From 56299143957a0eb2a021505cfff56f9c9c27dd86 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Tue, 9 Jun 2026 23:29:02 +0800
Subject: [PATCH] {"schema":"decodex/commit/1","summary":"Add knowledge
 compilation real-world memory fixtures","authority":"XY-848"}

---
 Makefile.toml                                 |  57 ++
 .../knowledge/entity_concept_issue_pages.json | 372 ++++++++++++
 .../pages/concept_derived_knowledge_pages.md  |  27 +
 .../knowledge/pages/entity_qdrant_rebuild.md  |  26 +
 .../pages/issue_xy848_knowledge_pages.md      |  24 +
 .../pages/project_elf_benchmark_suite.md      |  36 ++
 .../knowledge/project_page_rebuild.json       | 311 ++++++++++
 .../src/bin/real_world_job_benchmark.rs       | 549 ++++++++++++++++--
 .../tests/real_world_job_benchmark.rs         | 145 ++++-
 docs/guide/benchmarking/index.md              |   7 +-
 .../benchmarking/live_baseline_benchmark.md   |  19 +
 .../real_world_agent_memory_benchmark.md      |  15 +
 .../real_world_agent_memory_benchmark_v1.md   |  62 ++
 13 files changed, 1603 insertions(+), 47 deletions(-)
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/knowledge/entity_concept_issue_pages.json
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/knowledge/pages/concept_derived_knowledge_pages.md
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/knowledge/pages/entity_qdrant_rebuild.md
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/knowledge/pages/issue_xy848_knowledge_pages.md
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/knowledge/pages/project_elf_benchmark_suite.md
 create mode 100644 apps/elf-eval/fixtures/real_world_memory/knowledge/project_page_rebuild.json

diff --git a/Makefile.toml b/Makefile.toml
index e9982276..03373f46 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -702,6 +702,63 @@ args = [
 ]
 
 
+# Real-world memory knowledge benchmark
+# | task                           | type      | cwd |
+# | ------------------------------ | --------- | --- |
+# | real-world-memory-knowledge        | composite | |
+# | real-world-memory-knowledge-json   | command   | |
+# | real-world-memory-knowledge-report | command   | |
+
+[tasks.real-world-memory-knowledge]
+workspace = false
+dependencies = [
+	"real-world-memory-knowledge-report",
+]
+
+[tasks.real-world-memory-knowledge-json]
+workspace = false
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"run",
+	"--fixtures",
+	"apps/elf-eval/fixtures/real_world_memory/knowledge",
+	"--out",
+	"tmp/real-world-memory/knowledge-report.json",
+	"--run-id",
+	"real-world-memory-knowledge",
+	"--adapter-id",
+	"fixture_knowledge",
+	"--adapter-name",
+	"ELF knowledge fixture",
+]
+
+[tasks.real-world-memory-knowledge-report]
+workspace = false
+dependencies = [
+	"real-world-memory-knowledge-json",
+]
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"publish",
+	"--report",
+	"tmp/real-world-memory/knowledge-report.json",
+	"--out",
+	"tmp/real-world-memory/knowledge-report.md",
+]
+
+
 # Meta
 # | task   | type      | cwd |
 # | ------ | --------- | --- |
diff --git a/apps/elf-eval/fixtures/real_world_memory/knowledge/entity_concept_issue_pages.json b/apps/elf-eval/fixtures/real_world_memory/knowledge/entity_concept_issue_pages.json
new file mode 100644
index 00000000..f65f78e2
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/knowledge/entity_concept_issue_pages.json
@@ -0,0 +1,372 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "knowledge-entity-concept-002",
+  "suite": "knowledge_compilation",
+  "title": "Compile entity, concept, and issue timeline pages with stale lint",
+  "corpus": {
+    "corpus_id": "real-world-memory-knowledge-synthetic-2026-06-09",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "qdrant-rebuild-entity",
+        "kind": "note",
+        "text": "Entity fact: Qdrant is a derived rebuildable index for ELF candidate retrieval; Postgres vectors are the source used to rebuild it.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_memory_fixture/v1",
+          "ref": {
+            "fixture": "entity_concept_issue_pages",
+            "evidence_id": "qdrant-rebuild-entity"
+          }
+        },
+        "created_at": "2026-06-09T02:00:00Z"
+      },
+      {
+        "evidence_id": "derived-pages-concept",
+        "kind": "decision",
+        "text": "Concept fact: Derived knowledge pages compile current truth, history, backlinks, and lint findings from source notes and events.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_memory_fixture/v1",
+          "ref": {
+            "fixture": "entity_concept_issue_pages",
+            "evidence_id": "derived-pages-concept"
+          }
+        },
+        "created_at": "2026-06-09T02:05:00Z"
+      },
+      {
+        "evidence_id": "xy848-current-timeline",
+        "kind": "issue",
+        "text": "Current issue timeline: XY-848 adds knowledge compilation benchmark cases and keeps generated pages pointer-backed benchmark artifacts.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_memory_fixture/v1",
+          "ref": {
+            "fixture": "entity_concept_issue_pages",
+            "evidence_id": "xy848-current-timeline"
+          }
+        },
+        "created_at": "2026-06-09T02:10:00Z"
+      },
+      {
+        "evidence_id": "old-qdrant-authoritative-trap",
+        "kind": "note",
+        "text": "Stale fact: Qdrant became the authoritative source for compiled knowledge pages.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_memory_fixture/v1",
+          "ref": {
+            "fixture": "entity_concept_issue_pages",
+            "evidence_id": "old-qdrant-authoritative-trap"
+          }
+        },
+        "created_at": "2026-06-08T02:00:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_knowledge",
+      "answer": {
+        "content": "Generated entity, concept, and issue timeline pages cite Qdrant rebuild evidence, derived-page concept evidence, and the current XY-848 timeline; stale Qdrant-authoritative text is linted, and one rebuild explains allowed ordering variance.",
+        "claims": [
+          {
+            "claim_id": "qdrant_rebuild_entity",
+            "text": "The Qdrant entity page states that Qdrant is derived and rebuildable from Postgres-held vectors.",
+            "evidence_ids": ["qdrant-rebuild-entity"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "derived_pages_concept",
+            "text": "The derived-pages concept page compiles current truth, history, backlinks, and lint findings from source notes and events.",
+            "evidence_ids": ["derived-pages-concept"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "issue_timeline_current",
+            "text": "The XY-848 issue timeline page records that generated pages are pointer-backed benchmark artifacts.",
+            "evidence_ids": ["xy848-current-timeline"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": [
+          "qdrant-rebuild-entity",
+          "derived-pages-concept",
+          "xy848-current-timeline"
+        ],
+        "pages": [
+          {
+            "page_id": "entity:qdrant-rebuild",
+            "page_type": "entity",
+            "title": "Qdrant Rebuild Entity Page",
+            "path": "apps/elf-eval/fixtures/real_world_memory/knowledge/pages/entity_qdrant_rebuild.md",
+            "sections": [
+              {
+                "section_id": "current-truth",
+                "heading": "Current Truth",
+                "role": "current_truth",
+                "content": "Qdrant is derived and rebuildable; Postgres vectors remain the source used for rebuild.",
+                "evidence_ids": ["qdrant-rebuild-entity"],
+                "timeline_event_ids": ["qdrant-current-fact"]
+              },
+              {
+                "section_id": "history",
+                "heading": "History",
+                "role": "history",
+                "content": "The stale claim that Qdrant became authoritative is recorded only as lint evidence.",
+                "evidence_ids": ["old-qdrant-authoritative-trap"],
+                "timeline_event_ids": ["qdrant-stale-fact"]
+              }
+            ],
+            "backlinks": [
+              "project:elf-benchmark-suite",
+              "concept:derived-knowledge-pages"
+            ],
+            "lint_findings": [
+              {
+                "finding_id": "lint-old-qdrant-authoritative",
+                "finding_type": "stale_claim",
+                "severity": "error",
+                "text": "The old Qdrant-authoritative claim conflicts with the current derived-index evidence.",
+                "evidence_ids": ["old-qdrant-authoritative-trap"],
+                "trap_id": "old-qdrant-authoritative"
+              }
+            ],
+            "rebuild": {
+              "first_hash": "blake3:2ac0d7d7e03088fe3171e41c19f3ea1097b07b1d7ddc891f9aa81311d476e001",
+              "second_hash": "blake3:2ac0d7d7e03088fe3171e41c19f3ea1097b07b1d7ddc891f9aa81311d476e001",
+              "deterministic": true,
+              "allowed_variance": []
+            }
+          },
+          {
+            "page_id": "concept:derived-knowledge-pages",
+            "page_type": "concept",
+            "title": "Derived Knowledge Pages Concept Page",
+            "path": "apps/elf-eval/fixtures/real_world_memory/knowledge/pages/concept_derived_knowledge_pages.md",
+            "sections": [
+              {
+                "section_id": "compiled-truth",
+                "heading": "Compiled Truth",
+                "role": "current_truth",
+                "content": "Derived knowledge pages compile current truth, history, backlinks, and lint findings from source notes and events.",
+                "evidence_ids": ["derived-pages-concept"],
+                "timeline_event_ids": ["derived-pages-concept-recorded"]
+              },
+              {
+                "section_id": "backlinks",
+                "heading": "Backlinks",
+                "role": "backlinks",
+                "content": "The concept links to the Qdrant rebuild entity and the XY-848 issue timeline.",
+                "evidence_ids": ["derived-pages-concept", "xy848-current-timeline"],
+                "timeline_event_ids": ["xy848-current-scope"]
+              }
+            ],
+            "backlinks": [
+              "entity:qdrant-rebuild",
+              "issue:xy848-knowledge-pages"
+            ],
+            "lint_findings": [],
+            "rebuild": {
+              "first_hash": "blake3:498016f1d39a6a0a5241b0c640c30f0720eb9dbdd73b167fdce95b4387d9699a",
+              "second_hash": "blake3:498016f1d39a6a0a5241b0c640c30f0720eb9dbdd73b167fdce95b4387d9699b",
+              "deterministic": false,
+              "allowed_variance": [
+                "Backlink order may differ before canonical sort is applied; fixture report records the variance and still compares normalized page sections."
+              ]
+            }
+          },
+          {
+            "page_id": "issue:xy848-knowledge-pages",
+            "page_type": "issue_timeline",
+            "title": "XY-848 Knowledge Pages Issue Timeline",
+            "path": "apps/elf-eval/fixtures/real_world_memory/knowledge/pages/issue_xy848_knowledge_pages.md",
+            "sections": [
+              {
+                "section_id": "current-state",
+                "heading": "Current State",
+                "role": "current_truth",
+                "content": "XY-848 adds knowledge compilation benchmark cases and marks generated pages as pointer-backed benchmark artifacts.",
+                "evidence_ids": ["xy848-current-timeline"],
+                "timeline_event_ids": ["xy848-current-scope"]
+              },
+              {
+                "section_id": "linked-pages",
+                "heading": "Linked Pages",
+                "role": "backlinks",
+                "content": "The issue timeline links to the Qdrant rebuild entity and derived-knowledge-pages concept pages.",
+                "evidence_ids": ["qdrant-rebuild-entity", "derived-pages-concept"],
+                "timeline_event_ids": ["qdrant-current-fact", "derived-pages-concept-recorded"]
+              }
+            ],
+            "backlinks": [
+              "entity:qdrant-rebuild",
+              "concept:derived-knowledge-pages"
+            ],
+            "lint_findings": [],
+            "rebuild": {
+              "first_hash": "blake3:fed9c4af9f53e787fcb91a4900b6137d728a72b60629ca049a6da57260be682d",
+              "second_hash": "blake3:fed9c4af9f53e787fcb91a4900b6137d728a72b60629ca049a6da57260be682d",
+              "deterministic": true,
+              "allowed_variance": []
+            }
+          }
+        ],
+        "latency_ms": 3.1,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "qdrant-stale-fact",
+      "ts": "2026-06-08T02:00:00Z",
+      "actor": "agent",
+      "action": "recorded_stale_fact",
+      "evidence_ids": ["old-qdrant-authoritative-trap"],
+      "summary": "A stale note incorrectly said Qdrant became authoritative."
+    },
+    {
+      "event_id": "qdrant-current-fact",
+      "ts": "2026-06-09T02:00:00Z",
+      "actor": "agent",
+      "action": "recorded_current_fact",
+      "evidence_ids": ["qdrant-rebuild-entity"],
+      "summary": "The current Qdrant fact says it is derived and rebuildable from Postgres-held vectors."
+    },
+    {
+      "event_id": "derived-pages-concept-recorded",
+      "ts": "2026-06-09T02:05:00Z",
+      "actor": "agent",
+      "action": "recorded_concept",
+      "evidence_ids": ["derived-pages-concept"],
+      "summary": "Derived pages compile current truth, history, backlinks, and lint findings from source notes and events."
+    },
+    {
+      "event_id": "xy848-current-scope",
+      "ts": "2026-06-09T02:10:00Z",
+      "actor": "operator",
+      "action": "recorded_issue_scope",
+      "evidence_ids": ["xy848-current-timeline"],
+      "summary": "XY-848 keeps generated knowledge pages as pointer-backed benchmark artifacts."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "Compile entity, concept, and issue timeline pages for the knowledge suite and identify stale claims plus rebuild variance.",
+    "job_mode": "compile",
+    "constraints": [
+      "cite_evidence",
+      "lint_stale_claims",
+      "include_backlinks",
+      "explain_allowed_rebuild_variance"
+    ]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "qdrant_rebuild_entity",
+        "text": "The Qdrant entity page states that Qdrant is derived and rebuildable from Postgres-held vectors."
+      },
+      {
+        "claim_id": "derived_pages_concept",
+        "text": "The derived-pages concept page compiles current truth, history, backlinks, and lint findings from source notes and events."
+      },
+      {
+        "claim_id": "issue_timeline_current",
+        "text": "The XY-848 issue timeline page records that generated pages are pointer-backed benchmark artifacts."
+      }
+    ],
+    "must_not_include": [
+      "Qdrant became the authoritative source for compiled knowledge pages."
+    ],
+    "evidence_links": {
+      "qdrant_rebuild_entity": ["qdrant-rebuild-entity"],
+      "derived_pages_concept": ["derived-pages-concept"],
+      "issue_timeline_current": ["xy848-current-timeline"]
+    },
+    "answer_type": "compiled_knowledge",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "qdrant-rebuild-entity",
+      "claim_id": "qdrant_rebuild_entity",
+      "requirement": "cite",
+      "quote": "Qdrant is a derived rebuildable index"
+    },
+    {
+      "evidence_id": "derived-pages-concept",
+      "claim_id": "derived_pages_concept",
+      "requirement": "cite",
+      "quote": "current truth, history, backlinks, and lint findings"
+    },
+    {
+      "evidence_id": "xy848-current-timeline",
+      "claim_id": "issue_timeline_current",
+      "requirement": "use",
+      "quote": "pointer-backed benchmark artifacts"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "old-qdrant-authoritative",
+      "type": "stale_fact",
+      "evidence_ids": ["old-qdrant-authoritative-trap"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "answer_correctness": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "States current entity, concept, and issue timeline truth."
+      },
+      "evidence_grounding": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Every page section traces to source notes or timeline events."
+      },
+      "trap_avoidance": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Stale Qdrant-authoritative claim is detected as lint evidence."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.15,
+        "max_points": 1.0,
+        "criteria": "Pages include backlinks and useful current-truth/history surfaces."
+      },
+      "lifecycle_behavior": {
+        "weight": 0.1,
+        "max_points": 1.0,
+        "criteria": "Rebuild records are deterministic enough or explain allowed variance."
+      }
+    },
+    "pass_threshold": 0.8,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": true,
+    "acceptable_phrases": [
+      "The fixture does not provide that evidence."
+    ],
+    "fallback_action": "cite_partial_evidence"
+  },
+  "tags": [
+    "synthetic",
+    "knowledge",
+    "no_live_claim",
+    "benchmark_artifact"
+  ]
+}
diff --git a/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/concept_derived_knowledge_pages.md b/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/concept_derived_knowledge_pages.md
new file mode 100644
index 00000000..88fb9fc4
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/concept_derived_knowledge_pages.md
@@ -0,0 +1,27 @@
+# Derived Knowledge Pages Concept Page
+
+Benchmark artifact only: this page is a derived fixture for `knowledge_compilation`
+scoring. It is not authoritative production truth.
+
+## Compiled Truth
+
+Derived knowledge pages compile current truth, history, backlinks, and lint findings
+from source notes and events.
+
+Sources: `derived-pages-concept`, `derived-pages-concept-recorded`.
+
+## Backlinks
+
+The concept links to the Qdrant rebuild entity and the XY-848 issue timeline.
+
+Sources: `derived-pages-concept`, `xy848-current-timeline`, `xy848-current-scope`.
+
+Backlinks:
+
+- `entity:qdrant-rebuild`
+- `issue:xy848-knowledge-pages`
+
+## Rebuild Note
+
+Allowed variance: backlink order may differ before canonical sort is applied; the
+fixture report records the variance and compares normalized page sections.
diff --git a/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/entity_qdrant_rebuild.md b/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/entity_qdrant_rebuild.md
new file mode 100644
index 00000000..d2b28c05
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/entity_qdrant_rebuild.md
@@ -0,0 +1,26 @@
+# Qdrant Rebuild Entity Page
+
+Benchmark artifact only: this page is a derived fixture for `knowledge_compilation`
+scoring. It is not authoritative production truth.
+
+## Current Truth
+
+Qdrant is derived and rebuildable; Postgres vectors remain the source used for rebuild.
+
+Sources: `qdrant-rebuild-entity`, `qdrant-current-fact`.
+
+## History
+
+The stale claim that Qdrant became authoritative is recorded only as lint evidence.
+
+Sources: `old-qdrant-authoritative-trap`, `qdrant-stale-fact`.
+
+## Lint
+
+- `lint-old-qdrant-authoritative`: stale claim; the old Qdrant-authoritative claim
+  conflicts with the current derived-index evidence.
+
+## Backlinks
+
+- `project:elf-benchmark-suite`
+- `concept:derived-knowledge-pages`
diff --git a/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/issue_xy848_knowledge_pages.md b/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/issue_xy848_knowledge_pages.md
new file mode 100644
index 00000000..ac665951
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/issue_xy848_knowledge_pages.md
@@ -0,0 +1,24 @@
+# XY-848 Knowledge Pages Issue Timeline
+
+Benchmark artifact only: this page is a derived fixture for `knowledge_compilation`
+scoring. It is not authoritative production truth.
+
+## Current State
+
+XY-848 adds knowledge compilation benchmark cases and marks generated pages as
+pointer-backed benchmark artifacts.
+
+Sources: `xy848-current-timeline`, `xy848-current-scope`.
+
+## Linked Pages
+
+The issue timeline links to the Qdrant rebuild entity and derived-knowledge-pages
+concept pages.
+
+Sources: `qdrant-rebuild-entity`, `derived-pages-concept`,
+`qdrant-current-fact`, `derived-pages-concept-recorded`.
+
+Backlinks:
+
+- `entity:qdrant-rebuild`
+- `concept:derived-knowledge-pages`
diff --git a/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/project_elf_benchmark_suite.md b/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/project_elf_benchmark_suite.md
new file mode 100644
index 00000000..de6d403c
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/project_elf_benchmark_suite.md
@@ -0,0 +1,36 @@
+# ELF Benchmark Suite Knowledge Page
+
+Benchmark artifact only: this page is a derived fixture for `knowledge_compilation`
+scoring. It is not authoritative production truth.
+
+## Current Truth
+
+Generated knowledge pages remain derived benchmark artifacts and source notes stay
+authoritative.
+
+Sources: `elf-knowledge-current-truth`, `knowledge-current-truth-recorded`.
+
+## History
+
+The suite borrows llm-wiki lint, gbrain compiled_truth plus timeline, and graphify
+report ideas without copying their source-of-truth assumptions.
+
+Sources: `elf-knowledge-history`, `knowledge-patterns-selected`.
+
+## XY-848 Timeline
+
+XY-848 requires project pages, entity/concept pages, issue timelines, current truth
+plus history, stale linting, backlinks, and rebuild determinism.
+
+Sources: `xy848-issue-timeline`, `xy848-scope-recorded`.
+
+## Private Corpus Summary
+
+Unsupported: the fixture does not contain private production corpus evidence for a
+private-corpus knowledge-page quality claim.
+
+## Backlinks
+
+- `entity:qdrant-rebuild`
+- `concept:derived-knowledge-pages`
+- `issue:xy848-knowledge-pages`
diff --git a/apps/elf-eval/fixtures/real_world_memory/knowledge/project_page_rebuild.json b/apps/elf-eval/fixtures/real_world_memory/knowledge/project_page_rebuild.json
new file mode 100644
index 00000000..de6fd359
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_memory/knowledge/project_page_rebuild.json
@@ -0,0 +1,311 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "knowledge-project-page-001",
+  "suite": "knowledge_compilation",
+  "title": "Compile a pointer-backed project page with current truth and history",
+  "corpus": {
+    "corpus_id": "real-world-memory-knowledge-synthetic-2026-06-09",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "elf-knowledge-current-truth",
+        "kind": "note",
+        "text": "Current truth: The ELF knowledge benchmark must keep generated pages derived from notes and source refs; source notes stay authoritative and generated pages are not production truth.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_memory_fixture/v1",
+          "ref": {
+            "fixture": "project_page_rebuild",
+            "evidence_id": "elf-knowledge-current-truth"
+          }
+        },
+        "created_at": "2026-06-09T01:00:00Z"
+      },
+      {
+        "evidence_id": "elf-knowledge-history",
+        "kind": "decision",
+        "text": "History: The knowledge compilation suite follows llm-wiki query-save-lint, gbrain compiled_truth plus timeline, and graphify graph report patterns while preserving ELF provenance boundaries.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_memory_fixture/v1",
+          "ref": {
+            "fixture": "project_page_rebuild",
+            "evidence_id": "elf-knowledge-history"
+          }
+        },
+        "created_at": "2026-06-09T01:05:00Z"
+      },
+      {
+        "evidence_id": "xy848-issue-timeline",
+        "kind": "issue",
+        "text": "Issue timeline: XY-848 asks for project pages, entity/concept pages, issue timelines, current truth plus history, stale-claim linting, backlinks, and rebuild determinism.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_memory_fixture/v1",
+          "ref": {
+            "fixture": "project_page_rebuild",
+            "evidence_id": "xy848-issue-timeline"
+          }
+        },
+        "created_at": "2026-06-09T01:10:00Z"
+      },
+      {
+        "evidence_id": "old-authoritative-page-trap",
+        "kind": "compiled_page",
+        "text": "Stale claim: Generated knowledge pages are authoritative production truth and can replace source notes.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_memory_fixture/v1",
+          "ref": {
+            "fixture": "project_page_rebuild",
+            "evidence_id": "old-authoritative-page-trap"
+          }
+        },
+        "created_at": "2026-06-08T01:00:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_knowledge",
+      "answer": {
+        "content": "Generated benchmark page `project_elf_benchmark_suite.md` keeps ELF source notes authoritative, cites current truth and history, links the XY-848 issue timeline, flags one unsupported summary, and rebuilds deterministically.",
+        "claims": [
+          {
+            "claim_id": "derived_not_authoritative",
+            "text": "Generated knowledge pages remain derived benchmark artifacts, not authoritative production truth.",
+            "evidence_ids": ["elf-knowledge-current-truth"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "reference_patterns",
+            "text": "The page shape uses llm-wiki lint, gbrain compiled truth plus timeline, and graphify report patterns while preserving ELF provenance.",
+            "evidence_ids": ["elf-knowledge-history"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "rebuild_deterministic",
+            "text": "The project page rebuild produced the same page hash in two fixture rebuild passes.",
+            "evidence_ids": ["xy848-issue-timeline"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": [
+          "elf-knowledge-current-truth",
+          "elf-knowledge-history",
+          "xy848-issue-timeline"
+        ],
+        "pages": [
+          {
+            "page_id": "project:elf-benchmark-suite",
+            "page_type": "project",
+            "title": "ELF Benchmark Suite Knowledge Page",
+            "path": "apps/elf-eval/fixtures/real_world_memory/knowledge/pages/project_elf_benchmark_suite.md",
+            "sections": [
+              {
+                "section_id": "current-truth",
+                "heading": "Current Truth",
+                "role": "current_truth",
+                "content": "Generated knowledge pages remain derived benchmark artifacts and source notes stay authoritative.",
+                "evidence_ids": ["elf-knowledge-current-truth"],
+                "timeline_event_ids": ["knowledge-current-truth-recorded"]
+              },
+              {
+                "section_id": "history",
+                "heading": "History",
+                "role": "history",
+                "content": "The suite borrows llm-wiki lint, gbrain compiled_truth plus timeline, and graphify report ideas without copying their source-of-truth assumptions.",
+                "evidence_ids": ["elf-knowledge-history"],
+                "timeline_event_ids": ["knowledge-patterns-selected"]
+              },
+              {
+                "section_id": "issue-timeline",
+                "heading": "XY-848 Timeline",
+                "role": "timeline",
+                "content": "XY-848 requires project pages, entity/concept pages, issue timelines, current truth plus history, stale linting, backlinks, and rebuild determinism.",
+                "evidence_ids": ["xy848-issue-timeline"],
+                "timeline_event_ids": ["xy848-scope-recorded"]
+              },
+              {
+                "section_id": "unsupported-private-summary",
+                "heading": "Private Corpus Summary",
+                "role": "summary",
+                "content": "The fixture does not contain private production corpus evidence for a private-corpus knowledge-page quality claim.",
+                "evidence_ids": [],
+                "timeline_event_ids": [],
+                "unsupported_reason": "No private production corpus item is present in this synthetic benchmark fixture."
+              }
+            ],
+            "backlinks": [
+              "entity:qdrant-rebuild",
+              "concept:derived-knowledge-pages",
+              "issue:xy848-knowledge-pages"
+            ],
+            "lint_findings": [
+              {
+                "finding_id": "lint-old-authoritative-page-trap",
+                "finding_type": "stale_claim",
+                "severity": "error",
+                "text": "The stale authoritative-page claim conflicts with current source-of-truth evidence.",
+                "evidence_ids": ["old-authoritative-page-trap"],
+                "trap_id": "old-authoritative-page"
+              }
+            ],
+            "rebuild": {
+              "first_hash": "blake3:93b78a1d6e8e0f7a5c761b0c3c1e311adf3a5c0f8e0f3999d5e6f4012c4a8481",
+              "second_hash": "blake3:93b78a1d6e8e0f7a5c761b0c3c1e311adf3a5c0f8e0f3999d5e6f4012c4a8481",
+              "deterministic": true,
+              "allowed_variance": []
+            }
+          }
+        ],
+        "latency_ms": 2.5,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "knowledge-current-truth-recorded",
+      "ts": "2026-06-09T01:00:00Z",
+      "actor": "agent",
+      "action": "recorded_current_truth",
+      "evidence_ids": ["elf-knowledge-current-truth"],
+      "summary": "Current truth says generated pages are derived and source notes stay authoritative."
+    },
+    {
+      "event_id": "knowledge-patterns-selected",
+      "ts": "2026-06-09T01:05:00Z",
+      "actor": "agent",
+      "action": "selected_reference_patterns",
+      "evidence_ids": ["elf-knowledge-history"],
+      "summary": "The suite uses llm-wiki, gbrain, and graphify as reference patterns."
+    },
+    {
+      "event_id": "xy848-scope-recorded",
+      "ts": "2026-06-09T01:10:00Z",
+      "actor": "operator",
+      "action": "recorded_issue_scope",
+      "evidence_ids": ["xy848-issue-timeline"],
+      "summary": "XY-848 defines the required knowledge page benchmark dimensions."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "Compile a project knowledge page for the ELF benchmark suite and report whether every section is cited or flagged unsupported.",
+    "job_mode": "compile",
+    "constraints": [
+      "cite_evidence",
+      "derived_pages_not_authoritative",
+      "flag_unsupported_sections",
+      "report_rebuild_determinism"
+    ]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "derived_not_authoritative",
+        "text": "Generated knowledge pages remain derived benchmark artifacts, not authoritative production truth."
+      },
+      {
+        "claim_id": "reference_patterns",
+        "text": "The page shape uses llm-wiki lint, gbrain compiled truth plus timeline, and graphify report patterns while preserving ELF provenance."
+      },
+      {
+        "claim_id": "rebuild_deterministic",
+        "text": "The project page rebuild produced the same page hash in two fixture rebuild passes."
+      }
+    ],
+    "must_not_include": [
+      "Generated knowledge pages are authoritative production truth.",
+      "The fixture proves private-corpus knowledge-page quality."
+    ],
+    "evidence_links": {
+      "derived_not_authoritative": ["elf-knowledge-current-truth"],
+      "reference_patterns": ["elf-knowledge-history"],
+      "rebuild_deterministic": ["xy848-issue-timeline"]
+    },
+    "answer_type": "compiled_knowledge",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "elf-knowledge-current-truth",
+      "claim_id": "derived_not_authoritative",
+      "requirement": "cite",
+      "quote": "source notes stay authoritative"
+    },
+    {
+      "evidence_id": "elf-knowledge-history",
+      "claim_id": "reference_patterns",
+      "requirement": "cite",
+      "quote": "llm-wiki query-save-lint, gbrain compiled_truth plus timeline, and graphify graph report patterns"
+    },
+    {
+      "evidence_id": "xy848-issue-timeline",
+      "claim_id": "rebuild_deterministic",
+      "requirement": "use",
+      "quote": "rebuild determinism"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "old-authoritative-page",
+      "type": "stale_fact",
+      "evidence_ids": ["old-authoritative-page-trap"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "answer_correctness": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "States current derived-page truth and reference pattern rationale."
+      },
+      "evidence_grounding": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Every generated page section cites source notes/events or is flagged unsupported."
+      },
+      "trap_avoidance": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Stale authoritative-page claim is linted and not used as current truth."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.15,
+        "max_points": 1.0,
+        "criteria": "Compiled page includes current truth, history, issue timeline, and backlinks."
+      },
+      "lifecycle_behavior": {
+        "weight": 0.1,
+        "max_points": 1.0,
+        "criteria": "Rebuild record is deterministic enough for regression comparison."
+      }
+    },
+    "pass_threshold": 0.8,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": true,
+    "acceptable_phrases": [
+      "The fixture does not provide that evidence."
+    ],
+    "fallback_action": "cite_partial_evidence"
+  },
+  "tags": [
+    "synthetic",
+    "knowledge",
+    "no_live_claim",
+    "benchmark_artifact"
+  ]
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
index 42e6c496..f5a5fee6 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
@@ -352,6 +352,8 @@ struct ProducedAnswer {
 	claims: Vec<ProducedClaim>,
 	#[serde(default)]
 	evidence_ids: Vec<String>,
+	#[serde(default)]
+	pages: Vec<DerivedPageArtifact>,
 	#[serde(skip_serializing_if = "Option::is_none")]
 	latency_ms: Option<f64>,
 	#[serde(skip_serializing_if = "Option::is_none")]
@@ -371,6 +373,58 @@ struct ProducedClaim {
 	confidence: Option<String>,
 }
 
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct DerivedPageArtifact {
+	page_id: String,
+	page_type: String,
+	title: String,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	path: Option<String>,
+	#[serde(default)]
+	sections: Vec<DerivedPageSection>,
+	#[serde(default)]
+	backlinks: Vec<String>,
+	#[serde(default)]
+	lint_findings: Vec<DerivedPageLintFinding>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	rebuild: Option<DerivedPageRebuild>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct DerivedPageSection {
+	section_id: String,
+	heading: String,
+	role: String,
+	content: String,
+	#[serde(default)]
+	evidence_ids: Vec<String>,
+	#[serde(default)]
+	timeline_event_ids: Vec<String>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	unsupported_reason: Option<String>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct DerivedPageLintFinding {
+	finding_id: String,
+	finding_type: String,
+	severity: String,
+	text: String,
+	#[serde(default)]
+	evidence_ids: Vec<String>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	trap_id: Option<String>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct DerivedPageRebuild {
+	first_hash: String,
+	second_hash: String,
+	deterministic: bool,
+	#[serde(default)]
+	allowed_variance: Vec<String>,
+}
+
 #[derive(Clone, Debug, Deserialize)]
 struct ConsolidationFixture {
 	#[serde(default)]
@@ -622,6 +676,8 @@ struct ReportSummary {
 	operator_ux_gap_count: usize,
 	#[serde(default)]
 	consolidation: ConsolidationSummaryReport,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	knowledge: Option<KnowledgeSummary>,
 }
 
 #[derive(Debug, Default, Deserialize, Serialize)]
@@ -635,6 +691,23 @@ struct ConsolidationSummaryReport {
 	executable_gap_count: usize,
 }
 
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+struct KnowledgeSummary {
+	job_count: usize,
+	page_count: usize,
+	section_count: usize,
+	backlink_count: usize,
+	pages_with_backlinks: usize,
+	citation_coverage: f64,
+	stale_claim_detection: f64,
+	rebuild_determinism: f64,
+	backlink_coverage: f64,
+	page_usefulness: f64,
+	unsupported_summary_count: usize,
+	untraced_section_count: usize,
+	allowed_variance_count: usize,
+}
+
 #[derive(Debug, Deserialize, Serialize)]
 struct SuiteReport {
 	suite_id: String,
@@ -682,6 +755,8 @@ struct JobReport {
 	latency_ms: Option<f64>,
 	cost: Option<CostReport>,
 	trace_explainability: Option<TraceExplainability>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	knowledge: Option<KnowledgeJobMetrics>,
 	trap_ids_used: Vec<String>,
 	dimension_scores: Vec<DimensionScoreReport>,
 	reason: String,
@@ -787,6 +862,29 @@ struct UnsupportedClaimReport {
 	evidence_ids: Vec<String>,
 }
 
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+struct KnowledgeJobMetrics {
+	page_count: usize,
+	section_count: usize,
+	traced_section_count: usize,
+	flagged_unsupported_section_count: usize,
+	untraced_section_count: usize,
+	unsupported_summary_count: usize,
+	backlink_count: usize,
+	pages_with_backlinks: usize,
+	stale_trap_count: usize,
+	stale_traps_detected: usize,
+	rebuild_page_count: usize,
+	deterministic_rebuild_count: usize,
+	rebuild_failure_count: usize,
+	allowed_variance_count: usize,
+	citation_coverage: f64,
+	stale_claim_detection: f64,
+	rebuild_determinism: f64,
+	backlink_coverage: f64,
+	page_usefulness: f64,
+}
+
 #[derive(Clone, Debug, Default, Deserialize, Serialize)]
 struct EvolutionSummary {
 	stale_answer_count: usize,
@@ -832,6 +930,7 @@ struct JobScoring {
 	hard_fail_hits: Vec<String>,
 	unsupported_claims: Vec<UnsupportedClaimReport>,
 	wrong_result_count: usize,
+	knowledge: Option<KnowledgeJobMetrics>,
 	trap_ids_used: Vec<String>,
 	dimension_scores: Vec<DimensionScoreReport>,
 	reason: String,
@@ -859,6 +958,10 @@ struct FailureCounts {
 	review_action_failures: usize,
 	source_mutations: usize,
 	blocking_executable_gaps: usize,
+	untraced_page_sections: usize,
+	missed_stale_findings: usize,
+	rebuild_failures: usize,
+	page_usefulness_failures: usize,
 }
 
 #[derive(Debug, Default)]
@@ -976,6 +1079,7 @@ fn validate_job(job: &RealWorldJob, path: &Path) -> Result<()> {
 	validate_expected_answer(job, path)?;
 	validate_required_evidence(job, path)?;
 	validate_consolidation_fixture(job, path)?;
+	validate_adapter_response(job, path)?;
 	validate_scoring_rubric(job, path)?;
 	validate_allowed_uncertainty(job, path)?;
 	validate_operator_debug(job, path)?;
@@ -1241,6 +1345,93 @@ fn validate_consolidation_proposal(
 	Ok(())
 }
 
+fn validate_adapter_response(job: &RealWorldJob, path: &Path) -> Result<()> {
+	let Some(adapter_response) = &job.corpus.adapter_response else {
+		return Ok(());
+	};
+	let evidence_ids = corpus_evidence_ids(job);
+	let event_ids = timeline_event_ids(job);
+
+	for page in &adapter_response.answer.pages {
+		validate_page_artifact(page, path, &evidence_ids, &event_ids)?;
+	}
+
+	Ok(())
+}
+
+fn validate_page_artifact(
+	page: &DerivedPageArtifact,
+	path: &Path,
+	evidence_ids: &BTreeSet<String>,
+	event_ids: &BTreeSet<String>,
+) -> Result<()> {
+	if page.page_id.trim().is_empty()
+		|| page.page_type.trim().is_empty()
+		|| page.title.trim().is_empty()
+	{
+		return Err(eyre::eyre!("{} has an incomplete derived page.", path.display()));
+	}
+
+	for section in &page.sections {
+		if section.section_id.trim().is_empty()
+			|| section.heading.trim().is_empty()
+			|| section.role.trim().is_empty()
+			|| section.content.trim().is_empty()
+		{
+			return Err(eyre::eyre!(
+				"{} page {} has an incomplete section.",
+				path.display(),
+				page.page_id
+			));
+		}
+
+		for evidence_id in &section.evidence_ids {
+			ensure_known_evidence(path, evidence_ids, evidence_id)?;
+		}
+		for event_id in &section.timeline_event_ids {
+			ensure_known_event(path, event_ids, event_id)?;
+		}
+	}
+	for backlink in &page.backlinks {
+		if backlink.trim().is_empty() {
+			return Err(eyre::eyre!(
+				"{} page {} has an empty backlink.",
+				path.display(),
+				page.page_id
+			));
+		}
+	}
+	for finding in &page.lint_findings {
+		if finding.finding_id.trim().is_empty()
+			|| finding.finding_type.trim().is_empty()
+			|| finding.severity.trim().is_empty()
+			|| finding.text.trim().is_empty()
+		{
+			return Err(eyre::eyre!(
+				"{} page {} has an incomplete lint finding.",
+				path.display(),
+				page.page_id
+			));
+		}
+
+		for evidence_id in &finding.evidence_ids {
+			ensure_known_evidence(path, evidence_ids, evidence_id)?;
+		}
+	}
+
+	if let Some(rebuild) = &page.rebuild
+		&& (rebuild.first_hash.trim().is_empty() || rebuild.second_hash.trim().is_empty())
+	{
+		return Err(eyre::eyre!(
+			"{} page {} has an incomplete rebuild record.",
+			path.display(),
+			page.page_id
+		));
+	}
+
+	Ok(())
+}
+
 fn validate_scoring_rubric(job: &RealWorldJob, path: &Path) -> Result<()> {
 	if !(0.0..=1.0).contains(&job.scoring_rubric.pass_threshold) {
 		return Err(eyre::eyre!("{} has invalid pass_threshold.", path.display()));
@@ -1595,6 +1786,22 @@ fn corpus_text_by_id(job: &RealWorldJob) -> BTreeMap<&str, &str> {
 		.collect()
 }
 
+fn timeline_event_ids(job: &RealWorldJob) -> BTreeSet<String> {
+	job.timeline.iter().map(|event| event.event_id.clone()).collect()
+}
+
+fn ensure_known_event(path: &Path, known: &BTreeSet<String>, event_id: &str) -> Result<()> {
+	if !known.contains(event_id) {
+		return Err(eyre::eyre!(
+			"{} references unknown timeline event id {}.",
+			path.display(),
+			event_id
+		));
+	}
+
+	Ok(())
+}
+
 fn build_report(jobs: &[RealWorldJob], args: &RunArgs) -> Result<RealWorldReport> {
 	if jobs.is_empty() {
 		return Err(eyre::eyre!("At least one real_world_job fixture is required."));
@@ -1654,6 +1861,7 @@ fn score_job(job: &RealWorldJob) -> JobScoring {
 			hard_fail_hits: Vec::new(),
 			unsupported_claims: Vec::new(),
 			wrong_result_count: 0,
+			knowledge: None,
 			trap_ids_used,
 			dimension_scores: declared_not_encoded_dimension_scores(job),
 			reason: job
@@ -1669,7 +1877,11 @@ fn score_job(job: &RealWorldJob) -> JobScoring {
 	let missing_claims = missing_required_claims(job, answer);
 	let forbidden_claims = forbidden_claim_hits(job, answer);
 	let missing_evidence = missing_required_evidence(job, &produced_evidence);
+	let knowledge = knowledge_metrics(job, answer);
 	let mut unsupported_claims = unsupported_claims(job, answer);
+
+	unsupported_claims.extend(unsupported_page_claims(answer));
+
 	let operator_counts = operator_debug_failure_counts(job);
 	let latency_violations = latency_violations(job, answer);
 	let hard_fail_hits = hard_fail_hits(job, &unsupported_claims, &trap_ids_used);
@@ -1698,6 +1910,12 @@ fn score_job(job: &RealWorldJob) -> JobScoring {
 		review_action_failures: review_action_failures(consolidation.as_ref()),
 		source_mutations: consolidation.as_ref().map_or(0, |report| report.source_mutation_count),
 		blocking_executable_gaps: blocking_executable_gaps(consolidation.as_ref()),
+		untraced_page_sections: knowledge
+			.as_ref()
+			.map_or(0, |metrics| metrics.untraced_section_count),
+		missed_stale_findings: knowledge.as_ref().map_or(0, missed_stale_finding_count),
+		rebuild_failures: knowledge.as_ref().map_or(0, |metrics| metrics.rebuild_failure_count),
+		page_usefulness_failures: knowledge.as_ref().map_or(0, page_usefulness_failure_count),
 	};
 	let dimension_scores = dimension_scores(job, &counts);
 	let normalized_score = normalized_score(&dimension_scores);
@@ -1713,7 +1931,11 @@ fn score_job(job: &RealWorldJob) -> JobScoring {
 		+ counts.update_rationale_missing
 		+ counts.proposal_usefulness_failures
 		+ counts.lineage_failures
-		+ counts.review_action_failures;
+		+ counts.review_action_failures
+		+ counts.untraced_page_sections
+		+ counts.missed_stale_findings
+		+ counts.rebuild_failures
+		+ counts.page_usefulness_failures;
 	let status = job_status(
 		normalized_score,
 		job.scoring_rubric.pass_threshold,
@@ -1735,6 +1957,7 @@ fn score_job(job: &RealWorldJob) -> JobScoring {
 		hard_fail_hits,
 		unsupported_claims,
 		wrong_result_count,
+		knowledge,
 		trap_ids_used,
 		dimension_scores,
 		reason,
@@ -1789,6 +2012,7 @@ fn synthetic_answer(job: &RealWorldJob) -> &ProducedAnswer {
 		content: String::new(),
 		claims: Vec::new(),
 		evidence_ids: Vec::new(),
+		pages: Vec::new(),
 		latency_ms: None,
 		cost: None,
 		trace_explainability: None,
@@ -2024,6 +2248,145 @@ fn unsupported_claim_report(claim: &ProducedClaim, reason: &str) -> UnsupportedC
 	}
 }
 
+fn unsupported_page_claims(answer: &ProducedAnswer) -> Vec<UnsupportedClaimReport> {
+	answer
+		.pages
+		.iter()
+		.flat_map(|page| {
+			page.sections.iter().filter_map(|section| {
+				if section_is_traced(section) || section_is_flagged_unsupported(section) {
+					return None;
+				}
+
+				Some(UnsupportedClaimReport {
+					suite_id: String::new(),
+					job_id: String::new(),
+					claim_id: Some(format!("{}:{}", page.page_id, section.section_id)),
+					claim_text: bounded_text(section.content.as_str(), 240),
+					reason:
+						"derived page section has no source evidence and is not flagged unsupported"
+							.to_string(),
+					evidence_ids: section.evidence_ids.clone(),
+				})
+			})
+		})
+		.collect()
+}
+
+fn knowledge_metrics(job: &RealWorldJob, answer: &ProducedAnswer) -> Option<KnowledgeJobMetrics> {
+	if answer.pages.is_empty() {
+		return None;
+	}
+
+	let mut metrics = KnowledgeJobMetrics {
+		page_count: answer.pages.len(),
+		stale_trap_count: stale_traps(job).len(),
+		..KnowledgeJobMetrics::default()
+	};
+
+	for page in &answer.pages {
+		accumulate_page_metrics(page, &mut metrics);
+	}
+
+	metrics.stale_traps_detected = stale_traps(job)
+		.iter()
+		.filter(|trap| page_artifacts_detect_stale_trap(&answer.pages, trap))
+		.count();
+	metrics.citation_coverage = ratio(metrics.traced_section_count, metrics.section_count);
+	metrics.stale_claim_detection =
+		ratio_or_full(metrics.stale_traps_detected, metrics.stale_trap_count);
+	metrics.rebuild_determinism = ratio(metrics.deterministic_rebuild_count, metrics.page_count);
+	metrics.backlink_coverage = ratio(metrics.pages_with_backlinks, metrics.page_count);
+	metrics.page_usefulness = round3(
+		(metrics.citation_coverage
+			+ metrics.stale_claim_detection
+			+ metrics.rebuild_determinism
+			+ metrics.backlink_coverage)
+			/ 4.0,
+	);
+
+	Some(metrics)
+}
+
+fn stale_traps(job: &RealWorldJob) -> Vec<&NegativeTrap> {
+	job.negative_traps
+		.iter()
+		.filter(|trap| trap.trap_type == "stale_fact" && trap.failure_if_used)
+		.collect()
+}
+
+fn accumulate_page_metrics(page: &DerivedPageArtifact, metrics: &mut KnowledgeJobMetrics) {
+	if !page.backlinks.is_empty() {
+		metrics.pages_with_backlinks += 1;
+	}
+
+	metrics.backlink_count += page.backlinks.len();
+
+	for section in &page.sections {
+		metrics.section_count += 1;
+
+		if section_is_traced(section) {
+			metrics.traced_section_count += 1;
+		} else if section_is_flagged_unsupported(section) {
+			metrics.flagged_unsupported_section_count += 1;
+
+			if section.role == "summary" {
+				metrics.unsupported_summary_count += 1;
+			}
+		} else {
+			metrics.untraced_section_count += 1;
+		}
+	}
+
+	if let Some(rebuild) = &page.rebuild {
+		if !rebuild.allowed_variance.is_empty() {
+			metrics.allowed_variance_count += 1;
+		}
+		if rebuild_is_acceptable(rebuild) {
+			metrics.deterministic_rebuild_count += 1;
+		} else {
+			metrics.rebuild_failure_count += 1;
+		}
+	} else {
+		metrics.rebuild_failure_count += 1;
+	}
+
+	metrics.rebuild_page_count += 1;
+}
+
+fn section_is_traced(section: &DerivedPageSection) -> bool {
+	!section.evidence_ids.is_empty() || !section.timeline_event_ids.is_empty()
+}
+
+fn section_is_flagged_unsupported(section: &DerivedPageSection) -> bool {
+	section.unsupported_reason.as_ref().is_some_and(|reason| !reason.trim().is_empty())
+}
+
+fn rebuild_is_acceptable(rebuild: &DerivedPageRebuild) -> bool {
+	(rebuild.deterministic && rebuild.first_hash == rebuild.second_hash)
+		|| !rebuild.allowed_variance.is_empty()
+}
+
+fn page_artifacts_detect_stale_trap(pages: &[DerivedPageArtifact], trap: &NegativeTrap) -> bool {
+	pages.iter().any(|page| {
+		page.lint_findings.iter().any(|finding| {
+			finding.trap_id.as_deref() == Some(trap.trap_id.as_str())
+				|| finding
+					.evidence_ids
+					.iter()
+					.any(|evidence_id| trap.evidence_ids.contains(evidence_id))
+		})
+	})
+}
+
+fn missed_stale_finding_count(metrics: &KnowledgeJobMetrics) -> usize {
+	metrics.stale_trap_count.saturating_sub(metrics.stale_traps_detected)
+}
+
+fn page_usefulness_failure_count(metrics: &KnowledgeJobMetrics) -> usize {
+	if metrics.page_usefulness < 0.8 { 1 } else { 0 }
+}
+
 fn hard_fail_hits(
 	job: &RealWorldJob,
 	unsupported_claims: &[UnsupportedClaimReport],
@@ -2095,18 +2458,21 @@ fn dimension_score(dimension_id: &str, max_points: f64, counts: &FailureCounts)
 				|| counts.operator_debug_repair_unclear > 0
 				|| counts.conflict_detection_missing > 0
 				|| counts.proposal_usefulness_failures > 0
-				|| counts.review_action_failures > 0,
+				|| counts.review_action_failures > 0
+				|| counts.page_usefulness_failures > 0,
 		"evidence_grounding" =>
 			counts.missing_evidence > 0
 				|| counts.unsupported_claims > 0
-				|| counts.lineage_failures > 0,
-		"trap_avoidance" => counts.trap_uses > 0,
+				|| counts.lineage_failures > 0
+				|| counts.untraced_page_sections > 0,
+		"trap_avoidance" => counts.trap_uses > 0 || counts.missed_stale_findings > 0,
 		"uncertainty_handling" => counts.unsupported_claims > 0,
 		"lifecycle_behavior" =>
 			counts.stale_answers > 0
 				|| counts.conflict_detection_missing > 0
 				|| counts.update_rationale_missing > 0
-				|| counts.source_mutations > 0,
+				|| counts.source_mutations > 0
+				|| counts.rebuild_failures > 0,
 		"source_immutability" => counts.source_mutations > 0,
 		"proposal_usefulness" => counts.proposal_usefulness_failures > 0,
 		"lineage_completeness" => counts.lineage_failures > 0,
@@ -2180,42 +2546,17 @@ fn job_status(
 }
 
 fn job_reason(status: TypedStatus, counts: &FailureCounts, normalized_score: f64) -> String {
+	let wrong_result_signal_count = wrong_result_signal_count(counts);
+
 	match status {
 		TypedStatus::Pass => format!("Job passed with normalized_score {normalized_score:.3}."),
 		TypedStatus::UnsupportedClaim => format!(
 			"Job produced {} unsupported claim(s), {} wrong-result signal(s), {} latency violation(s), and normalized_score {normalized_score:.3}.",
-			counts.unsupported_claims,
-			counts.missing_claims
-				+ counts.forbidden_claims
-				+ counts.missing_evidence
-				+ counts.trap_uses
-				+ counts.operator_debug_missing
-				+ counts.operator_debug_raw_sql
-				+ counts.operator_debug_trace_gaps
-				+ counts.operator_debug_repair_unclear
-				+ counts.conflict_detection_missing
-				+ counts.update_rationale_missing
-				+ counts.proposal_usefulness_failures
-				+ counts.lineage_failures
-				+ counts.review_action_failures,
-			counts.latency_violations
+			counts.unsupported_claims, wrong_result_signal_count, counts.latency_violations
 		),
 		TypedStatus::WrongResult => format!(
 			"Job produced {} wrong-result signal(s), {} latency violation(s), and normalized_score {normalized_score:.3}.",
-			counts.missing_claims
-				+ counts.forbidden_claims
-				+ counts.missing_evidence
-				+ counts.trap_uses
-				+ counts.operator_debug_missing
-				+ counts.operator_debug_raw_sql
-				+ counts.operator_debug_trace_gaps
-				+ counts.operator_debug_repair_unclear
-				+ counts.conflict_detection_missing
-				+ counts.update_rationale_missing
-				+ counts.proposal_usefulness_failures
-				+ counts.lineage_failures
-				+ counts.review_action_failures,
-			counts.latency_violations
+			wrong_result_signal_count, counts.latency_violations
 		),
 		TypedStatus::LifecycleFail => format!(
 			"Job produced {} source mutation(s) and normalized_score {normalized_score:.3}.",
@@ -2229,6 +2570,26 @@ fn job_reason(status: TypedStatus, counts: &FailureCounts, normalized_score: f64
 	}
 }
 
+fn wrong_result_signal_count(counts: &FailureCounts) -> usize {
+	counts.missing_claims
+		+ counts.forbidden_claims
+		+ counts.missing_evidence
+		+ counts.trap_uses
+		+ counts.operator_debug_missing
+		+ counts.operator_debug_raw_sql
+		+ counts.operator_debug_trace_gaps
+		+ counts.operator_debug_repair_unclear
+		+ counts.conflict_detection_missing
+		+ counts.update_rationale_missing
+		+ counts.proposal_usefulness_failures
+		+ counts.lineage_failures
+		+ counts.review_action_failures
+		+ counts.untraced_page_sections
+		+ counts.missed_stale_findings
+		+ counts.rebuild_failures
+		+ counts.page_usefulness_failures
+}
+
 fn job_report(job: &RealWorldJob, scoring: JobScoring) -> JobReport {
 	let answer = produced_answer(job);
 	let metrics = job_metrics(job, answer);
@@ -2266,6 +2627,7 @@ fn job_report(job: &RealWorldJob, scoring: JobScoring) -> JobReport {
 		latency_ms: answer.latency_ms,
 		cost: answer.cost.clone(),
 		trace_explainability: answer.trace_explainability.clone(),
+		knowledge: scoring.knowledge,
 		trap_ids_used: scoring.trap_ids_used,
 		dimension_scores: scoring.dimension_scores,
 		reason: scoring.reason,
@@ -2747,6 +3109,7 @@ fn report_summary(jobs: &[JobReport], suites: &[SuiteReport]) -> ReportSummary {
 			.map(|debug| debug.ux_gaps.len())
 			.sum(),
 		consolidation: consolidation_summary(jobs),
+		knowledge: knowledge_summary(jobs),
 		..ReportSummary::default()
 	};
 
@@ -2821,6 +3184,10 @@ fn ratio_or(numerator: usize, denominator: usize, empty_value: f64) -> f64 {
 	if denominator == 0 { empty_value } else { round3(numerator as f64 / denominator as f64) }
 }
 
+fn ratio_or_full(numerator: usize, denominator: usize) -> f64 {
+	ratio_or(numerator, denominator, 1.0)
+}
+
 fn consolidation_summary(jobs: &[JobReport]) -> ConsolidationSummaryReport {
 	let reports = jobs.iter().filter_map(|job| job.consolidation.as_ref()).collect::<Vec<_>>();
 
@@ -2854,6 +3221,60 @@ fn consolidation_summary(jobs: &[JobReport]) -> ConsolidationSummaryReport {
 	}
 }
 
+fn knowledge_summary(jobs: &[JobReport]) -> Option<KnowledgeSummary> {
+	let knowledge_jobs = jobs.iter().filter_map(|job| job.knowledge.as_ref()).collect::<Vec<_>>();
+
+	if knowledge_jobs.is_empty() {
+		return None;
+	}
+
+	let job_count = knowledge_jobs.len();
+	let page_count = knowledge_jobs.iter().map(|metrics| metrics.page_count).sum::<usize>();
+	let section_count = knowledge_jobs.iter().map(|metrics| metrics.section_count).sum::<usize>();
+	let traced_section_count =
+		knowledge_jobs.iter().map(|metrics| metrics.traced_section_count).sum::<usize>();
+	let stale_trap_count =
+		knowledge_jobs.iter().map(|metrics| metrics.stale_trap_count).sum::<usize>();
+	let stale_traps_detected =
+		knowledge_jobs.iter().map(|metrics| metrics.stale_traps_detected).sum::<usize>();
+	let deterministic_rebuild_count =
+		knowledge_jobs.iter().map(|metrics| metrics.deterministic_rebuild_count).sum::<usize>();
+	let rebuild_page_count =
+		knowledge_jobs.iter().map(|metrics| metrics.rebuild_page_count).sum::<usize>();
+	let backlink_count = knowledge_jobs.iter().map(|metrics| metrics.backlink_count).sum::<usize>();
+	let pages_with_backlinks =
+		knowledge_jobs.iter().map(|metrics| metrics.pages_with_backlinks).sum::<usize>();
+	let page_usefulness = round3(
+		knowledge_jobs.iter().map(|metrics| metrics.page_usefulness).sum::<f64>()
+			/ job_count as f64,
+	);
+
+	Some(KnowledgeSummary {
+		job_count,
+		page_count,
+		section_count,
+		backlink_count,
+		pages_with_backlinks,
+		citation_coverage: ratio(traced_section_count, section_count),
+		stale_claim_detection: ratio_or_full(stale_traps_detected, stale_trap_count),
+		rebuild_determinism: ratio(deterministic_rebuild_count, rebuild_page_count),
+		backlink_coverage: ratio(pages_with_backlinks, page_count),
+		page_usefulness,
+		unsupported_summary_count: knowledge_jobs
+			.iter()
+			.map(|metrics| metrics.unsupported_summary_count)
+			.sum(),
+		untraced_section_count: knowledge_jobs
+			.iter()
+			.map(|metrics| metrics.untraced_section_count)
+			.sum(),
+		allowed_variance_count: knowledge_jobs
+			.iter()
+			.map(|metrics| metrics.allowed_variance_count)
+			.sum(),
+	})
+}
+
 fn mean_score(jobs: &[JobReport]) -> f64 {
 	if jobs.is_empty() {
 		return 0.0;
@@ -2983,6 +3404,7 @@ fn render_markdown(report: &RealWorldReport, report_path: &Path) -> String {
 	render_markdown_evolution(&mut out, report);
 	render_markdown_trace_explainability(&mut out, report);
 	render_markdown_consolidation(&mut out, report);
+	render_markdown_knowledge(&mut out, report);
 	render_markdown_unsupported_claims(&mut out, report);
 	render_markdown_follow_ups(&mut out, report);
 	render_markdown_semantics(&mut out, report);
@@ -3094,6 +3516,28 @@ fn render_markdown_header(out: &mut String, report: &RealWorldReport, report_pat
 		report.summary.trace_incomplete_count
 	));
 	out.push_str(&format!("- Operator UX gaps: `{}`\n", report.summary.operator_ux_gap_count));
+
+	if let Some(knowledge) = &report.summary.knowledge {
+		out.push_str(&format!(
+			"- Knowledge citation coverage: `{:.3}`\n",
+			knowledge.citation_coverage
+		));
+		out.push_str(&format!(
+			"- Stale claim detection: `{:.3}`\n",
+			knowledge.stale_claim_detection
+		));
+		out.push_str(&format!("- Rebuild determinism: `{:.3}`\n", knowledge.rebuild_determinism));
+		out.push_str(&format!(
+			"- Backlinks: `{}` total, `{:.3}` page coverage\n",
+			knowledge.backlink_count, knowledge.backlink_coverage
+		));
+		out.push_str(&format!("- Page usefulness: `{:.3}`\n", knowledge.page_usefulness));
+		out.push_str(&format!(
+			"- Unsupported summary count: `{}`\n",
+			knowledge.unsupported_summary_count
+		));
+	}
+
 	out.push_str(&format!(
 		"- Private corpus redaction: `{}`\n\n",
 		md_inline(report.private_corpus_redaction.policy.as_str())
@@ -3451,6 +3895,42 @@ fn render_markdown_consolidation_gaps(out: &mut String, report: &RealWorldReport
 	out.push('\n');
 }
 
+fn render_markdown_knowledge(out: &mut String, report: &RealWorldReport) {
+	let knowledge_jobs =
+		report.jobs.iter().filter(|job| job.knowledge.is_some()).collect::<Vec<_>>();
+
+	if knowledge_jobs.is_empty() {
+		return;
+	}
+
+	out.push_str("## Knowledge Page Metrics\n\n");
+	out.push_str("| Job | Pages | Sections | Citation Coverage | Stale Claim Detection | Rebuild Determinism | Page Usefulness | Backlinks | Unsupported Summaries | Untraced Sections | Allowed Variance |\n");
+	out.push_str("| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |\n");
+
+	for job in knowledge_jobs {
+		let Some(knowledge) = &job.knowledge else {
+			continue;
+		};
+
+		out.push_str(&format!(
+			"| {} | {} | {} | `{:.3}` | `{:.3}` | `{:.3}` | `{:.3}` | {} | {} | {} | {} |\n",
+			md_cell(job.job_id.as_str()),
+			knowledge.page_count,
+			knowledge.section_count,
+			knowledge.citation_coverage,
+			knowledge.stale_claim_detection,
+			knowledge.rebuild_determinism,
+			knowledge.page_usefulness,
+			knowledge.backlink_count,
+			knowledge.unsupported_summary_count,
+			knowledge.untraced_section_count,
+			knowledge.allowed_variance_count
+		));
+	}
+
+	out.push('\n');
+}
+
 fn render_markdown_unsupported_claims(out: &mut String, report: &RealWorldReport) {
 	out.push_str("## Unsupported Claims\n\n");
 
@@ -3520,6 +4000,7 @@ fn render_markdown_semantics(out: &mut String, report: &RealWorldReport) {
 	);
 	out.push_str("- `unsupported_claim`: a job produced a substantive claim not supported by the fixture evidence links.\n");
 	out.push_str("- `not_encoded`: a suite has no checked-in fixture, or an encoded fixture declares a capability gap so no pass/fail claim is allowed.\n\n");
+	out.push_str("For `knowledge_compilation` jobs, generated pages are benchmark artifacts. Page sections must cite source evidence or timeline events, or be explicitly flagged as unsupported. Flagged unsupported summaries are counted separately from hidden unsupported claims.\n\n");
 	out.push_str("## Suites With `not_encoded` Status\n\n");
 
 	if report.not_encoded_suites.is_empty() {
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 9f6b7217..cc665cb4 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -48,6 +48,10 @@ fn consolidation_fixture_dir() -> PathBuf {
 	real_world_memory_fixture_dir().join("consolidation")
 }
 
+fn knowledge_fixture_dir() -> PathBuf {
+	real_world_memory_fixture_dir().join("knowledge")
+}
+
 fn run_json_report_from(fixtures: PathBuf) -> Result<Value> {
 	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
 		.arg("run")
@@ -150,7 +154,7 @@ fn smoke_fixture_produces_typed_json_report() -> Result<()> {
 fn runner_discovers_nested_fixture_layout() -> Result<()> {
 	let report = run_json_report_from(fixture_root())?;
 
-	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(25));
+	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(27));
 
 	Ok(())
 }
@@ -256,6 +260,77 @@ fn consolidation_fixtures_report_reviewable_proposal_metrics() -> Result<()> {
 	Ok(())
 }
 
+#[test]
+fn knowledge_fixtures_report_page_metrics() -> Result<()> {
+	let report = run_json_report_from(knowledge_fixture_dir())?;
+
+	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(2));
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(2));
+	assert_eq!(report.pointer("/summary/unsupported_claim_count").and_then(Value::as_u64), Some(0));
+	assert_eq!(report.pointer("/summary/wrong_result_count").and_then(Value::as_u64), Some(0));
+	assert_eq!(report.pointer("/summary/knowledge/page_count").and_then(Value::as_u64), Some(4));
+	assert_eq!(
+		report.pointer("/summary/knowledge/section_count").and_then(Value::as_u64),
+		Some(10)
+	);
+	assert_eq!(
+		report.pointer("/summary/knowledge/citation_coverage").and_then(Value::as_f64),
+		Some(0.9)
+	);
+	assert_eq!(
+		report.pointer("/summary/knowledge/stale_claim_detection").and_then(Value::as_f64),
+		Some(1.0)
+	);
+	assert_eq!(
+		report.pointer("/summary/knowledge/rebuild_determinism").and_then(Value::as_f64),
+		Some(1.0)
+	);
+	assert_eq!(
+		report.pointer("/summary/knowledge/backlink_count").and_then(Value::as_u64),
+		Some(9)
+	);
+	assert_eq!(
+		report.pointer("/summary/knowledge/pages_with_backlinks").and_then(Value::as_u64),
+		Some(4)
+	);
+	assert_eq!(
+		report.pointer("/summary/knowledge/backlink_coverage").and_then(Value::as_f64),
+		Some(1.0)
+	);
+	assert_eq!(
+		report.pointer("/summary/knowledge/page_usefulness").and_then(Value::as_f64),
+		Some(0.969)
+	);
+	assert_eq!(
+		report.pointer("/summary/knowledge/unsupported_summary_count").and_then(Value::as_u64),
+		Some(1)
+	);
+	assert_eq!(
+		report.pointer("/summary/knowledge/allowed_variance_count").and_then(Value::as_u64),
+		Some(1)
+	);
+
+	let suites = array_at(&report, "/suites")?;
+	let knowledge_suite = find_by_field(suites, "/suite_id", "knowledge_compilation")?;
+
+	assert_eq!(knowledge_suite.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(knowledge_suite.pointer("/encoded_job_count").and_then(Value::as_u64), Some(2));
+
+	let jobs = array_at(&report, "/jobs")?;
+	let project_page_job = find_by_field(jobs, "/job_id", "knowledge-project-page-001")?;
+
+	assert_eq!(
+		project_page_job.pointer("/knowledge/unsupported_summary_count").and_then(Value::as_u64),
+		Some(1)
+	);
+	assert_eq!(
+		project_page_job.pointer("/knowledge/untraced_section_count").and_then(Value::as_u64),
+		Some(0)
+	);
+
+	Ok(())
+}
+
 #[test]
 fn generated_json_report_renders_markdown() -> Result<()> {
 	let report = run_json_report()?;
@@ -295,23 +370,70 @@ fn generated_json_report_renders_markdown() -> Result<()> {
 	Ok(())
 }
 
+#[test]
+fn knowledge_json_report_renders_markdown_metrics() -> Result<()> {
+	let report = run_json_report_from(knowledge_fixture_dir())?;
+	let temp_dir = env::temp_dir().join(format!("elf-real-world-knowledge-test-{}", process::id()));
+
+	fs::create_dir_all(&temp_dir)?;
+
+	let report_path = temp_dir.join("knowledge-report.json");
+	let markdown_path = temp_dir.join("knowledge-report.md");
+
+	fs::write(&report_path, serde_json::to_vec_pretty(&report)?)?;
+
+	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("publish")
+		.arg("--report")
+		.arg(&report_path)
+		.arg("--out")
+		.arg(&markdown_path)
+		.output()?;
+
+	assert!(
+		output.status.success(),
+		"real_world_job publisher failed: {}",
+		String::from_utf8_lossy(&output.stderr),
+	);
+
+	let markdown = fs::read_to_string(markdown_path)?;
+
+	assert!(markdown.contains("Knowledge Page Metrics"));
+	assert!(markdown.contains("Knowledge citation coverage"));
+	assert!(markdown.contains("Backlinks: `9` total"));
+	assert!(markdown.contains("Unsupported summary count"));
+	assert!(markdown.contains("knowledge-project-page-001"));
+	assert!(markdown.contains("knowledge-entity-concept-002"));
+
+	Ok(())
+}
+
+fn assert_root_knowledge_summary(report: &Value) {
+	assert_eq!(report.pointer("/summary/knowledge/job_count").and_then(Value::as_u64), Some(2));
+	assert_eq!(report.pointer("/summary/knowledge/page_count").and_then(Value::as_u64), Some(4));
+	assert_eq!(
+		report.pointer("/summary/knowledge/page_usefulness").and_then(Value::as_f64),
+		Some(0.969)
+	);
+}
+
 #[test]
 fn real_world_memory_fixtures_report_aggregate_metrics() -> Result<()> {
 	let report = run_json_report_from(real_world_memory_fixture_dir())?;
 
-	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(25));
-	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(23));
+	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(27));
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(25));
 	assert_eq!(report.pointer("/summary/wrong_result").and_then(Value::as_u64), Some(1));
 	assert_eq!(report.pointer("/summary/not_encoded").and_then(Value::as_u64), Some(1));
 	assert_eq!(report.pointer("/summary/unsupported_claim_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/wrong_result_count").and_then(Value::as_u64), Some(3));
 	assert_eq!(
 		report.pointer("/summary/expected_evidence_recall").and_then(Value::as_f64),
-		Some(0.929)
+		Some(0.938)
 	);
 	assert_eq!(
 		report.pointer("/summary/irrelevant_context_ratio").and_then(Value::as_f64),
-		Some(0.022)
+		Some(0.02)
 	);
 	assert_eq!(report.pointer("/summary/stale_retrieval_count").and_then(Value::as_u64), Some(1));
 	assert_eq!(report.pointer("/summary/stale_answer_count").and_then(Value::as_u64), Some(0));
@@ -341,12 +463,12 @@ fn real_world_memory_fixtures_report_aggregate_metrics() -> Result<()> {
 	);
 	assert_eq!(
 		report.pointer("/summary/evidence_required_count").and_then(Value::as_u64),
-		Some(49)
+		Some(55)
 	);
-	assert_eq!(report.pointer("/summary/evidence_covered_count").and_then(Value::as_u64), Some(46));
-	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(0.939));
-	assert_eq!(report.pointer("/summary/source_ref_coverage").and_then(Value::as_f64), Some(0.939));
-	assert_eq!(report.pointer("/summary/quote_coverage").and_then(Value::as_f64), Some(0.939));
+	assert_eq!(report.pointer("/summary/evidence_covered_count").and_then(Value::as_u64), Some(52));
+	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(0.945));
+	assert_eq!(report.pointer("/summary/source_ref_coverage").and_then(Value::as_f64), Some(0.945));
+	assert_eq!(report.pointer("/summary/quote_coverage").and_then(Value::as_f64), Some(0.945));
 	assert_eq!(
 		report.pointer("/summary/trace_explainability_count").and_then(Value::as_u64),
 		Some(1)
@@ -370,6 +492,8 @@ fn real_world_memory_fixtures_report_aggregate_metrics() -> Result<()> {
 		Some(1)
 	);
 
+	assert_root_knowledge_summary(&report);
+
 	let suites = array_at(&report, "/suites")?;
 
 	for suite_id in [
@@ -379,6 +503,7 @@ fn real_world_memory_fixtures_report_aggregate_metrics() -> Result<()> {
 		"capture_integration",
 		"personalization",
 		"consolidation",
+		"knowledge_compilation",
 	] {
 		let suite = find_by_field(suites, "/suite_id", suite_id)?;
 
diff --git a/docs/guide/benchmarking/index.md b/docs/guide/benchmarking/index.md
index 2829e253..a0409e6d 100644
--- a/docs/guide/benchmarking/index.md
+++ b/docs/guide/benchmarking/index.md
@@ -38,7 +38,8 @@ cleanup, use `docs/guide/single_user_production.md`.
   operator-debugging UX report with trace/viewer links, raw-SQL avoidance, root-cause
   step counts, dropped-candidate visibility, and repair-action clarity.
 - `real_world_agent_memory_benchmark.md`: operator overview for the v1 real-world
-  agent memory benchmark contract, including suite taxonomy and typed report states.
+  agent memory benchmark contract, including suite taxonomy, typed report states, and
+  the knowledge-compilation fixture task.
 - `real_world_memory_evolution.md`: run and interpret the checked-in memory evolution
   jobs for current facts, historical facts, stale traps, conflicts, update rationales,
   and temporal graph limitations.
@@ -50,8 +51,8 @@ cleanup, use `docs/guide/single_user_production.md`.
   summaries and durable scripts.
 - Keep generated real-world job smoke JSON and Markdown under `tmp/real-world-job/`;
   commit fixture schemas, smoke fixtures, runner code, and durable docs only.
-- Keep generated real-world memory trust/personalization JSON and Markdown under
-  `tmp/real-world-memory/`; commit fixtures, runner code, and durable docs only.
+- Keep generated real-world memory trust/personalization/knowledge JSON and Markdown
+  under `tmp/real-world-memory/`; commit fixtures, runner code, and durable docs only.
 - Link the newest decision-relevant report from README and this index.
 - When benchmark semantics change, update `live_baseline_benchmark.md` and the
   relevant spec before publishing a new result.
diff --git a/docs/guide/benchmarking/live_baseline_benchmark.md b/docs/guide/benchmarking/live_baseline_benchmark.md
index 31294eee..5d5f0387 100644
--- a/docs/guide/benchmarking/live_baseline_benchmark.md
+++ b/docs/guide/benchmarking/live_baseline_benchmark.md
@@ -374,6 +374,25 @@ The consolidation fixtures live under
 proposal payloads, source lineage, review action outcomes, executable gaps, and source
 mutation count. They do not claim live scheduled consolidation-worker generation.
 
+To run the checked-in knowledge-compilation and page-rebuild fixtures:
+
+```sh
+cargo make real-world-memory-knowledge
+```
+
+Artifacts:
+
+```text
+tmp/real-world-memory/knowledge-report.json
+tmp/real-world-memory/knowledge-report.md
+```
+
+The knowledge fixtures live under
+`apps/elf-eval/fixtures/real_world_memory/knowledge/`. They score derived page
+citation coverage, stale-claim linting, rebuild determinism, backlink coverage, page
+usefulness, and explicitly flagged unsupported summaries. Generated pages are
+benchmark artifacts, not source-truth replacements.
+
 ## Clean Up
 
 ```sh
diff --git a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
index 16f63169..305ec553 100644
--- a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
+++ b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
@@ -245,6 +245,21 @@ These fixtures encode proposal expectations only. They do not claim that a live
 scheduled consolidation worker generated the proposals; the report records that missing
 primitive as an executable gap with a follow-up issue title.
 
+Current checked-in knowledge-compilation increment:
+
+```sh
+cargo make real-world-memory-knowledge
+```
+
+This parses `apps/elf-eval/fixtures/real_world_memory/knowledge/`, writes
+`tmp/real-world-memory/knowledge-report.json`, and renders
+`tmp/real-world-memory/knowledge-report.md`. The fixtures include synthetic project,
+entity, concept, and issue-timeline page artifacts. Generated pages are benchmark
+artifacts only: every section must cite source evidence or timeline events, or it must
+be explicitly flagged unsupported. The report publishes citation coverage, stale claim
+detection, rebuild determinism, aggregate backlink counts and page coverage, page
+usefulness, unsupported summary count, and untraced section count.
+
 Do not generate large fixtures or update production-adoption verdicts while adding the
 contract. The current adoption gate remains an existing benchmark decision until new
 real-world job reports are implemented and published.
diff --git a/docs/spec/real_world_agent_memory_benchmark_v1.md b/docs/spec/real_world_agent_memory_benchmark_v1.md
index 9cad1941..d1aefae9 100644
--- a/docs/spec/real_world_agent_memory_benchmark_v1.md
+++ b/docs/spec/real_world_agent_memory_benchmark_v1.md
@@ -191,6 +191,65 @@ An answer that states a required claim without any acceptable evidence link is a
 `unsupported_claim` unless the job's `allowed_uncertainty` explicitly permits an
 uncited low-confidence statement.
 
+### Optional `adapter_response.answer.pages`
+
+Knowledge-compilation fixtures MAY include generated page artifacts in
+`corpus.adapter_response.answer.pages[]`. These page artifacts are benchmark outputs,
+not authoritative source truth. Any checked-in generated page fixture MUST be clearly
+marked as a benchmark artifact.
+
+Each page entry MUST include:
+
+- `page_id`: stable page identifier, such as `project:elf-benchmark-suite`.
+- `page_type`: `project`, `entity`, `concept`, `issue_timeline`, or another
+  fixture-defined type.
+- `title`: human-readable page title.
+- `path`: optional fixture path for a checked-in benchmark artifact page.
+- `sections`: generated page sections.
+- `backlinks`: zero or more page, entity, concept, issue, or evidence identifiers.
+- `lint_findings`: zero or more stale, unsupported, or contradiction findings.
+- `rebuild`: optional rebuild comparison record.
+
+Each `sections[]` entry MUST include:
+
+- `section_id`
+- `heading`
+- `role`: examples include `current_truth`, `history`, `timeline`, `backlinks`, and
+  `summary`.
+- `content`: bounded fixture text.
+- `evidence_ids`: zero or more ids from `corpus.items[]`.
+- `timeline_event_ids`: zero or more ids from `timeline[]`.
+- `unsupported_reason`: optional reason why the section is intentionally unsupported.
+
+Every generated page section MUST trace back to at least one `evidence_id` or
+`timeline_event_id`, or it MUST include `unsupported_reason`. A section that lacks both
+trace evidence and an unsupported flag is an `unsupported_claim`. A section with
+`role = "summary"` and `unsupported_reason` is counted as an unsupported summary, but it
+is not a hidden unsupported claim because the page explicitly marks the gap.
+
+Each `lint_findings[]` entry SHOULD include:
+
+- `finding_id`
+- `finding_type`: for example `stale_claim`, `unsupported_section`, or
+  `contradiction`.
+- `severity`
+- `text`
+- `evidence_ids`
+- `trap_id`: optional link to `negative_traps[]`.
+
+Each `rebuild` record SHOULD include:
+
+- `first_hash`
+- `second_hash`
+- `deterministic`: true when repeat rebuilds produced byte-stable output.
+- `allowed_variance`: explanations for accepted non-semantic variance.
+
+Knowledge-compilation reports SHOULD include citation coverage, stale claim detection,
+rebuild determinism, page usefulness, backlink counts, unsupported summary count, and
+untraced section count. Rebuild results are acceptable only when repeated output is
+deterministic enough for regression comparison or every allowed variance is explicitly
+reported.
+
 ### `negative_traps`
 
 Negative traps MUST be explicit so systems are tested against realistic memory failure
@@ -387,6 +446,9 @@ Reports MUST include:
   stages, especially for wrong-result stage attribution;
 - per-suite typed status and score distribution;
 - unsupported claim list with claim text or a bounded redacted description;
+- for encoded knowledge-compilation jobs with page artifacts: citation coverage, stale
+  claim detection, rebuild determinism, page usefulness, backlink counts, unsupported
+  summary count, and untraced section count;
 - explicit `not_encoded` suite list;
 - private-corpus redaction policy when private fixtures are used.
 - capture/integration coverage classes when any fixture declares `capture_behaviors`,