hack-ink · yvette-carlisle · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/Makefile.toml b/Makefile.toml
@@ -839,6 +839,9 @@ args = [
 # | real-world-memory-knowledge        | composite | |
 # | real-world-memory-knowledge-json   | command   | |
 # | real-world-memory-knowledge-report | command   | |
+# | real-world-first-generation-oss        | composite | |
+# | real-world-first-generation-oss-json   | command   | |
+# | real-world-first-generation-oss-report | command   | |
 # | ragflow-docker-smoke                    | command   | |
 # | lightrag-docker-context-smoke           | command   | |
 # | graphrag-docker-smoke                   | command   | |
@@ -933,6 +936,55 @@ args = [
 	"tmp/real-world-memory/knowledge-report.md",
 ]
 
+[tasks.real-world-first-generation-oss]
+workspace = false
+dependencies = [
+	"real-world-first-generation-oss-report",
+]
+
+[tasks.real-world-first-generation-oss-json]
+workspace = false
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"run",
+	"--fixtures",
+	"apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss",
+	"--out",
+	"tmp/real-world-memory/first-generation-oss/report.json",
+	"--run-id",
+	"first-generation-oss-continuity-source-store",
+	"--adapter-id",
+	"fixture_first_generation_oss",
+	"--adapter-name",
+	"First-generation OSS fixture coverage",
+]
+
+[tasks.real-world-first-generation-oss-report]
+workspace = false
+dependencies = [
+	"real-world-first-generation-oss-json",
+]
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"publish",
+	"--report",
+	"tmp/real-world-memory/first-generation-oss/report.json",
+	"--out",
+	"tmp/real-world-memory/first-generation-oss/report.md",
+]
+
 
 # External memory pattern radar
 # | task                               | type      | cwd |

diff --git a/README.md b/README.md
@@ -170,8 +170,16 @@ provider-backed ELF evidence was required.
   ELF passes trace hydration, candidate-drop visibility, selected-but-not-narrated
   evidence, replay-command availability, and repair-action clarity. qmd ties replay
   command and repair-action clarity but is `wrong_result` for trace hydration and
-  candidate-drop stage visibility. OpenMemory UI/export and claude-mem viewer flows
-  remain blocked or not encoded, so this is not a broad viewer-product claim.
+  candidate-drop stage visibility. OpenMemory UI/export remains blocked, and
+  claude-mem viewer flows remain blocked until Docker-contained hook/viewer evidence
+  exists, so this is not a broad viewer-product claim.
+- First-generation OSS continuity/source-store follow-up after XY-925: `cargo make
+  real-world-first-generation-oss` emits a fixture-backed external-adapter slice for
+  agentmemory, memsearch, and claude-mem with 6 jobs, 4 pass, 2 blocked, and full
+  evidence/source-ref/quote coverage. It selects agentmemory's durable local path,
+  adds memsearch canonical Markdown source-store and retrieval-debug prompt coverage,
+  and records claude-mem progressive-disclosure/retrieval-repair coverage while
+  keeping hook and viewer/operator workflows blocked.
 - Expanded adapter-pack coverage after XY-834: the real-world external adapter
   manifest now includes `research_gate` records for RAGFlow, LightRAG, GraphRAG,
   Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki, gbrain, and deeper
@@ -201,14 +209,16 @@ provider-backed ELF evidence was required.
   source refs, write-policy redaction audit counts, evidence binding, and no secret
   leakage. qmd remains `not_encoded` for this suite. agentmemory capture comparison is
   blocked by mocked/in-memory storage, and claude-mem hook/viewer capture remains
-  untested, so no broad capture-breadth superiority claim is allowed.
+  blocked until Docker-contained hook/viewer capture evidence exists, so no broad
+  capture-breadth superiority claim is allowed.
 - The benchmark runner and report publisher are checked in and Docker-isolated:
   `cargo make baseline-live-docker`, `cargo make baseline-backfill-docker`,
   `cargo make baseline-production-private-addendum`,
   `cargo make baseline-backfill-10k-docker`,
   `cargo make baseline-backfill-100k-docker`,
   `cargo make baseline-soak-docker`, `cargo make baseline-live-report`,
-  `cargo make real-world-memory-live-adapters`, and
+  `cargo make real-world-memory-live-adapters`,
+  `cargo make real-world-first-generation-oss`, and
   `cargo make baseline-live-docker-clean`. Expensive 100k and long-soak profiles
   are opt-in and do not run in normal checks.
 
@@ -225,6 +235,7 @@ Detailed evidence and interpretation:
 - [Graph/RAG Scored Smoke Adapter Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md)
 - [mem0/OpenMemory History and UI Export Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md)
 - [Capture/Write-Policy Live Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md)
+- [First-Generation OSS Continuity and Source-Store Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md)
 - [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)
 - [Single-User Production Runbook](docs/guide/single_user_production.md)
 - Benchmark contract:
@@ -303,6 +314,7 @@ Detailed comparison, mechanism-level analysis, and source map:
 - [Graph/RAG Scored Smoke Adapter Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md)
 - [mem0/OpenMemory History and UI Export Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md)
 - [Capture/Write-Policy Live Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md)
+- [First-Generation OSS Continuity and Source-Store Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md)
 - [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)
 - [Real-World Agent Memory Benchmark](docs/guide/benchmarking/real_world_agent_memory_benchmark.md)
 - [External Memory Improvement Plan](docs/guide/research/external_memory_improvement_plan.md)

diff --git a/...orld_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json b/...orld_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json
@@ -0,0 +1,208 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "first-gen-agentmemory-durable-capture-blocked-001",
+  "suite": "capture_integration",
+  "title": "Select the durable agentmemory capture path before scoring hooks",
+  "encoding": {
+    "status": "blocked",
+    "reason": "agentmemory's current Docker baseline still uses a process-local SDK/KV mock, so work-resume and write-policy hook capture cannot be scored until a persistent local session, KV, and index path survives a fresh process.",
+    "follow_up": {
+      "title": "Wire agentmemory durable local session capture for work-resume jobs",
+      "reason": "The fair path is a Docker-contained adapter that persists the agentmemory observation log, KV store, and searchable index between capture and replay processes."
+    }
+  },
+  "corpus": {
+    "corpus_id": "first-generation-oss-agentmemory-2026-06-11",
+    "profile": "external_adapter",
+    "items": [
+      {
+        "evidence_id": "agentmemory-selected-durable-path",
+        "kind": "adapter_plan",
+        "text": "Selected agentmemory path: run capture hooks into a Docker-local session directory, persist the SDK KV store and searchable index, restart a fresh process, then score work_resume and write-policy prompts against that recovered store.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "agentmemory_durable_capture_path_blocked",
+            "evidence_id": "agentmemory-selected-durable-path"
+          },
+          "locator": {
+            "quote": "persist the SDK KV store and searchable index"
+          }
+        },
+        "created_at": "2026-06-11T10:00:00Z"
+      },
+      {
+        "evidence_id": "agentmemory-mock-boundary",
+        "kind": "adapter_blocker",
+        "text": "Current blocker: the live-baseline adapter registers agentmemory functions against a process-local StateKV Map and in-memory index, so it cannot prove cold-start recovery or hook capture durability.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "agentmemory_durable_capture_path_blocked",
+            "evidence_id": "agentmemory-mock-boundary"
+          },
+          "locator": {
+            "quote": "process-local StateKV Map and in-memory index"
+          }
+        },
+        "created_at": "2026-06-11T10:01:00Z"
+      },
+      {
+        "evidence_id": "agentmemory-pass-decoy",
+        "kind": "adapter_state",
+        "text": "Decoy: agentmemory same-corpus retrieval passing through the mock proves durable coding-agent continuity and write-policy capture.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "agentmemory_durable_capture_path_blocked",
+            "evidence_id": "agentmemory-pass-decoy"
+          }
+        },
+        "created_at": "2026-06-11T09:59:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_first_generation_oss",
+      "answer": {
+        "content": "agentmemory remains blocked for durable work-resume and write-policy hook capture. The selected local path is a Docker-contained session directory that persists the SDK KV store and searchable index across a fresh process; the current StateKV Map and in-memory index cannot prove that.",
+        "claims": [
+          {
+            "claim_id": "selected_durable_path",
+            "text": "The selected local path persists the SDK KV store and searchable index across a fresh process.",
+            "evidence_ids": ["agentmemory-selected-durable-path"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "current_mock_blocker",
+            "text": "The current StateKV Map and in-memory index cannot prove durable continuity.",
+            "evidence_ids": ["agentmemory-mock-boundary"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["agentmemory-selected-durable-path", "agentmemory-mock-boundary"],
+        "latency_ms": 1.0,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    },
+    "capture_behaviors": {
+      "blocked": [
+        "agentmemory durable hook capture waits for a persistent Docker-local session, KV, and index path."
+      ],
+      "notes": [
+        "Same-corpus mock retrieval is not promoted into work-resume or capture integration pass evidence."
+      ]
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "agentmemory-durable-path-selected",
+      "ts": "2026-06-11T10:00:00Z",
+      "actor": "benchmark",
+      "action": "selected_durable_adapter_path",
+      "evidence_ids": ["agentmemory-selected-durable-path"],
+      "summary": "The next fair agentmemory path must persist capture state across a fresh process."
+    },
+    {
+      "event_id": "agentmemory-mock-blocker-preserved",
+      "ts": "2026-06-11T10:01:00Z",
+      "actor": "benchmark",
+      "action": "kept_blocked_state",
+      "evidence_ids": ["agentmemory-mock-boundary"],
+      "summary": "The current in-memory adapter remains blocked for durable continuity."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "What local agentmemory path should be used for work-resume and write-policy capture, and can the current mock be scored?",
+    "job_mode": "operate",
+    "constraints": ["cite_evidence", "state_blockers", "do_not_promote_mock_smoke"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "selected_durable_path",
+        "text": "The selected local path persists the SDK KV store and searchable index across a fresh process."
+      },
+      {
+        "claim_id": "current_mock_blocker",
+        "text": "The current StateKV Map and in-memory index cannot prove durable continuity."
+      }
+    ],
+    "must_not_include": [
+      "same-corpus retrieval passing through the mock proves durable coding-agent continuity"
+    ],
+    "evidence_links": {
+      "selected_durable_path": ["agentmemory-selected-durable-path"],
+      "current_mock_blocker": ["agentmemory-mock-boundary"]
+    },
+    "answer_type": "blocked_plan",
+    "accepted_alternates": [],
+    "requires_caveat": true,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "agentmemory-selected-durable-path",
+      "claim_id": "selected_durable_path",
+      "requirement": "cite",
+      "quote": "persist the SDK KV store and searchable index"
+    },
+    {
+      "evidence_id": "agentmemory-mock-boundary",
+      "claim_id": "current_mock_blocker",
+      "requirement": "cite",
+      "quote": "process-local StateKV Map and in-memory index"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "mock-smoke-durable-pass",
+      "type": "unsupported_prior",
+      "evidence_ids": ["agentmemory-pass-decoy"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "uncertainty_handling": {
+        "weight": 0.35,
+        "max_points": 1.0,
+        "criteria": "Keeps the durable path blocked until persistent state is proven."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Names the concrete local path needed for the next adapter."
+      },
+      "evidence_grounding": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Cites the selected path and the current mock boundary."
+      },
+      "trap_avoidance": {
+        "weight": 0.15,
+        "max_points": 1.0,
+        "criteria": "Does not promote the mock same-corpus smoke into durable continuity proof."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": [],
+    "fallback_action": "state_blocker"
+  },
+  "tags": ["external_adapter", "agentmemory", "capture_integration", "blocked", "no_live_claim"]
+}