Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Makefile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ args = [
# | baseline-live-docker | command | |
# | baseline-live-report | command | |
# | baseline-live-docker-clean | command | |
# | baseline-production-synthetic | command | |
# | baseline-production-private | command | |

[tasks.baseline-live-docker]
workspace = false
Expand Down Expand Up @@ -327,6 +329,22 @@ args = [
"--remove-orphans",
]

[tasks.baseline-production-synthetic]
workspace = false
command = "bash"
args = [
"-lc",
"set -euo pipefail; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; selected_projects=\"$(printenv ELF_BASELINE_PROJECTS || true)\"; if [ -z \"$selected_projects\" ]; then selected_projects=\"ELF\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=\"$selected_projects\"; export ELF_BASELINE_PROFILE=production-synthetic; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
]

[tasks.baseline-production-private]
workspace = false
command = "bash"
args = [
"-lc",
"set -euo pipefail; manifest=\"$(printenv ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST || true)\"; if [ -z \"$manifest\" ]; then echo \"ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST is required for baseline-production-private\" >&2; exit 1; fi; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; selected_projects=\"$(printenv ELF_BASELINE_PROJECTS || true)\"; if [ -z \"$selected_projects\" ]; then selected_projects=\"ELF\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=\"$selected_projects\"; export ELF_BASELINE_PROFILE=production-private; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
]


# Meta
# | task | type | cwd |
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ embeddings.
Detailed evidence and interpretation:

- [Live Baseline Benchmark Report - June 9, 2026](docs/guide/benchmarking/2026-06-09-live-baseline-report.md)
- [Synthetic Production Corpus Benchmark Report - June 9, 2026](docs/guide/benchmarking/2026-06-09-production-corpus-report.md)
- [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)

Quick comparison snapshot (objective/high-level).
Expand Down Expand Up @@ -177,6 +178,7 @@ Project signature strengths (what each does especially well):
Detailed comparison, mechanism-level analysis, and source map:

- [Live Baseline Benchmark Report - June 9, 2026](docs/guide/benchmarking/2026-06-09-live-baseline-report.md)
- [Synthetic Production Corpus Benchmark Report - June 9, 2026](docs/guide/benchmarking/2026-06-09-production-corpus-report.md)
- [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)
- [External Memory Improvement Plan](docs/guide/research/external_memory_improvement_plan.md)
- [Detailed External Comparison](docs/guide/research/comparison_external_projects.md)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
{
"schema": "elf.production_corpus_manifest/v1",
"manifest_id": "synthetic-coding-agent-prod-corpus-2026-06-09",
"description": "Synthetic, sanitized production-style coding-agent memory corpus for ELF adoption benchmarking.",
"evidence": [
{
"evidence_id": "issue-xy812-resume",
"category": "issue",
"title": "XY-812 Resume Lane",
"text": "XY-812 resume lane uses branch y/elf-xy-812. The next command is `cargo make trace-gate`; the stale blocker cleared after PR #108 merged."
},
{
"evidence_id": "pr-110-review",
"category": "pr",
"title": "PR 110 Review Status",
"text": "PR #110 is review-ready for the ELF viewer lane. It passed `cargo make checks` and waits for the non-draft review handoff."
},
{
"evidence_id": "worktree-xy791-repair",
"category": "worktree",
"title": "XY-791 Strict Config Repair",
"text": "Worktree XY-791 recovered strict-config repair after rebase. The exact gate was `cargo make fmt && cargo make lint-fix && cargo make checks`."
},
{
"evidence_id": "runbook-live-baseline",
"category": "runbook",
"title": "Private Production Corpus Runbook",
"text": "Private production fixtures use `ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST` with `cargo make baseline-production-private` and stay out of git."
},
{
"evidence_id": "decision-qdrant-derived",
"category": "decision",
"title": "Qdrant Derived Index Decision",
"text": "Decision: Qdrant remains a rebuildable derived index. Postgres stores source-of-truth vectors, notes, chunks, and audit rows."
},
{
"evidence_id": "blocker-stale-qwen-key",
"category": "blocker",
"title": "Stale Provider Key Blocker",
"text": "Stale blocker: missing Qwen key applied only to provider stress runs. The synthetic production corpus uses local deterministic embeddings."
},
{
"evidence_id": "recovery-xy640-ledger",
"category": "recovery_note",
"title": "XY-640 Ledger Replay Recovery",
"text": "Recovery note: XY-640 ledger replay resumes from checkpoint `ledger-replay-42` and verifies the retained lane with `cargo make test`."
},
{
"evidence_id": "decision-xy818-supersedes",
"category": "decision",
"title": "Superseded Command Decision",
"text": "Update case: old command `cargo make lint` was superseded by `cargo make lint-fix` for Decodex ELF lanes."
}
],
"queries": [
{
"query_id": "q-resume-lane",
"task": "resume_lane",
"query": "How do I resume XY-812 and what command is next?",
"expected_evidence_ids": ["issue-xy812-resume"],
"allowed_alternate_evidence_ids": [],
"expected_terms": ["XY-812", "cargo make trace-gate"]
},
{
"query_id": "q-recover-exact-command",
"task": "recover_exact_command",
"query": "Recover the exact repair gate command for XY-791 strict config.",
"expected_evidence_ids": ["worktree-xy791-repair"],
"allowed_alternate_evidence_ids": ["runbook-live-baseline"],
"expected_terms": ["XY-791", "cargo make fmt && cargo make lint-fix && cargo make checks"]
},
{
"query_id": "q-explain-stale-blocker",
"task": "explain_stale_blocker",
"query": "Why is the missing Qwen key blocker stale for the synthetic production corpus?",
"expected_evidence_ids": ["blocker-stale-qwen-key"],
"allowed_alternate_evidence_ids": [],
"expected_terms": ["missing Qwen key", "local deterministic embeddings"]
},
{
"query_id": "q-find-prior-decision",
"task": "find_prior_decision",
"query": "What prior decision explains why Qdrant can be rebuilt?",
"expected_evidence_ids": ["decision-qdrant-derived"],
"allowed_alternate_evidence_ids": [],
"expected_terms": ["Qdrant", "rebuildable derived index"]
},
{
"query_id": "q-compare-project-status",
"task": "compare_project_status",
"query": "Compare PR #110 and XY-640 status.",
"expected_evidence_ids": ["pr-110-review"],
"allowed_alternate_evidence_ids": ["recovery-xy640-ledger"],
"expected_terms": ["PR #110", "review-ready"]
},
{
"query_id": "q-detect-contradiction-update",
"task": "detect_contradiction_update",
"query": "Which command superseded cargo make lint for Decodex ELF lanes?",
"expected_evidence_ids": ["decision-xy818-supersedes"],
"allowed_alternate_evidence_ids": [],
"expected_terms": ["cargo make lint-fix", "superseded"]
}
]
}
Loading