Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Makefile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,7 @@ args = [
# | lightrag-docker-context-smoke | command | |
# | graphrag-docker-smoke | command | |
# | graphiti-zep-docker-temporal-smoke | command | |
# | graphify-docker-graph-report-smoke | command | |

[tasks.ragflow-docker-smoke]
workspace = false
Expand Down Expand Up @@ -857,6 +858,14 @@ args = [
"set -euo pipefail; start=\"$(printenv ELF_GRAPHITI_ZEP_SMOKE_START || true)\"; status=0; if [ \"$start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile graphiti-zep up -d graphiti-falkordb; fi; docker compose -f docker-compose.baseline.yml run --build --rm -e ELF_GRAPHITI_ZEP_SMOKE_RUN -e ELF_GRAPHITI_ZEP_SMOKE_REPORT_DIR -e ELF_GRAPHITI_ZEP_SMOKE_WORK_DIR -e ELF_GRAPHITI_ZEP_SMOKE_INSTALL -e ELF_GRAPHITI_ZEP_VERSION -e ELF_GRAPHITI_ZEP_PACKAGE -e ELF_GRAPHITI_ZEP_REF -e ELF_GRAPHITI_ZEP_API_BASE -e ELF_GRAPHITI_ZEP_API_KEY -e ELF_GRAPHITI_ZEP_LLM_MODEL -e ELF_GRAPHITI_ZEP_EMBEDDING_MODEL -e ELF_GRAPHITI_ZEP_FALKORDB_HOST -e ELF_GRAPHITI_ZEP_FALKORDB_PORT -e ELF_GRAPHITI_ZEP_FALKORDB_DATABASE -e ELF_GRAPHITI_ZEP_TIMEOUT_SECONDS -e ELF_GRAPHITI_ZEP_STARTUP_ATTEMPTS -e ELF_GRAPHITI_ZEP_STARTUP_INTERVAL_SECONDS baseline-runner python3 scripts/graphiti-zep-docker-temporal-smoke.py || status=$?; if [ \"$start\" = \"1\" ]; then docker compose -f docker-compose.baseline.yml --profile graphiti-zep stop graphiti-falkordb >/dev/null 2>&1 || true; fi; exit \"$status\"",
]

[tasks.graphify-docker-graph-report-smoke]
workspace = false
command = "bash"
args = [
"-lc",
"set -euo pipefail; docker compose -f docker-compose.baseline.yml run --build --rm -e ELF_GRAPHIFY_SMOKE_RUN -e ELF_GRAPHIFY_SMOKE_REPORT_DIR -e ELF_GRAPHIFY_SMOKE_WORK_DIR -e ELF_GRAPHIFY_SMOKE_INSTALL -e ELF_GRAPHIFY_PACKAGE -e ELF_GRAPHIFY_REF -e ELF_GRAPHIFY_TIMEOUT_SECONDS -e ELF_GRAPHIFY_QUERY_BUDGET baseline-runner python3 scripts/graphify-docker-graph-report-smoke.py",
]

[tasks.real-world-memory-knowledge]
workspace = false
dependencies = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1943,46 +1943,61 @@
"evidence_class": "research_gate",
"docker_default": true,
"host_global_installs_required": false,
"overall_status": "not_encoded",
"overall_status": "blocked",
"setup": {
"status": "not_encoded",
"evidence": "XY-882 marks graphify as an adapter_candidate for a Docker-only CLI/materializer path, but no adapter is implemented."
"status": "blocked",
"evidence": "XY-889 adds a Docker-only graph/report smoke command. The checked-in manifest remains a research gate until a generated artifact reaches graphify graph/report output.",
"command": "cargo make graphify-docker-graph-report-smoke",
"artifact": "tmp/real-world-memory/graphify-smoke/graphify-smoke.json"
},
"run": {
"status": "not_encoded",
"evidence": "No graphify graph/report build is encoded."
"status": "blocked",
"evidence": "The smoke installs graphify in a container-local venv, runs over a generated public corpus, and records typed setup/runtime failure if graph/report build or query output is unavailable.",
"command": "cargo make graphify-docker-graph-report-smoke",
"artifact": "tmp/real-world-memory/graphify-smoke/summary.json"
},
"result": {
"status": "not_encoded",
"evidence": "No graph-navigation or knowledge-compilation result is claimed."
"status": "blocked",
"evidence": "No graph-navigation or knowledge-compilation quality result is claimed from the checked-in research gate. Generated smoke artifacts may become live_real_world only after graph.json, GRAPH_REPORT.md, and graphify query output map to generated evidence ids.",
"artifact": "tmp/real-world-memory/graphify-smoke/graphify-smoke.json"
},
"capabilities": [
{
"capability": "docker_cli_boundary",
"status": "blocked",
"evidence": "The smoke uses docker-compose.baseline.yml baseline-runner, a container-local Python venv, and isolated assistant config paths; it does not install host-global assistant hooks."
},
{
"capability": "graph_report_generation",
"status": "not_encoded",
"evidence": "Graph reports and query output have a candidate scoring path, but they are not executed by the runner."
"status": "blocked",
"evidence": "The smoke captures graphify-out/graph.json, GRAPH_REPORT.md, cache metadata, command logs, build time, graph size, and report size when build succeeds."
},
{
"capability": "real_world_job_adapter",
"status": "blocked",
"evidence": "The smoke maps node labels, edge types, confidence tags, source files, source locations, report text, and query output to generated real_world_job evidence ids when graphify reaches output."
},
{
"capability": "multimodal_code_graph",
"status": "not_encoded",
"evidence": "Multimodal graph extraction is a reference capability but not scored."
"evidence": "Multimodal extraction for videos, images, PDFs, or broad codebase understanding is a reference capability but not scored by this smoke."
},
{
"capability": "real_world_job_adapter",
"capability": "quality_or_scale_claim",
"status": "not_encoded",
"evidence": "No graphify materializer exists."
"evidence": "The smoke does not claim broad graph quality, private corpus behavior, scale, or authoritative memory-store behavior."
}
],
"suites": [
{
"suite_id": "knowledge_compilation",
"status": "not_encoded",
"evidence": "Graph report citation and lint behavior are not scored."
"status": "blocked",
"evidence": "The generated smoke can exercise graph/report evidence mapping for one generated knowledge-compilation fixture, but the checked-in record stays blocked until a live artifact reaches graph/report output."
},
{
"suite_id": "retrieval",
"status": "not_encoded",
"evidence": "Graph-guided query output is not mapped to required evidence."
"status": "blocked",
"evidence": "Graph-guided query output is mapped only for the generated smoke when available; broad retrieval quality scoring remains unclaimed."
},
{
"suite_id": "work_resume",
Expand All @@ -1995,6 +2010,16 @@
"kind": "source",
"ref": "https://github.com/safishamsi/graphify",
"status": "real"
},
{
"kind": "command",
"ref": "cargo make graphify-docker-graph-report-smoke",
"status": "blocked"
},
{
"kind": "artifact",
"ref": "tmp/real-world-memory/graphify-smoke/graphify-smoke.json",
"status": "blocked"
}
],
"execution_metadata": {
Expand All @@ -2010,14 +2035,15 @@
"evidence": "Official CLI, output artifact, query, and source-location contract."
}
],
"setup_path": "Install graphify inside Docker, build a graph/report from a generated corpus, and export query evidence without installing host-global assistant hooks.",
"runtime_boundary": "Docker-only CLI/materializer run over mounted benchmark corpus.",
"resource_expectation": "Graph build cost scales with corpus and model choices; record build time, graph size, and generated report size.",
"setup_path": "Run cargo make graphify-docker-graph-report-smoke to install graphify in Docker, build graph/report artifacts from a generated public corpus, and export query evidence without installing host-global assistant hooks.",
"runtime_boundary": "docker-compose.baseline.yml baseline-runner, container-local Python venv, isolated HOME/config paths, generated public corpus, and artifacts under tmp/real-world-memory/graphify-smoke.",
"resource_expectation": "Graph build cost scales with corpus and model choices; generated artifacts record package reference, provider/model boundary, build time, graph size, report size, cache size, timeout, and retry behavior.",
"retry_guidance": [
"Start with a generated public code/document corpus.",
"Score graph-guided answers only when report nodes cite source evidence IDs."
"Run cargo make graphify-docker-graph-report-smoke first; setup/runtime failures must remain typed artifacts, not pass claims.",
"Do not use graphify host assistant hook installs or operator-owned assistant configuration as proof.",
"Score graph-guided answers only when graph.json, GRAPH_REPORT.md, and graphify query output map to generated evidence ids."
],
"research_depth": "D1 feasibility verdict: adapter_candidate (XY-882); research_gate only, adapter not encoded"
"research_depth": "D1 feasibility verdict plus XY-889 Docker graph/report smoke implementation; checked-in record remains research_gate unless a generated artifact reaches graphify output"
},
"follow_up": {
"title": "[ELF benchmark adapter] Implement graphify Docker graph-report adapter",
Expand Down
57 changes: 42 additions & 15 deletions apps/elf-eval/tests/real_world_job_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,13 +257,13 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
report
.pointer("/external_adapters/summary/overall_status_counts/blocked")
.and_then(Value::as_u64),
Some(5)
Some(6)
);
assert_eq!(
report
.pointer("/external_adapters/summary/overall_status_counts/not_encoded")
.and_then(Value::as_u64),
Some(8)
Some(7)
);
assert_eq!(
report
Expand All @@ -281,7 +281,7 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
report
.pointer("/external_adapters/summary/suite_status_counts/blocked")
.and_then(Value::as_u64),
Some(9)
Some(11)
);
}

Expand All @@ -297,6 +297,7 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
let lightrag = find_by_field(adapters, "/adapter_id", "lightrag_research_gate")?;
let graphrag = find_by_field(adapters, "/adapter_id", "graphrag_research_gate")?;
let graphiti_zep = find_by_field(adapters, "/adapter_id", "graphiti_zep_research_gate")?;
let graphify = find_by_field(adapters, "/adapter_id", "graphify_research_gate")?;
let qmd_deep = find_by_field(adapters, "/adapter_id", "qmd_deep_profile_gate")?;

assert_eq!(elf.pointer("/evidence_class").and_then(Value::as_str), Some("fixture_backed"));
Expand Down Expand Up @@ -364,38 +365,64 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
Some("cargo make graphrag-docker-smoke")
);
assert_eq!(graphrag.pointer("/suites/1/status").and_then(Value::as_str), Some("not_encoded"));

assert_graphiti_zep_adapter(graphiti_zep);
assert_graphify_adapter(graphify);

assert_eq!(
graphiti_zep.pointer("/evidence_class").and_then(Value::as_str),
Some("research_gate")
qmd_deep.pointer("/capabilities/2/status").and_then(Value::as_str),
Some("unsupported")
);
assert_eq!(graphiti_zep.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));

Ok(())
}

fn assert_graphiti_zep_adapter(adapter: &Value) {
assert_eq!(adapter.pointer("/evidence_class").and_then(Value::as_str), Some("research_gate"));
assert_eq!(adapter.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));
assert_eq!(
graphiti_zep.pointer("/setup/command").and_then(Value::as_str),
adapter.pointer("/setup/command").and_then(Value::as_str),
Some("cargo make graphiti-zep-docker-temporal-smoke")
);
assert_eq!(
graphiti_zep.pointer("/run/command").and_then(Value::as_str),
adapter.pointer("/run/command").and_then(Value::as_str),
Some(
"ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke"
)
);
assert_eq!(
graphiti_zep.pointer("/suites/0/suite_id").and_then(Value::as_str),
adapter.pointer("/suites/0/suite_id").and_then(Value::as_str),
Some("memory_evolution")
);
assert_eq!(graphiti_zep.pointer("/suites/0/status").and_then(Value::as_str), Some("blocked"));
assert_eq!(adapter.pointer("/suites/0/status").and_then(Value::as_str), Some("blocked"));
assert_eq!(
graphiti_zep.pointer("/execution_metadata/research_depth").and_then(Value::as_str),
adapter.pointer("/execution_metadata/research_depth").and_then(Value::as_str),
Some(
"D2 feasibility plus XY-888 Docker temporal smoke implementation; checked-in record remains research_gate unless a generated artifact reaches Graphiti search output"
)
);
}

fn assert_graphify_adapter(adapter: &Value) {
assert_eq!(adapter.pointer("/evidence_class").and_then(Value::as_str), Some("research_gate"));
assert_eq!(adapter.pointer("/overall_status").and_then(Value::as_str), Some("blocked"));
assert_eq!(
qmd_deep.pointer("/capabilities/2/status").and_then(Value::as_str),
Some("unsupported")
adapter.pointer("/setup/command").and_then(Value::as_str),
Some("cargo make graphify-docker-graph-report-smoke")
);
assert_eq!(
adapter.pointer("/suites/0/suite_id").and_then(Value::as_str),
Some("knowledge_compilation")
);
assert_eq!(adapter.pointer("/suites/0/status").and_then(Value::as_str), Some("blocked"));
assert_eq!(adapter.pointer("/suites/1/suite_id").and_then(Value::as_str), Some("retrieval"));
assert_eq!(adapter.pointer("/suites/1/status").and_then(Value::as_str), Some("blocked"));
assert_eq!(
adapter.pointer("/execution_metadata/research_depth").and_then(Value::as_str),
Some(
"D1 feasibility verdict plus XY-889 Docker graph/report smoke implementation; checked-in record remains research_gate unless a generated artifact reaches graphify output"
)
);

Ok(())
}

fn assert_live_sweep_record(adapter: &Value) -> Result<()> {
Expand Down
Loading