diff --git a/.env.example b/.env.example index 472b5a1..d94ae60 100644 --- a/.env.example +++ b/.env.example @@ -26,3 +26,13 @@ SESSION_SECRET= # Shared secret gating the write endpoints (POST /run, /packs/{id}/decision). Held server-side only # (read API + the dashboard proxies); never exposed to the browser. Generate: openssl rand -hex 16. RUN_API_TOKEN= + +# ===== Sift Memory (Voyage AI, DBRE-side read-only retrieval) ===== +# Used only to retrieve/rerank DBRE guidance for Run Review. It must never be exposed to +# the browser and never participates in approval, mutation, or verification decisions. +VOYAGE_API_KEY= +VOYAGE_EMBED_MODEL=voyage-4-lite +VOYAGE_RERANK_MODEL=rerank-2.5-lite +VOYAGE_MEMORY_TOP_K=3 +VOYAGE_MEMORY_PREFILTER_K=8 +VOYAGE_MEMORY_MAX_DOCS=12 diff --git a/agents/agent.py b/agents/agent.py index ccd943f..e829f24 100644 --- a/agents/agent.py +++ b/agents/agent.py @@ -34,23 +34,26 @@ class AgentRole(StrEnum): INSTRUCTIONS = { AgentRole.FULL: ( - "You are a MongoDB performance engineer. Use the native Mongo tools to read the " - "slow query, compare candidates, diagnose the ESR-correct index, and explain the " - "rationale. Return compact JSON with evidence, candidates, experiments, " - "recommended_index, and rationale. Never create or drop an index during diagnosis." + "You are a MongoDB performance engineer. The request gives a captured slow query; pass " + "it verbatim as the `query_json` argument to every tool. Use the native Mongo tools to " + "read the slow query, compare candidates, diagnose the ESR-correct index, and explain " + "the rationale. Return compact JSON with evidence, candidates, recommended_index, and " + "rationale. Never create or drop an index during diagnosis." ), AgentRole.DIAGNOSE: ( - "You are the Diagnose Agent. Run explain_slow_query and diagnose_candidate. " - "Return compact JSON with before evidence, diagnosis, and recommended_index. " - "Never create or drop an index." + "You are the Diagnose Agent. Run explain_slow_query then diagnose_candidate, passing the " + "request's query as the `query_json` argument to each. Return compact JSON with before " + "evidence, diagnosis, and recommended_index. Never create or drop an index." ), AgentRole.CANDIDATE: ( - "You are the Candidate Agent. Run compare_candidate_indexes. Return compact JSON " - "with candidate metrics and the winner. Never create or drop an index." + "You are the Candidate Agent. Run compare_candidate_indexes, passing the request's query " + "as the `query_json` argument. Return compact JSON with candidate metrics and the winner. " + "Never create or drop an index." ), AgentRole.RATIONALE: ( - "You are the Rationale Agent. Run rationalize_recommendation. Return compact JSON " - "with recommended_index and rationale grounded in evidence. Never create or drop an index." + "You are the Rationale Agent. Run rationalize_recommendation, passing the request's query " + "as the `query_json` argument. Return compact JSON with recommended_index and rationale " + "grounded in evidence. Never create or drop an index." ), } diff --git a/agents/deploy.py b/agents/deploy.py index cc7602b..ebbbf29 100644 --- a/agents/deploy.py +++ b/agents/deploy.py @@ -130,7 +130,14 @@ def teardown(resource_name: str) -> None: # pragma: no cover - live deploy print(f"TEARDOWN=engine_deleted resource={resource_name}") +_SMOKE_QUERY_JSON = ( + '{"filter": {"purchaseMethod": "Phone", "customer.age": {"$gte": 55, "$lte": 75}}, ' + '"sort": [["saleDate", -1]], "limit": 15}' +) + + def _smoke_prompt(role: AgentRole) -> str: + q = f"Pass this exact JSON string as the `query_json` argument to every tool:\n{_SMOKE_QUERY_JSON}\n" prompts = { AgentRole.DIAGNOSE: ( "Run explain_slow_query and diagnose_candidate. Return compact JSON with " @@ -151,7 +158,7 @@ def _smoke_prompt(role: AgentRole) -> str: "and rationale. Do not mutate the database." ), } - return prompts[role] + return f"{prompts[role]}\n\n{q}" async def smoke( diff --git a/agents/native_mongo_tools.py b/agents/native_mongo_tools.py index 1aaff90..5df8e29 100644 --- a/agents/native_mongo_tools.py +++ b/agents/native_mongo_tools.py @@ -1,23 +1,22 @@ """Python-native Mongo tools for Agent Engine. -These tools are read-only. They mirror the deterministic controller's demo fixture -query and never create or drop indexes. +These tools are read-only and query-parameterised: each one takes the captured slow query as a +JSON string (`query_json`) and explains/diagnoses THAT query against the live collection. They +never create or drop an index — the ESR-correct index is derived deterministically and is built +only after a human approves the evidence hash. """ from __future__ import annotations -from collections.abc import Mapping +import json +from collections.abc import Mapping, Sequence from typing import Any -from controller.demo_fixture import COLL, DB, LIMIT, QUERY_FILTER, QUERY_SORT +from controller.demo_fixture import COLL, DB from controller.diagnosis import diagnose from controller.explain import capture_evidence, get_connection_string from controller.schemas import Evidence -INDEX_B_NAME = "esr_wrong_B" -INDEX_C_NAME = "esr_right_C" -INDEX_B_KEYS = (("storeLocation", 1), ("customer.age", 1), ("saleDate", -1)) -INDEX_C_KEYS = (("storeLocation", 1), ("saleDate", -1), ("customer.age", 1)) NAMESPACE = f"{DB}.{COLL}" @@ -31,104 +30,166 @@ def _require_connection_string() -> str: ) -def _capture_with_hint(hint: str | list[tuple[str, int]]) -> Evidence: +def _parse_query( + query_json: str | Mapping[str, Any], +) -> tuple[dict[str, Any], list[tuple[str, int]], int]: + """Parse the query the orchestrator passes to every tool. + + Accepts {"filter": ..., "sort": [["f", -1]], "limit": N} or a {"query": {...}} wrapper. + """ + data = json.loads(query_json) if isinstance(query_json, str) else dict(query_json) + spec = data.get("query", data) if isinstance(data, dict) else {} + query_filter = dict(spec.get("filter", {})) + query_sort = [(str(field), int(direction)) for field, direction in spec.get("sort", [])] + limit = int(spec.get("limit", 20)) + return query_filter, query_sort, limit + + +def _capture( + query_filter: Mapping[str, Any], query_sort: Sequence[tuple[str, int]], limit: int +) -> Evidence: + """Capture the query's NATURAL explain plan (no index hint), read-only.""" from pymongo import MongoClient client = MongoClient(_require_connection_string()) try: - return capture_evidence(client[DB][COLL], QUERY_FILTER, QUERY_SORT, LIMIT, hint=hint) + return capture_evidence(client[DB][COLL], query_filter, query_sort, limit) finally: client.close() -def _evidence_payload(evidence: Evidence, *, hint: str | list[tuple[str, int]]) -> dict[str, Any]: +def _query_echo( + query_filter: Mapping[str, Any], query_sort: Sequence[tuple[str, int]], limit: int +) -> dict[str, Any]: return { - "namespace": NAMESPACE, - "hint": hint, - "evidence": evidence.model_dump(mode="json"), - "metrics": evidence.metrics.model_dump(mode="json"), + "filter": dict(query_filter), + "sort": [[field, direction] for field, direction in query_sort], + "limit": limit, } -def _candidate_payload( - name: str, index_spec: tuple[tuple[str, int], ...], evidence: Evidence -) -> dict[str, Any]: +def _recommended_spec(diagnosis: Any) -> list[list[Any]]: + return [[field, direction] for field, direction in diagnosis.recommendation.index_spec] + + +def explain_slow_query(query_json: str) -> dict[str, Any]: + """Explain the given slow query's natural plan and return its before-evidence. + + Args: + query_json: JSON string of the query, e.g. + {"filter": {"purchaseMethod": "Phone", "customer.age": {"$gte": 55, "$lte": 75}}, + "sort": [["saleDate", -1]], "limit": 15} + """ + query_filter, query_sort, limit = _parse_query(query_json) + evidence = _capture(query_filter, query_sort, limit) return { - "name": name, - "index_spec": [[field, direction] for field, direction in index_spec], - "has_blocking_sort": evidence.metrics.has_blocking_sort, - "total_keys_examined": evidence.metrics.total_keys_examined, - "docs_examined": evidence.metrics.docs_examined, - "docs_returned": evidence.metrics.docs_returned, - "stages": list(evidence.metrics.stages), + "namespace": NAMESPACE, + "query": _query_echo(query_filter, query_sort, limit), + "evidence": evidence.model_dump(mode="json"), + "metrics": evidence.metrics.model_dump(mode="json"), } -def explain_slow_query() -> dict[str, Any]: - """Capture the canonical slow-query evidence by hinting the known wrong ESR index.""" - evidence = _capture_with_hint(INDEX_B_NAME) - return _evidence_payload(evidence, hint=INDEX_B_NAME) +def compare_candidate_indexes(query_json: str) -> dict[str, Any]: + """Compare the query's current plan against the ESR-recommended index (read-only). + The current plan is measured; the recommended index is derived deterministically and is NOT + created here — diagnosis never mutates. The recommendation is the winner. -def compare_candidate_indexes() -> dict[str, Any]: - """Compare wrong-vs-correct ESR candidates using read-only hinted explains.""" - wrong = _capture_with_hint(INDEX_B_NAME) - correct = _capture_with_hint(list(INDEX_C_KEYS)) + Args: + query_json: JSON string {"filter": ..., "sort": ..., "limit": ...}. + """ + query_filter, query_sort, limit = _parse_query(query_json) + current = _capture(query_filter, query_sort, limit) + diagnosis = diagnose( + query_filter, + query_sort, + has_blocking_sort=current.metrics.has_blocking_sort, + current_index=None, + ) return { "namespace": NAMESPACE, - "query": {"filter": QUERY_FILTER, "sort": QUERY_SORT, "limit": LIMIT}, + "query": _query_echo(query_filter, query_sort, limit), "candidates": [ - _candidate_payload(INDEX_B_NAME, INDEX_B_KEYS, wrong), - _candidate_payload(INDEX_C_NAME, INDEX_C_KEYS, correct), + { + "name": "current_plan", + "index_spec": None, + "has_blocking_sort": current.metrics.has_blocking_sort, + "total_keys_examined": current.metrics.total_keys_examined, + "docs_examined": current.metrics.docs_examined, + "docs_returned": current.metrics.docs_returned, + "stages": list(current.metrics.stages), + }, + { + "name": "esr_recommended", + "index_spec": _recommended_spec(diagnosis), + }, ], - "winner": INDEX_C_NAME - if correct.metrics.total_keys_examined < wrong.metrics.total_keys_examined - else INDEX_B_NAME, + "winner": "esr_recommended", } -def diagnose_candidate() -> dict[str, Any]: - """Run the deterministic ESR diagnosis from live slow-query evidence.""" - evidence = _capture_with_hint(INDEX_B_NAME) +def diagnose_candidate(query_json: str) -> dict[str, Any]: + """Capture natural evidence for the query and run the deterministic ESR diagnosis. + + Args: + query_json: JSON string {"filter": ..., "sort": ..., "limit": ...}. + """ + query_filter, query_sort, limit = _parse_query(query_json) + evidence = _capture(query_filter, query_sort, limit) diagnosis = diagnose( - QUERY_FILTER, - QUERY_SORT, + query_filter, + query_sort, has_blocking_sort=evidence.metrics.has_blocking_sort, - current_index=INDEX_B_NAME, + current_index=None, ) return { "namespace": NAMESPACE, "source": "deterministic_esr", "before": evidence.model_dump(mode="json"), "diagnosis": diagnosis.model_dump(mode="json"), + "recommended_index": _recommended_spec(diagnosis), } -def rationalize_recommendation() -> dict[str, Any]: - """Return a concise evidence-grounded rationale for the ESR recommendation.""" - slow = _capture_with_hint(INDEX_B_NAME) - fast = _capture_with_hint(list(INDEX_C_KEYS)) +def rationalize_recommendation(query_json: str) -> dict[str, Any]: + """Return an evidence-grounded rationale for the ESR recommendation (read-only). + + Args: + query_json: JSON string {"filter": ..., "sort": ..., "limit": ...}. + """ + query_filter, query_sort, limit = _parse_query(query_json) + current = _capture(query_filter, query_sort, limit) diagnosis = diagnose( - QUERY_FILTER, - QUERY_SORT, - has_blocking_sort=slow.metrics.has_blocking_sort, - current_index=INDEX_B_NAME, + query_filter, + query_sort, + has_blocking_sort=current.metrics.has_blocking_sort, + current_index=None, + ) + spec = _recommended_spec(diagnosis) + keys = current.metrics.total_keys_examined + docs = current.metrics.docs_examined + returned = current.metrics.docs_returned + order = ", ".join(f"{field}:{direction}" for field, direction in spec) + problem = ( + "performs a blocking in-memory SORT" + if current.metrics.has_blocking_sort + else "over-scans the collection" ) - before_keys = slow.metrics.total_keys_examined - after_keys = fast.metrics.total_keys_examined return { "namespace": NAMESPACE, - "recommended_index": diagnosis.recommendation.model_dump(mode="json")["index_spec"], + "recommended_index": spec, "rationale": ( - "Index B puts the range field before the sort field, causing a blocking SORT " - f"and {before_keys} keys examined. ESR index C moves saleDate before " - f"customer.age, removes the SORT stage, and examines {after_keys} keys." + f"The query examines {keys} keys and {docs} documents to return {returned}, and " + f"{problem}. The ESR-ordered index ({order}) places equality fields first, the sort " + "field next, and range fields last, so the index supplies the requested order and " + "removes the blocking SORT." ), "evidence": { - "before_keys_examined": before_keys, - "after_keys_examined": after_keys, - "before_has_blocking_sort": slow.metrics.has_blocking_sort, - "after_has_blocking_sort": fast.metrics.has_blocking_sort, + "keys_examined": keys, + "docs_examined": docs, + "docs_returned": returned, + "has_blocking_sort": current.metrics.has_blocking_sort, }, } diff --git a/api/agent_engine.py b/api/agent_engine.py index 7dae6d0..80cb57c 100644 --- a/api/agent_engine.py +++ b/api/agent_engine.py @@ -265,31 +265,20 @@ def _role_prompt( query_sort: list[tuple[str, int]], limit: int, ) -> str: - payload = { - "run_id": run_id, - "namespace": namespace, - "query": {"filter": query_filter, "sort": query_sort, "limit": limit}, - } - prompts = { - "explain_slow_query": ( - "Run exactly one tool: explain_slow_query. Return compact JSON containing " - "`before` evidence and the tool name. Do not call any mutation tools." - ), - "compare_candidate_indexes": ( - "Run exactly one tool: compare_candidate_indexes. Return compact JSON containing " - "candidate metrics, winner, and the tool name. Do not call any mutation tools." - ), - "diagnose_candidate": ( - "Run exactly one tool: diagnose_candidate. Return compact JSON containing " - "`before`, `diagnosis`, `recommended_index`, and the tool name. Do not call " - "any mutation tools." - ), - "rationalize_recommendation": ( - "Run exactly one tool: rationalize_recommendation. Return compact JSON containing " - "`recommended_index`, `rationale`, and the tool name. Do not call any mutation tools." - ), + query_json = json.dumps( + {"filter": query_filter, "sort": [[f, d] for f, d in query_sort], "limit": limit} + ) + returns = { + "explain_slow_query": "`before` evidence and the tool name", + "compare_candidate_indexes": "candidate metrics, winner, and the tool name", + "diagnose_candidate": "`before`, `diagnosis`, `recommended_index`, and the tool name", + "rationalize_recommendation": "`recommended_index`, `rationale`, and the tool name", } - return f"{prompts[tool_name]}\n\n{json.dumps(payload, sort_keys=True)}" + return ( + f"Run exactly one tool: {tool_name}. Pass this exact JSON string as its `query_json` " + f"argument:\n{query_json}\n(run_id={run_id}, namespace={namespace}). Return compact JSON " + f"containing {returns[tool_name]}. Do not call any other tool or any mutation tool." + ) @dataclass(frozen=True) diff --git a/api/memory.py b/api/memory.py new file mode 100644 index 0000000..29babfa --- /dev/null +++ b/api/memory.py @@ -0,0 +1,377 @@ +"""Read-only DBRE memory retrieval for Run Review. + +Sift Memory is out-of-band presentation context. It is not part of EvidencePack v1 and +never changes diagnosis, approval, mutation, or verification decisions. +""" + +from __future__ import annotations + +import json +import math +import os +from dataclasses import dataclass +from datetime import UTC, datetime +from typing import Any, Literal, Protocol +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen + +from pydantic import BaseModel, Field + +from controller.schemas import EvidencePack + + +class VoyageMemoryError(RuntimeError): + pass + + +@dataclass(frozen=True) +class VoyageMemoryConfig: + api_key: str = "" + embed_model: str = "voyage-4-lite" + rerank_model: str = "rerank-2.5-lite" + top_k: int = 3 + prefilter_k: int = 8 + max_docs: int = 12 + namespace: str = "dbre-runbook" + + +@dataclass(frozen=True) +class MemoryDocument: + id: str + title: str + text: str + tags: tuple[str, ...] = () + + +class MemoryGuidance(BaseModel): + id: str + title: str + summary: str + reason: str + source: Literal["voyage", "local"] + score: float = Field(ge=0) + tags: list[str] = Field(default_factory=list) + + +class MemoryResponse(BaseModel): + configured: bool + run_id: str + status: Literal["unconfigured", "ok", "empty", "fallback", "failed"] + mutation_authority: bool = False + guidance: list[MemoryGuidance] = Field(default_factory=list) + models: dict[str, str] + query: str | None = None + namespace: str | None = None + retrieved_at: str | None = None + message: str | None = None + + +class VoyageClientProtocol(Protocol): + def embed(self, texts: list[str], *, model: str, input_type: str) -> list[list[float]]: ... + def rerank( + self, query: str, documents: list[str], *, model: str, top_k: int + ) -> list[tuple[int, float]]: ... + + +MEMORY_DOCUMENTS: tuple[MemoryDocument, ...] = ( + MemoryDocument( + "blocking-sort-esr", + "Blocking SORT with ESR index order", + "When explain contains a blocking SORT, prefer a compound index ordered by equality " + "fields first, sort fields next, and range fields last.", + ("SORT", "ESR", "index"), + ), + MemoryDocument( + "high-docs-examined", + "High docs examined against a small result set", + "A high docsExamined-to-returned ratio means the current index is not selective enough " + "for the query shape. Verify docs examined, keys examined, or execution time improves.", + ("docsExamined", "selectivity", "verification"), + ), + MemoryDocument( + "hash-bound-approval", + "Approval must bind to the evidence hash", + "The operator approves the hash derived from before evidence and the recommendation. " + "If the hash changes, reject the decision as stale and keep mutation blocked.", + ("approval", "hash", "gate"), + ), + MemoryDocument( + "verification-rail", + "Strict verification rail", + "Mark VERIFIED only when blocking SORT is gone, the selected index is evidenced in the " + "plan, and at least one metric improves. Otherwise record failed checks.", + ("verify", "SORT", "metrics"), + ), + MemoryDocument( + "captured-query-natural-plan", + "Captured queries diagnose their natural plan", + "For real workload captures, diagnose the natural plan without forcing the old demo " + "hint. The captured filter, sort, and limit drive the ESR candidate.", + ("captured-query", "workload", "explain"), + ), + MemoryDocument( + "index-backed-sort", + "Index-backed sort is the intended after state", + "Removing a blocking SORT does not mean results are unsorted. MongoDB can read the " + "compound index in the requested order without materializing a blocking sort stage.", + ("sort", "index", "after"), + ), + MemoryDocument( + "rollback-visibility", + "Show rollback visibility before automation", + "Expose the rollback command or index name clearly in the evidence trail. Automatic " + "rollback can wait; the operator still needs to see what would be reversed.", + ("rollback", "audit", "operator"), + ), + MemoryDocument( + "agent-read-only-boundary", + "Agents recommend; controller decides", + "Reasoning agents and read-only tools provide evidence and narrative context only. " + "Deterministic Python validates, selects, hashes, applies after approval, and verifies.", + ("agents", "read-only", "controller"), + ), +) + + +class VoyageClient: + def __init__(self, api_key: str, *, timeout: float = 12.0) -> None: + self._api_key = api_key + self._timeout = timeout + + def _post(self, path: str, payload: dict[str, Any]) -> dict[str, Any]: + req = Request( + f"https://api.voyageai.com{path}", + data=json.dumps(payload).encode("utf-8"), + headers={ + "Authorization": f"Bearer {self._api_key}", + "Accept": "application/json", + "Content-Type": "application/json", + }, + method="POST", + ) + try: + with urlopen(req, timeout=self._timeout) as res: + return json.loads(res.read().decode("utf-8")) + except (HTTPError, URLError, TimeoutError, json.JSONDecodeError) as exc: + raise VoyageMemoryError("Voyage request failed") from exc + + def embed(self, texts: list[str], *, model: str, input_type: str) -> list[list[float]]: + data = self._post( + "/v1/embeddings", + {"model": model, "input": texts, "input_type": input_type}, + ) + rows = data.get("data", []) + embeddings: list[list[float]] = [] + for row in rows: + embedding = row.get("embedding") + if not isinstance(embedding, list): + raise VoyageMemoryError("Voyage embedding response was malformed") + embeddings.append([float(value) for value in embedding]) + return embeddings + + def rerank( + self, query: str, documents: list[str], *, model: str, top_k: int + ) -> list[tuple[int, float]]: + data = self._post( + "/v1/rerank", + {"model": model, "query": query, "documents": documents, "top_k": top_k}, + ) + ranked: list[tuple[int, float]] = [] + for row in data.get("data", []): + index = row.get("index") + score = row.get("relevance_score", row.get("score", 0.0)) + if isinstance(index, int): + ranked.append((index, float(score))) + return ranked + + +class VoyageMemoryService: + def __init__( + self, + config: VoyageMemoryConfig, + *, + client: VoyageClientProtocol | None = None, + documents: tuple[MemoryDocument, ...] = MEMORY_DOCUMENTS, + ) -> None: + self._config = config + self._client = client or VoyageClient(config.api_key) + self._documents = documents + + def lookup(self, pack: EvidencePack) -> MemoryResponse: + models = {"embed": self._config.embed_model, "rerank": self._config.rerank_model} + if not self._config.api_key: + return MemoryResponse( + configured=False, + run_id=pack.run_id, + status="unconfigured", + models=models, + message="Sift Memory is not configured.", + ) + + query = build_memory_query(pack) + docs = self._documents[: max(0, self._config.max_docs)] + if not docs: + return MemoryResponse( + configured=True, + run_id=pack.run_id, + status="empty", + models=models, + query=query, + namespace=self._config.namespace, + retrieved_at=_now(), + message="No Sift Memory documents are available.", + ) + + try: + query_embedding = self._single_embedding( + query, model=self._config.embed_model, input_type="query" + ) + doc_embeddings = self._client.embed( + [doc.text for doc in docs], model=self._config.embed_model, input_type="document" + ) + prefiltered = sorted( + enumerate(doc_embeddings), + key=lambda pair: _cosine(query_embedding, pair[1]), + reverse=True, + )[: max(self._config.top_k, self._config.prefilter_k)] + candidates = [docs[index] for index, _ in prefiltered] + ranked = self._client.rerank( + query, + [doc.text for doc in candidates], + model=self._config.rerank_model, + top_k=self._config.top_k, + ) + guidance = [ + _guidance(candidates[index], pack, score, source="voyage") + for index, score in ranked[: self._config.top_k] + if 0 <= index < len(candidates) + ] + return MemoryResponse( + configured=True, + run_id=pack.run_id, + status="ok" if guidance else "empty", + guidance=guidance, + models=models, + query=query, + namespace=self._config.namespace, + retrieved_at=_now(), + message=None if guidance else "No relevant Sift Memory guidance found.", + ) + except VoyageMemoryError: + guidance = [ + _guidance(doc, pack, 0.1, source="local") for doc in _local_docs(pack, docs) + ] + return MemoryResponse( + configured=True, + run_id=pack.run_id, + status="fallback", + guidance=guidance[: self._config.top_k], + models=models, + query=query, + namespace=self._config.namespace, + retrieved_at=_now(), + message="Voyage request failed; returned local guidance.", + ) + + def _single_embedding(self, text: str, *, model: str, input_type: str) -> list[float]: + rows = self._client.embed([text], model=model, input_type=input_type) + if len(rows) != 1: + raise VoyageMemoryError("Voyage returned an unexpected embedding count") + return rows[0] + + +def get_memory_service() -> VoyageMemoryService: + return VoyageMemoryService( + VoyageMemoryConfig( + api_key=os.getenv("VOYAGE_API_KEY", "").strip(), + embed_model=os.getenv("VOYAGE_EMBED_MODEL", "voyage-4-lite"), + rerank_model=os.getenv("VOYAGE_RERANK_MODEL", "rerank-2.5-lite"), + top_k=_env_int("VOYAGE_MEMORY_TOP_K", 3), + prefilter_k=_env_int("VOYAGE_MEMORY_PREFILTER_K", 8), + max_docs=_env_int("VOYAGE_MEMORY_MAX_DOCS", 12), + ) + ) + + +def build_memory_query(pack: EvidencePack) -> str: + before = pack.before.model_dump(mode="json") + metrics = before["metrics"] + trace = " | ".join(event.summary for event in pack.agent_trace[:4]) + return "\n".join( + ( + f"namespace: {pack.namespace}", + f"status: {pack.status.value}", + f"finding: {pack.finding.problem}", + f"severity: {pack.finding.severity.value}", + f"query: {json.dumps(before['query'], sort_keys=True)}", + f"metrics: docs_examined={metrics['docs_examined']} " + f"docs_returned={metrics['docs_returned']} " + f"keys_examined={metrics['total_keys_examined']} " + f"millis={metrics['millis']} stages={','.join(metrics.get('stages', []))}", + f"recommendation: {json.dumps(pack.recommendation.index_spec)}", + f"rationale: {pack.recommendation.rationale}", + f"trace: {trace}", + ) + ) + + +def _guidance( + doc: MemoryDocument, + pack: EvidencePack, + score: float, + *, + source: Literal["voyage", "local"], +) -> MemoryGuidance: + return MemoryGuidance( + id=doc.id, + title=doc.title, + summary=doc.text, + reason=_reason(doc, pack), + source=source, + score=max(0.0, score), + tags=list(doc.tags), + ) + + +def _local_docs(pack: EvidencePack, docs: tuple[MemoryDocument, ...]) -> list[MemoryDocument]: + tokens = set(build_memory_query(pack).lower().replace("_", " ").split()) + return sorted( + docs, + key=lambda doc: ( + len(tokens.intersection(doc.text.lower().split())) + + (4 if pack.before.metrics.has_blocking_sort and "SORT" in doc.tags else 0) + ), + reverse=True, + )[:3] + + +def _reason(doc: MemoryDocument, pack: EvidencePack) -> str: + if pack.before.metrics.has_blocking_sort and ("SORT" in doc.tags or "sort" in doc.tags): + return "This run has a blocking SORT in the before explain plan." + if pack.before.metrics.docs_examined > max(pack.before.metrics.docs_returned * 100, 1000): + return "This run has a high docs-examined to returned-docs ratio." + if "approval" in doc.tags: + return "This run is governed by a hash-bound approval gate." + return "This guidance matches the selected run's evidence and recommendation context." + + +def _cosine(a: list[float], b: list[float]) -> float: + if not a or not b or len(a) != len(b): + return 0.0 + dot = sum(x * y for x, y in zip(a, b, strict=True)) + mag_a = math.sqrt(sum(x * x for x in a)) + mag_b = math.sqrt(sum(y * y for y in b)) + if mag_a == 0 or mag_b == 0: + return 0.0 + return dot / (mag_a * mag_b) + + +def _env_int(name: str, default: int) -> int: + try: + return int(os.getenv(name, str(default))) + except ValueError: + return default + + +def _now() -> str: + return datetime.now(UTC).isoformat() diff --git a/api/routes.py b/api/routes.py index 0ba90f4..bcfbc3d 100644 --- a/api/routes.py +++ b/api/routes.py @@ -8,7 +8,8 @@ from fastapi.responses import JSONResponse from pydantic import BaseModel -from api.auth import optional_dbre_identity +from api.auth import ROLE_DBRE, optional_dbre_identity, require_role +from api.memory import MemoryResponse, VoyageMemoryService, get_memory_service from api.workload import WorkloadService, get_workload_service_optional from controller.auth import Identity from controller.orchestrator import ApprovalTicket, issue_approval_ticket @@ -64,6 +65,7 @@ def get_engine() -> Engine: StoreDep = Annotated[PackStore, Depends(get_store)] EngineDep = Annotated[Engine, Depends(get_engine)] +MemoryDep = Annotated[VoyageMemoryService, Depends(get_memory_service)] @router.get("/health") @@ -84,6 +86,19 @@ def get_pack(run_id: str, store: StoreDep) -> dict: return pack.model_dump(mode="json") +@router.get("/packs/{run_id}/memory", response_model=MemoryResponse) +def get_pack_memory( + run_id: str, + store: StoreDep, + memory: MemoryDep, + _identity: Annotated[Identity, Depends(require_role(ROLE_DBRE))], +) -> MemoryResponse: + pack = store.get_pack(run_id) + if pack is None: + raise HTTPException(status_code=404, detail=f"pack '{run_id}' not found") + return memory.lookup(pack) + + class RunRequest(BaseModel): run_id: str | None = None captured_query_id: str | None = None @@ -123,6 +138,14 @@ async def trigger_run( assert_safe_query(query.query_filter, query.query_sort) except WorkloadSpecError as exc: raise HTTPException(status_code=422, detail=str(exc)) from exc + # A query with neither a filter nor a sort has no equality/sort/range structure, so the + # ESR rule yields an empty index — undiagnosable. Reject cleanly rather than letting the + # frozen Recommendation(min_length=1) raise a 500 deep in the controller/agent path. + if not query.query_filter and not query.query_sort: + raise HTTPException( + status_code=422, + detail="captured query has no filter or sort to index; nothing to diagnose", + ) pack = await engine.diagnose(run_id, query) store.save_pack(pack) return pack.model_dump(mode="json") diff --git a/api/server.py b/api/server.py index a9edb25..c34f9dc 100644 --- a/api/server.py +++ b/api/server.py @@ -138,6 +138,8 @@ async def diagnose(self, run_id: str, query: QueryInput | None = None) -> Eviden current_index = INDEX_B_NAME agent_failure: Exception | None = None + # The Agent Engine narrates and supplies before-evidence for the captured (or fixture) + # query; the deterministic controller still computes and hash-binds the recommendation. if self._diagnosis_agent is not None: try: return await run_agent_diagnosis( @@ -245,14 +247,18 @@ def create_app( target = client[NAMESPACE_DB][NAMESPACE_COLL] workload_service = MongoWorkloadService(target, state_db["query_log"]) if engine is None: - # Production requires the split Agent Engine; a local connection runs the - # deterministic controller (no Vertex), which still produces a valid pack. - agent = ( - diagnosis_agent_from_env(require_split=True, allow_legacy=False) - if secret_mode - else None + # Use the split Agent Engine when configured — mandatory in production + # (secret_mode), optional locally. The agent narrates and supplies before- + # evidence; the deterministic controller always computes and hash-binds the + # recommendation. Fallback keeps a transient Vertex error from failing the run + # — the pack is still valid because the controller is the source of truth. + agent = diagnosis_agent_from_env(require_split=secret_mode, allow_legacy=False) + engine = _LiveEngine( + conn, + agent, + MongoLedgerStore(state_db), + allow_agent_fallback=agent is not None, ) - engine = _LiveEngine(conn, agent, MongoLedgerStore(state_db)) else: packs_dir = Path(os.getenv("PACKS_DIR", "runs")) store = LocalFilePackStore(packs_dir) if packs_dir.exists() else _EmptyPackStore() diff --git a/dashboard/app/console/page.tsx b/dashboard/app/console/page.tsx index a911d9f..37a8554 100644 --- a/dashboard/app/console/page.tsx +++ b/dashboard/app/console/page.tsx @@ -1,5 +1,6 @@ import { getSession, getSessionToken } from "@/lib/auth"; import { WorkloadConsole } from "@/components/WorkloadConsole"; +import { Tour, type TourStep } from "@/components/Tour"; interface Preset { key: string; @@ -23,8 +24,32 @@ async function loadPresets(): Promise { } } +const USER_TOUR: TourStep[] = [ + { + title: "Welcome to Sift", + body: "This is your workload console. Run real, read-only queries against the live collection — Sift captures each one's explain evidence for the DBRE to triage.", + }, + { + title: "Quick workloads", + body: "Click a preset to run a realistic query. An amber dot is a slow 'trap' shape; a green dot is healthy. Run a few to build up a workload.", + }, + { + title: "Or build your own", + body: "Pick a store or method, an age range, a sort field and a limit, then Run query. Every query stays read-only and capped — you can't hurt the cluster.", + }, + { + title: "Read the verdict", + body: "Each run shows the real plan: a blocking SORT, docs examined vs returned, and the stages — captured and attributed to you for the DBRE to fix.", + }, +]; + export default async function ConsolePage() { const session = await getSession(); const presets = await loadPresets(); - return ; + return ( + <> + + + + ); } diff --git a/dashboard/app/dbre/page.tsx b/dashboard/app/dbre/page.tsx index 22f4744..7b35002 100644 --- a/dashboard/app/dbre/page.tsx +++ b/dashboard/app/dbre/page.tsx @@ -1,5 +1,6 @@ import { getSessionToken } from "@/lib/auth"; import { SlowQueryQueue, type SlowQuery } from "@/components/SlowQueryQueue"; +import { Tour, type TourStep } from "@/components/Tour"; async function loadSlowQueries(): Promise<{ rows: SlowQuery[]; error: string | null }> { const base = (process.env.API_URL ?? process.env.NEXT_PUBLIC_API_URL)?.trim(); @@ -17,7 +18,31 @@ async function loadSlowQueries(): Promise<{ rows: SlowQuery[]; error: string | n } } +const DBRE_TOUR: TourStep[] = [ + { + title: "The slow-query queue", + body: "These are the actual slowest queries your users ran, ranked by explain evidence — blocking sort, collection scan, over-scan ratio — not noisy wall-clock time.", + }, + { + title: "Diagnose the worst one", + body: "Click Diagnose on a row. Sift derives the ESR-correct index from the query's own equality/sort/range shape and opens an EvidencePack — no mutation yet.", + }, + { + title: "Review, then approve", + body: "Check the finding and the evidence hash, then approve. The approval is bound to that exact hash: the agent only recommends — you decide what gets applied.", + }, + { + title: "Verification proves it", + body: "On approve, the backend applies the index and re-explains: the blocking SORT is gone and docs-examined collapses. The pack moves to VERIFIED.", + }, +]; + export default async function DbreQueuePage() { const { rows, error } = await loadSlowQueries(); - return ; + return ( + <> + + + + ); } diff --git a/dashboard/app/globals.css b/dashboard/app/globals.css index 30f0436..5a110b2 100644 --- a/dashboard/app/globals.css +++ b/dashboard/app/globals.css @@ -1,26 +1,26 @@ /* - * DBRE console — design system. + * Sift — design system (Slate kit). * - * Deliberately NON-default (issue #10 acceptance criterion): no Inter, no - * purple gradient, no Lucide. This is a dark engineering-operations aesthetic - * fitting a database-reliability instrument: - * - Headings / data: JetBrains Mono (monospace — echoes explain output) - * - Body: IBM Plex Sans - * - Palette: deep slate background, amber = attention/pending, green = good - * plan, red = regression. Accents borrowed from query-plan semantics. + * Dark, dense, dev-tooling aesthetic for a database-reliability instrument — + * the lineage of Linear / Vercel / Raycast. Single-family type discipline: + * - Sans + data: Geist + * - Mono (explain output, IDs, hashes): Geist Mono + * - Surfaces: cool near-blue blacks. Accent = signal cyan (matches the mark). + * amber = attention/pending, green = good plan, red = regression — kept from + * query-plan semantics as the brand's signal palette. * - Icons: Phosphor (set in components). */ :root { - --bg: #0d1117; - --bg-panel: #161b22; - --bg-elevated: #1c2230; - --border: #2a3340; - --border-bright: #3b4656; + --bg: #0a0b0f; + --bg-panel: #13151b; + --bg-elevated: #1b1e27; + --border: #262934; + --border-bright: #3a3e4d; - --text: #e6edf3; - --text-dim: #9aa7b4; - --text-faint: #5c6b7a; + --text: #eef0f6; + --text-dim: #a0a4b5; + --text-faint: #5a5e6e; --amber: #f5b342; /* pending / attention */ --amber-dim: #6b5320; @@ -28,12 +28,12 @@ --green-dim: #1f5b41; --red: #f06a5e; /* regression / blocking */ --red-dim: #5e2722; - --cyan: #5cc8e6; /* neutral data accent */ + --cyan: #5cc8e6; /* signal accent — matches the Sift mark */ --mono: var(--font-mono), ui-monospace, "Cascadia Code", "Fira Code", monospace; --sans: var(--font-sans), system-ui, -apple-system, sans-serif; - --radius: 6px; + --radius: 8px; } * { diff --git a/dashboard/app/layout.tsx b/dashboard/app/layout.tsx index 996dc3c..1d8d7d9 100644 --- a/dashboard/app/layout.tsx +++ b/dashboard/app/layout.tsx @@ -1,26 +1,25 @@ import type { Metadata } from "next"; -import { JetBrains_Mono, IBM_Plex_Sans } from "next/font/google"; +import { Geist, Geist_Mono } from "next/font/google"; import "./globals.css"; import { SidebarNav } from "@/components/SidebarNav"; import { getSession } from "@/lib/auth"; import layout from "./layout.module.css"; -// Non-default typography (see globals.css rationale). -const mono = JetBrains_Mono({ +// Slate kit typography: Geist + Geist Mono — single-family discipline (see globals.css). +const sans = Geist({ subsets: ["latin"], - variable: "--font-mono", + variable: "--font-sans", display: "swap", }); -const sans = IBM_Plex_Sans({ +const mono = Geist_Mono({ subsets: ["latin"], - weight: ["400", "500", "600", "700"], - variable: "--font-sans", + variable: "--font-mono", display: "swap", }); export const metadata: Metadata = { - title: "DBRE Console — Evidence-Driven Agent", + title: "Sift — Evidence-Driven DBRE", description: "Operator console for the Evidence-Driven DBRE agent: run workloads, triage slow queries, and approve the index fix.", }; diff --git a/dashboard/app/login/page.tsx b/dashboard/app/login/page.tsx index 52a6c11..eeed724 100644 --- a/dashboard/app/login/page.tsx +++ b/dashboard/app/login/page.tsx @@ -2,8 +2,9 @@ import { useState } from "react"; import { useRouter } from "next/navigation"; -import { Database, SignIn } from "@phosphor-icons/react/dist/ssr"; +import { SignIn } from "@phosphor-icons/react/dist/ssr"; +import { Logo } from "@/components/Logo"; import styles from "./login.module.css"; export default function LoginPage() { @@ -45,8 +46,8 @@ export default function LoginPage() {
- - DBRE Console + + Sift

Sign in to run database workloads, or to triage the slowest queries. diff --git a/dashboard/app/run-review/page.tsx b/dashboard/app/run-review/page.tsx index 874480c..5621c59 100644 --- a/dashboard/app/run-review/page.tsx +++ b/dashboard/app/run-review/page.tsx @@ -1,5 +1,6 @@ import { AgentRunView } from "@/components/AgentRunView"; import { loadPack } from "@/lib/api"; +import { loadSiftMemory } from "@/lib/siftMemory.server"; export const dynamic = "force-dynamic"; @@ -14,6 +15,14 @@ export default async function RunReviewPage({ }) { const { run_id } = await searchParams; const { pack, source, notice } = await loadPack(run_id); + const memory = source === "live" ? await loadSiftMemory(pack.run_id) : null; - return ; + return ( + + ); } diff --git a/dashboard/app/runs/[run_id]/page.tsx b/dashboard/app/runs/[run_id]/page.tsx index 89e9728..8f9ae32 100644 --- a/dashboard/app/runs/[run_id]/page.tsx +++ b/dashboard/app/runs/[run_id]/page.tsx @@ -1,5 +1,6 @@ import { AgentRunView } from "@/components/AgentRunView"; import { loadPack } from "@/lib/api"; +import { loadSiftMemory } from "@/lib/siftMemory.server"; export const dynamic = "force-dynamic"; @@ -14,6 +15,14 @@ export default async function RunPage({ }) { const { run_id } = await params; const { pack, source, notice } = await loadPack(run_id); + const memory = source === "live" ? await loadSiftMemory(pack.run_id) : null; - return ; + return ( + + ); } diff --git a/dashboard/app/system-map/page.tsx b/dashboard/app/system-map/page.tsx index e663d23..b7abe0a 100644 --- a/dashboard/app/system-map/page.tsx +++ b/dashboard/app/system-map/page.tsx @@ -41,6 +41,7 @@ export default async function SystemMapPage() {

+
diff --git a/dashboard/app/system-map/system-map.module.css b/dashboard/app/system-map/system-map.module.css index 3592a51..f8b1e59 100644 --- a/dashboard/app/system-map/system-map.module.css +++ b/dashboard/app/system-map/system-map.module.css @@ -51,17 +51,11 @@ .tierRow { display: grid; - grid-template-columns: repeat(3, 1fr); + grid-template-columns: repeat(auto-fit, minmax(190px, 1fr)); gap: 12px; width: 100%; } -@media (max-width: 640px) { - .tierRow { - grid-template-columns: 1fr; - } -} - .box { display: flex; flex-direction: column; diff --git a/dashboard/components/AgentRunView.tsx b/dashboard/components/AgentRunView.tsx index 0c13f63..eb7edf5 100644 --- a/dashboard/components/AgentRunView.tsx +++ b/dashboard/components/AgentRunView.tsx @@ -14,11 +14,37 @@ import { EvidencePanel } from "@/components/EvidencePanel"; import { ApprovalGatePanel } from "@/components/ApprovalGatePanel"; import { StatusPill } from "@/components/StatusPill"; import { TracePanel } from "@/components/TracePanel"; +import { SiftMemoryPanel } from "@/components/SiftMemoryPanel"; +import { Tour, type TourStep } from "@/components/Tour"; import type { EvidencePack } from "@/lib/evidence"; import { displayStatus, isVerificationFailed } from "@/lib/evidence"; import type { PackSource } from "@/lib/api"; +import type { SiftMemoryResult } from "@/lib/siftMemory"; import styles from "@/app/run-review.module.css"; +const RUNREVIEW_TOUR: TourStep[] = [ + { + title: "Reviewing one diagnosis", + body: "This is the EvidencePack for a single captured query. The agent has recommended a fix — but nothing has touched the database yet. The decision is yours.", + }, + { + title: "The approval gate", + body: "Status is PENDING APPROVAL · MUTATION BLOCKED. The full evidence hash binds your approval to exactly this evidence — approve a different pack and the hash won't match, so a stale fix can't slip through.", + }, + { + title: "Why it's safe", + body: "Safety Authority spells out the trust model: agents are read-only and only recommend, deterministic Python picks the winner, you approve, mutation is backend-only after that, then it re-explains to verify. The agent never writes to the database.", + }, + { + title: "The evidence", + body: "Scroll down for the HIGH finding (the blocking SORT), the ESR index recommendation, and the before/after explain-plan diff. The 'after' side fills in once it's verified.", + }, + { + title: "Approve → apply → verify", + body: "Click 'Approve this evidence hash': the backend creates the recommended index and re-explains — the SORT disappears and docs-examined collapses, and the run flips to VERIFIED. 'Reject' records your decision and changes nothing.", + }, +]; + /** * Run review for a DBRE-selected diagnosis. The run is produced from the slow-query queue (the * "Diagnose" action runs POST /run on the captured query); this view renders the resulting pack @@ -29,10 +55,12 @@ export function AgentRunView({ initialPack, initialSource, initialNotice, + initialMemory, }: { initialPack: EvidencePack; initialSource: PackSource; initialNotice?: string; + initialMemory?: SiftMemoryResult | null; }) { const [pack, setPack] = useState(initialPack); const source = initialSource; @@ -42,6 +70,7 @@ export function AgentRunView({ return (
+
@@ -74,6 +103,7 @@ export function AgentRunView({ evidenceHash={pack.evidence_hash} ledgerPersisted={source === "live"} /> +
; + return ( + // eslint-disable-next-line @next/next/no-img-element + setFailed(true)} + style={{ display: "block" }} + /> + ); +} diff --git a/dashboard/components/SidebarNav.tsx b/dashboard/components/SidebarNav.tsx index b202f0f..264284b 100644 --- a/dashboard/components/SidebarNav.tsx +++ b/dashboard/components/SidebarNav.tsx @@ -4,7 +4,6 @@ import { useEffect, useRef, useState } from "react"; import Link from "next/link"; import { usePathname, useRouter } from "next/navigation"; import { - Database, SquaresFour, MagnifyingGlass, TreeStructure, @@ -16,6 +15,7 @@ import { List, X, } from "@phosphor-icons/react/dist/ssr"; +import { Logo } from "@/components/Logo"; import type { Session } from "@/lib/session"; import styles from "./SidebarNav.module.css"; @@ -83,8 +83,8 @@ export function SidebarNav({ session }: { session: Session }) { {/* mobile top bar with hamburger */}
- - DBRE Console + + Sift
+ + {open && step && ( +
+
+
+ {title} + +
+

{step.title}

+

{step.body}

+
+ {steps.map((_, n) => ( + + ))} +
+
+ {i > 0 ? ( + + ) : ( + + )} + + {i + 1} / {steps.length} + + {last ? ( + + ) : ( + + )} +
+
+
+ )} + + ); +} diff --git a/dashboard/lib/__tests__/siftMemory.test.ts b/dashboard/lib/__tests__/siftMemory.test.ts new file mode 100644 index 0000000..e5f2124 --- /dev/null +++ b/dashboard/lib/__tests__/siftMemory.test.ts @@ -0,0 +1,143 @@ +import { readFileSync } from "node:fs"; +import { describe, expect, it } from "vitest"; +import { + formatMemoryScore, + memoryHitSummary, + memoryHitTitle, + normalizeSiftMemory, +} from "@/lib/siftMemory"; +import type { SiftMemoryGuidance, SiftMemoryResult } from "@/lib/siftMemory"; + +const baseMemory = { + run_id: "run-1", + mutation_authority: false, + models: { embed: "voyage-4-lite", rerank: "rerank-2.5-lite" }, +}; + +describe("normalizeSiftMemory", () => { + it("maps missing memory data to the not configured state", () => { + expect(normalizeSiftMemory(undefined)).toMatchObject({ + state: "not_configured", + label: "Not configured", + hits: [], + }); + }); + + it("maps explicit not_configured memory data", () => { + const memory: SiftMemoryResult = { + ...baseMemory, + status: "unconfigured", + configured: false, + guidance: [], + }; + expect(normalizeSiftMemory(memory)).toMatchObject({ + state: "not_configured", + query: null, + hits: [], + }); + }); + + it("maps configured memory with no results", () => { + const memory: SiftMemoryResult = { + ...baseMemory, + status: "empty", + configured: true, + query: "blocking sort customer age", + guidance: [], + }; + expect(normalizeSiftMemory(memory)).toMatchObject({ + state: "configured_no_results", + label: "Configured · no results", + query: "blocking sort customer age", + hits: [], + }); + }); + + it("maps configured memory with results", () => { + const memory: SiftMemoryResult = { + ...baseMemory, + status: "ok", + configured: true, + guidance: [ + { + id: "mem-1", + title: "ESR order", + summary: "Equality, Sort, Range ordering.", + reason: "Same blocking SORT.", + source: "voyage", + score: 0.92, + tags: ["ESR"], + }, + { + id: "mem-2", + title: "Rejected order", + summary: "Prior candidate rejected because range came before sort.", + reason: "Same shape.", + source: "voyage", + score: 0.75, + tags: [], + }, + ], + namespace: "dbre", + retrieved_at: "2026-06-08T14:00:00Z", + }; + const result = normalizeSiftMemory(memory); + expect(result).toMatchObject({ + state: "configured_with_results", + label: "2 results", + namespace: "dbre", + retrievedAt: "2026-06-08T14:00:00Z", + }); + expect(result.hits).toHaveLength(2); + }); + + it("maps failed memory retrieval", () => { + const memory: SiftMemoryResult = { + ...baseMemory, + status: "failed", + configured: true, + guidance: [], + message: "vector store unavailable", + }; + expect(normalizeSiftMemory(memory)).toMatchObject({ + state: "failed", + label: "Failed", + error: "vector store unavailable", + }); + }); +}); + +describe("memory result formatting", () => { + it("uses stable fallbacks for title and summary", () => { + const hit = { + id: "abc", + title: "", + summary: "", + reason: "Same query shape.", + source: "local", + score: 0.1, + tags: [], + } satisfies SiftMemoryGuidance; + expect(memoryHitTitle(hit, 0)).toBe("abc"); + expect(memoryHitSummary(hit)).toBe("Same query shape."); + }); + + it("formats normalized and raw scores", () => { + expect(formatMemoryScore(0.847)).toBe("0.85"); + expect(formatMemoryScore(12.345)).toBe("12.3"); + expect(formatMemoryScore(undefined)).toBeNull(); + }); +}); + +describe("voyage key never reaches the client", () => { + // The client-shipped memory modules must never reference the Voyage key. + // The key is read server-side only (read API), never NEXT_PUBLIC_* or browser-bound. + const clientFiles = ["../siftMemory.ts", "../../components/SiftMemoryPanel.tsx"]; + for (const rel of clientFiles) { + it(`${rel} references no Voyage API key`, () => { + const src = readFileSync(new URL(rel, import.meta.url), "utf8"); + expect(src).not.toMatch(/VOYAGE_API_KEY/); + expect(src).not.toMatch(/NEXT_PUBLIC_VOYAGE/); + }); + } +}); diff --git a/dashboard/lib/siftMemory.server.ts b/dashboard/lib/siftMemory.server.ts new file mode 100644 index 0000000..fad5f55 --- /dev/null +++ b/dashboard/lib/siftMemory.server.ts @@ -0,0 +1,19 @@ +import { apiBaseUrl } from "@/lib/api"; +import { getSessionToken } from "@/lib/auth"; +import type { SiftMemoryResult } from "@/lib/siftMemory"; + +export async function loadSiftMemory(runId: string): Promise { + const base = apiBaseUrl(); + const token = await getSessionToken(); + if (!base || !token) return null; + try { + const res = await fetch(`${base}/packs/${encodeURIComponent(runId)}/memory`, { + headers: { accept: "application/json", authorization: `Bearer ${token}` }, + cache: "no-store", + }); + if (!res.ok) return null; + return (await res.json()) as SiftMemoryResult; + } catch { + return null; + } +} diff --git a/dashboard/lib/siftMemory.ts b/dashboard/lib/siftMemory.ts new file mode 100644 index 0000000..9f1a362 --- /dev/null +++ b/dashboard/lib/siftMemory.ts @@ -0,0 +1,107 @@ +export type SiftMemoryStatus = "unconfigured" | "ok" | "empty" | "fallback" | "failed"; + +export interface SiftMemoryGuidance { + id: string; + title: string; + summary: string; + reason: string; + source: "voyage" | "local"; + score: number; + tags: string[]; +} + +export interface SiftMemoryResult { + configured: boolean; + run_id: string; + status: SiftMemoryStatus; + mutation_authority: boolean; + guidance: SiftMemoryGuidance[]; + models: { embed: string; rerank: string }; + query?: string | null; + namespace?: string | null; + retrieved_at?: string | null; + message?: string | null; +} + +export type SiftMemoryViewState = + | "configured_with_results" + | "configured_no_results" + | "not_configured" + | "failed"; + +export interface SiftMemoryViewModel { + state: SiftMemoryViewState; + label: string; + summary: string; + query: string | null; + hits: SiftMemoryGuidance[]; + error: string | null; + namespace: string | null; + retrievedAt: string | null; +} + +function cleanHits(results: SiftMemoryGuidance[] | undefined): SiftMemoryGuidance[] { + if (!Array.isArray(results)) return []; + return results.filter((hit) => hit && typeof hit === "object"); +} + +export function normalizeSiftMemory( + memory: SiftMemoryResult | null | undefined, +): SiftMemoryViewModel { + const base = { + query: memory?.query?.trim() || null, + hits: cleanHits(memory?.guidance), + error: memory?.message?.trim() || null, + namespace: memory?.namespace?.trim() || null, + retrievedAt: memory?.retrieved_at?.trim() || null, + }; + + if (memory?.status === "failed") { + return { + ...base, + state: "failed", + label: "Failed", + summary: "Memory retrieval failed before context could be attached to this run.", + }; + } + + if (!memory || memory.status === "unconfigured" || memory.configured === false) { + return { + ...base, + hits: [], + state: "not_configured", + label: "Not configured", + summary: "No Sift Memory retrieval context is attached to this EvidencePack yet.", + }; + } + + if (base.hits.length === 0) { + return { + ...base, + state: "configured_no_results", + label: "Configured · no results", + summary: "Sift Memory retrieval ran and returned no matching context for this query.", + }; + } + + return { + ...base, + state: "configured_with_results", + label: `${base.hits.length} result${base.hits.length === 1 ? "" : "s"}`, + summary: "Sift Memory retrieved read-only context for this run.", + }; +} + +export function memoryHitTitle(hit: SiftMemoryGuidance, index: number): string { + return hit.title?.trim() || hit.id?.trim() || `Memory result ${index + 1}`; +} + +export function memoryHitSummary(hit: SiftMemoryGuidance): string { + return hit.summary?.trim() || hit.reason?.trim() || "No summary provided."; +} + +export function formatMemoryScore(score: number | undefined): string | null { + if (typeof score !== "number" || Number.isNaN(score)) return null; + if (score > 1) return score.toFixed(1); + return score.toFixed(2); +} diff --git a/dashboard/public/brand/sift-mark-aperture.svg b/dashboard/public/brand/sift-mark-aperture.svg new file mode 100644 index 0000000..9a04d47 --- /dev/null +++ b/dashboard/public/brand/sift-mark-aperture.svg @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/dashboard/public/brand/sift-mark-rail.svg b/dashboard/public/brand/sift-mark-rail.svg new file mode 100644 index 0000000..2dbf7ff --- /dev/null +++ b/dashboard/public/brand/sift-mark-rail.svg @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/dashboard/public/brand/sift-mark-signal.svg b/dashboard/public/brand/sift-mark-signal.svg new file mode 100644 index 0000000..8dccc31 --- /dev/null +++ b/dashboard/public/brand/sift-mark-signal.svg @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/dashboard/public/dbre-mark.svg b/dashboard/public/dbre-mark.svg new file mode 100644 index 0000000..0230b3a --- /dev/null +++ b/dashboard/public/dbre-mark.svg @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dashboard/public/logo.svg b/dashboard/public/logo.svg new file mode 100644 index 0000000..eda535f --- /dev/null +++ b/dashboard/public/logo.svg @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/tests/unit/test_api.py b/tests/unit/test_api.py index a78fbc7..9f8cf4d 100644 --- a/tests/unit/test_api.py +++ b/tests/unit/test_api.py @@ -773,6 +773,17 @@ def test_run_rejects_unsafe_captured_filter() -> None: assert client.post("/run", json={"captured_query_id": "evil"}).status_code == 422 +def test_run_rejects_degenerate_captured_query() -> None: + # No filter and no sort -> empty ESR index -> undiagnosable. Must 422, not 500. + captured = {"bare": {"query": {"filter": {}, "sort": [], "limit": 20}}} + engine = FakeEngine() + client = TestClient( + create_app(FakePackStore([]), engine, workload_service=FakeWorkloadService(captured)) + ) + assert client.post("/run", json={"captured_query_id": "bare"}).status_code == 422 + assert engine.queries == [] # rejected before reaching diagnosis + + def test_run_fixture_path_passes_no_query() -> None: engine = FakeEngine() client = TestClient(create_app(FakePackStore([]), engine)) diff --git a/tests/unit/test_memory.py b/tests/unit/test_memory.py new file mode 100644 index 0000000..5586847 --- /dev/null +++ b/tests/unit/test_memory.py @@ -0,0 +1,223 @@ +from fastapi.testclient import TestClient + +from api.memory import ( + MemoryDocument, + VoyageMemoryConfig, + VoyageMemoryError, + VoyageMemoryService, +) +from api.server import create_app +from controller.auth import Identity, make_session_token +from controller.ledger import evidence_hash +from controller.schemas import ( + ApprovalGate, + ApprovalGateState, + Evidence, + EvidenceMetrics, + EvidencePack, + Finding, + PackStatus, + Recommendation, + Severity, +) + + +def _minimal_pack(run_id: str = "run-memory") -> EvidencePack: + before = Evidence( + query={ + "filter": {"storeLocation": "Denver", "customer.age": {"$gte": 30, "$lte": 50}}, + "sort": [("saleDate", -1)], + "limit": 20, + }, + explain_plan={"stage": "SORT", "inputStage": {"stage": "IXSCAN"}}, + metrics=EvidenceMetrics( + docs_examined=1000, + docs_returned=20, + millis=12, + total_keys_examined=1000, + stages=("IXSCAN", "FETCH", "SORT"), + ), + ) + recommendation = Recommendation( + index_spec=(("storeLocation", 1), ("saleDate", -1), ("customer.age", 1)), + rationale="ESR order removes the blocking sort.", + ) + hash_value = evidence_hash({"evidence": before, "recommendation": recommendation}) + return EvidencePack( + run_id=run_id, + namespace="sample.sales", + status=PackStatus.DIAGNOSED, + before=before, + finding=Finding( + problem="Query performs a blocking in-memory SORT.", + severity=Severity.HIGH, + evidence_refs=("explain",), + ), + recommendation=recommendation, + approval_gate=ApprovalGate( + gate_id=f"{run_id}:gate", + state=ApprovalGateState.PENDING_APPROVAL, + required_hash=hash_value, + mutation_allowed=False, + ), + evidence_hash=hash_value, + created_at="2026-06-01T00:00:00Z", + ) + + +class _Store: + def __init__(self, packs: list[EvidencePack]) -> None: + self._packs = {pack.run_id: pack for pack in packs} + + def list_packs(self) -> list[EvidencePack]: + return list(self._packs.values()) + + def get_pack(self, run_id: str) -> EvidencePack | None: + return self._packs.get(run_id) + + def save_pack(self, pack: EvidencePack) -> None: + self._packs[pack.run_id] = pack + + +class _FakeVoyageClient: + def __init__(self) -> None: + self.embed_calls: list[tuple[list[str], str, str]] = [] + self.rerank_calls: list[tuple[str, list[str], str, int]] = [] + + def embed(self, texts: list[str], *, model: str, input_type: str) -> list[list[float]]: + self.embed_calls.append((texts, model, input_type)) + if input_type == "query": + return [[1.0, 0.0]] + return [[0.0, 1.0], [1.0, 0.0], [0.5, 0.5]][: len(texts)] + + def rerank( + self, query: str, documents: list[str], *, model: str, top_k: int + ) -> list[tuple[int, float]]: + self.rerank_calls.append((query, documents, model, top_k)) + return [(1, 0.91), (0, 0.42)] + + +def test_memory_endpoint_returns_unconfigured_when_voyage_key_absent(monkeypatch) -> None: + monkeypatch.delenv("VOYAGE_API_KEY", raising=False) + monkeypatch.setenv("SESSION_SECRET", "memory-secret") + client = TestClient(create_app(_Store([_minimal_pack()]))) + dbre = make_session_token(Identity("dbre", "DBRE", "dbre"), "memory-secret") + + resp = client.get("/packs/run-memory/memory", headers={"Authorization": f"Bearer {dbre}"}) + + assert resp.status_code == 200 + body = resp.json() + assert body["run_id"] == "run-memory" + assert body["status"] == "unconfigured" + assert body["configured"] is False + assert body["mutation_authority"] is False + assert body["guidance"] == [] + assert body["models"] == {"embed": "voyage-4-lite", "rerank": "rerank-2.5-lite"} + + +def test_memory_endpoint_unknown_pack_returns_404(monkeypatch) -> None: + monkeypatch.delenv("VOYAGE_API_KEY", raising=False) + monkeypatch.setenv("SESSION_SECRET", "memory-secret") + client = TestClient(create_app(_Store([]))) + dbre = make_session_token(Identity("dbre", "DBRE", "dbre"), "memory-secret") + + resp = client.get("/packs/missing/memory", headers={"Authorization": f"Bearer {dbre}"}) + + assert resp.status_code == 404 + + +def test_memory_endpoint_requires_dbre_session(monkeypatch) -> None: + monkeypatch.delenv("VOYAGE_API_KEY", raising=False) + monkeypatch.setenv("SESSION_SECRET", "memory-secret") + client = TestClient(create_app(_Store([_minimal_pack()]))) + assert client.get("/packs/run-memory/memory").status_code == 401 + user = make_session_token(Identity("dev", "Dev", "user"), "memory-secret") + assert ( + client.get("/packs/run-memory/memory", headers={"Authorization": f"Bearer {user}"}) + ).status_code == 403 + dbre = make_session_token(Identity("dbre", "DBRE", "dbre"), "memory-secret") + assert ( + client.get("/packs/run-memory/memory", headers={"Authorization": f"Bearer {dbre}"}) + ).status_code == 200 + + +def test_memory_service_prefilters_with_embeddings_and_reranks() -> None: + docs = ( + MemoryDocument("doc-a", "Approval", "Hash gate review."), + MemoryDocument("doc-b", "ESR", "Blocking SORT guidance."), + MemoryDocument("doc-c", "Verify", "Re-explain after apply."), + ) + fake = _FakeVoyageClient() + service = VoyageMemoryService( + VoyageMemoryConfig( + api_key="test-key", + embed_model="embed-test", + rerank_model="rerank-test", + top_k=2, + prefilter_k=3, + max_docs=3, + ), + client=fake, + documents=docs, + ) + + response = service.lookup(_minimal_pack()) + + assert response.status == "ok" + assert response.configured is True + assert [item.id for item in response.guidance] == ["doc-c", "doc-b"] + assert [item.source for item in response.guidance] == ["voyage", "voyage"] + assert fake.embed_calls[0][1:] == ("embed-test", "query") + assert fake.embed_calls[1][1:] == ("embed-test", "document") + assert fake.rerank_calls[0][2:] == ("rerank-test", 2) + + +def test_memory_service_returns_empty_when_docs_are_capped_to_zero() -> None: + service = VoyageMemoryService( + VoyageMemoryConfig(api_key="test-key", max_docs=0), + client=_FakeVoyageClient(), + documents=(MemoryDocument("doc", "Title", "Body"),), + ) + + response = service.lookup(_minimal_pack()) + + assert response.status == "empty" + assert response.guidance == [] + + +def test_memory_service_falls_back_to_local_guidance_when_voyage_fails() -> None: + class FailingVoyageClient: + def embed(self, texts: list[str], *, model: str, input_type: str) -> list[list[float]]: + raise VoyageMemoryError("network failure") + + def rerank( + self, query: str, documents: list[str], *, model: str, top_k: int + ) -> list[tuple[int, float]]: + raise AssertionError("rerank should not run when embeddings fail") + + docs = ( + MemoryDocument("sort", "Blocking sort", "DBRE guidance for blocking SORT and ESR."), + MemoryDocument("other", "Other", "Unrelated guidance."), + ) + service = VoyageMemoryService( + VoyageMemoryConfig(api_key="test-key", top_k=1, max_docs=2), + client=FailingVoyageClient(), + documents=docs, + ) + + response = service.lookup(_minimal_pack()) + + assert response.status == "fallback" + assert response.configured is True + assert response.guidance[0].id == "sort" + assert response.guidance[0].source == "local" + assert response.message == "Voyage request failed; returned local guidance." + + +def test_memory_stays_out_of_evidence_pack_v1() -> None: + # Sift Memory is out-of-band: it must never be a field of EvidencePack v1. + assert "memory" not in EvidencePack.model_fields + assert "guidance" not in EvidencePack.model_fields + dumped = _minimal_pack().model_dump(mode="json") + assert "memory" not in dumped + assert "guidance" not in dumped diff --git a/tests/unit/test_tools.py b/tests/unit/test_tools.py index 0785fcf..ed09891 100644 --- a/tests/unit/test_tools.py +++ b/tests/unit/test_tools.py @@ -1,3 +1,5 @@ +import json + import pytest from agents import native_mongo_tools @@ -5,6 +7,8 @@ from controller.schemas import Evidence, EvidenceMetrics QUERY_FILTER = {"storeLocation": "Denver", "customer.age": {"$gte": 30, "$lte": 50}} +QUERY_SORT = [["saleDate", -1]] +QUERY_JSON = json.dumps({"filter": QUERY_FILTER, "sort": QUERY_SORT, "limit": 20}) EXPECTED_C = [["storeLocation", 1], ["saleDate", -1], ["customer.age", 1]] @@ -164,63 +168,100 @@ def test_native_tool_module_has_no_index_mutation_calls(): def test_native_explain_slow_query_returns_canonical_evidence(monkeypatch): monkeypatch.setattr( native_mongo_tools, - "_capture_with_hint", - lambda hint: _evidence(has_blocking_sort=True, keys_examined=17209), + "_capture", + lambda query_filter, query_sort, limit: _evidence( + has_blocking_sort=True, keys_examined=17209 + ), ) - payload = native_mongo_tools.explain_slow_query() + payload = native_mongo_tools.explain_slow_query(QUERY_JSON) assert payload["namespace"] == "sample_supplies.sales_agent_demo" - assert payload["hint"] == "esr_wrong_B" + assert payload["query"]["filter"] == QUERY_FILTER + assert payload["query"]["sort"] == QUERY_SORT assert payload["metrics"]["total_keys_examined"] == 17209 assert payload["metrics"]["has_blocking_sort"] is True -def test_native_compare_candidate_indexes_selects_c(monkeypatch): - def fake_capture(hint): - if hint == "esr_wrong_B": - return _evidence(has_blocking_sort=True, keys_examined=17209) - return _evidence(has_blocking_sort=False, keys_examined=64) - - monkeypatch.setattr(native_mongo_tools, "_capture_with_hint", fake_capture) +def test_native_compare_candidate_indexes_selects_esr(monkeypatch): + monkeypatch.setattr( + native_mongo_tools, + "_capture", + lambda query_filter, query_sort, limit: _evidence( + has_blocking_sort=True, keys_examined=17209 + ), + ) - payload = native_mongo_tools.compare_candidate_indexes() + payload = native_mongo_tools.compare_candidate_indexes(QUERY_JSON) - assert payload["winner"] == "esr_right_C" - assert payload["candidates"][0]["name"] == "esr_wrong_B" - assert payload["candidates"][1]["name"] == "esr_right_C" + assert payload["winner"] == "esr_recommended" + assert payload["candidates"][0]["name"] == "current_plan" + assert payload["candidates"][0]["has_blocking_sort"] is True + assert payload["candidates"][1]["name"] == "esr_recommended" assert payload["candidates"][1]["index_spec"] == EXPECTED_C def test_native_diagnose_candidate_uses_deterministic_esr(monkeypatch): monkeypatch.setattr( native_mongo_tools, - "_capture_with_hint", - lambda hint: _evidence(has_blocking_sort=True, keys_examined=17209), + "_capture", + lambda query_filter, query_sort, limit: _evidence( + has_blocking_sort=True, keys_examined=17209 + ), ) - payload = native_mongo_tools.diagnose_candidate() + payload = native_mongo_tools.diagnose_candidate(QUERY_JSON) assert payload["source"] == "deterministic_esr" assert payload["diagnosis"]["recommendation"]["index_spec"] == EXPECTED_C + assert payload["recommended_index"] == EXPECTED_C assert payload["diagnosis"]["finding"]["severity"] == "high" def test_native_rationalize_recommendation_is_evidence_grounded(monkeypatch): - def fake_capture(hint): - if hint == "esr_wrong_B": - return _evidence(has_blocking_sort=True, keys_examined=17209) - return _evidence(has_blocking_sort=False, keys_examined=64) - - monkeypatch.setattr(native_mongo_tools, "_capture_with_hint", fake_capture) + monkeypatch.setattr( + native_mongo_tools, + "_capture", + lambda query_filter, query_sort, limit: _evidence( + has_blocking_sort=True, keys_examined=17209 + ), + ) - payload = native_mongo_tools.rationalize_recommendation() + payload = native_mongo_tools.rationalize_recommendation(QUERY_JSON) assert payload["recommended_index"] == EXPECTED_C assert "17209" in payload["rationale"] - assert "64" in payload["rationale"] - assert payload["evidence"]["before_has_blocking_sort"] is True - assert payload["evidence"]["after_has_blocking_sort"] is False + assert payload["evidence"]["keys_examined"] == 17209 + assert payload["evidence"]["has_blocking_sort"] is True + + +def test_native_rationalize_recommendation_handles_no_blocking_sort(monkeypatch): + monkeypatch.setattr( + native_mongo_tools, + "_capture", + lambda query_filter, query_sort, limit: _evidence( + has_blocking_sort=False, keys_examined=64 + ), + ) + + payload = native_mongo_tools.rationalize_recommendation(QUERY_JSON) + + assert "over-scans the collection" in payload["rationale"] + assert "blocking in-memory SORT" not in payload["rationale"] + assert payload["evidence"]["has_blocking_sort"] is False + + +def test_parse_query_defaults_empty_to_browse_shape(): + assert native_mongo_tools._parse_query("{}") == ({}, [], 20) + + +def test_parse_query_unwraps_wrapper_and_coerces_sort_direction(): + f, s, limit = native_mongo_tools._parse_query( + json.dumps({"query": {"filter": QUERY_FILTER, "sort": [["saleDate", "-1"]], "limit": 15}}) + ) + assert f == QUERY_FILTER + assert s == [("saleDate", -1)] # string direction coerced to int + assert limit == 15 def test_native_connection_string_error_names_agent_engine_secret_name(monkeypatch): @@ -232,7 +273,7 @@ def test_native_connection_string_error_names_agent_engine_secret_name(monkeypat native_mongo_tools._require_connection_string() -def test_native_capture_with_hint_uses_env_connection_and_closes_client(monkeypatch): +def test_native_capture_uses_env_connection_and_closes_client(monkeypatch): explained = { "queryPlanner": { "winningPlan": { @@ -255,9 +296,10 @@ def test_native_capture_with_hint_uses_env_connection_and_closes_client(monkeypa monkeypatch.setattr(pymongo, "MongoClient", _FakeMongoClient) - evidence = native_mongo_tools._capture_with_hint("esr_wrong_B") + evidence = native_mongo_tools._capture(QUERY_FILTER, [("saleDate", -1)], 20) assert evidence.metrics.total_keys_examined == 17209 assert evidence.metrics.has_blocking_sort is True - assert _FakeMongoClient.collection.cursor.hinted_with == "esr_wrong_B" + assert _FakeMongoClient.collection.cursor.hinted_with is None # natural plan, no hint + assert _FakeMongoClient.collection.last_find["filter"] == QUERY_FILTER assert _FakeMongoClient.closed is True