From 39a7f213a64c24d129ffdfab3267edcf3c48ec04 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Tue, 26 May 2026 15:49:33 +0200 Subject: [PATCH] feat(api,ui): showcase planning + knowledge lifecycle (#315) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two new phases to the in-process /showcase demo pipeline on the showcase_rich scenario (PRP-40): planning (2 steps — scenario_simulate_and_save, multi_plan_compare) and knowledge (3 steps — embedding_provider_probe, rag_index_subset, rag_retrieve_probe). Both phases insert BEFORE the verify phase via a relative anchor so PRP-39 (sibling, parallel) rebases cleanly. Backend additive contracts: - IndexProjectDocsRequest.path_prefix: str | None = None on app/features/rag/schemas.py — restricts the docs/ root scan to a sub-path with a path-traversal guard. Default None preserves wholesale-scan behavior. - _parse_artifact_key + _embedding_provider_reachable helpers on app/features/demo/pipeline.py — R16 (scenarios.run_id is the artifact key, not model_run.run_id) and provider-presence-only probe per security-patterns.md. - DemoContext gains scenario_artifact_key / price_cut_scenario_id / holiday_scenario_id / embedding_unreachable fields (None on demo_minimal). Showcase_rich step count: 14 → 19. The knowledge phase SKIPs gracefully when no embedding provider is reachable; pipeline still goes green. Frontend mirrors the lockstep contract: PHASE_DEFS.ts ALL_STEPS gains five new entries + PHASE_ORDER + PHASE_LABEL add planning / knowledge; showcase.tsx resolveInspectHref switch deep-links the new step cards into /visualize/planner (with optional ?scenario_id), /knowledge, and /admin. Five new mini-summary helpers on demo-step-card.tsx render the per-step detail strips. Vertical-slice rule preserved — demo never imports scenarios / rag / config / registry; every call goes through httpx.ASGITransport. Docs: docs/_base/API_CONTRACTS.md notes the new path_prefix field and the two new phases; docs/_base/RUNBOOKS.md adds five new step failure-mode entries to the "Showcase page pipeline fails at step X" section. --- app/features/demo/pipeline.py | 389 ++++++++++ app/features/demo/tests/test_pipeline.py | 688 +++++++++++++++++- app/features/rag/schemas.py | 16 + app/features/rag/service.py | 18 +- app/features/rag/tests/test_service.py | 56 ++ docs/_base/API_CONTRACTS.md | 5 +- docs/_base/RUNBOOKS.md | 10 +- .../src/components/demo/PHASE_DEFS.test.ts | 14 +- frontend/src/components/demo/PHASE_DEFS.ts | 31 +- .../src/components/demo/demo-step-card.tsx | 152 ++++ frontend/src/hooks/use-demo-pipeline.test.ts | 12 +- frontend/src/pages/showcase.tsx | 36 +- 12 files changed, 1387 insertions(+), 40 deletions(-) diff --git a/app/features/demo/pipeline.py b/app/features/demo/pipeline.py index 09ced393..5c96f64c 100644 --- a/app/features/demo/pipeline.py +++ b/app/features/demo/pipeline.py @@ -25,6 +25,7 @@ import hashlib import json import math +import re import shutil import time from collections.abc import AsyncIterator, Awaitable, Callable @@ -218,6 +219,12 @@ class DemoContext: original_demo_alias_run_id: str | None = None batch_id: str | None = None batch_status: str | None = None + # PRP-40 — additive fields for the planning + knowledge phases (set on + # SHOWCASE_RICH runs only; remain None on demo_minimal / sparse). + scenario_artifact_key: str | None = None + price_cut_scenario_id: str | None = None + holiday_scenario_id: str | None = None + embedding_unreachable: bool = False # ============================================================================= @@ -262,6 +269,71 @@ def _llm_key_present() -> bool: return False +# PRP-40 — artifact-key parser for /scenarios/* run_id resolution. Two ID +# spaces: model_run.run_id (32-char UUID-hex) vs scenarios.run_id (12-char +# artifact key parsed from `model_{KEY}.joblib`). Memory anchor: +# [[scenario-run-id-vs-registry-run-id]]. +_ARTIFACT_KEY_RE = re.compile(r"model_([0-9a-f]+)(?:\.joblib)?$") + + +def _parse_artifact_key(artifact_uri: str) -> str: + """Extract the 12-char artifact-key from a registry artifact_uri. + + V1 demo: 'demo/{model_type}-model_{KEY}.joblib' -> KEY + V2: 'artifacts/models/model_{KEY}.joblib' -> KEY + """ + match = _ARTIFACT_KEY_RE.search(artifact_uri) + if match is None: + raise ValueError(f"Cannot parse artifact-key from artifact_uri: {artifact_uri!r}") + return match.group(1) + + +# PRP-40 — curated 5-file user-guide corpus indexed by the knowledge phase. +# The path_prefix RAG indexing additive contract scopes discovery to this +# subset (memory anchor: [[rag-runtime-config-and-corpus-state]] — keep the +# blast radius small). +_USER_GUIDE_CURATED_FILES: frozenset[str] = frozenset( + { + "docs/user-guide/getting-started.md", + "docs/user-guide/dashboard-guide.md", + "docs/user-guide/feature-reference.md", + "docs/user-guide/agents-and-rag-guide.md", + "docs/user-guide/advanced-forecasting-guide.md", + } +) + + +async def _embedding_provider_reachable(client: _Client) -> tuple[bool, str]: + """Probe whether the configured RAG embedding provider is reachable. + + Mirrors ``_llm_key_present()`` for the embedding provider. Returns + ``(reachable, provider_name)``. Logs key NAME only, never the value + (security-patterns.md). + + - openai -> bool(settings.openai_api_key) + - ollama -> live-probe via GET /config/providers/health (reads the + ollama entry's ``reachable`` field) + """ + settings = get_settings() + provider = settings.rag_embedding_provider + if provider == "openai": + return (bool(settings.openai_api_key), provider) + if provider == "ollama": + # GET /config/providers/health returns a list (per + # ConfigService.get_provider_health); _Client.request wraps top-level + # JSON arrays as ``{"_raw": [...]}`` (pipeline.py:158-159). + try: + body = await client.request("knowledge[probe]", "GET", "/config/providers/health") + except _StepError: + return (False, provider) + items = body.get("_raw", []) + if isinstance(items, list): + for entry in items: + if isinstance(entry, dict) and entry.get("provider") == "ollama": + return (bool(entry.get("reachable")), provider) + return (False, provider) + + def _select_winner( backtest_results: dict[str, dict[str, float]], ) -> tuple[str, float] | None: @@ -1032,6 +1104,302 @@ async def step_register(ctx: DemoContext, client: _Client) -> StepResult: ) +# ============================================================================= +# PRP-40 — Planning + Knowledge phase steps (SHOWCASE_RICH only) +# ============================================================================= + + +async def step_scenario_simulate_and_save(ctx: DemoContext, client: _Client) -> StepResult: + """PRP-40 — save a 10% price-cut scenario against the champion run. + + Resolves the demo-production champion -> artifact_uri -> 12-char artifact + key, then POSTs /scenarios to run the simulation AND persist it as + ``showcase-price-cut-10pct`` in one round-trip. R16 — scenarios.run_id is + the artifact key, not model_run.run_id. + """ + if ctx.date_end is None: + return ("fail", "no date_end on ctx (status step did not populate it)", {}) + + # (1) Resolve alias -> registry run_id (32-char uuid). + alias_body = await client.request( + "scenario_simulate_and_save[alias]", + "GET", + f"/registry/aliases/{DEMO_ALIAS}", + ) + winner_run_id = alias_body.get("run_id") + if not isinstance(winner_run_id, str): + return ("fail", f"{DEMO_ALIAS} alias has no run_id", {}) + + # (2) Resolve run -> artifact_uri. + run_body = await client.request( + "scenario_simulate_and_save[run]", + "GET", + f"/registry/runs/{winner_run_id}", + ) + artifact_uri = run_body.get("artifact_uri") + if not isinstance(artifact_uri, str): + return ("fail", f"run {winner_run_id[:8]}... has no artifact_uri", {}) + + # (3) Parse the 12-char artifact key. + try: + artifact_key = _parse_artifact_key(artifact_uri) + except ValueError as exc: + return ("fail", str(exc), {}) + ctx.scenario_artifact_key = artifact_key + + # (4+5) Build a price-cut assumption inside the forecast horizon and persist. + # POST /scenarios runs the simulation internally and stores the resulting + # ScenarioComparison in the response, so we don't need a separate + # /scenarios/simulate round-trip. + horizon_start = ctx.date_end + timedelta(days=1) + horizon_end = ctx.date_end + timedelta(days=DEMO_HORIZON) + assumptions = { + "price": { + "change_pct": -0.10, + "start_date": horizon_start.isoformat(), + "end_date": horizon_end.isoformat(), + } + } + plan_body = await client.request( + "scenario_simulate_and_save[save]", + "POST", + "/scenarios", + json_body={ + "name": "showcase-price-cut-10pct", + "run_id": artifact_key, + "horizon": DEMO_HORIZON, + "assumptions": assumptions, + "tags": ["showcase", "price"], + }, + ) + scenario_id_raw = plan_body.get("scenario_id") + if isinstance(scenario_id_raw, str): + ctx.price_cut_scenario_id = scenario_id_raw + + comparison = plan_body.get("comparison") or {} + method = comparison.get("method", "unknown") if isinstance(comparison, dict) else "unknown" + units_delta_raw = comparison.get("units_delta", 0.0) if isinstance(comparison, dict) else 0.0 + revenue_delta_raw = ( + comparison.get("revenue_delta", 0.0) if isinstance(comparison, dict) else 0.0 + ) + try: + units_delta = float(units_delta_raw) + except (TypeError, ValueError): + units_delta = 0.0 + try: + revenue_delta = float(revenue_delta_raw) + except (TypeError, ValueError): + revenue_delta = 0.0 + + return ( + "pass", + ( + f"plan=showcase-price-cut-10pct method={method} " + f"Δunits={units_delta:+.1f} Δrevenue={revenue_delta:+.2f}" + ), + { + "scenario_id": scenario_id_raw, + "method": method, + "units_delta": units_delta, + "revenue_delta": revenue_delta, + "winner_run_id": winner_run_id, + "artifact_key": artifact_key, + }, + ) + + +async def step_multi_plan_compare(ctx: DemoContext, client: _Client) -> StepResult: + """PRP-40 — save the holiday plan and rank both plans by revenue_delta. + + WARN (not FAIL) when the second-plan save fails so the visitor still sees + the first plan was saved (R19 partial-success surfacing). + """ + if ctx.price_cut_scenario_id is None or ctx.scenario_artifact_key is None: + return ("fail", "price_cut plan not saved by previous step", {}) + if ctx.date_end is None: + return ("fail", "no date_end on ctx", {}) + + # (1) Build a one-day holiday inside the horizon and persist plan #2. + holiday_day = (ctx.date_end + timedelta(days=DEMO_HORIZON // 2)).isoformat() + try: + plan_body = await client.request( + "multi_plan_compare[save]", + "POST", + "/scenarios", + json_body={ + "name": "showcase-holiday-uplift", + "run_id": ctx.scenario_artifact_key, + "horizon": DEMO_HORIZON, + "assumptions": {"holiday": {"dates": [holiday_day]}}, + "tags": ["showcase", "holiday"], + }, + ) + except _StepError as exc: + return ( + "warn", + f"holiday-plan save failed: {exc}; price-cut plan still saved", + {"price_cut_scenario_id": ctx.price_cut_scenario_id}, + ) + holiday_id_raw = plan_body.get("scenario_id") + if not isinstance(holiday_id_raw, str): + return ("warn", "holiday-plan save returned no scenario_id", {}) + ctx.holiday_scenario_id = holiday_id_raw + + # (2+3) Rank both plans by revenue_delta. + compare_body = await client.request( + "multi_plan_compare[compare]", + "POST", + "/scenarios/compare", + json_body={ + "scenario_ids": [ctx.price_cut_scenario_id, holiday_id_raw], + "rank_by": "revenue_delta", + }, + ) + scenarios_raw = compare_body.get("scenarios") or [] + # Filter on the runtime type up-front so the local list is typed as a + # ``list[dict[str, Any]]`` and downstream calls don't need to re-check. + scenarios_list: list[dict[str, Any]] = ( + [s for s in scenarios_raw if isinstance(s, dict)] if isinstance(scenarios_raw, list) else [] + ) + if not scenarios_list: + return ("fail", "/scenarios/compare returned empty ranked list", {}) + winner = scenarios_list[0] + winner_id = winner.get("scenario_id", "unknown") + winner_name = winner.get("name", "unknown") + ranked = [ + { + "scenario_id": s.get("scenario_id"), + "name": s.get("name"), + "units_delta": s.get("units_delta"), + "revenue_delta": s.get("revenue_delta"), + "rank": s.get("rank"), + } + for s in scenarios_list + ] + return ( + "pass", + f"winner={winner_name} ranked_by=revenue_delta", + { + "winner_scenario_id": winner_id, + "winner_name": winner_name, + "ranked_by": "revenue_delta", + "ranked": ranked, + }, + ) + + +async def step_embedding_provider_probe(ctx: DemoContext, client: _Client) -> StepResult: + """PRP-40 — probe the configured embedding provider. Always PASS. + + When reachable, downstream knowledge steps run normally. When unreachable, + sets ``ctx.embedding_unreachable=True`` so the next two steps SKIP with a + clear detail; the pipeline still goes green. + """ + reachable, provider = await _embedding_provider_reachable(client) + ctx.embedding_unreachable = not reachable + logger.info( + "demo.embedding_provider_probe", + provider=provider, + reachable=reachable, + ) + detail = ( + f"provider={provider} reachable={reachable}" + if reachable + else f"provider={provider} unreachable — knowledge phase will skip" + ) + return ("pass", detail, {"provider": provider, "reachable": reachable}) + + +async def step_rag_index_subset(ctx: DemoContext, client: _Client) -> StepResult: + """PRP-40 — index the curated 5-file docs/user-guide corpus. + + SKIPs when ``ctx.embedding_unreachable`` is set (by the prior probe step). + Uses the additive ``path_prefix`` field on IndexProjectDocsRequest so the + blast radius stays scoped to the user-guide subset. + """ + if ctx.embedding_unreachable: + return ("skip", "embedding provider unreachable", {}) + + body = await client.request( + "rag_index_subset", + "POST", + "/rag/index/project-docs", + json_body={ + "include_docs": True, + "include_prps": False, + "include_root": False, + "path_prefix": "docs/user-guide", + }, + ) + results = body.get("results") or [] + total_chunks = int(body.get("total_chunks", 0)) + failed = int(body.get("failed", 0)) + indexed = int(body.get("indexed", 0)) + updated = int(body.get("updated", 0)) + unchanged = int(body.get("unchanged", 0)) + curated_hits = sum( + 1 + for r in results + if isinstance(r, dict) and r.get("source_path") in _USER_GUIDE_CURATED_FILES + ) + return ( + "pass", + f"files_indexed={curated_hits}/5 chunks={total_chunks} failed={failed}", + { + "total_files": int(body.get("total_files", 0)), + "indexed": indexed, + "updated": updated, + "unchanged": unchanged, + "failed": failed, + "total_chunks": total_chunks, + "curated_hits": curated_hits, + }, + ) + + +async def step_rag_retrieve_probe(ctx: DemoContext, client: _Client) -> StepResult: + """PRP-40 — semantic-retrieve probe against the curated corpus. + + SKIPs when ``ctx.embedding_unreachable``. WARN (not FAIL) on zero hits so + a green-but-empty corpus still lets the pipeline go green. + """ + if ctx.embedding_unreachable: + return ("skip", "embedding provider unreachable", {}) + + body = await client.request( + "rag_retrieve_probe", + "POST", + "/rag/retrieve", + json_body={"query": "How do I run the demo pipeline?", "top_k": 3}, + ) + results = body.get("results") or [] + if not results: + return ( + "warn", + "no hits — corpus indexed but query did not match", + { + "results_count": 0, + "total_chunks_searched": body.get("total_chunks_searched", 0), + }, + ) + top = results[0] if isinstance(results, list) else {} + title = top.get("source_path", "unknown") if isinstance(top, dict) else "unknown" + score_raw = top.get("relevance_score", 0.0) if isinstance(top, dict) else 0.0 + try: + score = float(score_raw) + except (TypeError, ValueError): + score = 0.0 + return ( + "pass", + f"top hit: {title} (score={score:.3f})", + { + "results_count": len(results), + "top_source_path": title, + "top_relevance_score": score, + }, + ) + + async def step_verify(ctx: DemoContext, client: _Client) -> StepResult: """SHA-256 artifact-integrity check via the public verify endpoint. @@ -1619,6 +1987,10 @@ async def step_cleanup(ctx: DemoContext, client: _Client) -> StepResult: PHASE_DECISION = "decision" # PRP-39 — new portfolio phase, inserted between decision and verify. PHASE_PORTFOLIO = "portfolio" +# PRP-40 — planning + knowledge phases inserted AFTER portfolio, BEFORE verify +# on SHOWCASE_RICH. +PHASE_PLANNING = "planning" +PHASE_KNOWLEDGE = "knowledge" PHASE_VERIFY = "verify" PHASE_AGENT = "agent" PHASE_CLEANUP = "cleanup" @@ -1650,6 +2022,9 @@ def _phase_table(scenario: ScenarioPreset) -> list[PhaseStep]: ] # PRP-39 — new portfolio phase, empty under demo_minimal/sparse. portfolio_steps: list[tuple[str, StepFn]] = [] + # PRP-40 — planning + knowledge default to empty; populated on SHOWCASE_RICH. + planning_steps: list[tuple[str, StepFn]] = [] + knowledge_steps: list[tuple[str, StepFn]] = [] verify_steps: list[tuple[str, StepFn]] = [("verify", step_verify)] agent_steps: list[tuple[str, StepFn]] = [("agent", step_agent)] cleanup_steps: list[tuple[str, StepFn]] = [("cleanup", step_cleanup)] @@ -1667,12 +2042,26 @@ def _phase_table(scenario: ScenarioPreset) -> list[PhaseStep]: ] # PRP-39 — new portfolio phase has its one step under showcase_rich. portfolio_steps = [("batch_preset", step_batch_preset)] + # PRP-40 — planning + knowledge phases live in the SHOWCASE_RICH branch. + planning_steps = [ + ("scenario_simulate_and_save", step_scenario_simulate_and_save), + ("multi_plan_compare", step_multi_plan_compare), + ] + knowledge_steps = [ + ("embedding_provider_probe", step_embedding_provider_probe), + ("rag_index_subset", step_rag_index_subset), + ("rag_retrieve_probe", step_rag_retrieve_probe), + ] rows: list[PhaseStep] = [] rows += [(PHASE_DATA, name, fn) for name, fn in data_steps] rows += [(PHASE_MODELING, name, fn) for name, fn in modeling_steps] rows += [(PHASE_DECISION, name, fn) for name, fn in decision_steps] # PRP-39 — INSERT portfolio BEFORE verify (relative anchor). rows += [(PHASE_PORTFOLIO, name, fn) for name, fn in portfolio_steps] + # PRP-40 — planning + knowledge inserted AFTER portfolio, BEFORE verify + # (relative anchor; both are no-ops outside SHOWCASE_RICH). + rows += [(PHASE_PLANNING, name, fn) for name, fn in planning_steps] + rows += [(PHASE_KNOWLEDGE, name, fn) for name, fn in knowledge_steps] rows += [(PHASE_VERIFY, name, fn) for name, fn in verify_steps] rows += [(PHASE_AGENT, name, fn) for name, fn in agent_steps] rows += [(PHASE_CLEANUP, name, fn) for name, fn in cleanup_steps] diff --git a/app/features/demo/tests/test_pipeline.py b/app/features/demo/tests/test_pipeline.py index 782e2157..93e697a8 100644 --- a/app/features/demo/tests/test_pipeline.py +++ b/app/features/demo/tests/test_pipeline.py @@ -9,7 +9,7 @@ from __future__ import annotations from types import SimpleNamespace -from typing import Any +from typing import Any, cast from fastapi import FastAPI @@ -31,6 +31,8 @@ def _canned_response( json_body: dict[str, Any] | None, artifact_path: str, wapes: dict[str, float], + *, + method: str = "POST", ) -> dict[str, Any]: """Return a canned 2xx body for a given endpoint path.""" if path == "/health": @@ -95,6 +97,112 @@ def _canned_response( return response if path == "/registry/runs": return {"run_id": "demo-run-abc123def456"} + # PRP-40 — planning + knowledge endpoints (showcase_rich only). + if path == "/registry/aliases/demo-production": + return {"alias_name": "demo-production", "run_id": "demo-run-abc123def456"} + if ( + method == "GET" + and path.startswith("/registry/runs/") + and not path.endswith("/verify") + and not path.endswith("/feature-metadata") + ): + # GET /registry/runs/{run_id} returns artifact_uri. + return { + "run_id": "demo-run-abc123def456", + "artifact_uri": "demo/seasonal_naive-model_abc123def456.joblib", + "status": "success", + } + if path == "/scenarios": + # POST /scenarios runs the simulation and stores the snapshot. + assert json_body is not None + name = json_body.get("name", "") + scenario_id = f"scn-{name}" + units_delta = -25.0 if "price" in name else 18.5 + revenue_delta = -180.0 if "price" in name else 220.0 + return { + "scenario_id": scenario_id, + "name": name, + "store_id": 7, + "product_id": 3, + "run_id": json_body.get("run_id", ""), + "horizon": json_body.get("horizon", 14), + "method": "heuristic", + "created_at": "2026-05-26T10:00:00Z", + "assumptions": json_body.get("assumptions", {}), + "comparison": { + "method": "heuristic", + "units_delta": units_delta, + "revenue_delta": revenue_delta, + "units_delta_pct": 0.0, + }, + "tags": json_body.get("tags", []), + } + if path == "/scenarios/compare": + assert json_body is not None + ids = json_body.get("scenario_ids", []) + # Holiday plan ranks first (higher revenue_delta in the canned data). + ordered = [ + { + "scenario_id": ids[1] if len(ids) > 1 else "scn-unknown", + "name": "showcase-holiday-uplift", + "units_delta": 18.5, + "revenue_delta": 220.0, + "coverage_verdict": "ok", + "rank": 1, + }, + { + "scenario_id": ids[0] if ids else "scn-unknown", + "name": "showcase-price-cut-10pct", + "units_delta": -25.0, + "revenue_delta": -180.0, + "coverage_verdict": "ok", + "rank": 2, + }, + ] + return {"scenarios": ordered, "chart_data": []} + if path == "/config/providers/health": + # _Client wraps top-level JSON arrays as {"_raw": [...]}. + return { + "_raw": [ + {"provider": "ollama", "reachable": True, "detail": "ok", "models": []}, + { + "provider": "openai", + "reachable": True, + "detail": "key present", + "models": [], + }, + ] + } + if path == "/rag/index/project-docs": + # Canned 5-file curated index — every test target file present. + from app.features.demo.pipeline import _USER_GUIDE_CURATED_FILES + + results = [ + {"source_path": p, "status": "indexed", "chunks_created": 4, "error": None} + for p in sorted(_USER_GUIDE_CURATED_FILES) + ] + return { + "results": results, + "total_files": 5, + "indexed": 5, + "updated": 0, + "unchanged": 0, + "failed": 0, + "total_chunks": 20, + "duration_ms": 120.0, + } + if path == "/rag/retrieve": + return { + "results": [ + { + "source_path": "docs/user-guide/getting-started.md", + "content": "demo content...", + "relevance_score": 0.87, + "chunk_index": 0, + } + ], + "total_chunks_searched": 20, + } if path == "/seeder/phase2-enrichment": return { "success": True, @@ -142,7 +250,10 @@ def _canned_response( "config_diff": {}, "metrics_diff": {}, } - if path.startswith("/registry/runs/"): # PATCH pending->running->success + if path.startswith("/registry/runs/"): + # PATCH pending->running->success returns an empty (200) body. The + # PRP-40 GET /registry/runs/{run_id} branch is handled earlier in + # this function (returns artifact_uri). return {} if path == "/registry/aliases": return {} @@ -209,19 +320,31 @@ async def request( json_body: dict[str, Any] | None = None, ) -> dict[str, Any]: self.calls.append((method, path)) - return _canned_response(path, json_body, artifact_path, wapes) + return _canned_response(path, json_body, artifact_path, wapes, method=method) return _FakeClient -def _fake_settings(registry_root: str) -> SimpleNamespace: - """Fake settings: usable registry root, no LLM keys (agent step skips).""" +def _fake_settings( + registry_root: str, + *, + rag_embedding_provider: str = "openai", + openai_api_key: str = "sk-test", +) -> SimpleNamespace: + """Fake settings: usable registry root, no agent LLM key (agent skips). + + ``rag_embedding_provider`` defaults to "openai" with a present key so the + PRP-40 knowledge phase runs to completion in test fixtures; the + knowledge-skip tests override via ``rag_embedding_provider="openai"`` + + ``openai_api_key=""`` (or "ollama" with an unreachable canned probe). + """ return SimpleNamespace( registry_artifact_root=registry_root, agent_default_model="anthropic:claude-test", anthropic_api_key="", - openai_api_key="", + openai_api_key=openai_api_key, google_api_key="", + rag_embedding_provider=rag_embedding_provider, ) @@ -445,13 +568,17 @@ def test_phase_table_demo_minimal_matches_legacy_11_steps(): ] -def test_phase_table_showcase_rich_adds_v2_steps(): - """PRP-38/39 — phase_table for SHOWCASE_RICH adds 3+4 steps; phase order stable. +def test_phase_table_showcase_rich_adds_v2_decision_portfolio_planning_knowledge_steps(): + """PRP-38 + PRP-39 + PRP-40 — phase_table for SHOWCASE_RICH is the canonical 23 rows. PRP-38 shipped 3 (phase2_enrichment, historical_backfill, v2_train). - PRP-39 adds 4 more (champion_compat_compare, stale_alias_trigger, - safer_promote_flow, batch_preset) AND a new ``portfolio`` phase between - ``decision`` and ``verify``. Total: 18 rows across 7 phases. + PRP-39 added 4 (champion_compat_compare, stale_alias_trigger, + safer_promote_flow, batch_preset) plus a new ``portfolio`` phase between + ``decision`` and ``verify``. + PRP-40 adds 5 (scenario_simulate_and_save, multi_plan_compare, + embedding_provider_probe, rag_index_subset, rag_retrieve_probe) under two + new ``planning`` + ``knowledge`` phases, AFTER portfolio and BEFORE verify + via a relative anchor. Total: 23 rows across 9 phases. """ rows = pipeline._phase_table(ScenarioPreset.SHOWCASE_RICH) by_phase_step = [(p, s) for p, s, _fn in rows] @@ -471,8 +598,14 @@ def test_phase_table_showcase_rich_adds_v2_steps(): ("decision", "champion_compat_compare"), ("decision", "stale_alias_trigger"), ("decision", "safer_promote_flow"), - # PRP-39 — new portfolio phase between decision and verify. + # PRP-39 — portfolio phase between decision and verify. ("portfolio", "batch_preset"), + # PRP-40 — planning + knowledge phases after portfolio, before verify. + ("planning", "scenario_simulate_and_save"), + ("planning", "multi_plan_compare"), + ("knowledge", "embedding_provider_probe"), + ("knowledge", "rag_index_subset"), + ("knowledge", "rag_retrieve_probe"), ("verify", "verify"), ("agent", "agent"), ("cleanup", "cleanup"), @@ -581,12 +714,14 @@ async def test_run_pipeline_showcase_rich_runs_v2_and_buckets(monkeypatch, tmp_p assert final.data["v2_run_id"] == "demo-run-abc123def456" -async def test_run_pipeline_showcase_rich_emits_18_steps(monkeypatch, tmp_path): - """PRP-38/39 — SHOWCASE_RICH adds 3+4 new steps (11 -> 18 total). +async def test_run_pipeline_showcase_rich_emits_23_steps(monkeypatch, tmp_path): + """PRP-38 + PRP-39 + PRP-40 — SHOWCASE_RICH = 11 base + 3 PRP-38 + 4 PRP-39 + 5 PRP-40 = 23 total steps. - PRP-38 shipped 14 (11 + phase2_enrichment + historical_backfill + v2_train). - PRP-39 adds 4 more (champion_compat_compare + stale_alias_trigger + + PRP-38 shipped 3 (phase2_enrichment + historical_backfill + v2_train). + PRP-39 added 4 (champion_compat_compare + stale_alias_trigger + safer_promote_flow + batch_preset). + PRP-40 adds 5 (scenario_simulate_and_save + multi_plan_compare + + embedding_provider_probe + rag_index_subset + rag_retrieve_probe). """ artifact = tmp_path / "artifacts" / "models" / "model_x.joblib" artifact.parent.mkdir(parents=True, exist_ok=True) @@ -598,10 +733,10 @@ async def test_run_pipeline_showcase_rich_emits_18_steps(monkeypatch, tmp_path): req = DemoRunRequest(scenario=ScenarioPreset.SHOWCASE_RICH) events = [e async for e in pipeline.run_pipeline(app=_FAKE_APP, req=req)] completes = [e for e in events if e.event_type == "step_complete"] - assert len(completes) == 18 - # Every event reports total_steps=18 + assert len(completes) == 23 + # Every event reports total_steps=23 for ev in completes: - assert ev.total_steps == 18 + assert ev.total_steps == 23 # ============================================================================= @@ -781,3 +916,518 @@ async def test_cleanup_skips_when_nothing_to_restore_or_close(monkeypatch, tmp_p assert status == "skip" assert data["alias_restored"] is False assert data["agent_session_closed"] is False + + +# ============================================================================= +# PRP-40 — Helpers + planning/knowledge step unit tests +# ============================================================================= + + +def test_parse_artifact_key_v1_demo_path(): + """PRP-40 — V1 demo path: 'demo/{model_type}-model_{KEY}.joblib'.""" + key = pipeline._parse_artifact_key("demo/seasonal_naive-model_abc123def456.joblib") + assert key == "abc123def456" + + +def test_parse_artifact_key_v2_artifacts_models_path(): + """PRP-40 — V2 path: 'artifacts/models/model_{KEY}.joblib'.""" + key = pipeline._parse_artifact_key("artifacts/models/model_deadbeef0042.joblib") + assert key == "deadbeef0042" + + +def test_parse_artifact_key_rejects_unparseable(): + """PRP-40 — a malformed artifact_uri raises ValueError (not a silent miss).""" + import pytest + + with pytest.raises(ValueError, match="Cannot parse artifact-key"): + pipeline._parse_artifact_key("not-a-model-uri.bin") + + +class _RecordingClient: + """A minimal stand-in for pipeline._Client recording every call. + + Tests pass a dict of (method, path) -> response body; missing entries + raise AssertionError so unexpected requests show up loudly. + """ + + def __init__( + self, + _app: Any, + responses: dict[tuple[str, str], Any] | None = None, + errors: dict[tuple[str, str], pipeline._StepError] | None = None, + ) -> None: + self._responses = responses or {} + self._errors = errors or {} + self.calls: list[tuple[str, str, dict[str, Any] | None]] = [] + + async def __aenter__(self) -> _RecordingClient: + return self + + async def __aexit__(self, *_exc: object) -> None: + return None + + async def request( + self, + step: str, + method: str, + path: str, + *, + json_body: dict[str, Any] | None = None, + ) -> dict[str, Any]: + self.calls.append((method, path, json_body)) + key = (method, path) + if key in self._errors: + raise self._errors[key] + if key in self._responses: + response = self._responses[key] + if not isinstance(response, dict): + raise AssertionError( + f"canned response for {key!r} must be a dict, got {type(response)}" + ) + return cast("dict[str, Any]", response) + raise AssertionError(f"unexpected request: method={method!r} path={path!r}") + + +def _as_client(rec: _RecordingClient) -> pipeline._Client: + """Cast a _RecordingClient stand-in to the real _Client type for typecheckers. + + The stand-in is structurally compatible with pipeline._Client (same async + ``request`` signature) but mypy can't see that since the real _Client is + not declared as a Protocol. ``cast`` is the load-bearing escape hatch. + """ + return cast("pipeline._Client", rec) + + +def _make_showcase_ctx(scenario: ScenarioPreset = ScenarioPreset.SHOWCASE_RICH) -> Any: + """Build a DemoContext positioned at the start of the planning phase.""" + from datetime import date as date_type + + ctx = pipeline.DemoContext(seed=42, skip_seed=True, reset=False, scenario=scenario) + ctx.store_id = 7 + ctx.product_id = 3 + ctx.date_start = date_type(2024, 1, 1) + ctx.date_end = date_type(2024, 12, 31) + ctx.winner_model_type = "seasonal_naive" + ctx.winning_run_id = "demo-run-abc123def456" + return ctx + + +async def test_scenario_simulate_and_save_happy_path(): + """PRP-40 — happy path: resolves alias -> run -> artifact_key, saves plan.""" + ctx = _make_showcase_ctx() + client = _RecordingClient( + None, + responses={ + ( + "GET", + "/registry/aliases/demo-production", + ): {"alias_name": "demo-production", "run_id": "uuid-32-char"}, + ("GET", "/registry/runs/uuid-32-char"): { + "run_id": "uuid-32-char", + "artifact_uri": "demo/seasonal_naive-model_abc123def456.joblib", + }, + ("POST", "/scenarios"): { + "scenario_id": "scn-001", + "comparison": { + "method": "heuristic", + "units_delta": -25.5, + "revenue_delta": -180.0, + }, + }, + }, + ) + status, detail, data = await pipeline.step_scenario_simulate_and_save(ctx, _as_client(client)) + assert status == "pass" + assert ctx.scenario_artifact_key == "abc123def456" + assert ctx.price_cut_scenario_id == "scn-001" + assert data["method"] == "heuristic" + assert data["units_delta"] == -25.5 + assert data["revenue_delta"] == -180.0 + assert data["artifact_key"] == "abc123def456" + assert "showcase-price-cut-10pct" in detail + assert "heuristic" in detail + # The POST /scenarios body carried the additive price assumption. + save_call = next(c for c in client.calls if c[0] == "POST" and c[1] == "/scenarios") + body = save_call[2] + assert body is not None + assert body["name"] == "showcase-price-cut-10pct" + assert body["run_id"] == "abc123def456" + assert body["assumptions"]["price"]["change_pct"] == -0.10 + assert body["tags"] == ["showcase", "price"] + + +async def test_scenario_simulate_and_save_missing_alias_fails(): + """PRP-40 — alias missing run_id -> FAIL with clear detail.""" + ctx = _make_showcase_ctx() + client = _RecordingClient( + None, + responses={ + ("GET", "/registry/aliases/demo-production"): {"alias_name": "demo-production"}, + }, + ) + status, detail, _ = await pipeline.step_scenario_simulate_and_save(ctx, _as_client(client)) + assert status == "fail" + assert "no run_id" in detail + + +async def test_scenario_simulate_and_save_unparseable_artifact_uri_fails(): + """PRP-40 — artifact_uri the regex can't parse -> FAIL.""" + ctx = _make_showcase_ctx() + client = _RecordingClient( + None, + responses={ + ("GET", "/registry/aliases/demo-production"): {"run_id": "uuid"}, + ("GET", "/registry/runs/uuid"): {"artifact_uri": "garbage-path.bin"}, + }, + ) + status, detail, _ = await pipeline.step_scenario_simulate_and_save(ctx, _as_client(client)) + assert status == "fail" + assert "artifact-key" in detail + + +async def test_multi_plan_compare_happy_path(): + """PRP-40 — happy path: second-plan save + compare returns ranked list.""" + ctx = _make_showcase_ctx() + ctx.price_cut_scenario_id = "scn-price" + ctx.scenario_artifact_key = "abc123def456" + client = _RecordingClient( + None, + responses={ + ("POST", "/scenarios"): { + "scenario_id": "scn-holiday", + "comparison": {"method": "heuristic"}, + }, + ("POST", "/scenarios/compare"): { + "scenarios": [ + { + "scenario_id": "scn-holiday", + "name": "showcase-holiday-uplift", + "units_delta": 18.5, + "revenue_delta": 220.0, + "coverage_verdict": "ok", + "rank": 1, + }, + { + "scenario_id": "scn-price", + "name": "showcase-price-cut-10pct", + "units_delta": -25.5, + "revenue_delta": -180.0, + "coverage_verdict": "ok", + "rank": 2, + }, + ], + }, + }, + ) + status, detail, data = await pipeline.step_multi_plan_compare(ctx, _as_client(client)) + assert status == "pass" + assert ctx.holiday_scenario_id == "scn-holiday" + assert data["winner_scenario_id"] == "scn-holiday" + assert data["winner_name"] == "showcase-holiday-uplift" + assert data["ranked_by"] == "revenue_delta" + assert len(data["ranked"]) == 2 + assert "winner=showcase-holiday-uplift" in detail + + +async def test_multi_plan_compare_second_save_failure_emits_warn(): + """PRP-40 R19 — second-plan POST 4xx -> WARN, NOT FAIL (partial success).""" + ctx = _make_showcase_ctx() + ctx.price_cut_scenario_id = "scn-price" + ctx.scenario_artifact_key = "abc123def456" + client = _RecordingClient( + None, + errors={ + ("POST", "/scenarios"): pipeline._StepError( + "multi_plan_compare[save]", + 422, + {"title": "Unprocessable Entity", "detail": "horizon out of range"}, + ), + }, + ) + status, detail, data = await pipeline.step_multi_plan_compare(ctx, _as_client(client)) + assert status == "warn" + assert "holiday-plan save failed" in detail + assert data["price_cut_scenario_id"] == "scn-price" + + +async def test_multi_plan_compare_fails_without_price_cut_plan(): + """PRP-40 — missing prior-step state -> FAIL with clear detail.""" + ctx = _make_showcase_ctx() # price_cut_scenario_id stays None + client = _RecordingClient(None) + status, detail, _ = await pipeline.step_multi_plan_compare(ctx, _as_client(client)) + assert status == "fail" + assert "price_cut plan not saved" in detail + + +async def test_embedding_provider_probe_reachable_openai(monkeypatch, tmp_path): + """PRP-40 — openai key present -> PASS + ctx.embedding_unreachable=False.""" + monkeypatch.setattr( + pipeline, + "get_settings", + lambda: _fake_settings( + str(tmp_path / "reg"), + rag_embedding_provider="openai", + openai_api_key="sk-test", + ), + ) + ctx = _make_showcase_ctx() + client = _RecordingClient(None) + status, detail, data = await pipeline.step_embedding_provider_probe(ctx, _as_client(client)) + assert status == "pass" + assert ctx.embedding_unreachable is False + assert data["provider"] == "openai" + assert data["reachable"] is True + assert "reachable=True" in detail + + +async def test_embedding_provider_probe_unreachable_openai(monkeypatch, tmp_path): + """PRP-40 — openai with empty key -> PASS + ctx.embedding_unreachable=True.""" + monkeypatch.setattr( + pipeline, + "get_settings", + lambda: _fake_settings( + str(tmp_path / "reg"), + rag_embedding_provider="openai", + openai_api_key="", + ), + ) + ctx = _make_showcase_ctx() + client = _RecordingClient(None) + status, detail, data = await pipeline.step_embedding_provider_probe(ctx, _as_client(client)) + assert status == "pass" + assert ctx.embedding_unreachable is True + assert data["reachable"] is False + assert "knowledge phase will skip" in detail + + +async def test_embedding_provider_probe_ollama_reachable(monkeypatch, tmp_path): + """PRP-40 — ollama provider live-probed via /config/providers/health.""" + monkeypatch.setattr( + pipeline, + "get_settings", + lambda: _fake_settings( + str(tmp_path / "reg"), + rag_embedding_provider="ollama", + ), + ) + ctx = _make_showcase_ctx() + # _Client wraps a list response as {"_raw": [...]}. + client = _RecordingClient( + None, + responses={ + ("GET", "/config/providers/health"): { + "_raw": [ + {"provider": "ollama", "reachable": True, "detail": "ok", "models": []}, + ] + }, + }, + ) + status, _detail, data = await pipeline.step_embedding_provider_probe(ctx, _as_client(client)) + assert status == "pass" + assert ctx.embedding_unreachable is False + assert data["provider"] == "ollama" + assert data["reachable"] is True + + +async def test_embedding_provider_probe_ollama_unreachable(monkeypatch, tmp_path): + """PRP-40 — ollama probe returns reachable=False -> ctx.embedding_unreachable.""" + monkeypatch.setattr( + pipeline, + "get_settings", + lambda: _fake_settings( + str(tmp_path / "reg"), + rag_embedding_provider="ollama", + ), + ) + ctx = _make_showcase_ctx() + client = _RecordingClient( + None, + responses={ + ("GET", "/config/providers/health"): { + "_raw": [ + {"provider": "ollama", "reachable": False, "detail": "down", "models": []}, + ] + }, + }, + ) + status, _, data = await pipeline.step_embedding_provider_probe(ctx, _as_client(client)) + assert status == "pass" + assert ctx.embedding_unreachable is True + assert data["reachable"] is False + + +async def test_rag_index_subset_happy_path(): + """PRP-40 — index subset returns curated_hits + total_chunks counters.""" + ctx = _make_showcase_ctx() + results = [ + {"source_path": p, "status": "indexed", "chunks_created": 4, "error": None} + for p in sorted(pipeline._USER_GUIDE_CURATED_FILES) + ] + client = _RecordingClient( + None, + responses={ + ("POST", "/rag/index/project-docs"): { + "results": results, + "total_files": 5, + "indexed": 5, + "updated": 0, + "unchanged": 0, + "failed": 0, + "total_chunks": 20, + "duration_ms": 120.0, + }, + }, + ) + status, detail, data = await pipeline.step_rag_index_subset(ctx, _as_client(client)) + assert status == "pass" + assert data["curated_hits"] == 5 + assert data["total_chunks"] == 20 + assert "files_indexed=5/5" in detail + # The POST body carried path_prefix="docs/user-guide". + body = client.calls[0][2] + assert body is not None + assert body["path_prefix"] == "docs/user-guide" + assert body["include_docs"] is True + assert body["include_prps"] is False + assert body["include_root"] is False + + +async def test_rag_index_subset_skips_when_provider_unreachable(): + """PRP-40 — skip with detail 'embedding provider unreachable'; no HTTP call.""" + ctx = _make_showcase_ctx() + ctx.embedding_unreachable = True + client = _RecordingClient(None) # any call would AssertionError + status, detail, _ = await pipeline.step_rag_index_subset(ctx, _as_client(client)) + assert status == "skip" + assert "unreachable" in detail + assert client.calls == [] + + +async def test_rag_retrieve_probe_happy_path(): + """PRP-40 — top hit + similarity score surface on PASS.""" + ctx = _make_showcase_ctx() + client = _RecordingClient( + None, + responses={ + ("POST", "/rag/retrieve"): { + "results": [ + { + "source_path": "docs/user-guide/getting-started.md", + "content": "...", + "relevance_score": 0.87, + "chunk_index": 0, + } + ], + "total_chunks_searched": 20, + }, + }, + ) + status, detail, data = await pipeline.step_rag_retrieve_probe(ctx, _as_client(client)) + assert status == "pass" + assert data["results_count"] == 1 + assert data["top_source_path"] == "docs/user-guide/getting-started.md" + assert data["top_relevance_score"] == 0.87 + assert "score=0.870" in detail + + +async def test_rag_retrieve_probe_zero_hits_emits_warn(): + """PRP-40 — empty results list -> WARN (not FAIL); pipeline still green.""" + ctx = _make_showcase_ctx() + client = _RecordingClient( + None, + responses={ + ("POST", "/rag/retrieve"): {"results": [], "total_chunks_searched": 0}, + }, + ) + status, detail, data = await pipeline.step_rag_retrieve_probe(ctx, _as_client(client)) + assert status == "warn" + assert "no hits" in detail + assert data["results_count"] == 0 + + +async def test_rag_retrieve_probe_skips_when_provider_unreachable(): + """PRP-40 — skip when ctx.embedding_unreachable; no HTTP call.""" + ctx = _make_showcase_ctx() + ctx.embedding_unreachable = True + client = _RecordingClient(None) + status, detail, _ = await pipeline.step_rag_retrieve_probe(ctx, _as_client(client)) + assert status == "skip" + assert "unreachable" in detail + assert client.calls == [] + + +async def test_run_pipeline_showcase_rich_runs_planning_and_knowledge(monkeypatch, tmp_path): + """PRP-40 — end-to-end SHOWCASE_RICH reaches the 5 new steps + greens.""" + artifact = tmp_path / "artifacts" / "models" / "model_abc123def456.joblib" + artifact.parent.mkdir(parents=True, exist_ok=True) + artifact.write_bytes(b"v2 bundle bytes") + monkeypatch.setattr(pipeline, "get_settings", lambda: _fake_settings(str(tmp_path / "reg"))) + # prophet_like wins so v2 path is exercised end-to-end. + wapes = { + "naive": 0.30, + "seasonal_naive": 0.18, + "moving_average": 0.25, + "prophet_like": 0.10, + } + monkeypatch.setattr(pipeline, "_Client", _build_fake_client(str(artifact), wapes)) + + req = DemoRunRequest(scenario=ScenarioPreset.SHOWCASE_RICH) + events = [e async for e in pipeline.run_pipeline(app=_FAKE_APP, req=req)] + by_name = {e.step_name: e for e in events if e.event_type == "step_complete"} + + # The 5 new step cards all emitted terminal statuses. + assert by_name["scenario_simulate_and_save"].status == "pass" + assert by_name["scenario_simulate_and_save"].data["method"] == "heuristic" + assert by_name["multi_plan_compare"].status == "pass" + assert by_name["multi_plan_compare"].data["ranked_by"] == "revenue_delta" + assert by_name["embedding_provider_probe"].status == "pass" + assert by_name["embedding_provider_probe"].data["reachable"] is True + assert by_name["rag_index_subset"].status == "pass" + assert by_name["rag_index_subset"].data["curated_hits"] == 5 + assert by_name["rag_retrieve_probe"].status == "pass" + assert "docs/user-guide" in by_name["rag_retrieve_probe"].data["top_source_path"] + + # Pipeline still greens. + final = events[-1] + assert final.event_type == "pipeline_complete" + assert final.status == "pass" + + +async def test_run_pipeline_showcase_rich_skips_knowledge_when_provider_unreachable( + monkeypatch, tmp_path +): + """PRP-40 C3 — every embedding provider unreachable -> 3x skip; pipeline green.""" + artifact = tmp_path / "artifacts" / "models" / "model_abc123def456.joblib" + artifact.parent.mkdir(parents=True, exist_ok=True) + artifact.write_bytes(b"v2 bundle bytes") + monkeypatch.setattr( + pipeline, + "get_settings", + lambda: _fake_settings( + str(tmp_path / "reg"), + rag_embedding_provider="openai", + openai_api_key="", # the embedding provider key is absent. + ), + ) + wapes = { + "naive": 0.30, + "seasonal_naive": 0.18, + "moving_average": 0.25, + "prophet_like": 0.10, + } + monkeypatch.setattr(pipeline, "_Client", _build_fake_client(str(artifact), wapes)) + + req = DemoRunRequest(scenario=ScenarioPreset.SHOWCASE_RICH) + events = [e async for e in pipeline.run_pipeline(app=_FAKE_APP, req=req)] + by_name = {e.step_name: e for e in events if e.event_type == "step_complete"} + + # Probe still PASSES (always emits pass; just flags downstream to skip). + assert by_name["embedding_provider_probe"].status == "pass" + assert by_name["embedding_provider_probe"].data["reachable"] is False + # Downstream knowledge steps skip. + assert by_name["rag_index_subset"].status == "skip" + assert by_name["rag_retrieve_probe"].status == "skip" + # Pipeline still greens. + final = events[-1] + assert final.event_type == "pipeline_complete" + assert final.status == "pass" diff --git a/app/features/rag/schemas.py b/app/features/rag/schemas.py index 44dbac2d..f338db1d 100644 --- a/app/features/rag/schemas.py +++ b/app/features/rag/schemas.py @@ -190,6 +190,11 @@ class IndexProjectDocsRequest(BaseModel): include_docs: Index markdown discovered under docs/**. include_prps: Index markdown discovered under PRPs/**. include_root: Index the root allow-list (README/AGENTS/CHANGELOG). + path_prefix: Optional repo-relative sub-path under docs/ to restrict + discovery to (PRP-40). When None (default), scanning is wholesale + (back-compat). Only applies when ``include_docs`` is True; the + ``include_prps`` / ``include_root`` branches are unaffected. + A path-traversal guard rejects values that escape the project root. """ model_config = ConfigDict(extra="forbid") @@ -199,6 +204,17 @@ class IndexProjectDocsRequest(BaseModel): include_root: bool = Field( default=True, description="Index README.md / AGENTS.md / CHANGELOG.md" ) + # PRP-40 — additive sub-path filter for the docs/ root. None preserves + # back-compat (wholesale rglob). + path_prefix: str | None = Field( + default=None, + max_length=200, + description=( + "Optional repo-relative path under docs/ to restrict discovery to " + "(e.g. 'docs/user-guide'). When None (default), discovery scans " + "every docs/**/*.md (back-compat)." + ), + ) class ProjectDocResult(BaseModel): diff --git a/app/features/rag/service.py b/app/features/rag/service.py index 8229bb33..ea967e98 100644 --- a/app/features/rag/service.py +++ b/app/features/rag/service.py @@ -276,7 +276,22 @@ def _discover_project_doc_files( found: list[tuple[Path, str]] = [] if request.include_docs: - found += [(p, "docs") for p in (self._base_dir / "docs").rglob("*.md")] + # PRP-40 — optional sub-path filter under docs/. None preserves + # back-compat (wholesale rglob). + if request.path_prefix: + base = self._base_dir.resolve() + candidate = (self._base_dir / request.path_prefix).resolve() + # Guard against path traversal — candidate MUST be inside the + # project root (security-patterns.md path-traversal section). + try: + candidate.relative_to(base) + except ValueError as exc: + raise ValueError( + f"path_prefix escapes the project root: {request.path_prefix!r}" + ) from exc + found += [(p, "docs") for p in candidate.rglob("*.md")] + else: + found += [(p, "docs") for p in (self._base_dir / "docs").rglob("*.md")] if request.include_prps: found += [(p, "prp") for p in (self._base_dir / "PRPs").rglob("*.md")] @@ -317,6 +332,7 @@ async def index_project_docs( include_docs=request.include_docs, include_prps=request.include_prps, include_root=request.include_root, + path_prefix=request.path_prefix, ) results: list[ProjectDocResult] = [] diff --git a/app/features/rag/tests/test_service.py b/app/features/rag/tests/test_service.py index 836bc84b..d3330fff 100644 --- a/app/features/rag/tests/test_service.py +++ b/app/features/rag/tests/test_service.py @@ -156,6 +156,62 @@ def test_root_allow_list_only(self, tmp_path): names = {p.name for p, _ in found} assert names == {"README.md"} + # --- PRP-40 — additive path_prefix sub-path filter --- + + def test_path_prefix_scopes_docs_discovery(self, tmp_path): + """PRP-40 — path_prefix='docs/user-guide' restricts docs scan to that subtree.""" + (tmp_path / "docs" / "user-guide").mkdir(parents=True) + (tmp_path / "docs" / "other").mkdir() + (tmp_path / "docs" / "user-guide" / "intro.md").write_text("# A", encoding="utf-8") + (tmp_path / "docs" / "other" / "internal.md").write_text("# B", encoding="utf-8") + service = RAGService(base_dir=str(tmp_path)) + + found = service._discover_project_doc_files( + IndexProjectDocsRequest( + include_docs=True, + include_prps=False, + include_root=False, + path_prefix="docs/user-guide", + ) + ) + + rels = {p.relative_to(tmp_path).as_posix() for p, _ in found} + assert rels == {"docs/user-guide/intro.md"} + + def test_path_prefix_none_preserves_wholesale_scan(self, tmp_path): + """PRP-40 — path_prefix=None (default) keeps the existing wholesale rglob behaviour.""" + (tmp_path / "docs").mkdir() + (tmp_path / "docs" / "a.md").write_text("# A", encoding="utf-8") + (tmp_path / "docs" / "deep").mkdir() + (tmp_path / "docs" / "deep" / "b.md").write_text("# B", encoding="utf-8") + service = RAGService(base_dir=str(tmp_path)) + + found = service._discover_project_doc_files( + IndexProjectDocsRequest(include_docs=True, include_prps=False, include_root=False) + ) + + rels = {p.relative_to(tmp_path).as_posix() for p, _ in found} + assert rels == {"docs/a.md", "docs/deep/b.md"} + + def test_index_project_docs_rejects_path_traversal(self, tmp_path): + """PRP-40 — path_prefix that escapes base_dir raises ValueError. + + Load-bearing security surface — `path_prefix` lands in an `rglob` call, + so a traversal-prefix MUST be rejected at the discovery layer + (security-patterns.md path-traversal rule). + """ + service = RAGService(base_dir=str(tmp_path)) + + with pytest.raises(ValueError, match="escapes the project root"): + service._discover_project_doc_files( + IndexProjectDocsRequest( + include_docs=True, + include_prps=False, + include_root=False, + path_prefix="../../etc", + ) + ) + class TestRAGServiceIndexDocument: """Tests for index_document method.""" diff --git a/docs/_base/API_CONTRACTS.md b/docs/_base/API_CONTRACTS.md index bc67be07..232b7da3 100644 --- a/docs/_base/API_CONTRACTS.md +++ b/docs/_base/API_CONTRACTS.md @@ -46,7 +46,7 @@ All endpoints serve JSON; error responses use `application/problem+json` (RFC 78 | jobs | GET | `/jobs/{job_id}` | Status + result JSON | | jobs | DELETE | `/jobs/{job_id}` | Cancel pending | | rag | POST | `/rag/index` | Index a markdown/openapi document; idempotent via content hash | -| rag | POST | `/rag/index/project-docs` | Bulk-index bundled `docs/`, `PRPs/`, and root markdown; per-file + aggregate summary; idempotent via content hash; `502` if the embedding provider fails | +| rag | POST | `/rag/index/project-docs` | Bulk-index bundled `docs/`, `PRPs/`, and root markdown; per-file + aggregate summary; idempotent via content hash; `502` if the embedding provider fails. **PRP-40** — body accepts an additive Optional `path_prefix: str \| None` (default `None`) that restricts the docs/ root scan to a sub-path (e.g. `docs/user-guide`); a path-traversal-escaping prefix returns `422 application/problem+json`. | | rag | POST | `/rag/retrieve` | Semantic search (HNSW), top-k with similarity threshold | | rag | GET | `/rag/sources` | List indexed sources | | rag | DELETE | `/rag/sources/{source_id}` | Delete source + cascaded chunks | @@ -91,7 +91,8 @@ Drives the end-to-end demo pipeline for the dashboard Showcase page. Verified ag - `pipeline_complete` — final event; `data` carries `winner_model_type`, `winner_wape`, `winning_run_id`, `alias`, `wall_clock_s`, `v2_run_id` (PRP-38; null when no V2 run was registered). - `error` — bad start frame or a concurrent run already in progress; one event, then the server closes. - Concurrency: a module-level `asyncio.Lock` allows one pipeline at a time. A second `POST /demo/run` returns `409`; a second `WS /demo/stream` receives one `error` event. -- PRP-38 — `scenario="showcase_rich"` extends the data phase with `phase2_enrichment` + `historical_backfill` steps and the modeling phase with `v2_train` (one V2 `prophet_like` run). Total step count: 14 for `showcase_rich`, 11 for `demo_minimal` and `sparse`. Phase ids are `data` / `modeling` / `decision` / `verify` / `agent` / `cleanup` (6 phases). +- PRP-38 — `scenario="showcase_rich"` extends the data phase with `phase2_enrichment` + `historical_backfill` steps and the modeling phase with `v2_train` (one V2 `prophet_like` run). Phase ids are `data` / `modeling` / `decision` / `verify` / `agent` / `cleanup` (6 phases). +- PRP-40 — `scenario="showcase_rich"` ALSO adds two phases inserted BEFORE `verify`: `planning` (2 steps — `scenario_simulate_and_save`, `multi_plan_compare`) and `knowledge` (3 steps — `embedding_provider_probe`, `rag_index_subset`, `rag_retrieve_probe`). Total step count: 19 for `showcase_rich`, 11 for `demo_minimal` and `sparse`. Phase ids on `showcase_rich` are `data` / `modeling` / `decision` / `planning` / `knowledge` / `verify` / `agent` / `cleanup` (8 phases). The knowledge steps SKIP gracefully when the embedding provider is unreachable; the pipeline still goes green. ## Async Events / Queues diff --git a/docs/_base/RUNBOOKS.md b/docs/_base/RUNBOOKS.md index 43b8c12d..990fbf76 100644 --- a/docs/_base/RUNBOOKS.md +++ b/docs/_base/RUNBOOKS.md @@ -123,7 +123,15 @@ uv run python scripts/run_demo.py --seed 42 --quiet 2>&1 | tee demo.log 15. **`batch_preset` step shows ⚠️ "batch poll timed out at 90s" (PRP-39, `showcase_rich` only)** — the batch's 18 sub-jobs together exceeded the poll-timeout budget. Cause: a slow-feature-pipeline branch makes each grain×model pair take longer than expected; on a developer laptop with limited CPU 18 jobs can exceed 90 s under load. Fix: visit `/visualize/batch/{batch_id}` to follow the run to completion; the step is `warn` (non-fatal), so the pipeline still goes green. 16. **`batch_preset` step fails with `HTTP 422 -- Unprocessable Entity` from `/batch/forecasting` (PRP-39, `showcase_rich` only)** — `BatchSubmitRequest` validation rejected the body. Common causes: (a) `BatchScope.kind` casing drift (must be lowercase `"manual"`); (b) `operation` value drift (must be `"train"` / `"predict"` / `"backtest"` / `"train_backtest_register"`, NOT `"forecasting"`); (c) the discovered `store_ids` / `product_ids` list is empty because `step_status` did not seed the grain. Fix: re-tick `Re-seed first`; verify the discovery returns at least 3 stores + 2 products. 17. **`cleanup` step shows `alias restored=False` in detail (PRP-39 R15, `showcase_rich` only)** — the `POST /registry/aliases` restore call returned non-2xx. Cause: the original alias target was archived between the swap and the cleanup (an `agent_require_approval` archive_run tool fire by an operator during the demo). Fix: re-create the alias manually pointing at the V2 winner. The cleanup step warns and continues so the run still goes green. -**Notes:** the `POST /demo/run` body and `WS /demo/stream` events are documented in `docs/_base/API_CONTRACTS.md`. The pipeline mirrors `scripts/run_demo.py`; the per-step diagnosis for `make demo` above applies to the same steps. PRP-38 added the `scenario` field on `DemoRunRequest` (defaults to `demo_minimal`) and the additive `phase_name` / `phase_index` / `phase_total` fields on every `StepEvent`. PRP-39 added four new steps (`champion_compat_compare`, `stale_alias_trigger`, `safer_promote_flow`, `batch_preset`) and a new `portfolio` phase between `decision` and `verify`. +18. **`scenario_simulate_and_save` step fails with `Cannot parse artifact-key from artifact_uri` (PRP-40, `showcase_rich` only)** — the `demo-production` alias's run has an `artifact_uri` the `_parse_artifact_key` regex can't match (`r"model_([0-9a-f]+)(?:\.joblib)?$"`). Causes: a backfilled run with an irregular `artifact_uri`, or a forecasting-slice change to the model-path convention. Fix: inspect the run via `GET /registry/aliases/demo-production` → `GET /registry/runs/{run_id}`, confirm `artifact_uri` matches one of the V1 (`demo/{model_type}-model_{KEY}.joblib`) or V2 (`artifacts/models/model_{KEY}.joblib`) shapes, then either re-run the showcase (the next `register` step rewrites the artifact_uri) or extend `_ARTIFACT_KEY_RE` if a new shape is intentional. +19. **`multi_plan_compare` step shows ⚠️ with `holiday-plan save failed: ...; price-cut plan still saved` (PRP-40, `showcase_rich` only)** — the second `POST /scenarios` returned 4xx (most likely 422). The price-cut plan was still saved (partial success — R19), so the run keeps going green. Fix: read the RFC 7807 body in the detail; common causes are a horizon out of range or a malformed `holiday.dates` payload. Re-running the showcase regenerates both plans from scratch. +20. **`embedding_provider_probe` step shows ✅ but `reachable=False` (PRP-40, `showcase_rich` only)** — expected when no embedding provider is configured. The probe always emits PASS so the pipeline still greens; downstream `rag_index_subset` and `rag_retrieve_probe` will emit ⏭️ skip with `detail="embedding provider unreachable"`. Fix only if you want the knowledge phase to run: set `OPENAI_API_KEY` (when `RAG_EMBEDDING_PROVIDER=openai`) or start Ollama on `OLLAMA_BASE_URL` (when `RAG_EMBEDDING_PROVIDER=ollama`), then re-run. +21. **`rag_index_subset` step fails with `path_prefix escapes the project root` (PRP-40, `showcase_rich` only)** — the demo step hard-codes `path_prefix="docs/user-guide"`, so a real-world hit means `RAGService._base_dir` no longer points at the repo root (e.g. a misconfigured container start). Fix: confirm the backend was started from the repo root (or that `RAGService(base_dir=...)` was constructed with the right path); rerun the showcase. The path-traversal guard is load-bearing security — never relax it. +22. **`rag_retrieve_probe` step shows ⚠️ with `no hits — corpus indexed but query did not match` (PRP-40, `showcase_rich` only)** — the 5-file corpus was indexed (the prior step PASSed) but the canned query `"How do I run the demo pipeline?"` returned zero hits. Common cause: the embedding-provider was switched mid-showcase and indexed chunks are now orphaned (memory anchor: `[[rag-runtime-config-and-corpus-state]]`); the pgvector column has one fixed dimension per provider. Fix: stick to one provider, or clear the RAG corpus (`DELETE /rag/sources/{id}` per source) and re-run. + +> ⚠️ **RAG embedding-dim mismatch can orphan chunks (R4).** PRP-40 indexes a curated 5-file subset; if the operator switches the embedding provider mid-showcase, indexed chunks orphan (pgvector assumes one fixed dimension per column). PRP-40 does NOT ship a `clear_rag` UI toggle — that's a future PRP. Stick to one provider for the showcase run. + +**Notes:** the `POST /demo/run` body and `WS /demo/stream` events are documented in `docs/_base/API_CONTRACTS.md`. The pipeline mirrors `scripts/run_demo.py`; the per-step diagnosis for `make demo` above applies to the same steps. PRP-38 added the `scenario` field on `DemoRunRequest` (defaults to `demo_minimal`) and the additive `phase_name` / `phase_index` / `phase_total` fields on every `StepEvent`. PRP-39 added four new steps (`champion_compat_compare`, `stale_alias_trigger`, `safer_promote_flow`, `batch_preset`) and a new `portfolio` phase between `decision` and `verify`. PRP-40 adds the `planning` + `knowledge` phases (5 steps inserted after `portfolio`, before `verify`) and the additive `IndexProjectDocsRequest.path_prefix` field on the RAG slice. ### release-please skipped the bump after a dev → main merge **Symptoms:** `dev → main` PR is merged, `CD Release` workflow on `main` completes in ~10s, **no Release PR** is opened. release-please log shows `No user facing commits found since - skipping`. diff --git a/frontend/src/components/demo/PHASE_DEFS.test.ts b/frontend/src/components/demo/PHASE_DEFS.test.ts index 5a836392..ab486487 100644 --- a/frontend/src/components/demo/PHASE_DEFS.test.ts +++ b/frontend/src/components/demo/PHASE_DEFS.test.ts @@ -27,7 +27,7 @@ describe('PHASE_DEFS lockstep with backend _phase_table', () => { ]) }) - it('showcase_rich -> the 18-step sequence with PRP-38 V2 + PRP-39 decision/portfolio rows', () => { + it('showcase_rich -> the 23-step sequence with PRP-38 V2 + PRP-39 decision/portfolio + PRP-40 planning/knowledge rows', () => { const tuples = phaseDefsForScenario('showcase_rich').map((d) => [d.phase, d.step]) expect(tuples).toEqual([ ['data', 'precheck'], @@ -45,8 +45,14 @@ describe('PHASE_DEFS lockstep with backend _phase_table', () => { ['decision', 'champion_compat_compare'], ['decision', 'stale_alias_trigger'], ['decision', 'safer_promote_flow'], - // PRP-39 — new portfolio phase between decision and verify. + // PRP-39 — portfolio phase between decision and verify. ['portfolio', 'batch_preset'], + // PRP-40 — planning + knowledge phases after portfolio, before verify. + ['planning', 'scenario_simulate_and_save'], + ['planning', 'multi_plan_compare'], + ['knowledge', 'embedding_provider_probe'], + ['knowledge', 'rag_index_subset'], + ['knowledge', 'rag_retrieve_probe'], ['verify', 'verify'], ['agent', 'agent'], ['cleanup', 'cleanup'], @@ -59,12 +65,14 @@ describe('PHASE_DEFS lockstep with backend _phase_table', () => { expect(sparse).toEqual(minimal) }) - it('PHASE_ORDER contains exactly the seven canonical phases (PRP-39 adds portfolio)', () => { + it('PHASE_ORDER contains exactly the nine canonical phases (PRP-39 adds portfolio, PRP-40 adds planning + knowledge)', () => { expect(PHASE_ORDER).toEqual([ 'data', 'modeling', 'decision', 'portfolio', + 'planning', + 'knowledge', 'verify', 'agent', 'cleanup', diff --git a/frontend/src/components/demo/PHASE_DEFS.ts b/frontend/src/components/demo/PHASE_DEFS.ts index 9307f4ca..b91c95b3 100644 --- a/frontend/src/components/demo/PHASE_DEFS.ts +++ b/frontend/src/components/demo/PHASE_DEFS.ts @@ -20,7 +20,15 @@ export interface PhaseDef { /** * The complete set of step definitions used by either DEMO_MINIMAL (legacy - * 11 steps) or SHOWCASE_RICH (PRP-38 added 3; PRP-39 adds 4 more = 18 steps). + * 11 steps) or SHOWCASE_RICH (11 + 3 PRP-38 + 4 PRP-39 + 5 PRP-40 = 23 steps). + * + * PRP-39 adds four steps (champion_compat_compare, stale_alias_trigger, + * safer_promote_flow under the existing decision phase, plus batch_preset + * under a new portfolio phase between decision and verify). + * + * PRP-40 adds five steps grouped under two new phases ("planning" and + * "knowledge"), inserted after portfolio and BEFORE verify via relative + * anchors. * * Order matters: each row's (phase, step) tuple list is what the lockstep * test asserts equals the backend's `_phase_table(scenario)` output for @@ -44,12 +52,19 @@ const ALL_STEPS: ReadonlyArray = [ { phase: 'decision', step: 'safer_promote_flow', label: 'Safer Promote walkthrough' }, // PRP-39 — new portfolio phase, between decision and verify. { phase: 'portfolio', step: 'batch_preset', label: 'Portfolio batch (quick baseline sweep)' }, + // PRP-40 — planning + knowledge phases, after portfolio, before verify. + { phase: 'planning', step: 'scenario_simulate_and_save', label: 'Simulate & save plan' }, + { phase: 'planning', step: 'multi_plan_compare', label: 'Compare plans' }, + { phase: 'knowledge', step: 'embedding_provider_probe', label: 'Probe embedding provider' }, + { phase: 'knowledge', step: 'rag_index_subset', label: 'Index user-guide corpus' }, + { phase: 'knowledge', step: 'rag_retrieve_probe', label: 'Semantic-retrieve probe' }, { phase: 'verify', step: 'verify', label: 'Verify artifact' }, { phase: 'agent', step: 'agent', label: 'Agent chat' }, { phase: 'cleanup', step: 'cleanup', label: 'Cleanup' }, ] as const const SHOWCASE_RICH_STEP_NAMES = new Set([ + // PRP-38 'phase2_enrichment', 'historical_backfill', 'v2_train', @@ -58,6 +73,12 @@ const SHOWCASE_RICH_STEP_NAMES = new Set([ 'stale_alias_trigger', 'safer_promote_flow', 'batch_preset', + // PRP-40 + 'scenario_simulate_and_save', + 'multi_plan_compare', + 'embedding_provider_probe', + 'rag_index_subset', + 'rag_retrieve_probe', ]) /** Return the PhaseDef list for one scenario (lockstep with backend). */ @@ -76,6 +97,9 @@ export const PHASE_LABEL: Record = { decision: 'Decision', // PRP-39 — new portfolio phase between decision and verify. portfolio: 'Portfolio', + // PRP-40 — planning + knowledge phases (showcase_rich only). + planning: 'Planning', + knowledge: 'Knowledge', verify: 'Verify', agent: 'Agent', cleanup: 'Cleanup', @@ -86,8 +110,11 @@ export const PHASE_ORDER: readonly string[] = [ 'data', 'modeling', 'decision', - // PRP-39 — new portfolio phase between decision and verify. + // PRP-39 — portfolio phase between decision and verify. 'portfolio', + // PRP-40 — planning + knowledge inserted after portfolio, before verify. + 'planning', + 'knowledge', 'verify', 'agent', 'cleanup', diff --git a/frontend/src/components/demo/demo-step-card.tsx b/frontend/src/components/demo/demo-step-card.tsx index 7a3a8c34..eddf2f09 100644 --- a/frontend/src/components/demo/demo-step-card.tsx +++ b/frontend/src/components/demo/demo-step-card.tsx @@ -109,6 +109,131 @@ function ChampionCompatDetail({ data }: { data: Record }) { ) } +/* =========================================================================== + * PRP-40 — planning + knowledge mini summaries + * =========================================================================== */ + +function formatSignedNumber(value: unknown, fractionDigits = 1): string { + if (typeof value !== 'number' || !Number.isFinite(value)) return 'n/a' + return value >= 0 ? `+${value.toFixed(fractionDigits)}` : value.toFixed(fractionDigits) +} + +/** scenario_simulate_and_save — plan name + method + Δunits + Δrevenue. */ +function ScenarioSummary({ data }: { data: Record }) { + const scenarioId = typeof data.scenario_id === 'string' ? data.scenario_id : null + const method = typeof data.method === 'string' ? data.method : null + if (!scenarioId && !method) return null + return ( +
+ + plan: showcase-price-cut-10pct + + {method && ( + method: {method} + )} + + Δunits: {formatSignedNumber(data.units_delta, 1)} + + + Δrevenue: {formatSignedNumber(data.revenue_delta, 2)} + +
+ ) +} + +/** multi_plan_compare — winner + ranked_by + per-plan deltas. */ +function CompareSummary({ data }: { data: Record }) { + const winnerName = typeof data.winner_name === 'string' ? data.winner_name : null + const rankedBy = typeof data.ranked_by === 'string' ? data.ranked_by : null + const rankedRaw = data.ranked + const ranked = Array.isArray(rankedRaw) ? rankedRaw : [] + if (!winnerName && ranked.length === 0) return null + return ( +
+
+ {winnerName && ( + + 🏆 winner: {winnerName} + + )} + {rankedBy && ( + + ranked_by: {rankedBy} + + )} +
+
+ {ranked.map((row, idx) => { + if (!row || typeof row !== 'object') return null + const r = row as Record + const name = typeof r.name === 'string' ? r.name : `plan-${idx}` + const unitsDelta = r.units_delta + const revenueDelta = r.revenue_delta + return ( +
+ {name} + + Δunits {formatSignedNumber(unitsDelta, 1)} · + Δrev {formatSignedNumber(revenueDelta, 2)} + +
+ ) + })} +
+
+ ) +} + +/** embedding_provider_probe — provider chip + reachable badge. */ +function ProviderChip({ data }: { data: Record }) { + const provider = typeof data.provider === 'string' ? data.provider : null + const reachable = data.reachable === true + if (!provider) return null + return ( +
+ provider: {provider} + + {reachable ? '✅ reachable' : '⏭️ unreachable'} + +
+ ) +} + +/** rag_index_subset — curated_hits/5 + chunks + failed. */ +function IndexSummary({ data }: { data: Record }) { + const curatedHits = typeof data.curated_hits === 'number' ? data.curated_hits : null + const totalChunks = typeof data.total_chunks === 'number' ? data.total_chunks : null + const failed = typeof data.failed === 'number' ? data.failed : null + if (curatedHits === null && totalChunks === null) return null + return ( +
+ {curatedHits !== null && ( + + files: {curatedHits}/5 + + )} + {totalChunks !== null && ( + + chunks: {totalChunks} + + )} + {failed !== null && failed > 0 && ( + + failed: {failed} + + )} +
+ ) +} + /** PRP-39 — stale-alias trigger mini-summary chip-line. */ function StaleAliasDetail({ data }: { data: Record }) { const aliasName = typeof data.alias_name === 'string' ? data.alias_name : null @@ -163,6 +288,23 @@ function BatchPresetDetail({ data }: { data: Record }) { ) } +/** rag_retrieve_probe — top hit title + similarity score. */ +function RetrieveSummary({ data }: { data: Record }) { + const topSource = typeof data.top_source_path === 'string' ? data.top_source_path : null + const score = typeof data.top_relevance_score === 'number' ? data.top_relevance_score : null + if (!topSource) return null + return ( +
+ top: {topSource} + {score !== null && ( + + score: {score.toFixed(3)} + + )} +
+ ) +} + interface DemoStepCardProps { step: DemoStep index: number @@ -223,6 +365,16 @@ export function DemoStepCard({ step, index, inspectHref }: DemoStepCardProps) { {step.name === 'stale_alias_trigger' && } {step.name === 'safer_promote_flow' && } {step.name === 'batch_preset' && } + {/* PRP-40 — planning + knowledge mini summaries. */} + {step.name === 'scenario_simulate_and_save' && ( + + )} + {step.name === 'multi_plan_compare' && } + {step.name === 'embedding_provider_probe' && ( + + )} + {step.name === 'rag_index_subset' && } + {step.name === 'rag_retrieve_probe' && } {showInspect && (