Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
389 changes: 389 additions & 0 deletions app/features/demo/pipeline.py

Large diffs are not rendered by default.

688 changes: 669 additions & 19 deletions app/features/demo/tests/test_pipeline.py

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions app/features/rag/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,11 @@ class IndexProjectDocsRequest(BaseModel):
include_docs: Index markdown discovered under docs/**.
include_prps: Index markdown discovered under PRPs/**.
include_root: Index the root allow-list (README/AGENTS/CHANGELOG).
path_prefix: Optional repo-relative sub-path under docs/ to restrict
discovery to (PRP-40). When None (default), scanning is wholesale
(back-compat). Only applies when ``include_docs`` is True; the
``include_prps`` / ``include_root`` branches are unaffected.
A path-traversal guard rejects values that escape the project root.
"""

model_config = ConfigDict(extra="forbid")
Expand All @@ -199,6 +204,17 @@ class IndexProjectDocsRequest(BaseModel):
include_root: bool = Field(
default=True, description="Index README.md / AGENTS.md / CHANGELOG.md"
)
# PRP-40 — additive sub-path filter for the docs/ root. None preserves
# back-compat (wholesale rglob).
path_prefix: str | None = Field(
default=None,
max_length=200,
description=(
"Optional repo-relative path under docs/ to restrict discovery to "
"(e.g. 'docs/user-guide'). When None (default), discovery scans "
"every docs/**/*.md (back-compat)."
),
)


class ProjectDocResult(BaseModel):
Expand Down
18 changes: 17 additions & 1 deletion app/features/rag/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,22 @@ def _discover_project_doc_files(
found: list[tuple[Path, str]] = []

if request.include_docs:
found += [(p, "docs") for p in (self._base_dir / "docs").rglob("*.md")]
# PRP-40 — optional sub-path filter under docs/. None preserves
# back-compat (wholesale rglob).
if request.path_prefix:
base = self._base_dir.resolve()
candidate = (self._base_dir / request.path_prefix).resolve()
# Guard against path traversal — candidate MUST be inside the
# project root (security-patterns.md path-traversal section).
try:
candidate.relative_to(base)
except ValueError as exc:
raise ValueError(
f"path_prefix escapes the project root: {request.path_prefix!r}"
) from exc
found += [(p, "docs") for p in candidate.rglob("*.md")]
else:
found += [(p, "docs") for p in (self._base_dir / "docs").rglob("*.md")]

if request.include_prps:
found += [(p, "prp") for p in (self._base_dir / "PRPs").rglob("*.md")]
Expand Down Expand Up @@ -317,6 +332,7 @@ async def index_project_docs(
include_docs=request.include_docs,
include_prps=request.include_prps,
include_root=request.include_root,
path_prefix=request.path_prefix,
)

results: list[ProjectDocResult] = []
Expand Down
56 changes: 56 additions & 0 deletions app/features/rag/tests/test_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,62 @@ def test_root_allow_list_only(self, tmp_path):
names = {p.name for p, _ in found}
assert names == {"README.md"}

# --- PRP-40 — additive path_prefix sub-path filter ---

def test_path_prefix_scopes_docs_discovery(self, tmp_path):
"""PRP-40 — path_prefix='docs/user-guide' restricts docs scan to that subtree."""
(tmp_path / "docs" / "user-guide").mkdir(parents=True)
(tmp_path / "docs" / "other").mkdir()
(tmp_path / "docs" / "user-guide" / "intro.md").write_text("# A", encoding="utf-8")
(tmp_path / "docs" / "other" / "internal.md").write_text("# B", encoding="utf-8")
service = RAGService(base_dir=str(tmp_path))

found = service._discover_project_doc_files(
IndexProjectDocsRequest(
include_docs=True,
include_prps=False,
include_root=False,
path_prefix="docs/user-guide",
)
)

rels = {p.relative_to(tmp_path).as_posix() for p, _ in found}
assert rels == {"docs/user-guide/intro.md"}

def test_path_prefix_none_preserves_wholesale_scan(self, tmp_path):
"""PRP-40 — path_prefix=None (default) keeps the existing wholesale rglob behaviour."""
(tmp_path / "docs").mkdir()
(tmp_path / "docs" / "a.md").write_text("# A", encoding="utf-8")
(tmp_path / "docs" / "deep").mkdir()
(tmp_path / "docs" / "deep" / "b.md").write_text("# B", encoding="utf-8")
service = RAGService(base_dir=str(tmp_path))

found = service._discover_project_doc_files(
IndexProjectDocsRequest(include_docs=True, include_prps=False, include_root=False)
)

rels = {p.relative_to(tmp_path).as_posix() for p, _ in found}
assert rels == {"docs/a.md", "docs/deep/b.md"}

def test_index_project_docs_rejects_path_traversal(self, tmp_path):
"""PRP-40 — path_prefix that escapes base_dir raises ValueError.

Load-bearing security surface — `path_prefix` lands in an `rglob` call,
so a traversal-prefix MUST be rejected at the discovery layer
(security-patterns.md path-traversal rule).
"""
service = RAGService(base_dir=str(tmp_path))

with pytest.raises(ValueError, match="escapes the project root"):
service._discover_project_doc_files(
IndexProjectDocsRequest(
include_docs=True,
include_prps=False,
include_root=False,
path_prefix="../../etc",
)
)


class TestRAGServiceIndexDocument:
"""Tests for index_document method."""
Expand Down
5 changes: 3 additions & 2 deletions docs/_base/API_CONTRACTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ All endpoints serve JSON; error responses use `application/problem+json` (RFC 78
| jobs | GET | `/jobs/{job_id}` | Status + result JSON |
| jobs | DELETE | `/jobs/{job_id}` | Cancel pending |
| rag | POST | `/rag/index` | Index a markdown/openapi document; idempotent via content hash |
| rag | POST | `/rag/index/project-docs` | Bulk-index bundled `docs/`, `PRPs/`, and root markdown; per-file + aggregate summary; idempotent via content hash; `502` if the embedding provider fails |
| rag | POST | `/rag/index/project-docs` | Bulk-index bundled `docs/`, `PRPs/`, and root markdown; per-file + aggregate summary; idempotent via content hash; `502` if the embedding provider fails. **PRP-40** — body accepts an additive Optional `path_prefix: str \| None` (default `None`) that restricts the docs/ root scan to a sub-path (e.g. `docs/user-guide`); a path-traversal-escaping prefix returns `422 application/problem+json`. |
| rag | POST | `/rag/retrieve` | Semantic search (HNSW), top-k with similarity threshold |
| rag | GET | `/rag/sources` | List indexed sources |
| rag | DELETE | `/rag/sources/{source_id}` | Delete source + cascaded chunks |
Expand Down Expand Up @@ -91,7 +91,8 @@ Drives the end-to-end demo pipeline for the dashboard Showcase page. Verified ag
- `pipeline_complete` — final event; `data` carries `winner_model_type`, `winner_wape`, `winning_run_id`, `alias`, `wall_clock_s`, `v2_run_id` (PRP-38; null when no V2 run was registered).
- `error` — bad start frame or a concurrent run already in progress; one event, then the server closes.
- Concurrency: a module-level `asyncio.Lock` allows one pipeline at a time. A second `POST /demo/run` returns `409`; a second `WS /demo/stream` receives one `error` event.
- PRP-38 — `scenario="showcase_rich"` extends the data phase with `phase2_enrichment` + `historical_backfill` steps and the modeling phase with `v2_train` (one V2 `prophet_like` run). Total step count: 14 for `showcase_rich`, 11 for `demo_minimal` and `sparse`. Phase ids are `data` / `modeling` / `decision` / `verify` / `agent` / `cleanup` (6 phases).
- PRP-38 — `scenario="showcase_rich"` extends the data phase with `phase2_enrichment` + `historical_backfill` steps and the modeling phase with `v2_train` (one V2 `prophet_like` run). Phase ids are `data` / `modeling` / `decision` / `verify` / `agent` / `cleanup` (6 phases).
- PRP-40 — `scenario="showcase_rich"` ALSO adds two phases inserted BEFORE `verify`: `planning` (2 steps — `scenario_simulate_and_save`, `multi_plan_compare`) and `knowledge` (3 steps — `embedding_provider_probe`, `rag_index_subset`, `rag_retrieve_probe`). Total step count: 19 for `showcase_rich`, 11 for `demo_minimal` and `sparse`. Phase ids on `showcase_rich` are `data` / `modeling` / `decision` / `planning` / `knowledge` / `verify` / `agent` / `cleanup` (8 phases). The knowledge steps SKIP gracefully when the embedding provider is unreachable; the pipeline still goes green.

## Async Events / Queues

Expand Down
10 changes: 9 additions & 1 deletion docs/_base/RUNBOOKS.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,15 @@ uv run python scripts/run_demo.py --seed 42 --quiet 2>&1 | tee demo.log
15. **`batch_preset` step shows ⚠️ "batch poll timed out at 90s" (PRP-39, `showcase_rich` only)** — the batch's 18 sub-jobs together exceeded the poll-timeout budget. Cause: a slow-feature-pipeline branch makes each grain×model pair take longer than expected; on a developer laptop with limited CPU 18 jobs can exceed 90 s under load. Fix: visit `/visualize/batch/{batch_id}` to follow the run to completion; the step is `warn` (non-fatal), so the pipeline still goes green.
16. **`batch_preset` step fails with `HTTP 422 -- Unprocessable Entity` from `/batch/forecasting` (PRP-39, `showcase_rich` only)** — `BatchSubmitRequest` validation rejected the body. Common causes: (a) `BatchScope.kind` casing drift (must be lowercase `"manual"`); (b) `operation` value drift (must be `"train"` / `"predict"` / `"backtest"` / `"train_backtest_register"`, NOT `"forecasting"`); (c) the discovered `store_ids` / `product_ids` list is empty because `step_status` did not seed the grain. Fix: re-tick `Re-seed first`; verify the discovery returns at least 3 stores + 2 products.
17. **`cleanup` step shows `alias restored=False` in detail (PRP-39 R15, `showcase_rich` only)** — the `POST /registry/aliases` restore call returned non-2xx. Cause: the original alias target was archived between the swap and the cleanup (an `agent_require_approval` archive_run tool fire by an operator during the demo). Fix: re-create the alias manually pointing at the V2 winner. The cleanup step warns and continues so the run still goes green.
**Notes:** the `POST /demo/run` body and `WS /demo/stream` events are documented in `docs/_base/API_CONTRACTS.md`. The pipeline mirrors `scripts/run_demo.py`; the per-step diagnosis for `make demo` above applies to the same steps. PRP-38 added the `scenario` field on `DemoRunRequest` (defaults to `demo_minimal`) and the additive `phase_name` / `phase_index` / `phase_total` fields on every `StepEvent`. PRP-39 added four new steps (`champion_compat_compare`, `stale_alias_trigger`, `safer_promote_flow`, `batch_preset`) and a new `portfolio` phase between `decision` and `verify`.
18. **`scenario_simulate_and_save` step fails with `Cannot parse artifact-key from artifact_uri` (PRP-40, `showcase_rich` only)** — the `demo-production` alias's run has an `artifact_uri` the `_parse_artifact_key` regex can't match (`r"model_([0-9a-f]+)(?:\.joblib)?$"`). Causes: a backfilled run with an irregular `artifact_uri`, or a forecasting-slice change to the model-path convention. Fix: inspect the run via `GET /registry/aliases/demo-production` → `GET /registry/runs/{run_id}`, confirm `artifact_uri` matches one of the V1 (`demo/{model_type}-model_{KEY}.joblib`) or V2 (`artifacts/models/model_{KEY}.joblib`) shapes, then either re-run the showcase (the next `register` step rewrites the artifact_uri) or extend `_ARTIFACT_KEY_RE` if a new shape is intentional.
19. **`multi_plan_compare` step shows ⚠️ with `holiday-plan save failed: ...; price-cut plan still saved` (PRP-40, `showcase_rich` only)** — the second `POST /scenarios` returned 4xx (most likely 422). The price-cut plan was still saved (partial success — R19), so the run keeps going green. Fix: read the RFC 7807 body in the detail; common causes are a horizon out of range or a malformed `holiday.dates` payload. Re-running the showcase regenerates both plans from scratch.
20. **`embedding_provider_probe` step shows ✅ but `reachable=False` (PRP-40, `showcase_rich` only)** — expected when no embedding provider is configured. The probe always emits PASS so the pipeline still greens; downstream `rag_index_subset` and `rag_retrieve_probe` will emit ⏭️ skip with `detail="embedding provider unreachable"`. Fix only if you want the knowledge phase to run: set `OPENAI_API_KEY` (when `RAG_EMBEDDING_PROVIDER=openai`) or start Ollama on `OLLAMA_BASE_URL` (when `RAG_EMBEDDING_PROVIDER=ollama`), then re-run.
21. **`rag_index_subset` step fails with `path_prefix escapes the project root` (PRP-40, `showcase_rich` only)** — the demo step hard-codes `path_prefix="docs/user-guide"`, so a real-world hit means `RAGService._base_dir` no longer points at the repo root (e.g. a misconfigured container start). Fix: confirm the backend was started from the repo root (or that `RAGService(base_dir=...)` was constructed with the right path); rerun the showcase. The path-traversal guard is load-bearing security — never relax it.
22. **`rag_retrieve_probe` step shows ⚠️ with `no hits — corpus indexed but query did not match` (PRP-40, `showcase_rich` only)** — the 5-file corpus was indexed (the prior step PASSed) but the canned query `"How do I run the demo pipeline?"` returned zero hits. Common cause: the embedding-provider was switched mid-showcase and indexed chunks are now orphaned (memory anchor: `[[rag-runtime-config-and-corpus-state]]`); the pgvector column has one fixed dimension per provider. Fix: stick to one provider, or clear the RAG corpus (`DELETE /rag/sources/{id}` per source) and re-run.

> ⚠️ **RAG embedding-dim mismatch can orphan chunks (R4).** PRP-40 indexes a curated 5-file subset; if the operator switches the embedding provider mid-showcase, indexed chunks orphan (pgvector assumes one fixed dimension per column). PRP-40 does NOT ship a `clear_rag` UI toggle — that's a future PRP. Stick to one provider for the showcase run.

**Notes:** the `POST /demo/run` body and `WS /demo/stream` events are documented in `docs/_base/API_CONTRACTS.md`. The pipeline mirrors `scripts/run_demo.py`; the per-step diagnosis for `make demo` above applies to the same steps. PRP-38 added the `scenario` field on `DemoRunRequest` (defaults to `demo_minimal`) and the additive `phase_name` / `phase_index` / `phase_total` fields on every `StepEvent`. PRP-39 added four new steps (`champion_compat_compare`, `stale_alias_trigger`, `safer_promote_flow`, `batch_preset`) and a new `portfolio` phase between `decision` and `verify`. PRP-40 adds the `planning` + `knowledge` phases (5 steps inserted after `portfolio`, before `verify`) and the additive `IndexProjectDocsRequest.path_prefix` field on the RAG slice.

### release-please skipped the bump after a dev → main merge
**Symptoms:** `dev → main` PR is merged, `CD Release` workflow on `main` completes in ~10s, **no Release PR** is opened. release-please log shows `No user facing commits found since <sha> - skipping`.
Expand Down
14 changes: 11 additions & 3 deletions frontend/src/components/demo/PHASE_DEFS.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ describe('PHASE_DEFS lockstep with backend _phase_table', () => {
])
})

it('showcase_rich -> the 18-step sequence with PRP-38 V2 + PRP-39 decision/portfolio rows', () => {
it('showcase_rich -> the 23-step sequence with PRP-38 V2 + PRP-39 decision/portfolio + PRP-40 planning/knowledge rows', () => {
const tuples = phaseDefsForScenario('showcase_rich').map((d) => [d.phase, d.step])
expect(tuples).toEqual([
['data', 'precheck'],
Expand All @@ -45,8 +45,14 @@ describe('PHASE_DEFS lockstep with backend _phase_table', () => {
['decision', 'champion_compat_compare'],
['decision', 'stale_alias_trigger'],
['decision', 'safer_promote_flow'],
// PRP-39 — new portfolio phase between decision and verify.
// PRP-39 — portfolio phase between decision and verify.
['portfolio', 'batch_preset'],
// PRP-40 — planning + knowledge phases after portfolio, before verify.
['planning', 'scenario_simulate_and_save'],
['planning', 'multi_plan_compare'],
['knowledge', 'embedding_provider_probe'],
['knowledge', 'rag_index_subset'],
['knowledge', 'rag_retrieve_probe'],
['verify', 'verify'],
['agent', 'agent'],
['cleanup', 'cleanup'],
Expand All @@ -59,12 +65,14 @@ describe('PHASE_DEFS lockstep with backend _phase_table', () => {
expect(sparse).toEqual(minimal)
})

it('PHASE_ORDER contains exactly the seven canonical phases (PRP-39 adds portfolio)', () => {
it('PHASE_ORDER contains exactly the nine canonical phases (PRP-39 adds portfolio, PRP-40 adds planning + knowledge)', () => {
expect(PHASE_ORDER).toEqual([
'data',
'modeling',
'decision',
'portfolio',
'planning',
'knowledge',
'verify',
'agent',
'cleanup',
Expand Down
31 changes: 29 additions & 2 deletions frontend/src/components/demo/PHASE_DEFS.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,15 @@ export interface PhaseDef {

/**
* The complete set of step definitions used by either DEMO_MINIMAL (legacy
* 11 steps) or SHOWCASE_RICH (PRP-38 added 3; PRP-39 adds 4 more = 18 steps).
* 11 steps) or SHOWCASE_RICH (11 + 3 PRP-38 + 4 PRP-39 + 5 PRP-40 = 23 steps).
*
* PRP-39 adds four steps (champion_compat_compare, stale_alias_trigger,
* safer_promote_flow under the existing decision phase, plus batch_preset
* under a new portfolio phase between decision and verify).
*
* PRP-40 adds five steps grouped under two new phases ("planning" and
* "knowledge"), inserted after portfolio and BEFORE verify via relative
* anchors.
*
* Order matters: each row's (phase, step) tuple list is what the lockstep
* test asserts equals the backend's `_phase_table(scenario)` output for
Expand All @@ -44,12 +52,19 @@ const ALL_STEPS: ReadonlyArray<PhaseDef> = [
{ phase: 'decision', step: 'safer_promote_flow', label: 'Safer Promote walkthrough' },
// PRP-39 — new portfolio phase, between decision and verify.
{ phase: 'portfolio', step: 'batch_preset', label: 'Portfolio batch (quick baseline sweep)' },
// PRP-40 — planning + knowledge phases, after portfolio, before verify.
{ phase: 'planning', step: 'scenario_simulate_and_save', label: 'Simulate & save plan' },
{ phase: 'planning', step: 'multi_plan_compare', label: 'Compare plans' },
{ phase: 'knowledge', step: 'embedding_provider_probe', label: 'Probe embedding provider' },
{ phase: 'knowledge', step: 'rag_index_subset', label: 'Index user-guide corpus' },
{ phase: 'knowledge', step: 'rag_retrieve_probe', label: 'Semantic-retrieve probe' },
{ phase: 'verify', step: 'verify', label: 'Verify artifact' },
{ phase: 'agent', step: 'agent', label: 'Agent chat' },
{ phase: 'cleanup', step: 'cleanup', label: 'Cleanup' },
] as const

const SHOWCASE_RICH_STEP_NAMES = new Set([
// PRP-38
'phase2_enrichment',
'historical_backfill',
'v2_train',
Expand All @@ -58,6 +73,12 @@ const SHOWCASE_RICH_STEP_NAMES = new Set([
'stale_alias_trigger',
'safer_promote_flow',
'batch_preset',
// PRP-40
'scenario_simulate_and_save',
'multi_plan_compare',
'embedding_provider_probe',
'rag_index_subset',
'rag_retrieve_probe',
])

/** Return the PhaseDef list for one scenario (lockstep with backend). */
Expand All @@ -76,6 +97,9 @@ export const PHASE_LABEL: Record<string, string> = {
decision: 'Decision',
// PRP-39 — new portfolio phase between decision and verify.
portfolio: 'Portfolio',
// PRP-40 — planning + knowledge phases (showcase_rich only).
planning: 'Planning',
knowledge: 'Knowledge',
verify: 'Verify',
agent: 'Agent',
cleanup: 'Cleanup',
Expand All @@ -86,8 +110,11 @@ export const PHASE_ORDER: readonly string[] = [
'data',
'modeling',
'decision',
// PRP-39 — new portfolio phase between decision and verify.
// PRP-39 — portfolio phase between decision and verify.
'portfolio',
// PRP-40 — planning + knowledge inserted after portfolio, before verify.
'planning',
'knowledge',
'verify',
'agent',
'cleanup',
Expand Down
Loading