From 91d05470ad22f4755a7bf2c906cbbed4742c11cc Mon Sep 17 00:00:00 2001 From: saagpatel Date: Wed, 3 Jun 2026 03:24:46 -0700 Subject: [PATCH] feat(risk): persist GitHub slug in truth + multi-key risk lookups (cross-source identity) --- src/excel_export_truth_helpers.py | 10 +++++++- src/portfolio_truth_reconcile.py | 1 + src/portfolio_truth_types.py | 5 ++++ src/report_enrichment.py | 21 ++++++++++++++-- tests/test_load_risk_truth.py | 37 ++++++++++++++++++++++++++++ tests/test_report_enrichment_risk.py | 22 +++++++++++++++++ 6 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 tests/test_load_risk_truth.py diff --git a/src/excel_export_truth_helpers.py b/src/excel_export_truth_helpers.py index ad85c71..ca59331 100644 --- a/src/excel_export_truth_helpers.py +++ b/src/excel_export_truth_helpers.py @@ -22,10 +22,18 @@ def load_risk_truth(truth_dir: Path | None) -> tuple[dict[str, str], dict[str, i risk_lookup: dict[str, str] = {} tier_counts: dict[str, int] = {} for project in truth_data.get("projects") or []: - display_name = str((project.get("identity") or {}).get("display_name") or "") + identity = project.get("identity") or {} + display_name = str(identity.get("display_name") or "") risk_tier = str((project.get("risk") or {}).get("risk_tier") or "") if display_name: risk_lookup[display_name] = risk_tier + # Also key by the GitHub repo name so workbook surfaces that look up by audit + # metadata.name resolve risk when it differs from the local-dir display_name + # (e.g. "Signal & Noise" vs "signal-noise"). tier_counts is incremented once + # per project below, so the alias does not inflate the aggregate posture. + slug = str(identity.get("repo_full_name") or "").rsplit("/", 1)[-1] + if slug and slug not in risk_lookup: + risk_lookup[slug] = risk_tier if risk_tier: tier_counts[risk_tier] = tier_counts.get(risk_tier, 0) + 1 diff --git a/src/portfolio_truth_reconcile.py b/src/portfolio_truth_reconcile.py index d98ed29..b9ea76f 100644 --- a/src/portfolio_truth_reconcile.py +++ b/src/portfolio_truth_reconcile.py @@ -347,6 +347,7 @@ def _build_truth_project( section_marker=_resolve_section_marker(relative_path, group_entry, raw_project), section_label=_resolve_section_label(group_entry, raw_project), has_git=bool(raw_project["has_git"]), + repo_full_name=str(raw_project.get("repo_full_name") or ""), ) declared_values = { diff --git a/src/portfolio_truth_types.py b/src/portfolio_truth_types.py index 40b1aca..ad93384 100644 --- a/src/portfolio_truth_types.py +++ b/src/portfolio_truth_types.py @@ -42,6 +42,11 @@ class IdentityFields: section_marker: str section_label: str has_git: bool + # GitHub "owner/repo" from the local git remote, when present. Lets risk and + # other truth-keyed overlays be matched by the GitHub repo name (audit + # metadata.name) and not only the local-dir display_name, which often differ + # (e.g. "Signal & Noise" vs "signal-noise"). + repo_full_name: str = "" def to_dict(self) -> dict[str, Any]: return dataclasses.asdict(self) diff --git a/src/report_enrichment.py b/src/report_enrichment.py index eb2dd9c..cf3dea3 100644 --- a/src/report_enrichment.py +++ b/src/report_enrichment.py @@ -173,14 +173,24 @@ def build_risk_lookup(output_dir: Path | None) -> dict[str, dict[str, str]]: return {} lookup: dict[str, dict[str, str]] = {} for project in truth.get("projects") or []: - name = str((project.get("identity") or {}).get("display_name") or "") + identity = project.get("identity") or {} + name = str(identity.get("display_name") or "") if not name: continue risk = project.get("risk") or {} - lookup[name] = { + entry = { "risk_tier": str(risk.get("risk_tier") or "baseline"), "risk_summary": str(risk.get("risk_summary") or ""), } + lookup[name] = entry + # Alias each project under its GitHub repo name so render consumers that key + # by audit metadata.name resolve risk for repos whose local-dir display_name + # differs (e.g. "Signal & Noise" vs "signal-noise"). The alias is the SAME + # entry object; _extract_risk_posture dedups by identity so it never inflates + # the aggregate counts. + slug = str(identity.get("repo_full_name") or "").rsplit("/", 1)[-1] + if slug and slug not in lookup: + lookup[slug] = entry return lookup @@ -191,7 +201,14 @@ def _extract_risk_posture(output_dir: Path | None) -> dict[str, Any]: return {} tier_counts: dict[str, int] = {} top_elevated: list[dict[str, Any]] = [] + seen: set[int] = set() for name, entry in lookup.items(): + # build_risk_lookup aliases each project under both its display_name and its + # GitHub slug (same entry object); count each project exactly once. Insertion + # order puts the display_name first, so top_elevated uses the human name. + if id(entry) in seen: + continue + seen.add(id(entry)) tier = entry["risk_tier"] tier_counts[tier] = tier_counts.get(tier, 0) + 1 if tier == "elevated": diff --git a/tests/test_load_risk_truth.py b/tests/test_load_risk_truth.py new file mode 100644 index 0000000..e86a69b --- /dev/null +++ b/tests/test_load_risk_truth.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from src.excel_export_truth_helpers import load_risk_truth + + +def test_load_risk_truth_keys_by_slug_and_display_name(tmp_path: Path) -> None: + truth = { + "projects": [ + { + "identity": { + "display_name": "Signal & Noise", + "repo_full_name": "saagpatel/signal-noise", + }, + "risk": {"risk_tier": "elevated"}, + } + ] + } + (tmp_path / "portfolio-truth-latest.json").write_text(json.dumps(truth)) + risk_lookup, posture = load_risk_truth(tmp_path) + # Findable by both the local-dir display name and the GitHub slug. + assert risk_lookup["Signal & Noise"] == "elevated" + assert risk_lookup["signal-noise"] == "elevated" + # The slug alias must NOT double-count the aggregate posture. + assert posture["elevated"] == 1 + + +def test_load_risk_truth_no_slug_when_repo_full_name_absent(tmp_path: Path) -> None: + truth = { + "projects": [{"identity": {"display_name": "PlainRepo"}, "risk": {"risk_tier": "moderate"}}] + } + (tmp_path / "portfolio-truth-latest.json").write_text(json.dumps(truth)) + risk_lookup, posture = load_risk_truth(tmp_path) + assert risk_lookup == {"PlainRepo": "moderate"} + assert posture["moderate"] == 1 diff --git a/tests/test_report_enrichment_risk.py b/tests/test_report_enrichment_risk.py index bd012fd..47a2d50 100644 --- a/tests/test_report_enrichment_risk.py +++ b/tests/test_report_enrichment_risk.py @@ -105,6 +105,28 @@ def test_build_risk_lookup_empty_when_no_output_dir() -> None: assert build_risk_lookup(None) == {} +def test_build_risk_lookup_also_keys_by_github_slug(tmp_path: Path) -> None: + # Truth keys by local-dir display_name, but render consumers look up by the + # GitHub repo name (audit metadata.name). When the truth identity carries the + # repo_full_name, risk must also be findable by that GitHub slug. + truth = { + "schema_version": "0.5.0", + "projects": [ + { + "identity": { + "display_name": "Signal & Noise", + "repo_full_name": "saagpatel/signal-noise", + }, + "risk": {"risk_tier": "elevated", "risk_summary": "Weak context."}, + } + ], + } + (tmp_path / "portfolio-truth-latest.json").write_text(json.dumps(truth)) + lookup = build_risk_lookup(tmp_path) + assert lookup["Signal & Noise"]["risk_tier"] == "elevated" + assert lookup["signal-noise"]["risk_tier"] == "elevated" + + def test_extract_risk_posture_still_derived_from_same_source(tmp_path: Path) -> None: # _extract_risk_posture is reimplemented on top of build_risk_lookup; aggregate # counts must still match the per-repo lookup.