From 91d05470ad22f4755a7bf2c906cbbed4742c11cc Mon Sep 17 00:00:00 2001
From: saagpatel <saagarpatel08@gmail.com>
Date: Wed, 3 Jun 2026 03:24:46 -0700
Subject: [PATCH] feat(risk): persist GitHub slug in truth + multi-key risk
 lookups (cross-source identity)

---
 src/excel_export_truth_helpers.py    | 10 +++++++-
 src/portfolio_truth_reconcile.py     |  1 +
 src/portfolio_truth_types.py         |  5 ++++
 src/report_enrichment.py             | 21 ++++++++++++++--
 tests/test_load_risk_truth.py        | 37 ++++++++++++++++++++++++++++
 tests/test_report_enrichment_risk.py | 22 +++++++++++++++++
 6 files changed, 93 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_load_risk_truth.py

diff --git a/src/excel_export_truth_helpers.py b/src/excel_export_truth_helpers.py
index ad85c71..ca59331 100644
--- a/src/excel_export_truth_helpers.py
+++ b/src/excel_export_truth_helpers.py
@@ -22,10 +22,18 @@ def load_risk_truth(truth_dir: Path | None) -> tuple[dict[str, str], dict[str, i
     risk_lookup: dict[str, str] = {}
     tier_counts: dict[str, int] = {}
     for project in truth_data.get("projects") or []:
-        display_name = str((project.get("identity") or {}).get("display_name") or "")
+        identity = project.get("identity") or {}
+        display_name = str(identity.get("display_name") or "")
         risk_tier = str((project.get("risk") or {}).get("risk_tier") or "")
         if display_name:
             risk_lookup[display_name] = risk_tier
+        # Also key by the GitHub repo name so workbook surfaces that look up by audit
+        # metadata.name resolve risk when it differs from the local-dir display_name
+        # (e.g. "Signal & Noise" vs "signal-noise"). tier_counts is incremented once
+        # per project below, so the alias does not inflate the aggregate posture.
+        slug = str(identity.get("repo_full_name") or "").rsplit("/", 1)[-1]
+        if slug and slug not in risk_lookup:
+            risk_lookup[slug] = risk_tier
         if risk_tier:
             tier_counts[risk_tier] = tier_counts.get(risk_tier, 0) + 1
 
diff --git a/src/portfolio_truth_reconcile.py b/src/portfolio_truth_reconcile.py
index d98ed29..b9ea76f 100644
--- a/src/portfolio_truth_reconcile.py
+++ b/src/portfolio_truth_reconcile.py
@@ -347,6 +347,7 @@ def _build_truth_project(
         section_marker=_resolve_section_marker(relative_path, group_entry, raw_project),
         section_label=_resolve_section_label(group_entry, raw_project),
         has_git=bool(raw_project["has_git"]),
+        repo_full_name=str(raw_project.get("repo_full_name") or ""),
     )
 
     declared_values = {
diff --git a/src/portfolio_truth_types.py b/src/portfolio_truth_types.py
index 40b1aca..ad93384 100644
--- a/src/portfolio_truth_types.py
+++ b/src/portfolio_truth_types.py
@@ -42,6 +42,11 @@ class IdentityFields:
     section_marker: str
     section_label: str
     has_git: bool
+    # GitHub "owner/repo" from the local git remote, when present. Lets risk and
+    # other truth-keyed overlays be matched by the GitHub repo name (audit
+    # metadata.name) and not only the local-dir display_name, which often differ
+    # (e.g. "Signal & Noise" vs "signal-noise").
+    repo_full_name: str = ""
 
     def to_dict(self) -> dict[str, Any]:
         return dataclasses.asdict(self)
diff --git a/src/report_enrichment.py b/src/report_enrichment.py
index eb2dd9c..cf3dea3 100644
--- a/src/report_enrichment.py
+++ b/src/report_enrichment.py
@@ -173,14 +173,24 @@ def build_risk_lookup(output_dir: Path | None) -> dict[str, dict[str, str]]:
         return {}
     lookup: dict[str, dict[str, str]] = {}
     for project in truth.get("projects") or []:
-        name = str((project.get("identity") or {}).get("display_name") or "")
+        identity = project.get("identity") or {}
+        name = str(identity.get("display_name") or "")
         if not name:
             continue
         risk = project.get("risk") or {}
-        lookup[name] = {
+        entry = {
             "risk_tier": str(risk.get("risk_tier") or "baseline"),
             "risk_summary": str(risk.get("risk_summary") or ""),
         }
+        lookup[name] = entry
+        # Alias each project under its GitHub repo name so render consumers that key
+        # by audit metadata.name resolve risk for repos whose local-dir display_name
+        # differs (e.g. "Signal & Noise" vs "signal-noise"). The alias is the SAME
+        # entry object; _extract_risk_posture dedups by identity so it never inflates
+        # the aggregate counts.
+        slug = str(identity.get("repo_full_name") or "").rsplit("/", 1)[-1]
+        if slug and slug not in lookup:
+            lookup[slug] = entry
     return lookup
 
 
@@ -191,7 +201,14 @@ def _extract_risk_posture(output_dir: Path | None) -> dict[str, Any]:
         return {}
     tier_counts: dict[str, int] = {}
     top_elevated: list[dict[str, Any]] = []
+    seen: set[int] = set()
     for name, entry in lookup.items():
+        # build_risk_lookup aliases each project under both its display_name and its
+        # GitHub slug (same entry object); count each project exactly once. Insertion
+        # order puts the display_name first, so top_elevated uses the human name.
+        if id(entry) in seen:
+            continue
+        seen.add(id(entry))
         tier = entry["risk_tier"]
         tier_counts[tier] = tier_counts.get(tier, 0) + 1
         if tier == "elevated":
diff --git a/tests/test_load_risk_truth.py b/tests/test_load_risk_truth.py
new file mode 100644
index 0000000..e86a69b
--- /dev/null
+++ b/tests/test_load_risk_truth.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from src.excel_export_truth_helpers import load_risk_truth
+
+
+def test_load_risk_truth_keys_by_slug_and_display_name(tmp_path: Path) -> None:
+    truth = {
+        "projects": [
+            {
+                "identity": {
+                    "display_name": "Signal & Noise",
+                    "repo_full_name": "saagpatel/signal-noise",
+                },
+                "risk": {"risk_tier": "elevated"},
+            }
+        ]
+    }
+    (tmp_path / "portfolio-truth-latest.json").write_text(json.dumps(truth))
+    risk_lookup, posture = load_risk_truth(tmp_path)
+    # Findable by both the local-dir display name and the GitHub slug.
+    assert risk_lookup["Signal & Noise"] == "elevated"
+    assert risk_lookup["signal-noise"] == "elevated"
+    # The slug alias must NOT double-count the aggregate posture.
+    assert posture["elevated"] == 1
+
+
+def test_load_risk_truth_no_slug_when_repo_full_name_absent(tmp_path: Path) -> None:
+    truth = {
+        "projects": [{"identity": {"display_name": "PlainRepo"}, "risk": {"risk_tier": "moderate"}}]
+    }
+    (tmp_path / "portfolio-truth-latest.json").write_text(json.dumps(truth))
+    risk_lookup, posture = load_risk_truth(tmp_path)
+    assert risk_lookup == {"PlainRepo": "moderate"}
+    assert posture["moderate"] == 1
diff --git a/tests/test_report_enrichment_risk.py b/tests/test_report_enrichment_risk.py
index bd012fd..47a2d50 100644
--- a/tests/test_report_enrichment_risk.py
+++ b/tests/test_report_enrichment_risk.py
@@ -105,6 +105,28 @@ def test_build_risk_lookup_empty_when_no_output_dir() -> None:
     assert build_risk_lookup(None) == {}
 
 
+def test_build_risk_lookup_also_keys_by_github_slug(tmp_path: Path) -> None:
+    # Truth keys by local-dir display_name, but render consumers look up by the
+    # GitHub repo name (audit metadata.name). When the truth identity carries the
+    # repo_full_name, risk must also be findable by that GitHub slug.
+    truth = {
+        "schema_version": "0.5.0",
+        "projects": [
+            {
+                "identity": {
+                    "display_name": "Signal & Noise",
+                    "repo_full_name": "saagpatel/signal-noise",
+                },
+                "risk": {"risk_tier": "elevated", "risk_summary": "Weak context."},
+            }
+        ],
+    }
+    (tmp_path / "portfolio-truth-latest.json").write_text(json.dumps(truth))
+    lookup = build_risk_lookup(tmp_path)
+    assert lookup["Signal & Noise"]["risk_tier"] == "elevated"
+    assert lookup["signal-noise"]["risk_tier"] == "elevated"
+
+
 def test_extract_risk_posture_still_derived_from_same_source(tmp_path: Path) -> None:
     # _extract_risk_posture is reimplemented on top of build_risk_lookup; aggregate
     # counts must still match the per-repo lookup.