From 956800e235cb4bb83332576669b97f48fdd9a8bd Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Thu, 11 Jun 2026 21:57:54 -0400 Subject: [PATCH] Add Anthropic GCP WIF support Introduce GCP Workload Identity Federation support for Anthropic by adding an anthropic_wif module that fetches Google-signed OIDC identity tokens, writes them to a private file, and keeps them refreshed on a background thread (atomic writes, configurable refresh interval). Runtime now calls anthropic_wif.configure() before agent startup to wire token file into ANTHROPIC_IDENTITY_TOKEN_FILE; configure is a no-op when federation is not enabled or when ANTHROPIC_API_KEY is set. Also add google-auth to dependencies. --- .../hackbot_runtime/anthropic_wif.py | 220 ++++++++++++++++++ .../hackbot_runtime/runtime.py | 13 ++ libs/hackbot-runtime/pyproject.toml | 1 + .../tests/test_anthropic_wif.py | 207 ++++++++++++++++ uv.lock | 2 + 5 files changed, 443 insertions(+) create mode 100644 libs/hackbot-runtime/hackbot_runtime/anthropic_wif.py create mode 100644 libs/hackbot-runtime/tests/test_anthropic_wif.py diff --git a/libs/hackbot-runtime/hackbot_runtime/anthropic_wif.py b/libs/hackbot-runtime/hackbot_runtime/anthropic_wif.py new file mode 100644 index 0000000000..1692376d69 --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/anthropic_wif.py @@ -0,0 +1,220 @@ +"""GCP Workload Identity Federation for Anthropic API auth. + +On GCP (Cloud Run, Cloud Functions, GCE, GKE) a workload can fetch a +Google-signed OIDC identity token from the instance metadata server (via the +``google-auth`` library) and exchange it for a short-lived Anthropic access +token, instead of shipping a static ``ANTHROPIC_API_KEY``. The Anthropic SDK and +the Claude Code CLI (which the +claude-agent-sdk spawns) do the exchange and refresh themselves, driven entirely +by environment variables: + + ``ANTHROPIC_FEDERATION_RULE_ID`` federation rule (``fdrl_…``) ── set by deploy + ``ANTHROPIC_ORGANIZATION_ID`` Anthropic org id ── set by deploy + ``ANTHROPIC_SERVICE_ACCOUNT_ID`` target service account ── set by deploy + ``ANTHROPIC_WORKSPACE_ID`` workspace (optional) ── set by deploy + ``ANTHROPIC_IDENTITY_TOKEN_FILE`` path to the Google JWT ── managed here + +The deploy provisions the four federation ids; this module owns the fifth. It +fetches the Google identity token, writes it to a private file, points +``ANTHROPIC_IDENTITY_TOKEN_FILE`` at it, and keeps it fresh in a background +thread — Google identity tokens live ~1h, and the SDK re-reads the file on every +exchange, so a periodically-rewritten file transparently keeps auth alive for +runs longer than a token's lifetime. + +When ``ANTHROPIC_FEDERATION_RULE_ID`` is absent the runtime is in API-key mode +and this module is inert, so local/compose runs keep using ``ANTHROPIC_API_KEY``. + +See https://platform.claude.com/docs/en/manage-claude/wif-providers/gcp +""" + +from __future__ import annotations + +import logging +import os +import tempfile +import threading +from pathlib import Path + +import google.auth.transport.requests +import google.oauth2.id_token +from google.auth.exceptions import GoogleAuthError + +from hackbot_runtime.providers import ProviderError + +log = logging.getLogger("hackbot_runtime.anthropic_wif") + +# Audience the metadata token is minted for; must match the federation rule's +# ``audience`` matcher on the Anthropic side. +ANTHROPIC_AUDIENCE = "https://api.anthropic.com" + +# Presence of the federation rule id is what flips the runtime from API-key mode +# to WIF mode — the deploy sets it only on federation-enabled workloads. +RULE_ID_ENV = "ANTHROPIC_FEDERATION_RULE_ID" +TOKEN_FILE_ENV = "ANTHROPIC_IDENTITY_TOKEN_FILE" +REFRESH_INTERVAL_ENV = "ANTHROPIC_IDENTITY_TOKEN_REFRESH_SECONDS" + +# Google identity tokens expire after ~1h; refresh well inside that window so +# the file always carries plenty of remaining lifetime for the SDK to exchange. +DEFAULT_REFRESH_INTERVAL = 1800 + +# Set once configure() succeeds; keeps the daemon refresher alive for the +# process lifetime and makes a second configure() call a no-op. +_refresher: _TokenFileRefresher | None = None + + +def is_enabled() -> bool: + """True when the deploy provisioned a federation rule (WIF mode).""" + return bool(os.environ.get(RULE_ID_ENV)) + + +def fetch_gcp_identity_token(audience: str = ANTHROPIC_AUDIENCE) -> str: + """Fetch a Google-signed OIDC identity token for ``audience``. + + Delegates to ``google.oauth2.id_token``, which sources the token from the + GCE/Cloud Run/GKE metadata server (or a ``GOOGLE_APPLICATION_CREDENTIALS`` + service-account file for local testing). On the metadata path it requests + ``format=full``, so the token carries the ``email`` claim the federation rule + matches on — without it the exchange fails with ``invalid_grant``. + """ + request = google.auth.transport.requests.Request() + try: + token = google.oauth2.id_token.fetch_id_token(request, audience) + except GoogleAuthError as exc: + # Surface provider-credential failures as a single ProviderError type so + # callers (and the refresh loop) handle them uniformly. + raise ProviderError( + f"Failed to fetch a Google identity token for Anthropic federation: {exc}" + ) from exc + if not token: + raise ProviderError( + "Google returned an empty identity token; the workload has no usable " + "service account." + ) + return token + + +def _refresh_interval() -> float: + raw = os.environ.get(REFRESH_INTERVAL_ENV) + if not raw: + return DEFAULT_REFRESH_INTERVAL + try: + value = float(raw) + except ValueError: + log.warning( + "%s=%r is not a number; falling back to %ss", + REFRESH_INTERVAL_ENV, + raw, + DEFAULT_REFRESH_INTERVAL, + ) + return DEFAULT_REFRESH_INTERVAL + if value <= 0: + log.warning( + "%s=%s is not positive; falling back to %ss", + REFRESH_INTERVAL_ENV, + raw, + DEFAULT_REFRESH_INTERVAL, + ) + return DEFAULT_REFRESH_INTERVAL + return value + + +def _default_token_path() -> Path: + """A private, process-owned path for the identity token file. + + ``mkdtemp`` gives a 0700 directory so the bearer token isn't world-readable. + """ + return Path(tempfile.mkdtemp(prefix="anthropic-wif-")) / "identity-token" + + +class _TokenFileRefresher: + """Keeps ``token_file`` populated with a fresh Google identity token. + + The first write happens synchronously in :meth:`start` so a metadata-server + failure surfaces immediately (fail fast) rather than as an opaque auth error + once the agent is mid-run. Subsequent writes run on a daemon thread. + """ + + def __init__( + self, token_file: Path, interval: float, audience: str = ANTHROPIC_AUDIENCE + ): + self._token_file = token_file + self._interval = interval + self._audience = audience + self._stop = threading.Event() + self._thread = threading.Thread( + target=self._loop, name="anthropic-wif-refresh", daemon=True + ) + + def _write_token(self) -> None: + """Atomically replace the token file so the SDK never reads a partial write.""" + token = fetch_gcp_identity_token(self._audience) + self._token_file.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(dir=self._token_file.parent, prefix=".identity-") + try: + with os.fdopen(fd, "w", encoding="utf-8") as handle: + handle.write(token) + os.replace(tmp, self._token_file) + except BaseException: + try: + os.unlink(tmp) + except OSError: + pass + raise + + def _loop(self) -> None: + # wait() returns True only when stop() is set, so the loop exits cleanly + # on shutdown and otherwise re-writes the token every interval. + while not self._stop.wait(self._interval): + try: + self._write_token() + except (ProviderError, OSError) as exc: + # A transient metadata blip is survivable: the previous token is + # still on disk and valid for a while, so log and keep the loop + # alive to retry on the next tick rather than crashing the thread. + log.warning( + "Failed to refresh Anthropic WIF identity token; " + "serving the previously written token: %s", + exc, + ) + + def start(self) -> None: + self._write_token() + self._thread.start() + + def stop(self) -> None: + self._stop.set() + + +def configure() -> bool: + """Wire up Anthropic WIF auth for the Claude SDK/CLI when the deploy enables it. + + Returns ``True`` when WIF was configured, ``False`` in API-key mode. Idempotent: + a second call while the refresher is already running is a no-op. + """ + global _refresher + if not is_enabled(): + return False + if _refresher is not None: + return True + + if os.environ.get("ANTHROPIC_API_KEY"): + log.error( + "ANTHROPIC_API_KEY is set while Workload Identity Federation is " + "configured; the API key takes precedence and shadows WIF. Unset it " + "if you intend to authenticate via federation." + ) + return False + + token_file = Path(os.environ.get(TOKEN_FILE_ENV) or _default_token_path()) + interval = _refresh_interval() + refresher = _TokenFileRefresher(token_file, interval) + refresher.start() + os.environ[TOKEN_FILE_ENV] = str(token_file) + _refresher = refresher + log.info( + "Anthropic auth: GCP Workload Identity Federation " + "(identity token file %s, refresh every %ss)", + token_file, + interval, + ) + return True diff --git a/libs/hackbot-runtime/hackbot_runtime/runtime.py b/libs/hackbot-runtime/hackbot_runtime/runtime.py index d60234aac4..f9099bd5e5 100644 --- a/libs/hackbot-runtime/hackbot_runtime/runtime.py +++ b/libs/hackbot-runtime/hackbot_runtime/runtime.py @@ -9,6 +9,7 @@ from pydantic import ValidationError +from hackbot_runtime import anthropic_wif from hackbot_runtime.config import HackbotConfig, load_config from hackbot_runtime.context import HackbotContext from hackbot_runtime.results import HackbotAgentResult @@ -32,6 +33,16 @@ _AGENT_LOG_KEY = "logs/agent.log" +def _configure_auth() -> None: + """Set up the model-provider credentials before the agent runs. + + For now only Anthropic WIF is supported; this is where other providers will + be wired in when we start supporting them. + """ + if anthropic_wif.configure(): + log.info("Configured Anthropic WIF authentication") + + def _configure_logging() -> None: if not logging.getLogger().handlers: logging.basicConfig( @@ -172,6 +183,7 @@ def run(entrypoint: AgentMain, config: ConfigArg = None) -> NoReturn: raise SystemExit(2) try: + _configure_auth() outcome: object = entrypoint(ctx) except Exception as exc: log.exception("Agent raised an exception") @@ -187,6 +199,7 @@ def run_async(entrypoint: AsyncAgentMain, config: ConfigArg = None) -> NoReturn: raise SystemExit(2) try: + _configure_auth() outcome: object = asyncio.run(entrypoint(ctx)) except Exception as exc: log.exception("Agent raised an exception") diff --git a/libs/hackbot-runtime/pyproject.toml b/libs/hackbot-runtime/pyproject.toml index 00a4bee4a1..8186fc62e3 100644 --- a/libs/hackbot-runtime/pyproject.toml +++ b/libs/hackbot-runtime/pyproject.toml @@ -6,6 +6,7 @@ requires-python = ">=3.12" dependencies = [ "requests>=2.32.0", "pydantic-settings>=2.1.0", + "google-auth>=2.0.0", "agent-tools", ] diff --git a/libs/hackbot-runtime/tests/test_anthropic_wif.py b/libs/hackbot-runtime/tests/test_anthropic_wif.py new file mode 100644 index 0000000000..bc41237609 --- /dev/null +++ b/libs/hackbot-runtime/tests/test_anthropic_wif.py @@ -0,0 +1,207 @@ +"""Tests for GCP Workload Identity Federation auth setup.""" + +import logging +import os + +import pytest +from google.auth.exceptions import GoogleAuthError +from hackbot_runtime import anthropic_wif +from hackbot_runtime.providers import ProviderError + + +@pytest.fixture(autouse=True) +def _reset_module_state(): + """Stop any refresher a test started and clear the module-level singleton.""" + yield + if anthropic_wif._refresher is not None: + anthropic_wif._refresher.stop() + anthropic_wif._refresher = None + + +@pytest.fixture +def _no_federation_env(monkeypatch): + for var in ( + anthropic_wif.RULE_ID_ENV, + anthropic_wif.TOKEN_FILE_ENV, + anthropic_wif.REFRESH_INTERVAL_ENV, + ): + monkeypatch.delenv(var, raising=False) + + +def test_is_enabled_follows_rule_id(monkeypatch): + monkeypatch.delenv(anthropic_wif.RULE_ID_ENV, raising=False) + assert anthropic_wif.is_enabled() is False + monkeypatch.setenv(anthropic_wif.RULE_ID_ENV, "fdrl_abc") + assert anthropic_wif.is_enabled() is True + + +def test_configure_is_inert_without_federation(_no_federation_env, monkeypatch): + called = False + + def _should_not_fetch(*_a, **_k): + nonlocal called + called = True + return "tok" + + monkeypatch.setattr(anthropic_wif, "fetch_gcp_identity_token", _should_not_fetch) + + assert anthropic_wif.configure() is False + assert called is False + assert anthropic_wif.TOKEN_FILE_ENV not in os.environ + + +def test_configure_writes_token_and_sets_env(_no_federation_env, monkeypatch, tmp_path): + token_file = tmp_path / "identity-token" + monkeypatch.setenv(anthropic_wif.RULE_ID_ENV, "fdrl_abc") + monkeypatch.setenv(anthropic_wif.TOKEN_FILE_ENV, str(token_file)) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.setattr( + anthropic_wif, "fetch_gcp_identity_token", lambda *a, **k: "google.jwt.token" + ) + + assert anthropic_wif.configure() is True + + assert token_file.read_text() == "google.jwt.token" + assert os.environ[anthropic_wif.TOKEN_FILE_ENV] == str(token_file) + + +def test_configure_refuses_when_api_key_set( + _no_federation_env, monkeypatch, tmp_path, caplog +): + monkeypatch.setenv(anthropic_wif.RULE_ID_ENV, "fdrl_abc") + monkeypatch.setenv(anthropic_wif.TOKEN_FILE_ENV, str(tmp_path / "identity-token")) + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-leftover") + monkeypatch.setattr( + anthropic_wif, "fetch_gcp_identity_token", lambda *a, **k: "tok" + ) + + with caplog.at_level(logging.ERROR, logger=anthropic_wif.log.name): + assert anthropic_wif.configure() is False + + # A key set alongside federation is flagged as an error and left untouched. + assert any( + rec.levelno == logging.ERROR and "ANTHROPIC_API_KEY" in rec.message + for rec in caplog.records + ) + assert os.environ["ANTHROPIC_API_KEY"] == "sk-leftover" + assert anthropic_wif._refresher is None + + +def test_configure_is_idempotent(_no_federation_env, monkeypatch, tmp_path): + monkeypatch.setenv(anthropic_wif.RULE_ID_ENV, "fdrl_abc") + monkeypatch.setenv(anthropic_wif.TOKEN_FILE_ENV, str(tmp_path / "tok")) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + + calls = 0 + + def _fetch(*_a, **_k): + nonlocal calls + calls += 1 + return "tok" + + monkeypatch.setattr(anthropic_wif, "fetch_gcp_identity_token", _fetch) + + assert anthropic_wif.configure() is True + assert anthropic_wif.configure() is True + # Second call short-circuits: no new refresher, no extra fetch. + assert calls == 1 + + +def test_configure_defaults_token_path_when_unset(_no_federation_env, monkeypatch): + monkeypatch.setenv(anthropic_wif.RULE_ID_ENV, "fdrl_abc") + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.setattr( + anthropic_wif, "fetch_gcp_identity_token", lambda *a, **k: "tok" + ) + + assert anthropic_wif.configure() is True + + path = os.environ[anthropic_wif.TOKEN_FILE_ENV] + assert "anthropic-wif-" in path + assert os.path.exists(path) + + +def test_fetch_uses_google_auth_with_audience(monkeypatch): + captured = {} + + def _fake_fetch(request, audience): + captured["request"] = request + captured["audience"] = audience + return "signed.jwt.value" + + monkeypatch.setattr( + anthropic_wif.google.oauth2.id_token, "fetch_id_token", _fake_fetch + ) + + token = anthropic_wif.fetch_gcp_identity_token() + + assert token == "signed.jwt.value" + assert captured["audience"] == anthropic_wif.ANTHROPIC_AUDIENCE + assert captured["request"] is not None + + +def test_fetch_wraps_google_auth_error(monkeypatch): + def _raise(*_a, **_k): + raise GoogleAuthError("metadata server unreachable") + + monkeypatch.setattr(anthropic_wif.google.oauth2.id_token, "fetch_id_token", _raise) + with pytest.raises(ProviderError, match="Failed to fetch a Google identity token"): + anthropic_wif.fetch_gcp_identity_token() + + +def test_fetch_rejects_empty(monkeypatch): + monkeypatch.setattr( + anthropic_wif.google.oauth2.id_token, "fetch_id_token", lambda *a, **k: "" + ) + with pytest.raises(ProviderError, match="empty identity token"): + anthropic_wif.fetch_gcp_identity_token() + + +@pytest.mark.parametrize( + "raw, expected", + [ + (None, anthropic_wif.DEFAULT_REFRESH_INTERVAL), + ("900", 900), + ("not-a-number", anthropic_wif.DEFAULT_REFRESH_INTERVAL), + ("0", anthropic_wif.DEFAULT_REFRESH_INTERVAL), + ("-5", anthropic_wif.DEFAULT_REFRESH_INTERVAL), + ], +) +def test_refresh_interval_parsing(monkeypatch, raw, expected): + if raw is None: + monkeypatch.delenv(anthropic_wif.REFRESH_INTERVAL_ENV, raising=False) + else: + monkeypatch.setenv(anthropic_wif.REFRESH_INTERVAL_ENV, raw) + assert anthropic_wif._refresh_interval() == expected + + +def test_token_file_write_is_atomic_and_clean(monkeypatch, tmp_path): + token_file = tmp_path / "identity-token" + monkeypatch.setattr( + anthropic_wif, "fetch_gcp_identity_token", lambda *a, **k: "the.jwt" + ) + refresher = anthropic_wif._TokenFileRefresher(token_file, interval=1800) + + refresher._write_token() + + assert token_file.read_text() == "the.jwt" + # No leftover temp files from the atomic replace. + assert list(tmp_path.iterdir()) == [token_file] + + +def test_refresh_loop_survives_provider_error(monkeypatch, tmp_path): + refresher = anthropic_wif._TokenFileRefresher(tmp_path / "tok", interval=0) + calls = [] + + def _boom(*_a, **_k): + calls.append(1) + # Stop after the first failure so the loop exits on its next tick. + refresher.stop() + raise ProviderError("transient") + + monkeypatch.setattr(anthropic_wif, "fetch_gcp_identity_token", _boom) + + # A ProviderError mid-run is logged and swallowed, not raised out of _loop. + refresher._loop() + + assert calls == [1] diff --git a/uv.lock b/uv.lock index 98b9d7bf13..9712e4de05 100644 --- a/uv.lock +++ b/uv.lock @@ -2206,6 +2206,7 @@ version = "0.1.0" source = { editable = "libs/hackbot-runtime" } dependencies = [ { name = "agent-tools" }, + { name = "google-auth" }, { name = "pydantic-settings" }, { name = "requests" }, ] @@ -2221,6 +2222,7 @@ requires-dist = [ { name = "agent-tools", editable = "libs/agent-tools" }, { name = "agent-tools", extras = ["claude-sdk"], marker = "extra == 'claude-sdk'", editable = "libs/agent-tools" }, { name = "claude-agent-sdk", marker = "extra == 'claude-sdk'", specifier = ">=0.1.30" }, + { name = "google-auth", specifier = ">=2.0.0" }, { name = "pydantic-settings", specifier = ">=2.1.0" }, { name = "requests", specifier = ">=2.32.0" }, ]