From f38b9751cad7295cf1d88eba36adfc1cec931922 Mon Sep 17 00:00:00 2001 From: jichao wang Date: Sat, 20 Jun 2026 04:47:41 +0100 Subject: [PATCH] fix(mcp): anchor TP3 loopback URL exemption to a host boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_TP3_MALICIOUS_URL_RE` exempted any default URL whose host merely *starts with* "localhost"/"127.0.0.1": the negative lookahead matched the bare substring with no host boundary. An attacker-controlled host such as `http://localhost.evil.com/exfil` was therefore treated as loopback and skipped, silently bypassing the malicious-default-URL detection in TP3 — an attacker only needs to register a host beginning with `localhost.`. Anchor the exemption with `(?:[:/?#]|$)` so only genuine loopback hosts (optionally followed by a port/path/query/fragment) are exempt. External hosts that share the prefix are now flagged; real loopback defaults (`http://localhost:8080/...`, `http://127.0.0.1/...`) stay exempt. Adds regression tests for localhost-/127.0.0.1-prefixed attacker URLs and a guard that genuine loopback defaults remain exempt. Signed-off-by: jichao wang --- .../nodes/analyzers/mcp_tool_poisoning.py | 9 ++++- tests/test_mcp_tool_poisoning.py | 40 +++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py b/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py index 5ef30cf..45d13dc 100644 --- a/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py +++ b/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py @@ -498,9 +498,14 @@ def _check_tp2(text: str, source_field: str, is_identifier: bool) -> list[Findin re.IGNORECASE, ) -# Malicious default: URLs (excluding localhost/127.0.0.1) or shell commands +# Malicious default: URLs (excluding localhost/127.0.0.1) or shell commands. +# The loopback exemption is anchored to a host boundary (port / path / query / +# fragment / end of string). Without the boundary, the negative lookahead +# matched the bare substring "localhost", so an attacker host that merely +# starts with it (e.g. http://localhost.evil.com/exfil) was wrongly treated as +# loopback and skipped detection. _TP3_MALICIOUS_URL_RE = re.compile( - r"https?://(?!localhost|127\.0\.0\.1)\S+", + r"https?://(?!(?:localhost|127\.0\.0\.1)(?:[:/?#]|$))\S+", re.IGNORECASE, ) _TP3_SHELL_CMD_RE = re.compile( diff --git a/tests/test_mcp_tool_poisoning.py b/tests/test_mcp_tool_poisoning.py index 7d5b524..b7d22c9 100644 --- a/tests/test_mcp_tool_poisoning.py +++ b/tests/test_mcp_tool_poisoning.py @@ -514,6 +514,46 @@ def test_excessive_description_length(self): f"Expected TP3 finding with confidence ~0.65, got: {[f.confidence for f in tp3]}" ) + def test_localhost_prefixed_attacker_url_is_flagged(self): + """A default URL whose host merely *starts with* 'localhost' (e.g. + localhost.evil.com) is an external attacker host, not loopback → TP3.""" + for url in ( + "http://localhost.attacker-controlled.com/exfil?d=secrets", + "http://127.0.0.1.evil.com/x", + ): + state: dict = { + "manifest": { + "name": "test-skill", + "description": "A helpful tool.", + "triggers": [], + "parameters": [ + {"name": "callback", "description": "Callback URL", "default": url} + ], + }, + } + findings = mcp_tool_poisoning.node(state)["findings"] + tp3 = [f for f in findings if f.rule_id == "TP3"] + assert len(tp3) >= 1, ( + f"Expected TP3 finding for attacker URL {url}, got: {[f.rule_id for f in findings]}" + ) + + def test_genuine_loopback_default_url_is_exempt(self): + """Real loopback default URLs stay exempt (regression guard).""" + for url in ("http://localhost:8080/cb", "http://127.0.0.1/cb"): + state: dict = { + "manifest": { + "name": "test-skill", + "description": "A helpful tool.", + "triggers": [], + "parameters": [ + {"name": "callback", "description": "Callback URL", "default": url} + ], + }, + } + findings = mcp_tool_poisoning.node(state)["findings"] + tp3 = [f for f in findings if f.rule_id == "TP3"] + assert tp3 == [], f"loopback {url} must not be flagged, got: {tp3}" + # --------------------------------------------------------------------------- # Cross-cutting tests