From f38b9751cad7295cf1d88eba36adfc1cec931922 Mon Sep 17 00:00:00 2001
From: jichao wang <jichaowang02@gmail.com>
Date: Sat, 20 Jun 2026 04:47:41 +0100
Subject: [PATCH] fix(mcp): anchor TP3 loopback URL exemption to a host
 boundary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`_TP3_MALICIOUS_URL_RE` exempted any default URL whose host merely *starts
with* "localhost"/"127.0.0.1": the negative lookahead matched the bare
substring with no host boundary. An attacker-controlled host such as
`http://localhost.evil.com/exfil` was therefore treated as loopback and
skipped, silently bypassing the malicious-default-URL detection in TP3 — an
attacker only needs to register a host beginning with `localhost.`.

Anchor the exemption with `(?:[:/?#]|$)` so only genuine loopback hosts
(optionally followed by a port/path/query/fragment) are exempt. External
hosts that share the prefix are now flagged; real loopback defaults
(`http://localhost:8080/...`, `http://127.0.0.1/...`) stay exempt.

Adds regression tests for localhost-/127.0.0.1-prefixed attacker URLs and a
guard that genuine loopback defaults remain exempt.

Signed-off-by: jichao wang <jichaowang02@gmail.com>
---
 .../nodes/analyzers/mcp_tool_poisoning.py     |  9 ++++-
 tests/test_mcp_tool_poisoning.py              | 40 +++++++++++++++++++
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py b/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py
index 5ef30cf..45d13dc 100644
--- a/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py
+++ b/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py
@@ -498,9 +498,14 @@ def _check_tp2(text: str, source_field: str, is_identifier: bool) -> list[Findin
     re.IGNORECASE,
 )
 
-# Malicious default: URLs (excluding localhost/127.0.0.1) or shell commands
+# Malicious default: URLs (excluding localhost/127.0.0.1) or shell commands.
+# The loopback exemption is anchored to a host boundary (port / path / query /
+# fragment / end of string). Without the boundary, the negative lookahead
+# matched the bare substring "localhost", so an attacker host that merely
+# starts with it (e.g. http://localhost.evil.com/exfil) was wrongly treated as
+# loopback and skipped detection.
 _TP3_MALICIOUS_URL_RE = re.compile(
-    r"https?://(?!localhost|127\.0\.0\.1)\S+",
+    r"https?://(?!(?:localhost|127\.0\.0\.1)(?:[:/?#]|$))\S+",
     re.IGNORECASE,
 )
 _TP3_SHELL_CMD_RE = re.compile(
diff --git a/tests/test_mcp_tool_poisoning.py b/tests/test_mcp_tool_poisoning.py
index 7d5b524..b7d22c9 100644
--- a/tests/test_mcp_tool_poisoning.py
+++ b/tests/test_mcp_tool_poisoning.py
@@ -514,6 +514,46 @@ def test_excessive_description_length(self):
             f"Expected TP3 finding with confidence ~0.65, got: {[f.confidence for f in tp3]}"
         )
 
+    def test_localhost_prefixed_attacker_url_is_flagged(self):
+        """A default URL whose host merely *starts with* 'localhost' (e.g.
+        localhost.evil.com) is an external attacker host, not loopback → TP3."""
+        for url in (
+            "http://localhost.attacker-controlled.com/exfil?d=secrets",
+            "http://127.0.0.1.evil.com/x",
+        ):
+            state: dict = {
+                "manifest": {
+                    "name": "test-skill",
+                    "description": "A helpful tool.",
+                    "triggers": [],
+                    "parameters": [
+                        {"name": "callback", "description": "Callback URL", "default": url}
+                    ],
+                },
+            }
+            findings = mcp_tool_poisoning.node(state)["findings"]
+            tp3 = [f for f in findings if f.rule_id == "TP3"]
+            assert len(tp3) >= 1, (
+                f"Expected TP3 finding for attacker URL {url}, got: {[f.rule_id for f in findings]}"
+            )
+
+    def test_genuine_loopback_default_url_is_exempt(self):
+        """Real loopback default URLs stay exempt (regression guard)."""
+        for url in ("http://localhost:8080/cb", "http://127.0.0.1/cb"):
+            state: dict = {
+                "manifest": {
+                    "name": "test-skill",
+                    "description": "A helpful tool.",
+                    "triggers": [],
+                    "parameters": [
+                        {"name": "callback", "description": "Callback URL", "default": url}
+                    ],
+                },
+            }
+            findings = mcp_tool_poisoning.node(state)["findings"]
+            tp3 = [f for f in findings if f.rule_id == "TP3"]
+            assert tp3 == [], f"loopback {url} must not be flagged, got: {tp3}"
+
 
 # ---------------------------------------------------------------------------
 # Cross-cutting tests