diff --git a/crates/trusted-server-core/src/html_processor.rs b/crates/trusted-server-core/src/html_processor.rs index ee67bdc1..c00b0879 100644 --- a/crates/trusted-server-core/src/html_processor.rs +++ b/crates/trusted-server-core/src/html_processor.rs @@ -258,6 +258,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso move |el| { if let Some(mut href) = el.get_attribute("href") { let original_href = href.clone(); + let element_name = el.tag_name(); if let Some(rewritten) = patterns.rewrite_url_value(&href) { href = rewritten; } @@ -267,6 +268,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso &href, &IntegrationAttributeContext { attribute_name: "href", + element_name: &element_name, request_host: &patterns.request_host, request_scheme: &patterns.request_scheme, origin_host: &patterns.origin_host, @@ -296,6 +298,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso move |el| { if let Some(mut src) = el.get_attribute("src") { let original_src = src.clone(); + let element_name = el.tag_name(); if let Some(rewritten) = patterns.rewrite_url_value(&src) { src = rewritten; } @@ -304,6 +307,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso &src, &IntegrationAttributeContext { attribute_name: "src", + element_name: &element_name, request_host: &patterns.request_host, request_scheme: &patterns.request_scheme, origin_host: &patterns.origin_host, @@ -333,6 +337,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso move |el| { if let Some(mut action) = el.get_attribute("action") { let original_action = action.clone(); + let element_name = el.tag_name(); if let Some(rewritten) = patterns.rewrite_url_value(&action) { action = rewritten; } @@ -342,6 +347,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso &action, &IntegrationAttributeContext { attribute_name: "action", + element_name: &element_name, request_host: &patterns.request_host, request_scheme: &patterns.request_scheme, origin_host: &patterns.origin_host, @@ -371,6 +377,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso move |el| { if let Some(mut srcset) = el.get_attribute("srcset") { let original_srcset = srcset.clone(); + let element_name = el.tag_name(); let new_srcset = srcset .replace(&patterns.https_origin(), &patterns.replacement_url()) .replace(&patterns.http_origin(), &patterns.replacement_url()) @@ -388,6 +395,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso &srcset, &IntegrationAttributeContext { attribute_name: "srcset", + element_name: &element_name, request_host: &patterns.request_host, request_scheme: &patterns.request_scheme, origin_host: &patterns.origin_host, @@ -417,6 +425,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso move |el| { if let Some(mut imagesrcset) = el.get_attribute("imagesrcset") { let original_imagesrcset = imagesrcset.clone(); + let element_name = el.tag_name(); let new_imagesrcset = imagesrcset .replace(&patterns.https_origin(), &patterns.replacement_url()) .replace(&patterns.http_origin(), &patterns.replacement_url()) @@ -433,6 +442,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso &imagesrcset, &IntegrationAttributeContext { attribute_name: "imagesrcset", + element_name: &element_name, request_host: &patterns.request_host, request_scheme: &patterns.request_scheme, origin_host: &patterns.origin_host, diff --git a/crates/trusted-server-core/src/integrations/datadome.rs b/crates/trusted-server-core/src/integrations/datadome.rs index da3a64a2..a91d6181 100644 --- a/crates/trusted-server-core/src/integrations/datadome.rs +++ b/crates/trusted-server-core/src/integrations/datadome.rs @@ -737,6 +737,7 @@ mod tests { let ctx = IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "publisher.com", request_scheme: "https", origin_host: "origin.publisher.com", @@ -777,6 +778,7 @@ mod tests { let ctx = IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "publisher.com", request_scheme: "https", origin_host: "origin.publisher.com", diff --git a/crates/trusted-server-core/src/integrations/google_tag_manager.rs b/crates/trusted-server-core/src/integrations/google_tag_manager.rs index 64f27415..e58cca26 100644 --- a/crates/trusted-server-core/src/integrations/google_tag_manager.rs +++ b/crates/trusted-server-core/src/integrations/google_tag_manager.rs @@ -707,6 +707,7 @@ mod tests { let ctx = IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "example.com", request_scheme: "https", origin_host: "origin.example.com", @@ -823,6 +824,7 @@ mod tests { let ctx = IntegrationAttributeContext { attribute_name: "href", + element_name: "a", request_host: "example.com", request_scheme: "https", origin_host: "origin.example.com", diff --git a/crates/trusted-server-core/src/integrations/gpt.rs b/crates/trusted-server-core/src/integrations/gpt.rs index 0affbe95..b6b8f3f0 100644 --- a/crates/trusted-server-core/src/integrations/gpt.rs +++ b/crates/trusted-server-core/src/integrations/gpt.rs @@ -480,6 +480,7 @@ mod tests { fn test_context() -> IntegrationAttributeContext<'static> { IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "edge.example.com", request_scheme: "https", origin_host: "origin.example.com", diff --git a/crates/trusted-server-core/src/integrations/js_asset_proxy.rs b/crates/trusted-server-core/src/integrations/js_asset_proxy.rs new file mode 100644 index 00000000..263c61f6 --- /dev/null +++ b/crates/trusted-server-core/src/integrations/js_asset_proxy.rs @@ -0,0 +1,1053 @@ +//! JavaScript asset proxy integration. +//! +//! This integration serves explicitly configured third-party JavaScript assets +//! from first-party paths. Each asset maps one exact publisher-facing path to +//! one exact HTTPS upstream URL and can independently enable proxying, disable +//! proxying, or block matching script tags from publisher HTML. + +use std::collections::HashSet; +use std::sync::Arc; + +use async_trait::async_trait; +use error_stack::Report; +use fastly::http::{header, Method, StatusCode}; +use fastly::{Request, Response}; +use serde::{Deserialize, Serialize}; +use url::Url; +use validator::{Validate, ValidationError, ValidationErrors}; + +use crate::constants::{ + HEADER_ACCEPT, HEADER_ACCEPT_ENCODING, HEADER_ACCEPT_LANGUAGE, HEADER_USER_AGENT, +}; +use crate::error::TrustedServerError; +use crate::integrations::{ + AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, + IntegrationEndpoint, IntegrationProxy, IntegrationRegistration, +}; +use crate::proxy::{proxy_request, ProxyRequestConfig}; +use crate::settings::{IntegrationConfig, Settings}; + +const JS_ASSET_PROXY_INTEGRATION_ID: &str = "js_asset_proxy"; +const HEADER_X_TS_JS_ASSET_PROXY: &str = "X-TS-JS-Asset-Proxy"; +const HEADER_X_TS_ERROR: &str = "X-TS-Error"; +const ERROR_ORIGIN_UNREACHABLE: &str = "js-asset-origin-unreachable"; +const ERROR_ORIGIN_STATUS: &str = "js-asset-origin-status"; + +/// Configuration for the JavaScript asset proxy integration. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct JsAssetProxyConfig { + /// Enables or disables the integration. + #[serde(default)] + pub enabled: bool, + /// Optional downstream cache TTL override for every asset. + #[serde(default)] + pub cache_ttl_seconds: Option, + /// JavaScript assets managed by this integration. + #[serde(default)] + pub assets: Vec, +} + +/// One configured JavaScript asset mapping. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct JsAssetProxyAsset { + /// Exact first-party request path handled by Trusted Server. + pub path: String, + /// Exact upstream JavaScript URL to fetch and to match during HTML rewriting. + pub origin_url: String, + /// Per-asset proxy behavior. + #[serde(default)] + pub proxy: JsAssetProxyMode, + /// Optional downstream cache TTL override for this asset. + #[serde(default)] + pub cache_ttl_seconds: Option, +} + +/// Per-asset proxy behavior. +#[derive(Debug, Clone, Copy, Default, Deserialize, Eq, PartialEq, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum JsAssetProxyMode { + /// Rewrite matching script URLs and serve the configured route. + #[default] + Enabled, + /// Keep the asset in configuration without rewriting or route registration. + Disabled, + /// Remove matching script elements without route registration. + Blocked, +} + +impl IntegrationConfig for JsAssetProxyConfig { + fn is_enabled(&self) -> bool { + self.enabled + } +} + +impl Validate for JsAssetProxyConfig { + fn validate(&self) -> Result<(), ValidationErrors> { + let mut errors = ValidationErrors::new(); + errors.merge_self("assets", self.assets.validate()); + + if self.enabled && self.assets.is_empty() { + errors.add("assets", ValidationError::new("empty_assets")); + } + + let mut paths = HashSet::new(); + let mut origin_urls = HashSet::new(); + for asset in &self.assets { + if !paths.insert(asset.path.as_str()) { + errors.add("asset_path", ValidationError::new("duplicate_asset_path")); + } + if !origin_urls.insert(asset.origin_url.as_str()) { + errors.add( + "asset_origin_url", + ValidationError::new("duplicate_asset_origin_url"), + ); + } + } + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } +} + +impl Validate for JsAssetProxyAsset { + fn validate(&self) -> Result<(), ValidationErrors> { + let mut errors = ValidationErrors::new(); + + if !self.path.starts_with('/') { + errors.add("path", ValidationError::new("path_must_start_with_slash")); + } + if self.path.starts_with("//") { + errors.add( + "path", + ValidationError::new("path_must_not_be_protocol_relative"), + ); + } + if self.path.contains('*') { + errors.add( + "path", + ValidationError::new("path_must_not_contain_wildcard"), + ); + } + if path_contains_parent_segment(&self.path) { + errors.add( + "path", + ValidationError::new("path_must_not_contain_parent_segment"), + ); + } + if self.path.contains(['{', '}']) { + errors.add("path", ValidationError::new("path_must_be_exact_route")); + } + if self.path.contains(['?', '#']) { + errors.add( + "path", + ValidationError::new("path_must_not_contain_query_or_fragment"), + ); + } + if self + .path + .chars() + .any(|ch| ch.is_whitespace() || ch.is_control()) + { + errors.add( + "path", + ValidationError::new("path_must_not_contain_whitespace_or_control"), + ); + } + + match Url::parse(&self.origin_url) { + Ok(url) => { + if url.scheme() != "https" { + errors.add( + "origin_url", + ValidationError::new("origin_url_must_be_https"), + ); + } + if url.host_str().is_none() { + errors.add( + "origin_url", + ValidationError::new("origin_url_must_have_host"), + ); + } + } + Err(_) => { + errors.add("origin_url", ValidationError::new("invalid_origin_url")); + } + } + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } +} + +fn path_contains_parent_segment(path: &str) -> bool { + path.split('/').any(|segment| segment == "..") +} + +/// JavaScript asset proxy integration implementation. +pub struct JsAssetProxyIntegration { + config: JsAssetProxyConfig, +} + +impl JsAssetProxyIntegration { + fn new(config: JsAssetProxyConfig) -> Arc { + Arc::new(Self { config }) + } + + fn error(message: impl Into) -> TrustedServerError { + TrustedServerError::Integration { + integration: JS_ASSET_PROXY_INTEGRATION_ID.to_string(), + message: message.into(), + } + } + + fn enabled_asset_for_path(&self, path: &str) -> Option<&JsAssetProxyAsset> { + self.config + .assets + .iter() + .find(|asset| asset.proxy == JsAssetProxyMode::Enabled && asset.path == path) + } + + fn asset_for_origin_url(&self, origin_url: &str) -> Option<&JsAssetProxyAsset> { + self.config + .assets + .iter() + .find(|asset| asset.origin_url == origin_url) + } + + fn build_proxy_config<'a>(origin_url: &'a str, req: &Request) -> ProxyRequestConfig<'a> { + let mut config = ProxyRequestConfig::new(origin_url) + .with_streaming() + .without_forward_headers(); + config.follow_redirects = false; + config.forward_ec_id = false; + + for header_name in [ + &HEADER_ACCEPT, + &HEADER_ACCEPT_LANGUAGE, + &HEADER_ACCEPT_ENCODING, + ] { + if let Some(value) = req.get_header(header_name).cloned() { + config = config.with_header(header_name.clone(), value); + } + } + + config.with_header( + HEADER_USER_AGENT.clone(), + fastly::http::HeaderValue::from_static("TrustedServer/1.0"), + ) + } + + fn origin_host(origin_url: &str) -> String { + Url::parse(origin_url) + .ok() + .and_then(|url| url.host_str().map(str::to_string)) + .unwrap_or_else(|| "unknown".to_string()) + } + + fn origin_unreachable_response() -> Response { + let mut response = Response::from_status(StatusCode::BAD_GATEWAY); + response.set_header(HEADER_X_TS_ERROR, ERROR_ORIGIN_UNREACHABLE); + response + } + + fn origin_status_response() -> Response { + let mut response = Response::from_status(StatusCode::BAD_GATEWAY); + response.set_header(HEADER_X_TS_ERROR, ERROR_ORIGIN_STATUS); + response + } + + fn vary_with_accept_encoding(upstream_vary: Option<&str>) -> String { + match upstream_vary.map(str::trim) { + Some("*") => "*".to_string(), + Some(vary) if !vary.is_empty() => { + if vary + .split(',') + .any(|header_name| header_name.trim().eq_ignore_ascii_case("accept-encoding")) + { + vary.to_string() + } else { + format!("{vary}, Accept-Encoding") + } + } + _ => "Accept-Encoding".to_string(), + } + } + + fn resolved_cache_ttl_seconds(&self, asset: &JsAssetProxyAsset) -> Option { + asset.cache_ttl_seconds.or(self.config.cache_ttl_seconds) + } + + fn finalize_asset_response( + &self, + asset: &JsAssetProxyAsset, + mut response: Response, + ) -> Response { + let status = response.get_status(); + let content_type = response.get_header(header::CONTENT_TYPE).cloned(); + let content_encoding = response.get_header(header::CONTENT_ENCODING).cloned(); + let etag = response.get_header(header::ETAG).cloned(); + let last_modified = response.get_header(header::LAST_MODIFIED).cloned(); + let upstream_vary = response + .get_header(header::VARY) + .and_then(|value| value.to_str().ok()) + .map(str::to_owned); + let upstream_cache_control = response.get_header(header::CACHE_CONTROL).cloned(); + let body = response.take_body(); + + let mut finalized = Response::from_status(status).with_body(body); + finalized.set_header(HEADER_X_TS_JS_ASSET_PROXY, "true"); + + if let Some(content_type) = content_type { + finalized.set_header(header::CONTENT_TYPE, content_type); + } + if let Some(content_encoding) = content_encoding { + finalized.set_header(header::CONTENT_ENCODING, content_encoding); + finalized.set_header( + header::VARY, + Self::vary_with_accept_encoding(upstream_vary.as_deref()), + ); + } else if let Some(upstream_vary) = upstream_vary { + finalized.set_header(header::VARY, upstream_vary); + } + if let Some(etag) = etag { + finalized.set_header(header::ETAG, etag); + } + if let Some(last_modified) = last_modified { + finalized.set_header(header::LAST_MODIFIED, last_modified); + } + + if let Some(ttl) = self.resolved_cache_ttl_seconds(asset) { + finalized.set_header(header::CACHE_CONTROL, format!("public, max-age={ttl}")); + } else if let Some(cache_control) = upstream_cache_control { + finalized.set_header(header::CACHE_CONTROL, cache_control); + } + + finalized + } +} + +fn build( + settings: &Settings, +) -> Result>, Report> { + let Some(config) = + settings.integration_config::(JS_ASSET_PROXY_INTEGRATION_ID)? + else { + return Ok(None); + }; + + Ok(Some(JsAssetProxyIntegration::new(config))) +} + +/// Register the JavaScript asset proxy integration. +/// +/// # Errors +/// +/// Returns an error when the integration is enabled with invalid configuration. +pub fn register( + settings: &Settings, +) -> Result, Report> { + let Some(integration) = build(settings)? else { + return Ok(None); + }; + + Ok(Some( + IntegrationRegistration::builder(JS_ASSET_PROXY_INTEGRATION_ID) + .with_proxy(integration.clone()) + .with_attribute_rewriter(integration) + .build(), + )) +} + +#[async_trait(?Send)] +impl IntegrationProxy for JsAssetProxyIntegration { + fn integration_name(&self) -> &'static str { + JS_ASSET_PROXY_INTEGRATION_ID + } + + fn routes(&self) -> Vec { + self.config + .assets + .iter() + .filter(|asset| asset.proxy == JsAssetProxyMode::Enabled) + .map(|asset| IntegrationEndpoint::new(Method::GET, asset.path.clone())) + .collect() + } + + async fn handle( + &self, + settings: &Settings, + req: Request, + ) -> Result> { + let request_path = req.get_path().to_string(); + let asset = self.enabled_asset_for_path(&request_path).ok_or_else(|| { + Report::new(Self::error(format!( + "Unknown JavaScript asset proxy route: {request_path}" + ))) + })?; + + let origin_host = Self::origin_host(&asset.origin_url); + let proxy_config = Self::build_proxy_config(&asset.origin_url, &req); + let response = match proxy_request(settings, req, proxy_config).await { + Ok(response) => response, + Err(error) => { + log::warn!( + "JS asset origin unreachable for path {} host {}: {:?}", + request_path, + origin_host, + error + ); + return Ok(Self::origin_unreachable_response()); + } + }; + + if !response.get_status().is_success() { + log::warn!( + "JS asset origin returned status {} for path {} host {}", + response.get_status(), + request_path, + origin_host + ); + return Ok(Self::origin_status_response()); + } + + Ok(self.finalize_asset_response(asset, response)) + } +} + +impl IntegrationAttributeRewriter for JsAssetProxyIntegration { + fn integration_id(&self) -> &'static str { + JS_ASSET_PROXY_INTEGRATION_ID + } + + fn handles_attribute(&self, attribute: &str) -> bool { + attribute == "src" + } + + fn rewrite( + &self, + attr_name: &str, + attr_value: &str, + ctx: &IntegrationAttributeContext<'_>, + ) -> AttributeRewriteAction { + if attr_name != "src" || !ctx.element_name.eq_ignore_ascii_case("script") { + return AttributeRewriteAction::keep(); + } + + let Some(asset) = self.asset_for_origin_url(attr_value) else { + return AttributeRewriteAction::keep(); + }; + + match asset.proxy { + JsAssetProxyMode::Enabled => AttributeRewriteAction::replace(asset.path.clone()), + JsAssetProxyMode::Disabled => AttributeRewriteAction::keep(), + JsAssetProxyMode::Blocked => AttributeRewriteAction::remove_element(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Cursor; + use std::sync::Arc; + + use crate::constants::{HEADER_REFERER, HEADER_X_FORWARDED_FOR, HEADER_X_TS_EC}; + use crate::html_processor::{create_html_processor, HtmlProcessorConfig}; + use crate::integrations::{ + AttributeRewriteAction, IntegrationAttributeRewriter, IntegrationRegistry, + }; + use crate::streaming_processor::{Compression, PipelineConfig, StreamingPipeline}; + use crate::test_support::tests::create_test_settings; + use fastly::http::header; + use serde_json::json; + + fn asset(path: &str, origin_url: &str, proxy: JsAssetProxyMode) -> JsAssetProxyAsset { + JsAssetProxyAsset { + path: path.to_string(), + origin_url: origin_url.to_string(), + proxy, + cache_ttl_seconds: None, + } + } + + fn config_with_assets(assets: Vec) -> JsAssetProxyConfig { + JsAssetProxyConfig { + enabled: true, + cache_ttl_seconds: None, + assets, + } + } + + fn rewrite_context() -> IntegrationAttributeContext<'static> { + IntegrationAttributeContext { + attribute_name: "src", + element_name: "script", + request_host: "publisher.example.com", + request_scheme: "https", + origin_host: "origin.example.com", + } + } + + fn process_html_with_integration( + html: &str, + integration: Arc, + ) -> String { + let rewriter: Arc = integration; + let processor = create_html_processor(HtmlProcessorConfig { + origin_host: "origin.example.com".to_string(), + request_host: "publisher.example.com".to_string(), + request_scheme: "https".to_string(), + integrations: IntegrationRegistry::from_rewriters(vec![rewriter], Vec::new()), + }); + let pipeline_config = PipelineConfig { + input_compression: Compression::None, + output_compression: Compression::None, + chunk_size: 8192, + }; + let mut pipeline = StreamingPipeline::new(pipeline_config, processor); + + let mut output = Vec::new(); + pipeline + .process(Cursor::new(html.as_bytes()), &mut output) + .expect("should process HTML"); + String::from_utf8(output).expect("should produce UTF-8 HTML") + } + + #[test] + fn disabled_config_does_not_register_routes() { + let mut settings = create_test_settings(); + settings + .integrations + .insert_config( + JS_ASSET_PROXY_INTEGRATION_ID, + &json!({ + "enabled": false, + "assets": [{ + "path": "/assets/vendor.js", + "origin_url": "https://cdn.example.com/vendor.js" + }] + }), + ) + .expect("should insert integration config"); + + let registry = IntegrationRegistry::new(&settings).expect("should build registry"); + + assert!( + !registry.has_route(&Method::GET, "/assets/vendor.js"), + "disabled integration should not register asset route" + ); + } + + #[test] + fn enabled_config_requires_at_least_one_asset() { + let config = JsAssetProxyConfig { + enabled: true, + cache_ttl_seconds: None, + assets: Vec::new(), + }; + + assert!( + config.validate().is_err(), + "enabled config should reject empty assets" + ); + } + + #[test] + fn proxy_modes_control_routes_and_rewriting() { + let integration = JsAssetProxyIntegration::new(config_with_assets(vec![ + asset( + "/assets/enabled.js", + "https://cdn.example.com/enabled.js", + JsAssetProxyMode::Enabled, + ), + asset( + "/assets/disabled.js", + "https://cdn.example.com/disabled.js", + JsAssetProxyMode::Disabled, + ), + asset( + "/assets/blocked.js", + "https://cdn.example.com/blocked.js", + JsAssetProxyMode::Blocked, + ), + ])); + + let routes = integration.routes(); + assert_eq!( + routes.len(), + 1, + "only enabled assets should register routes" + ); + assert_eq!(routes[0].method, Method::GET); + assert_eq!(routes[0].path, "/assets/enabled.js"); + + let ctx = rewrite_context(); + assert!(matches!( + integration.rewrite("src", "https://cdn.example.com/enabled.js", &ctx), + AttributeRewriteAction::Replace(ref value) if value == "/assets/enabled.js" + )); + assert!(matches!( + integration.rewrite("src", "https://cdn.example.com/disabled.js", &ctx), + AttributeRewriteAction::Keep + )); + assert!(matches!( + integration.rewrite("src", "https://cdn.example.com/blocked.js", &ctx), + AttributeRewriteAction::RemoveElement + )); + } + + #[test] + fn non_exact_origin_url_matches_are_not_rewritten_or_blocked() { + let integration = JsAssetProxyIntegration::new(config_with_assets(vec![asset( + "/assets/vendor.js", + "https://cdn.example.com/vendor.js", + JsAssetProxyMode::Enabled, + )])); + let ctx = rewrite_context(); + + assert!(matches!( + integration.rewrite("src", "https://cdn.example.com/vendor.js?v=1", &ctx), + AttributeRewriteAction::Keep + )); + assert!(matches!( + integration.rewrite("src", "https://cdn.example.com/other.js", &ctx), + AttributeRewriteAction::Keep + )); + } + + #[test] + fn non_script_src_matches_are_not_rewritten_or_blocked() { + let integration = JsAssetProxyIntegration::new(config_with_assets(vec![ + asset( + "/assets/enabled.js", + "https://cdn.example.com/enabled.js", + JsAssetProxyMode::Enabled, + ), + asset( + "/assets/blocked.js", + "https://cdn.example.com/blocked.js", + JsAssetProxyMode::Blocked, + ), + ])); + let ctx = IntegrationAttributeContext { + attribute_name: "src", + element_name: "img", + request_host: "publisher.example.com", + request_scheme: "https", + origin_host: "origin.example.com", + }; + + assert!(matches!( + integration.rewrite("src", "https://cdn.example.com/enabled.js", &ctx), + AttributeRewriteAction::Keep + )); + assert!(matches!( + integration.rewrite("src", "https://cdn.example.com/blocked.js", &ctx), + AttributeRewriteAction::Keep + )); + } + + #[test] + fn html_rewriting_only_applies_to_script_src_elements() { + let integration = JsAssetProxyIntegration::new(config_with_assets(vec![ + asset( + "/assets/enabled.js", + "https://cdn.example.com/enabled.js", + JsAssetProxyMode::Enabled, + ), + asset( + "/assets/blocked.js", + "https://cdn.example.com/blocked.js", + JsAssetProxyMode::Blocked, + ), + ])); + let html = r#" + + + + + "#; + + let processed = process_html_with_integration(html, integration); + + assert!(processed.contains(r#""#)); + assert!(processed.contains(r#""#)); + assert!(!processed.contains("blocked()")); + assert!(processed.contains(r#""#)); + } + + #[test] + fn rejects_duplicate_asset_paths() { + let config = config_with_assets(vec![ + asset( + "/assets/vendor.js", + "https://cdn.example.com/vendor-a.js", + JsAssetProxyMode::Enabled, + ), + asset( + "/assets/vendor.js", + "https://cdn.example.com/vendor-b.js", + JsAssetProxyMode::Enabled, + ), + ]); + + assert!( + config.validate().is_err(), + "duplicate asset paths should be rejected" + ); + } + + #[test] + fn rejects_duplicate_origin_urls() { + let config = config_with_assets(vec![ + asset( + "/assets/vendor-a.js", + "https://cdn.example.com/vendor.js", + JsAssetProxyMode::Enabled, + ), + asset( + "/assets/vendor-b.js", + "https://cdn.example.com/vendor.js", + JsAssetProxyMode::Enabled, + ), + ]); + + assert!( + config.validate().is_err(), + "duplicate origin URLs should be rejected" + ); + } + + #[test] + fn rejects_invalid_paths() { + for invalid_path in [ + "assets/vendor.js", + "//cdn.example.com/vendor.js", + "/assets/*.js", + "/assets/../vendor.js", + "/assets/{vendor}.js", + "/assets/vendor.js?v=1", + "/assets/vendor.js#v1", + "/assets/vendor js", + "/assets/vendor\n.js", + ] { + let config = config_with_assets(vec![asset( + invalid_path, + "https://cdn.example.com/vendor.js", + JsAssetProxyMode::Enabled, + )]); + + assert!( + config.validate().is_err(), + "path {invalid_path} should be rejected" + ); + } + } + + #[test] + fn rejects_non_https_origins() { + let config = config_with_assets(vec![asset( + "/assets/vendor.js", + "http://cdn.example.com/vendor.js", + JsAssetProxyMode::Enabled, + )]); + + assert!( + config.validate().is_err(), + "non-HTTPS origin should be rejected" + ); + } + + #[test] + fn rejects_unknown_proxy_mode() { + let toml = r#" + [[handlers]] + path = "^/secure" + username = "user" + password = "pass" + + [[handlers]] + path = "^/_ts/admin" + username = "admin" + password = "admin-pass" + + [publisher] + domain = "test-publisher.com" + cookie_domain = ".test-publisher.com" + origin_url = "https://origin.test-publisher.com" + proxy_secret = "unit-test-proxy-secret" + + [ec] + passphrase = "test-secret-key-32-bytes-minimum" + + [request_signing] + config_store_id = "test-config-store-id" + secret_store_id = "test-secret-store-id" + + [integrations.js_asset_proxy] + enabled = true + + [[integrations.js_asset_proxy.assets]] + path = "/assets/vendor.js" + origin_url = "https://cdn.example.com/vendor.js" + proxy = "passthrough" + "#; + let settings = Settings::from_toml(toml).expect("should parse settings TOML"); + + assert!( + settings + .integration_config::(JS_ASSET_PROXY_INTEGRATION_ID) + .is_err(), + "unknown proxy mode should fail deserialization" + ); + } + + #[test] + fn exact_configured_routes_are_registered() { + let mut settings = create_test_settings(); + settings + .integrations + .insert_config( + JS_ASSET_PROXY_INTEGRATION_ID, + &json!({ + "enabled": true, + "assets": [ + { + "path": "/assets/vendor.js", + "origin_url": "https://cdn.example.com/vendor.js" + }, + { + "path": "/assets/blocked.js", + "origin_url": "https://cdn.example.com/blocked.js", + "proxy": "blocked" + } + ] + }), + ) + .expect("should insert integration config"); + + let registry = IntegrationRegistry::new(&settings).expect("should build registry"); + + assert!(registry.has_route(&Method::GET, "/assets/vendor.js")); + assert!(!registry.has_route(&Method::GET, "/assets/vendor.js/extra")); + assert!(!registry.has_route(&Method::POST, "/assets/vendor.js")); + assert!(!registry.has_route(&Method::GET, "/assets/blocked.js")); + } + + #[test] + fn request_path_selects_the_correct_asset() { + let integration = JsAssetProxyIntegration::new(config_with_assets(vec![ + asset( + "/assets/a.js", + "https://cdn.example.com/a.js", + JsAssetProxyMode::Enabled, + ), + asset( + "/assets/b.js", + "https://cdn.example.com/b.js", + JsAssetProxyMode::Enabled, + ), + ])); + + let selected = integration + .enabled_asset_for_path("/assets/b.js") + .expect("should select configured asset"); + + assert_eq!(selected.origin_url, "https://cdn.example.com/b.js"); + } + + #[test] + fn successful_response_preserves_body_and_expected_headers() { + let mut configured_asset = asset( + "/assets/vendor.js", + "https://cdn.example.com/vendor.js", + JsAssetProxyMode::Enabled, + ); + configured_asset.cache_ttl_seconds = Some(900); + let integration = + JsAssetProxyIntegration::new(config_with_assets(vec![configured_asset.clone()])); + let mut upstream = Response::from_status(StatusCode::OK).with_body("console.log('ok');"); + upstream.set_header(header::CONTENT_TYPE, "application/javascript"); + upstream.set_header(header::CONTENT_ENCODING, "gzip"); + upstream.set_header(header::ETAG, "\"asset-etag\""); + upstream.set_header(header::LAST_MODIFIED, "Tue, 10 Jun 2026 00:00:00 GMT"); + upstream.set_header(header::VARY, "Origin"); + upstream.set_header(header::CACHE_CONTROL, "private, max-age=1"); + upstream.set_header(header::SET_COOKIE, "session=1"); + + let mut response = integration.finalize_asset_response(&configured_asset, upstream); + + assert_eq!(response.get_status(), StatusCode::OK); + assert_eq!(response.take_body_str(), "console.log('ok');"); + assert_eq!( + response.get_header_str(HEADER_X_TS_JS_ASSET_PROXY), + Some("true") + ); + assert_eq!( + response.get_header_str(header::CONTENT_TYPE), + Some("application/javascript") + ); + assert_eq!( + response.get_header_str(header::CONTENT_ENCODING), + Some("gzip") + ); + assert_eq!( + response.get_header_str(header::ETAG), + Some("\"asset-etag\"") + ); + assert_eq!( + response.get_header_str(header::LAST_MODIFIED), + Some("Tue, 10 Jun 2026 00:00:00 GMT") + ); + assert_eq!( + response.get_header_str(header::VARY), + Some("Origin, Accept-Encoding") + ); + assert_eq!( + response.get_header_str(header::CACHE_CONTROL), + Some("public, max-age=900") + ); + assert!( + response.get_header(header::SET_COOKIE).is_none(), + "Set-Cookie should not be forwarded" + ); + } + + #[test] + fn preserves_upstream_cache_control_without_ttl_override() { + let configured_asset = asset( + "/assets/vendor.js", + "https://cdn.example.com/vendor.js", + JsAssetProxyMode::Enabled, + ); + let integration = + JsAssetProxyIntegration::new(config_with_assets(vec![configured_asset.clone()])); + let mut upstream = Response::from_status(StatusCode::OK).with_body("body"); + upstream.set_header(header::CACHE_CONTROL, "public, max-age=123"); + + let response = integration.finalize_asset_response(&configured_asset, upstream); + + assert_eq!( + response.get_header_str(header::CACHE_CONTROL), + Some("public, max-age=123") + ); + } + + #[test] + fn integration_cache_ttl_overrides_upstream_cache_control() { + let configured_asset = asset( + "/assets/vendor.js", + "https://cdn.example.com/vendor.js", + JsAssetProxyMode::Enabled, + ); + let mut config = config_with_assets(vec![configured_asset.clone()]); + config.cache_ttl_seconds = Some(300); + let integration = JsAssetProxyIntegration::new(config); + let mut upstream = Response::from_status(StatusCode::OK).with_body("body"); + upstream.set_header(header::CACHE_CONTROL, "private, max-age=1"); + + let response = integration.finalize_asset_response(&configured_asset, upstream); + + assert_eq!( + response.get_header_str(header::CACHE_CONTROL), + Some("public, max-age=300") + ); + } + + #[test] + fn upstream_error_responses_have_expected_headers() { + let unreachable = JsAssetProxyIntegration::origin_unreachable_response(); + assert_eq!(unreachable.get_status(), StatusCode::BAD_GATEWAY); + assert_eq!( + unreachable.get_header_str(HEADER_X_TS_ERROR), + Some(ERROR_ORIGIN_UNREACHABLE) + ); + + let origin_status = JsAssetProxyIntegration::origin_status_response(); + assert_eq!(origin_status.get_status(), StatusCode::BAD_GATEWAY); + assert_eq!( + origin_status.get_header_str(HEADER_X_TS_ERROR), + Some(ERROR_ORIGIN_STATUS) + ); + } + + #[test] + fn build_proxy_config_forwards_only_asset_header_allowlist() { + let mut req = Request::get("https://publisher.example.com/assets/vendor.js"); + req.set_header(HEADER_ACCEPT.clone(), "application/javascript"); + req.set_header(HEADER_ACCEPT_LANGUAGE.clone(), "en-US"); + req.set_header(HEADER_ACCEPT_ENCODING.clone(), "gzip, br"); + req.set_header(HEADER_REFERER.clone(), "https://publisher.example.com/page"); + req.set_header(HEADER_X_FORWARDED_FOR.clone(), "192.0.2.10"); + req.set_header(HEADER_X_TS_EC.clone(), "edge-cookie-id"); + req.set_header(header::COOKIE, "session=1"); + + let config = + JsAssetProxyIntegration::build_proxy_config("https://cdn.example.com/vendor.js", &req); + + assert!(!config.copy_request_headers); + assert!(!config.follow_redirects); + assert!(!config.forward_ec_id); + + let forwarded: Vec<(String, String)> = config + .headers + .iter() + .map(|(name, value)| { + ( + name.as_str().to_string(), + value + .to_str() + .expect("should expose header value in test") + .to_string(), + ) + }) + .collect(); + + assert_eq!( + forwarded, + vec![ + ("accept".to_string(), "application/javascript".to_string()), + ("accept-language".to_string(), "en-US".to_string()), + ("accept-encoding".to_string(), "gzip, br".to_string()), + ("user-agent".to_string(), "TrustedServer/1.0".to_string()), + ] + ); + } + + #[test] + fn vary_with_accept_encoding_preserves_wildcard_and_existing_value() { + assert_eq!( + JsAssetProxyIntegration::vary_with_accept_encoding(Some("*")), + "*" + ); + assert_eq!( + JsAssetProxyIntegration::vary_with_accept_encoding(Some("Accept-Encoding")), + "Accept-Encoding" + ); + assert_eq!( + JsAssetProxyIntegration::vary_with_accept_encoding(Some("Origin")), + "Origin, Accept-Encoding" + ); + assert_eq!( + JsAssetProxyIntegration::vary_with_accept_encoding(None), + "Accept-Encoding" + ); + } + + #[test] + fn proxy_mode_defaults_to_enabled() { + let parsed: JsAssetProxyAsset = serde_json::from_value(json!({ + "path": "/assets/vendor.js", + "origin_url": "https://cdn.example.com/vendor.js" + })) + .expect("should deserialize asset"); + + assert_eq!(parsed.proxy, JsAssetProxyMode::Enabled); + } +} diff --git a/crates/trusted-server-core/src/integrations/lockr.rs b/crates/trusted-server-core/src/integrations/lockr.rs index 9a480aec..d042ebd2 100644 --- a/crates/trusted-server-core/src/integrations/lockr.rs +++ b/crates/trusted-server-core/src/integrations/lockr.rs @@ -395,6 +395,7 @@ mod tests { fn test_context() -> IntegrationAttributeContext<'static> { IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "edge.example.com", request_scheme: "https", origin_host: "origin.example.com", diff --git a/crates/trusted-server-core/src/integrations/mod.rs b/crates/trusted-server-core/src/integrations/mod.rs index e9438b32..66cf0537 100644 --- a/crates/trusted-server-core/src/integrations/mod.rs +++ b/crates/trusted-server-core/src/integrations/mod.rs @@ -11,6 +11,7 @@ pub mod datadome; pub mod didomi; pub mod google_tag_manager; pub mod gpt; +pub mod js_asset_proxy; pub mod lockr; pub mod nextjs; pub mod permutive; @@ -43,5 +44,6 @@ pub(crate) fn builders() -> &'static [IntegrationBuilder] { google_tag_manager::register, datadome::register, gpt::register, + js_asset_proxy::register, ] } diff --git a/crates/trusted-server-core/src/integrations/permutive.rs b/crates/trusted-server-core/src/integrations/permutive.rs index b3e59456..bf7cd08c 100644 --- a/crates/trusted-server-core/src/integrations/permutive.rs +++ b/crates/trusted-server-core/src/integrations/permutive.rs @@ -704,6 +704,7 @@ mod tests { let ctx = IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "edge.example.com", request_scheme: "https", origin_host: "origin.example.com", @@ -737,6 +738,7 @@ mod tests { let ctx = IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "edge.example.com", request_scheme: "https", origin_host: "origin.example.com", diff --git a/crates/trusted-server-core/src/integrations/prebid.rs b/crates/trusted-server-core/src/integrations/prebid.rs index 418b5b1a..e0fc2965 100644 --- a/crates/trusted-server-core/src/integrations/prebid.rs +++ b/crates/trusted-server-core/src/integrations/prebid.rs @@ -1745,6 +1745,7 @@ passphrase = "test-secret-key-32-bytes-minimum" let integration = PrebidIntegration::new(base_config()); let ctx = IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "pub.example", request_scheme: "https", origin_host: "origin.example", @@ -1762,6 +1763,7 @@ passphrase = "test-secret-key-32-bytes-minimum" let integration = PrebidIntegration::new(base_config()); let ctx = IntegrationAttributeContext { attribute_name: "href", + element_name: "a", request_host: "pub.example", request_scheme: "https", origin_host: "origin.example", diff --git a/crates/trusted-server-core/src/integrations/registry.rs b/crates/trusted-server-core/src/integrations/registry.rs index 389dd50d..2599b859 100644 --- a/crates/trusted-server-core/src/integrations/registry.rs +++ b/crates/trusted-server-core/src/integrations/registry.rs @@ -81,6 +81,7 @@ impl ScriptRewriteAction { #[derive(Debug)] pub struct IntegrationAttributeContext<'a> { pub attribute_name: &'a str, + pub element_name: &'a str, pub request_host: &'a str, pub request_scheme: &'a str, pub origin_host: &'a str, @@ -821,7 +822,7 @@ impl IntegrationRegistry { #[must_use] pub fn js_module_ids(&self) -> Vec<&'static str> { // Rust-only integrations with no corresponding JS module - const JS_EXCLUDED: &[&str] = &["nextjs", "aps", "adserver_mock"]; + const JS_EXCLUDED: &[&str] = &["nextjs", "aps", "adserver_mock", "js_asset_proxy"]; // JS-only modules always included (no Rust-side registration). // Sourcepoint's JS guards cookie clearing with a Sourcepoint-owned marker. const JS_ALWAYS: &[&str] = &["creative", "sourcepoint"]; diff --git a/crates/trusted-server-core/src/integrations/sourcepoint.rs b/crates/trusted-server-core/src/integrations/sourcepoint.rs index 55fc4ee6..4f900bcd 100644 --- a/crates/trusted-server-core/src/integrations/sourcepoint.rs +++ b/crates/trusted-server-core/src/integrations/sourcepoint.rs @@ -904,6 +904,7 @@ mod tests { let integration = SourcepointIntegration::new(Arc::new(config(true))); let ctx = IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "edge.example.com", request_scheme: "https", origin_host: "origin.example.com", @@ -928,6 +929,7 @@ mod tests { let integration = SourcepointIntegration::new(Arc::new(config(true))); let ctx = IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "edge.example.com", request_scheme: "https", origin_host: "origin.example.com", diff --git a/crates/trusted-server-core/src/integrations/testlight.rs b/crates/trusted-server-core/src/integrations/testlight.rs index e63baea3..92ab365a 100644 --- a/crates/trusted-server-core/src/integrations/testlight.rs +++ b/crates/trusted-server-core/src/integrations/testlight.rs @@ -284,6 +284,7 @@ mod tests { let ctx = IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "edge.example.com", request_scheme: "https", origin_host: "origin.example.com", @@ -313,6 +314,7 @@ mod tests { let integration = TestlightIntegration::new(config); let ctx = IntegrationAttributeContext { attribute_name: "src", + element_name: "script", request_host: "edge.example.com", request_scheme: "https", origin_host: "origin.example.com", diff --git a/docs/superpowers/specs/2026-04-01-js-asset-proxy-design.md b/docs/superpowers/specs/2026-04-01-js-asset-proxy-design.md new file mode 100644 index 00000000..1315a1b6 --- /dev/null +++ b/docs/superpowers/specs/2026-04-01-js-asset-proxy-design.md @@ -0,0 +1,347 @@ +# JS Asset Proxy — Engineering Spec + +**Date:** 2026-04-01 +**Updated:** 2026-05-28 +**Status:** Proposed + +--- + +## Context + +Publishers often need to load JavaScript from third-party ad tech or measurement vendors. Those scripts are usually referenced directly from vendor-controlled domains, which means the publisher page depends on external script hostnames at runtime. + +The JS Asset Proxy gives Trusted Server a small, explicit way to serve configured third-party JavaScript files from first-party paths. Each proxied asset is declared in `trusted-server.toml`; at request time Trusted Server fetches the configured upstream URL and streams the response back to the browser with controlled response headers. + +This spec intentionally follows existing integration proxy patterns already used in Trusted Server. The implementation should be a focused integration-level proxy, not a new storage, build, or asset management subsystem. + +--- + +## Goals + +- Serve allowlisted third-party JavaScript assets from configured first-party paths. +- Keep configuration in `trusted-server.toml` under the existing `[integrations.*]` configuration model. +- Fetch only explicitly configured upstream URLs. +- Stream upstream JavaScript responses without server-side body transformation. +- Apply predictable downstream cache headers controlled by Trusted Server configuration. +- Allow configured assets to be individually proxied, disabled, or blocked from publisher HTML. +- Reuse the existing integration registry and proxy request infrastructure. + +--- + +## Configuration + +Add a new integration configuration block: + +```toml +[integrations.js_asset_proxy] +enabled = false + +[[integrations.js_asset_proxy.assets]] +path = "/assets/vendor-loader.js" +origin_url = "https://js.vendor.example.com/loader.js" +proxy = "enabled" + +[[integrations.js_asset_proxy.assets]] +path = "/assets/measurement-sdk.js" +origin_url = "https://cdn.vendor.example.com/sdk/measurement.js" +proxy = "enabled" +cache_ttl_seconds = 900 + +[[integrations.js_asset_proxy.assets]] +path = "/assets/blocked-sdk.js" +origin_url = "https://cdn.vendor.example.com/sdk/blocked.js" +proxy = "blocked" + +[[integrations.js_asset_proxy.assets]] +path = "/assets/inactive-sdk.js" +origin_url = "https://cdn.vendor.example.com/sdk/inactive.js" +proxy = "disabled" +``` + +### Fields + +| Field | Required | Description | +| ---------------------------- | -------: | ---------------------------------------------------------------- | +| `enabled` | Yes | Enables or disables the integration. | +| `cache_ttl_seconds` | No | Optional downstream cache TTL override for all assets. When unset, preserve the upstream cache policy. | +| `assets` | Yes | List of JavaScript assets the proxy may serve. | +| `assets[].path` | Yes | Stable identifier for logs, tests, and response diagnostics; exact first-party request path handled by Trusted Server. | +| `assets[].origin_url` | Yes | Exact upstream JavaScript URL to fetch or match for page rewriting. | +| `assets[].proxy` | No | Per-asset proxy behavior: `enabled`, `disabled`, or `blocked`. Defaults to `enabled`. | +| `assets[].cache_ttl_seconds` | No | Per-asset downstream cache TTL override. Takes precedence over the integration-level value. | + +### Validation + +Configuration validation must reject: + +- enabled integration with malformed configured assets; +- empty `assets` when the integration is enabled; +- duplicate asset paths; +- duplicate `origin_url` values; +- asset paths that do not start with `/`; +- asset paths containing `*`; +- asset paths containing `..` path segments; +- `proxy` values other than `enabled`, `disabled`, or `blocked`; + +The implementation may use stricter validation if it keeps the configuration contract simple and documented. + +--- + +## Asset Proxy Behavior + +Each asset has a `proxy` setting that controls both page rewriting and route registration: + +| Value | Behavior | +| ---------- | -------- | +| `enabled` | Rewrite exact matching `