From d72e506a1084aeeddefdc3951b371919827e7fec Mon Sep 17 00:00:00 2001 From: Jeff Larson Date: Sun, 7 Jun 2026 22:28:07 -0700 Subject: [PATCH] perf: GIN index for trace attribute filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The traces attribute filter (attributes @> '{...}') seq-scanned the spans table, so an unbounded or wide-window query could exceed statement_timeout and 500. - Add spans_attrs_gin (gin jsonb_path_ops), mirroring logs_attrs_gin. The migration sets statement_timeout=0 locally so the one-time build isn't aborted by the pool's 60s cap; the build briefly write-locks spans (span ingest pauses once — metrics/logs ingest is unaffected). - Restructure the attr filter from `HAVING bool_or(attributes @> $6)` (can't use an index) to `WHERE trace_id IN (SELECT trace_id FROM spans WHERE attributes @> $6)`, which the GIN serves — selecting whole traces that contain a matching span, so the per-trace aggregates stay correct. Spans are lower write-volume than metrics, so the index's ingest cost is modest. Co-Authored-By: Claude Opus 4.8 --- server/migrations/0011_spans_attrs_gin.sql | 10 ++++++++++ server/src/api.rs | 9 ++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 server/migrations/0011_spans_attrs_gin.sql diff --git a/server/migrations/0011_spans_attrs_gin.sql b/server/migrations/0011_spans_attrs_gin.sql new file mode 100644 index 0000000..4527ced --- /dev/null +++ b/server/migrations/0011_spans_attrs_gin.sql @@ -0,0 +1,10 @@ +-- GIN index on span attributes so the traces list can filter by attribute +-- (attributes @> '{"key":"value"}') without seq-scanning the spans table. +-- Mirrors logs_attrs_gin; jsonb_path_ops is compact and serves the @> operator. +-- +-- SET LOCAL statement_timeout = 0 for this migration only: the pool sets a 60s +-- statement_timeout, but building a GIN over the existing spans can take longer, +-- and we don't want the build aborted (which would crash-loop startup). The build +-- holds a write lock on spans, so ingest pauses briefly once while it runs. +SET LOCAL statement_timeout = 0; +CREATE INDEX IF NOT EXISTS spans_attrs_gin ON spans USING gin (attributes jsonb_path_ops); diff --git a/server/src/api.rs b/server/src/api.rs index 652dbd9..cb57bde 100644 --- a/server/src/api.rs +++ b/server/src/api.rs @@ -60,8 +60,10 @@ pub async fn list_traces( .and_then(|s| s.split_once('=')) .filter(|(k, _)| !k.is_empty()) .map(|(k, v)| serde_json::json!({ k: v })); - // service + time bound the spans scanned (index-friendly); the trace-level - // filters (name / attribute / errors / duration) are applied with HAVING so + // service + time bound the spans scanned (index-friendly). The attribute + // filter selects whole traces that contain a matching span via a subquery the + // spans_attrs_gin index can serve (a HAVING bool_or couldn't use the index). + // The remaining trace-level filters (name / errors / duration) are HAVING, so // the per-trace aggregates stay computed over the whole trace. let rows = sqlx::query_as::<_, TraceSummary>( "SELECT trace_id, @@ -77,10 +79,11 @@ pub async fn list_traces( WHERE ($1::text IS NULL OR service = $1) AND ($2::timestamptz IS NULL OR start_time >= $2) AND ($3::timestamptz IS NULL OR start_time <= $3) + AND ($6::jsonb IS NULL + OR trace_id IN (SELECT trace_id FROM spans WHERE attributes @> $6)) GROUP BY trace_id HAVING ($5::text IS NULL OR (array_agg(name ORDER BY start_time))[1] ILIKE '%' || $5 || '%') - AND ($6::jsonb IS NULL OR bool_or(attributes @> $6)) AND (NOT $7::bool OR count(*) FILTER (WHERE status_code = 2) > 0) AND ($8::float8 IS NULL OR extract(epoch FROM (max(end_time) - min(start_time))) * 1000.0 >= $8)