From 2b9dda109a497f9ec787b806796987f02c7e68d0 Mon Sep 17 00:00:00 2001 From: Nico Hinderling Date: Thu, 4 Jun 2026 17:57:24 -0700 Subject: [PATCH 1/5] feat(snapshots): Retry rate-limited image uploads with backoff Snapshot image uploads now retry 429/503 and transient failures instead of aborting the whole command on the first error. Each attempt rebuilds a batch of only the operations that have not yet succeeded, reusing the CLI's existing backoff convention (get_default_backoff + Config::max_retries). --- CHANGELOG.md | 6 + src/commands/snapshots/upload.rs | 285 +++++++++++++++++++++++++++++-- 2 files changed, 274 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 16e3e0832d..1ec5277c50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +### Features + +- (snapshots) Retry rate-limited image uploads with backoff ([#3326](https://github.com/getsentry/sentry-cli/pull/3326)) + ## 3.5.0 ### Features diff --git a/src/commands/snapshots/upload.rs b/src/commands/snapshots/upload.rs index 98c32ce742..f957831d1f 100644 --- a/src/commands/snapshots/upload.rs +++ b/src/commands/snapshots/upload.rs @@ -3,15 +3,17 @@ use std::fs::File; use std::io::BufReader; use std::path::{Path, PathBuf}; use std::str::FromStr as _; +use std::thread; use std::time::Duration; use anyhow::{Context as _, Result}; +use backon::BackoffBuilder as _; use clap::{Arg, ArgMatches, Command}; use console::style; use futures_util::StreamExt as _; use itertools::Itertools as _; use log::{debug, warn}; -use objectstore_client::{ClientBuilder, ExpirationPolicy, OperationResult, Usecase}; +use objectstore_client::{ClientBuilder, Error, ExpirationPolicy, OperationResult, Usecase}; use rayon::prelude::*; use secrecy::ExposeSecret as _; use serde_json::Value; @@ -24,6 +26,7 @@ use crate::utils::args::ArgExt as _; use crate::utils::build_vcs::collect_git_metadata; use crate::utils::ci::is_ci; use crate::utils::fs::IMAGE_EXTENSIONS; +use crate::utils::retry::{get_default_backoff, DurationAsMilliseconds as _}; const EXPERIMENTAL_WARNING: &str = "[EXPERIMENTAL] The \"snapshots upload\" command is experimental. \ @@ -409,6 +412,105 @@ struct PreparedImage { key: String, } +fn is_retryable(err: &Error) -> bool { + match err { + Error::OperationFailure { status, .. } => *status == 429 || *status == 503, + Error::Reqwest(e) => match e.status() { + Some(status) => status.as_u16() == 429 || status.as_u16() == 503, + None => e.is_timeout() || e.is_connect(), + }, + Error::Batch(inner) => is_retryable(inner), + _ => false, + } +} + +#[derive(Default)] +struct ClassifiedResults { + succeeded: HashSet, + fatal: Vec<(String, Error)>, + retryable: Vec<(String, Error)>, + unattributed: Vec, +} + +fn classify_results(results: Vec) -> ClassifiedResults { + let mut classified = ClassifiedResults::default(); + for result in results { + match result { + OperationResult::Put(key, Ok(_)) => { + classified.succeeded.insert(key); + } + OperationResult::Put(key, Err(err)) if is_retryable(&err) => { + classified.retryable.push((key, err)); + } + OperationResult::Put(key, Err(err)) => { + classified.fatal.push((key, err)); + } + OperationResult::Error(err) => { + classified.unattributed.push(err); + } + _ => {} + } + } + classified +} + +fn upload_with_retry( + mut pending: Vec, + mut delays: impl Iterator, + mut send_batch: F, +) -> Vec +where + F: FnMut(&[PreparedImage]) -> Vec, +{ + let mut failures: Vec = Vec::new(); + let mut last_error: HashMap = HashMap::new(); + let mut last_unattributed: Option = None; + + loop { + let classified = classify_results(send_batch(&pending)); + + for (key, err) in classified.retryable { + last_error.insert(key, err); + } + if let Some(err) = classified.unattributed.into_iter().last() { + last_unattributed = Some(err); + } + let fatal_keys: HashSet = + classified.fatal.iter().map(|(key, _)| key.clone()).collect(); + for (key, err) in classified.fatal { + last_error.remove(&key); + failures.push(anyhow::Error::new(err).context(format!("failed to upload {key}"))); + } + + pending + .retain(|p| !classified.succeeded.contains(&p.key) && !fatal_keys.contains(&p.key)); + + if pending.is_empty() { + break; + } + let Some(delay) = delays.next() else { + break; + }; + debug!( + "{} snapshot image upload(s) pending, retrying in {} ms", + pending.len(), + delay.as_milliseconds() + ); + thread::sleep(delay); + } + + for prepared in pending { + let err = last_error + .remove(&prepared.key) + .map(anyhow::Error::new) + .or_else(|| last_unattributed.take().map(anyhow::Error::new)) + .unwrap_or_else(|| anyhow::anyhow!("operation failed after exhausting retries")); + failures.push(err.context(format!("failed to upload {}", prepared.key))); + } + + failures +} + fn upload_images( images: Vec, org: &str, @@ -531,24 +633,35 @@ fn upload_images( } if upload_count > 0 { - let mut many_builder = session.many(); - for prepared in missing_uploads { - many_builder = many_builder.push( - session - .put_path(prepared.path.clone()) - .key(&prepared.key) - .expiration_policy(expiration), - ); - } + let delays = get_default_backoff() + .with_max_times(Config::current().max_retries() as usize) + .build(); + + let failures = upload_with_retry(missing_uploads, delays, |pending| { + runtime.block_on(async { + let mut many_builder = session.many(); + for prepared in pending { + many_builder = many_builder.push( + session + .put_path(prepared.path.clone()) + .key(&prepared.key) + .expiration_policy(expiration), + ); + } + + let mut stream = many_builder.send().await; + let mut out = Vec::new(); + while let Some(result) = stream.next().await { + out.push(result); + } + out + }) + }); - let result = - runtime.block_on(async { many_builder.send().await.error_for_failures().await }); - if let Err(errors) = result { - let errors: Vec<_> = errors.collect(); - let error_count = errors.len(); + if !failures.is_empty() { + let error_count = failures.len(); eprintln!("There were errors uploading images:"); - for error in errors { - let error = anyhow::Error::new(error); + for error in failures { eprintln!(" {}", style(format!("{error:#}")).red()); } anyhow::bail!("Failed to upload {error_count} images"); @@ -577,6 +690,144 @@ mod tests { } } + #[test] + fn test_is_retryable_operation_failure_429() { + let err = Error::OperationFailure { + status: 429, + message: "rate limited".to_owned(), + }; + assert!(is_retryable(&err)); + } + + #[test] + fn test_is_retryable_operation_failure_503() { + let err = Error::OperationFailure { + status: 503, + message: "unavailable".to_owned(), + }; + assert!(is_retryable(&err)); + } + + #[test] + fn test_is_not_retryable_operation_failure_404() { + let err = Error::OperationFailure { + status: 404, + message: "not found".to_owned(), + }; + assert!(!is_retryable(&err)); + } + + #[test] + fn test_is_retryable_batch_wrapping_429() { + let err = Error::Batch(std::sync::Arc::new(Error::OperationFailure { + status: 429, + message: "rate limited".to_owned(), + })); + assert!(is_retryable(&err)); + } + + #[test] + fn test_is_not_retryable_malformed_response() { + let err = Error::MalformedResponse("bad".to_owned()); + assert!(!is_retryable(&err)); + } + + fn put_ok(key: &str) -> OperationResult { + OperationResult::Put( + key.to_owned(), + Ok(objectstore_client::PutResponse { + key: key.to_owned(), + }), + ) + } + + fn put_err(key: &str, status: u16) -> OperationResult { + OperationResult::Put( + key.to_owned(), + Err(Error::OperationFailure { + status, + message: format!("status {status}"), + }), + ) + } + + fn prepared(key: &str) -> PreparedImage { + PreparedImage { + path: PathBuf::from(format!("{key}.png")), + key: key.to_owned(), + } + } + + #[test] + fn test_classify_results_partitions_results() { + let results = vec![ + put_ok("ok-key"), + put_err("rl-key", 429), + put_err("fatal-key", 404), + OperationResult::Error(Error::MalformedResponse("bad".to_owned())), + ]; + + let classified = classify_results(results); + + assert_eq!(classified.succeeded.len(), 1); + assert!(classified.succeeded.contains("ok-key")); + assert_eq!(classified.fatal.len(), 1); + assert_eq!(classified.fatal[0].0, "fatal-key"); + assert_eq!(classified.retryable.len(), 1); + assert_eq!(classified.retryable[0].0, "rl-key"); + assert_eq!(classified.unattributed.len(), 1); + } + + #[test] + fn test_retry_all_succeed_first_attempt() { + let mut attempts = 0; + let failures = upload_with_retry(vec![prepared("a"), prepared("b")], std::iter::empty(), |p| { + attempts += 1; + p.iter().map(|img| put_ok(&img.key)).collect() + }); + assert!(failures.is_empty()); + assert_eq!(attempts, 1); + } + + #[test] + fn test_retry_recovers_after_rate_limit() { + let mut attempts = 0; + let failures = upload_with_retry(vec![prepared("a")], std::iter::repeat(Duration::ZERO), |p| { + attempts += 1; + if attempts == 1 { + p.iter().map(|img| put_err(&img.key, 429)).collect() + } else { + p.iter().map(|img| put_ok(&img.key)).collect() + } + }); + assert!(failures.is_empty()); + assert_eq!(attempts, 2); + } + + #[test] + fn test_retry_fatal_error_is_not_retried() { + let mut attempts = 0; + let failures = upload_with_retry(vec![prepared("a")], std::iter::repeat(Duration::ZERO), |p| { + attempts += 1; + p.iter().map(|img| put_err(&img.key, 404)).collect() + }); + assert_eq!(failures.len(), 1); + assert_eq!(attempts, 1); + } + + #[test] + fn test_retry_exhausts_and_reports_with_real_error() { + let mut attempts = 0; + let delays = std::iter::repeat_n(Duration::ZERO, 2); + let failures = upload_with_retry(vec![prepared("a")], delays, |p| { + attempts += 1; + p.iter().map(|img| put_err(&img.key, 429)).collect() + }); + assert_eq!(attempts, 3); + assert_eq!(failures.len(), 1); + assert!(format!("{:#}", failures[0]).contains("429")); + } + #[test] fn test_validate_image_sizes_at_limit_passes() { assert!(validate_image_sizes(&[make_image(8000, 5000)]).is_ok()); From ec140ec7f30b613b1008d7f58f7336f953997fe4 Mon Sep 17 00:00:00 2001 From: Nico Hinderling Date: Thu, 4 Jun 2026 18:20:34 -0700 Subject: [PATCH 2/5] fix(snapshots): Stop retrying on non-retryable batch stream errors An unattributable OperationResult::Error that is not retryable (e.g. a malformed batch response) now breaks the retry loop immediately instead of exhausting the full backoff, matching the behavior for attributed fatal errors. --- src/commands/snapshots/upload.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/commands/snapshots/upload.rs b/src/commands/snapshots/upload.rs index f957831d1f..29bab6cc72 100644 --- a/src/commands/snapshots/upload.rs +++ b/src/commands/snapshots/upload.rs @@ -472,6 +472,7 @@ where for (key, err) in classified.retryable { last_error.insert(key, err); } + let unattributed_is_fatal = classified.unattributed.iter().any(|err| !is_retryable(err)); if let Some(err) = classified.unattributed.into_iter().last() { last_unattributed = Some(err); } @@ -485,7 +486,7 @@ where pending .retain(|p| !classified.succeeded.contains(&p.key) && !fatal_keys.contains(&p.key)); - if pending.is_empty() { + if pending.is_empty() || unattributed_is_fatal { break; } let Some(delay) = delays.next() else { @@ -815,6 +816,18 @@ mod tests { assert_eq!(attempts, 1); } + #[test] + fn test_retry_stops_on_non_retryable_unattributed_error() { + let mut attempts = 0; + let delays = std::iter::repeat_n(Duration::ZERO, 5); + let failures = upload_with_retry(vec![prepared("a")], delays, |_| { + attempts += 1; + vec![OperationResult::Error(Error::MalformedResponse("bad".to_owned()))] + }); + assert_eq!(attempts, 1); + assert_eq!(failures.len(), 1); + } + #[test] fn test_retry_exhausts_and_reports_with_real_error() { let mut attempts = 0; From edda196f3b0bb01cf1b38997c324bde1c324a6f9 Mon Sep 17 00:00:00 2001 From: Nico Hinderling Date: Thu, 4 Jun 2026 18:25:37 -0700 Subject: [PATCH 3/5] style(snapshots): Format upload retry code with rustfmt --- src/commands/snapshots/upload.rs | 58 ++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/src/commands/snapshots/upload.rs b/src/commands/snapshots/upload.rs index 29bab6cc72..b77200e5b5 100644 --- a/src/commands/snapshots/upload.rs +++ b/src/commands/snapshots/upload.rs @@ -476,15 +476,17 @@ where if let Some(err) = classified.unattributed.into_iter().last() { last_unattributed = Some(err); } - let fatal_keys: HashSet = - classified.fatal.iter().map(|(key, _)| key.clone()).collect(); + let fatal_keys: HashSet = classified + .fatal + .iter() + .map(|(key, _)| key.clone()) + .collect(); for (key, err) in classified.fatal { last_error.remove(&key); failures.push(anyhow::Error::new(err).context(format!("failed to upload {key}"))); } - pending - .retain(|p| !classified.succeeded.contains(&p.key) && !fatal_keys.contains(&p.key)); + pending.retain(|p| !classified.succeeded.contains(&p.key) && !fatal_keys.contains(&p.key)); if pending.is_empty() || unattributed_is_fatal { break; @@ -782,10 +784,14 @@ mod tests { #[test] fn test_retry_all_succeed_first_attempt() { let mut attempts = 0; - let failures = upload_with_retry(vec![prepared("a"), prepared("b")], std::iter::empty(), |p| { - attempts += 1; - p.iter().map(|img| put_ok(&img.key)).collect() - }); + let failures = upload_with_retry( + vec![prepared("a"), prepared("b")], + std::iter::empty(), + |p| { + attempts += 1; + p.iter().map(|img| put_ok(&img.key)).collect() + }, + ); assert!(failures.is_empty()); assert_eq!(attempts, 1); } @@ -793,14 +799,18 @@ mod tests { #[test] fn test_retry_recovers_after_rate_limit() { let mut attempts = 0; - let failures = upload_with_retry(vec![prepared("a")], std::iter::repeat(Duration::ZERO), |p| { - attempts += 1; - if attempts == 1 { - p.iter().map(|img| put_err(&img.key, 429)).collect() - } else { - p.iter().map(|img| put_ok(&img.key)).collect() - } - }); + let failures = upload_with_retry( + vec![prepared("a")], + std::iter::repeat(Duration::ZERO), + |p| { + attempts += 1; + if attempts == 1 { + p.iter().map(|img| put_err(&img.key, 429)).collect() + } else { + p.iter().map(|img| put_ok(&img.key)).collect() + } + }, + ); assert!(failures.is_empty()); assert_eq!(attempts, 2); } @@ -808,10 +818,14 @@ mod tests { #[test] fn test_retry_fatal_error_is_not_retried() { let mut attempts = 0; - let failures = upload_with_retry(vec![prepared("a")], std::iter::repeat(Duration::ZERO), |p| { - attempts += 1; - p.iter().map(|img| put_err(&img.key, 404)).collect() - }); + let failures = upload_with_retry( + vec![prepared("a")], + std::iter::repeat(Duration::ZERO), + |p| { + attempts += 1; + p.iter().map(|img| put_err(&img.key, 404)).collect() + }, + ); assert_eq!(failures.len(), 1); assert_eq!(attempts, 1); } @@ -822,7 +836,9 @@ mod tests { let delays = std::iter::repeat_n(Duration::ZERO, 5); let failures = upload_with_retry(vec![prepared("a")], delays, |_| { attempts += 1; - vec![OperationResult::Error(Error::MalformedResponse("bad".to_owned()))] + vec![OperationResult::Error(Error::MalformedResponse( + "bad".to_owned(), + ))] }); assert_eq!(attempts, 1); assert_eq!(failures.len(), 1); From 931697beab0819c103ca553de7f169ffba90191b Mon Sep 17 00:00:00 2001 From: Nico Hinderling Date: Thu, 4 Jun 2026 18:32:59 -0700 Subject: [PATCH 4/5] fix(snapshots): Preserve unattributed batch error for all pending uploads Report the real objectstore error text for every still-pending image instead of attaching it to only the first, and prefer a non-retryable (fatal) batch error over a retryable one when choosing the message. --- src/commands/snapshots/upload.rs | 38 +++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/src/commands/snapshots/upload.rs b/src/commands/snapshots/upload.rs index b77200e5b5..3629337044 100644 --- a/src/commands/snapshots/upload.rs +++ b/src/commands/snapshots/upload.rs @@ -464,7 +464,7 @@ where { let mut failures: Vec = Vec::new(); let mut last_error: HashMap = HashMap::new(); - let mut last_unattributed: Option = None; + let mut unattributed_message: Option = None; loop { let classified = classify_results(send_batch(&pending)); @@ -473,8 +473,13 @@ where last_error.insert(key, err); } let unattributed_is_fatal = classified.unattributed.iter().any(|err| !is_retryable(err)); - if let Some(err) = classified.unattributed.into_iter().last() { - last_unattributed = Some(err); + if let Some(err) = classified + .unattributed + .iter() + .find(|err| !is_retryable(err)) + .or_else(|| classified.unattributed.last()) + { + unattributed_message = Some(format!("{err}")); } let fatal_keys: HashSet = classified .fatal @@ -503,11 +508,13 @@ where } for prepared in pending { - let err = last_error - .remove(&prepared.key) - .map(anyhow::Error::new) - .or_else(|| last_unattributed.take().map(anyhow::Error::new)) - .unwrap_or_else(|| anyhow::anyhow!("operation failed after exhausting retries")); + let err = match last_error.remove(&prepared.key) { + Some(err) => anyhow::Error::new(err), + None => match &unattributed_message { + Some(message) => anyhow::anyhow!("{message}"), + None => anyhow::anyhow!("operation failed after exhausting retries"), + }, + }; failures.push(err.context(format!("failed to upload {}", prepared.key))); } @@ -842,6 +849,21 @@ mod tests { }); assert_eq!(attempts, 1); assert_eq!(failures.len(), 1); + assert!(format!("{:#}", failures[0]).contains("bad")); + } + + #[test] + fn test_retry_unattributed_error_reported_for_all_pending() { + let delays = std::iter::repeat_n(Duration::ZERO, 3); + let failures = upload_with_retry(vec![prepared("a"), prepared("b")], delays, |_| { + vec![OperationResult::Error(Error::MalformedResponse( + "boom".to_owned(), + ))] + }); + assert_eq!(failures.len(), 2); + assert!(failures + .iter() + .all(|failure| format!("{failure:#}").contains("boom"))); } #[test] From d1a79829c699297325e63cefc21d622484dcd880 Mon Sep 17 00:00:00 2001 From: Nico Hinderling Date: Fri, 5 Jun 2026 10:11:52 -0700 Subject: [PATCH 5/5] fix(snapshots): Prefer fatal batch error over stale retryable error When the retry loop stopped on a non-retryable unattributed batch error, pending uploads still reported a stale per-key retryable error (such as a 429 from an earlier attempt) instead of the fatal error that actually ended the run. Skip the per-key last_error when the loop stops on a fatal unattributed error so all pending uploads surface the real cause. Co-Authored-By: Claude --- src/commands/snapshots/upload.rs | 33 ++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/commands/snapshots/upload.rs b/src/commands/snapshots/upload.rs index 3629337044..e36c9363de 100644 --- a/src/commands/snapshots/upload.rs +++ b/src/commands/snapshots/upload.rs @@ -465,6 +465,7 @@ where let mut failures: Vec = Vec::new(); let mut last_error: HashMap = HashMap::new(); let mut unattributed_message: Option = None; + let mut stopped_on_fatal_unattributed = false; loop { let classified = classify_results(send_batch(&pending)); @@ -493,7 +494,11 @@ where pending.retain(|p| !classified.succeeded.contains(&p.key) && !fatal_keys.contains(&p.key)); - if pending.is_empty() || unattributed_is_fatal { + if pending.is_empty() { + break; + } + if unattributed_is_fatal { + stopped_on_fatal_unattributed = true; break; } let Some(delay) = delays.next() else { @@ -508,7 +513,10 @@ where } for prepared in pending { - let err = match last_error.remove(&prepared.key) { + let attributed = (!stopped_on_fatal_unattributed) + .then(|| last_error.remove(&prepared.key)) + .flatten(); + let err = match attributed { Some(err) => anyhow::Error::new(err), None => match &unattributed_message { Some(message) => anyhow::anyhow!("{message}"), @@ -866,6 +874,27 @@ mod tests { .all(|failure| format!("{failure:#}").contains("boom"))); } + #[test] + fn test_retry_fatal_unattributed_error_masks_stale_retryable_error() { + let mut attempts = 0; + let delays = std::iter::repeat_n(Duration::ZERO, 3); + let failures = upload_with_retry(vec![prepared("a")], delays, |_| { + attempts += 1; + if attempts == 1 { + vec![put_err("a", 429)] + } else { + vec![OperationResult::Error(Error::MalformedResponse( + "fatal".to_owned(), + ))] + } + }); + assert_eq!(attempts, 2); + assert_eq!(failures.len(), 1); + let message = format!("{:#}", failures[0]); + assert!(message.contains("fatal")); + assert!(!message.contains("429")); + } + #[test] fn test_retry_exhausts_and_reports_with_real_error() { let mut attempts = 0;