bytecodealliance · fitzgen · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml
@@ -21,7 +21,7 @@ sightglass-data = { path = "../data" }
 sightglass-fingerprint = { path = "../fingerprint" }
 sightglass-recorder = { path = "../recorder" }
 sightglass-upload = { path = "../upload" }
-structopt = { version = "0.3", features = ["color", "suggestions"] }
+clap = { version = "4", features = ["derive"] }
 thiserror = "1.0"
 rand = { version = "0.7.3", features = ["small_rng"] }
 csv = "1.1.6"

diff --git a/crates/cli/src/benchmark.rs b/crates/cli/src/benchmark.rs
@@ -1,5 +1,6 @@
 use crate::suite::BenchmarkOrSuite;
 use anyhow::{anyhow, Context, Result};
+use clap::Parser;
 use rand::{rngs::SmallRng, Rng, SeedableRng};
 use sightglass_data::{Format, Measurement, Phase};
 use sightglass_recorder::bench_api::Engine;
@@ -13,7 +14,6 @@ use std::{
     path::{Path, PathBuf},
     process::{Command, Stdio},
 };
-use structopt::StructOpt;
 
 const DEFAULT_PROCESSES: usize = 10;
 const DEFAULT_ITERATIONS_PER_PROCESS: usize = 10;
@@ -204,7 +204,7 @@ mod callgrind {
 ///
 /// The total number of samples taken for each Wasm benchmark is `PROCESSES *
 /// NUMBER_OF_ITERATIONS_PER_PROCESS`.
-#[derive(StructOpt, Debug)]
+#[derive(Parser, Debug)]
 pub struct BenchmarkCommand {
     /// The path to the file(s) to benchmark. This accepts one or more:
     ///
@@ -217,25 +217,21 @@ pub struct BenchmarkCommand {
     ///   the `*.suite` file.
     ///
     /// By default, this will use `benchmarks/default.suite`.
-    #[structopt(
-        index = 1,
-        default_value = "benchmarks/default.suite",
-        value_name = "FILE"
-    )]
+    #[arg(default_value = "benchmarks/default.suite", value_name = "FILE")]
     benchmarks: Vec<BenchmarkOrSuite>,
 
     /// The benchmark engine(s) with which to run the benchmark.
     ///
     /// This is one or more paths to a shared library implementing the
     /// benchmarking engine specification. See `engines/wasmtime` for an example
     /// script to build an engine.
-    #[structopt(long("engine"), short("e"), value_name = "PATH", empty_values = false)]
+    #[arg(long = "engine", short = 'e', value_name = "PATH")]
     engines: Vec<String>,
 
     /// Configure an engine using engine-specific flags. (For the Wasmtime
     /// engine, these can be a subset of flags from `wasmtime run --help`).
-    #[structopt(
-        long("engine-flags"),
+    #[arg(
+        long = "engine-flags",
         value_name = "ENGINE_FLAGS",
         allow_hyphen_values = true
     )]
@@ -245,40 +241,40 @@ pub struct BenchmarkCommand {
     ///
     /// Defaults to `10`, unless using the `callgrind` measure, in which case the
     /// default is `3`.
-    #[structopt(long = "processes", value_name = "PROCESSES")]
+    #[arg(long = "processes", value_name = "PROCESSES")]
     processes: Option<usize>,
 
     /// Override the "engine" name; this is useful if running experiments that might
     /// not have a differentiating engine name (e.g. if customizing the flags).
     ///
     /// If multiple engines are provided, the order of names provided here should
     /// match the order of the engines specified.
-    #[structopt(long = "name", short = "n")]
+    #[arg(long = "name", short = 'n')]
     names: Option<Vec<String>>,
 
     /// How many times should we run a benchmark in a single process?
     ///
     /// Defaults to `10`, unless using the `callgrind` measure, in which case the
     /// default is `1`.
-    #[structopt(
+    #[arg(
         long = "iterations-per-process",
         value_name = "NUMBER_OF_ITERATIONS_PER_PROCESS"
     )]
     iterations_per_process: Option<usize>,
 
     /// Output raw data, rather than the summarized, human-readable analysis
     /// results.
-    #[structopt(long)]
+    #[arg(long)]
     raw: bool,
 
     /// The format of the raw output data when `--raw` is used. Either 'json' or
     /// 'csv'.
-    #[structopt(short = "f", long = "output-format", default_value = "json")]
+    #[arg(short = 'f', long = "output-format", default_value = "json")]
     output_format: Format,
 
     /// Path to a file which will contain the output data, or nothing to print
     /// to stdout (default).
-    #[structopt(short = "o", long = "output-file")]
+    #[arg(short = 'o', long = "output-file")]
     output_file: Option<String>,
 
     /// The type of measurement to use (cycles, insts-retired, perf-counters,
@@ -292,7 +288,7 @@ pub struct BenchmarkCommand {
     /// `callgrind` defaults to fewer processes and iterations per process
     /// because it runs the benchmarking processes under Valgrind, which is
     /// slower but also more deterministic and less noisy.
-    #[structopt(long = "measure", short = "m", multiple = true)]
+    #[arg(long = "measure", short = 'm', action = clap::ArgAction::Append)]
     measures: Vec<MeasureType>,
 
     /// Pass this flag to only run benchmarks over "small" workloads (rather
@@ -307,36 +303,36 @@ pub struct BenchmarkCommand {
     /// of truth, and any cases where results differ between the small and
     /// default workloads, the results from the small workloads should be
     /// ignored.
-    #[structopt(long, alias = "small-workload")]
+    #[arg(long, alias = "small-workload")]
     small_workloads: bool,
 
     /// The directory to preopen as the benchmark working directory. If the
     /// benchmark accesses files using WASI, it will see this directory as its
     /// current working directory (i.e. `.`). If the working directory is not
     /// specified, the Wasm file's parent directory is used instead.
-    #[structopt(short("d"), long("working-dir"), parse(from_os_str))]
+    #[arg(short = 'd', long = "working-dir")]
     working_dir: Option<PathBuf>,
 
     /// Benchmark only the given phase (compilation, instantiation, or
     /// execution). Benchmarks all phases if omitted.
-    #[structopt(long("benchmark-phase"))]
+    #[arg(long = "benchmark-phase")]
     benchmark_phase: Option<Phase>,
 
     /// The significance level for confidence intervals. Typical values are 0.01
     /// and 0.05, which correspond to 99% and 95% confidence respectively. This
     /// is ignored when using `--raw` or when there aren't exactly two engines
     /// supplied.
-    #[structopt(short, long, default_value = "0.01")]
+    #[arg(short, long, default_value = "0.01")]
     significance_level: f64,
 
     /// Pin all benchmark iterations in a process to a single core. See
     /// `cpu_affinity` in the `sightglass-recorder` crate for more information.
-    #[structopt(long)]
+    #[arg(long)]
     pin: bool,
 
     /// Keep log files after successful benchmark runs. By default, logs are
     /// only kept on failures.
-    #[structopt(short = "k", long = "keep-logs")]
+    #[arg(short = 'k', long = "keep-logs")]
     keep_logs: bool,
 }
 

diff --git a/crates/cli/src/clean.rs b/crates/cli/src/clean.rs
@@ -1,10 +1,10 @@
 use anyhow::{Context, Result};
+use clap::Parser;
 use regex::Regex;
-use structopt::StructOpt;
 
 /// Remove the log files emitted in Sightglass runs.
-#[derive(StructOpt, Debug)]
-#[structopt(name = "clean")]
+#[derive(Parser, Debug)]
+#[command(name = "clean")]
 pub struct CleanCommand {}
 
 impl CleanCommand {
@@ -51,9 +51,16 @@ impl CleanCommand {
 
                 // Okay! It's one of our log files!
                 log::info!("Removing log file: {}", path.display());
-                std::fs::remove_file(&path)
-                    .with_context(|| format!("failed to remove {}", path.display()))?;
-                removed_count += 1;
+                match std::fs::remove_file(&path) {
+                    Ok(()) => removed_count += 1,
+                    Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+                        // File was already removed (e.g. by a parallel process); that's fine.
+                    }
+                    Err(e) => {
+                        return Err(e)
+                            .with_context(|| format!("failed to remove {}", path.display()));
+                    }
+                }
             }
         }
 

diff --git a/crates/cli/src/effect_size.rs b/crates/cli/src/effect_size.rs
@@ -1,33 +1,33 @@
 use anyhow::Result;
+use clap::Parser;
 use sightglass_analysis::{effect_size, summarize};
 use sightglass_data::Format;
 use std::{
     fs::File,
     io::{self, BufReader},
 };
-use structopt::StructOpt;
 
 /// Calculate the effect size (and associated confidence interval) between the
 /// results for two different engines.
-#[derive(Debug, StructOpt)]
-#[structopt(name = "effect-size")]
+#[derive(Debug, Parser)]
+#[command(name = "effect-size")]
 pub struct EffectSizeCommand {
     /// Path to the file(s) that will be read from, or none to indicate stdin (default).
-    #[structopt(short = "f")]
+    #[arg(short = 'f')]
     input_file: Option<Vec<String>>,
 
     /// The format of the input data. Either 'json' or 'csv'.
-    #[structopt(short = "i", long = "input-format", default_value = "json")]
+    #[arg(short = 'i', long = "input-format", default_value = "json")]
     input_format: Format,
 
     /// The format of the output data. Either 'json' or 'csv'; if unspecified, print the output in
     /// human-readable form.
-    #[structopt(short = "o", long = "output-format")]
+    #[arg(short = 'o', long = "output-format")]
     output_format: Option<Format>,
 
     /// The significance level for the confidence interval. Typical values are
     /// 0.01 and 0.05, which correspond to 99% and 95% confidence respectively.
-    #[structopt(short, long, default_value = "0.01")]
+    #[arg(short, long, default_value = "0.01")]
     significance_level: f64,
 }
 

diff --git a/crates/cli/src/fingerprint.rs b/crates/cli/src/fingerprint.rs
@@ -1,25 +1,25 @@
 use anyhow::Result;
+use clap::Parser;
 use sightglass_data::Format;
 use sightglass_fingerprint::{Benchmark, Engine, Machine};
 use std::{io, path::PathBuf};
-use structopt::StructOpt;
 
 /// Gather information about the current machine, a Wasm benchmark, or a Wasm
 /// engine and print the results to `stdout`.
-#[derive(Debug, StructOpt)]
-#[structopt(name = "fingerprint")]
+#[derive(Debug, Parser)]
+#[command(name = "fingerprint")]
 pub struct FingerprintCommand {
     /// The kind of item to fingerprint. One of: 'benchmark', 'engine', 'machine'.
-    #[structopt(short = "k", long = "kind")]
+    #[arg(short = 'k', long = "kind")]
     kind: Kind,
 
     /// The format of the output data. Either 'json' or 'csv'.
-    #[structopt(short = "o", long = "output-format", default_value = "json")]
+    #[arg(short = 'o', long = "output-format", default_value = "json")]
     output_format: Format,
 
     /// The optional path to the file to fingerprint; not all kinds
     /// fingerprinting require a file (e.g., `--kind machine`).
-    #[structopt(index = 1, value_name = "FILE", parse(from_os_str))]
+    #[arg(value_name = "FILE")]
     file: Option<PathBuf>,
 }
 

diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs
@@ -11,34 +11,28 @@ mod validate;
 
 use anyhow::Result;
 use benchmark::BenchmarkCommand;
+use clap::Parser;
 use clean::CleanCommand;
 use effect_size::EffectSizeCommand;
 use fingerprint::FingerprintCommand;
 use log::trace;
 use pca_metrics::PcaMetricsCommand;
 use report::ReportCommand;
-use structopt::{clap::AppSettings, StructOpt};
 use summarize::SummarizeCommand;
 use upload::UploadCommand;
 use validate::ValidateCommand;
 
 /// Main entry point for CLI.
 fn main() -> Result<()> {
     pretty_env_logger::init();
-    let command = SightglassCommand::from_args();
+    let command = SightglassCommand::parse();
     command.execute()?;
     Ok(())
 }
 
 /// The sightglass benchmark runner.
-#[derive(StructOpt, Debug)]
-#[structopt(
-    version = env!("CARGO_PKG_VERSION"),
-    global_settings = &[
-        AppSettings::VersionlessSubcommands,
-        AppSettings::ColoredHelp
-    ],
-)]
+#[derive(Parser, Debug)]
+#[command(version, propagate_version = true)]
 enum SightglassCommand {
     Benchmark(BenchmarkCommand),
     Clean(CleanCommand),

diff --git a/crates/cli/src/pca_metrics.rs b/crates/cli/src/pca_metrics.rs
@@ -7,20 +7,20 @@ mod static_metrics;
 use crate::suite::BenchmarkOrSuite;
 use anyhow::{Context, Result};
 use category::{Category, NUM_CATEGORIES};
+use clap::Parser;
 use dynamic_metrics::{dynamic_metrics, make_engine};
 use serde::Serialize;
 use sightglass_build::get_engine_filename;
 use static_metrics::static_metrics;
 use std::path::{Path, PathBuf};
-use structopt::StructOpt;
 
 /// Capture benchmark metrics for principal component analysis (PCA).
-#[derive(Debug, StructOpt)]
-#[structopt(name = "pca-metrics")]
+#[derive(Debug, Parser)]
+#[command(name = "pca-metrics")]
 pub struct PcaMetricsCommand {
     /// The optional file path to write output to. Writes output to stdout if
     /// omitted.
-    #[structopt(long, short, parse(from_os_str))]
+    #[arg(long, short)]
     output: Option<PathBuf>,
 
     /// Optionally bound each benchmark's execution to this many units of fuel.
@@ -31,13 +31,13 @@ pub struct PcaMetricsCommand {
     ///
     /// This primarily exists to make testing this command easier, and shouldn't
     /// be used when doing full PCA.
-    #[structopt(long)]
+    #[arg(long)]
     fuel: Option<u64>,
 
     /// The benchmark engine with which to run Callgrind measurements.
     ///
     /// Defaults to the Wasmtime engine library in this repository.
-    #[structopt(long, short, parse(from_os_str))]
+    #[arg(long, short)]
     engine: Option<PathBuf>,
 
     /// The Wasm benchmarks whose PCA metrics should be taken.