From 9d6c7fc0c314174273c14f13d40750da06038944 Mon Sep 17 00:00:00 2001 From: Robert Haist Date: Thu, 25 Jun 2026 14:07:15 +0200 Subject: [PATCH] Reject conditioned/gated configs in the A2 fast-path detector is_a2_shape() validated the layer array exhaustively but never inspected the top-level "condition_dsp" key. The generic WaveNet, given a non-null condition_dsp, builds a nested model and routes the conditioning signal through it before the layer stack (model.cpp); the A2 fast path has no such stage and feeds the raw input as the condition. Because the condition DSP carries its own weights, the parent WaveNet weight stream is byte-identical with or without it, so the loader's weight-count check cannot catch the mismatch. The dispatcher (model.cpp: create_config) consults is_a2_shape before it would ever read condition_dsp, so an A2-shaped config that also carries a condition_dsp was silently routed to the fast path and produced different audio than the generic WaveNet it is meant to replace -- with no error. This contradicts the A2 fast-path's stated invariant ("checks every knob against the A2 signature ... never silently routes a non-A2 model to the fast path"), and conditioning a WaveNet with a nested model is a real feature. Reject any config with a non-null top-level condition_dsp. Also reject the legacy boolean `gated: true` (pre-gating_mode schema), which maps to GATED layers the fast path doesn't implement -- previously caught only indirectly by a downstream weight-count throw; now rejected by the detector so the boundary is enforced in one place. Adds test_detector_rejects_condition_dsp and test_detector_rejects_legacy_gated, mirroring the existing detector-rejection tests. Verified both fail without the guards (the detector accepts the config) and pass with them. --- NAM/wavenet/a2_fast.cpp | 19 +++++++++++++++++++ tools/run_tests.cpp | 2 ++ tools/test/test_a2_fast.cpp | 21 +++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/NAM/wavenet/a2_fast.cpp b/NAM/wavenet/a2_fast.cpp index 67093c88..512adc47 100644 --- a/NAM/wavenet/a2_fast.cpp +++ b/NAM/wavenet/a2_fast.cpp @@ -763,6 +763,17 @@ bool is_a2_shape(const nlohmann::json& config, int* channels) if (head_it != config.end() && !head_it->is_null()) return false; + // No conditioning DSP. When given a non-null condition_dsp the generic WaveNet + // builds a nested model and routes the conditioning signal through it before the + // layer stack; the fast path has no such stage and feeds the raw input as the + // condition. The condition DSP carries its own weights, so the parent weight + // stream is identical with or without it and the loader cannot detect the + // difference -- the detector must reject it here, or the fast path would silently + // produce different audio than the model it replaces. + auto cond_it = config.find("condition_dsp"); + if (cond_it != config.end() && !cond_it->is_null()) + return false; + // head_scale is loaded from the trailing weight, but require the field to // stay schema-compatible with the generic WaveNet parser. auto hs_it = config.find("head_scale"); @@ -827,6 +838,14 @@ bool is_a2_shape(const nlohmann::json& config, int* channels) return false; } + // Legacy boolean `gated` (the pre-gating_mode schema): the generic parser maps + // gated==true to GATED layers, which the fast path does not implement. A genuinely + // gated model has a larger weight stream and the loader would throw, but reject it + // here so the boundary is enforced by the detector rather than a downstream error. + auto gated_it = la.find("gated"); + if (gated_it != la.end() && gated_it->is_boolean() && gated_it->get()) + return false; + // secondary_activation: all null (or field absent) auto sa_it = la.find("secondary_activation"); if (sa_it != la.end() && !sa_it->is_null()) diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index 61c7ba4a..0d8713d3 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -351,6 +351,8 @@ int main() test_a2_fast::test_detector_rejects_wrong_kernel_sizes(); test_a2_fast::test_detector_rejects_wrong_activation(); test_a2_fast::test_detector_rejects_gating(); + test_a2_fast::test_detector_rejects_condition_dsp(); + test_a2_fast::test_detector_rejects_legacy_gated(); test_a2_fast::test_matches_generic_nano(); test_a2_fast::test_matches_generic_standard(); test_a2_fast::test_process_realtime_safe_nano(); diff --git a/tools/test/test_a2_fast.cpp b/tools/test/test_a2_fast.cpp index 7761f7eb..89c96070 100644 --- a/tools/test/test_a2_fast.cpp +++ b/tools/test/test_a2_fast.cpp @@ -228,6 +228,27 @@ void test_detector_rejects_gating() assert(!nam::wavenet::a2_fast::is_a2_shape(cfg, nullptr)); } +// A condition DSP routes the conditioning signal through a nested model; the fast +// path has no such stage. The nested model holds its own weights, so the parent +// weight stream is unchanged and only the detector can catch this -- otherwise the +// fast path would silently produce different audio than the generic WaveNet. +void test_detector_rejects_condition_dsp() +{ + auto cfg = build_a2_config(8); + cfg["condition_dsp"] = {{"version", "0.5.0"}, {"architecture", "Linear"}, + {"config", nlohmann::json::object()}, {"weights", nlohmann::json::array()}}; + assert(!nam::wavenet::a2_fast::is_a2_shape(cfg, nullptr)); +} + +// Legacy boolean `gated` (pre-gating_mode schema) maps to GATED layers in the +// generic parser, which the fast path does not implement. +void test_detector_rejects_legacy_gated() +{ + auto cfg = build_a2_config(3); + cfg["layers"][0]["gated"] = true; + assert(!nam::wavenet::a2_fast::is_a2_shape(cfg, nullptr)); +} + void test_matches_generic(int channels) { const auto cfg = build_a2_config(channels);