diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fd06ea2d..03498309 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,6 +52,10 @@ jobs: run: cargo test --lib --features test-utils - name: Run e2e tests run: cargo test --test e2e --features test-utils -- --test-threads=1 + - name: Run v12 storage-bound audit attack PoCs + run: cargo test --test poc_commitment_audit_attacks --features test-utils + - name: Run v12 live audit-handler tests + run: cargo test --test poc_audit_handler_live --features test-utils doc: name: Documentation diff --git a/Cargo.toml b/Cargo.toml index 0ef01ea7..f7f0d4a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -134,6 +134,22 @@ name = "e2e" path = "tests/e2e/mod.rs" required-features = ["test-utils"] +# v12 storage-bound audit attack PoCs. Uses the test-only one-shot +# commitment builder/verifier helpers, so it requires the test-utils +# feature. CI runs it via `cargo test --test poc_commitment_audit_attacks +# --features test-utils`. +[[test]] +name = "poc_commitment_audit_attacks" +path = "tests/poc_commitment_audit_attacks.rs" +required-features = ["test-utils"] + +# Live responder-handler tests for the v12 audit. Use +# LmdbStorageConfig::test_default(), gated on test-utils. +[[test]] +name = "poc_audit_handler_live" +path = "tests/poc_audit_handler_live.rs" +required-features = ["test-utils"] + [features] default = ["logging"] # Enable tracing/logging infrastructure. diff --git a/docs/adr/ADR-0002-gossip-triggered-contiguous-subtree-audit.md b/docs/adr/ADR-0002-gossip-triggered-contiguous-subtree-audit.md new file mode 100644 index 00000000..f05bee16 --- /dev/null +++ b/docs/adr/ADR-0002-gossip-triggered-contiguous-subtree-audit.md @@ -0,0 +1,233 @@ +# ADR-0002: Gossip-triggered contiguous-subtree storage audit + +- **Status:** Proposed +- **Date:** 2026-06-04 +- **Decision owners:** Anselme (@grumbach) +- **Reviewers:** +- **Supersedes:** none +- **Superseded by:** none +- **Related:** none + +## Context + +In this network, nodes are paid to store data chunks. To verify a node actually +holds what it is paid for, each node publishes a signed **storage commitment**: a +Merkle tree built over the chunks it claims to hold (one leaf per chunk, the leaf +being a hash of the chunk's content which incidentally also is its address on the network), reduced to a single root hash and signed by +the node's key. The commitment is spread to neighbouring nodes through the +network's normal periodic message exchange ("gossip"). Any neighbour can then choose to +**audit** the node: ask it to prove it still holds the committed chunks, sampled +probabilistically so that no single audit is expensive but cheating is caught over time. + +Triggered by gossip, the audits run as occasional surprise +exams, with no answer that escapes accounting, every failure is attributable to misbehaviour, including failure to respond in a reasonnable time. + +Terms used below: *root* = the single top hash of a node's storage-commitment +Merkle tree. *Leaf* = the hash of one stored chunk. *N* = the number of chunks a +node has committed to. *Subtree* = a contiguous branch of the tree (a node in the +tree plus everything beneath it). *Padding* = empty filler leaves added so the +tree is a clean binary shape when N is not a power of two. + +## Decision Drivers + +- Ensure all nodes actually store the data they claim they are storing +- Keep each proof small and keep steady-state audit traffic low. +- Catch the three real cheating strategies: storing nothing and fetching on demand; deleting some fraction of data; and keeping only chunk *addresses* (which are public) while never holding the actual bytes, then fabricating proofs. +- Reuse the existing cryptographic building blocks (the Merkle tree, the signed commitment, the freshness hash) without inventing new ones. +- Never wrongly penalise honest nodes, even in extreme cases like on small or dense networks where every node legitimately holds almost all of the data. + +## Considered Options + +1. **Keep the previous timer-driven schedule and just make the excusable answers + punishable.** Rejected: an audit answer like "I don't recognise that commitment" + was excusable *precisely because* the audited commitment was stale relative to + what the node had since published. Without fixing the schedule, punishing such + answers would also punish honest nodes whose latest commitment simply hadn't + propagated yet. + +2. **Keep naming individual chunks to audit, but trigger the audit from gossip.** + A better trigger, but it keeps the large, scattered proof (a separate inclusion + path per sampled chunk) and the "auditor names the chunks" model, which lets a + node honestly answer "that chunk isn't in my commitment" — another answer that + has to be excused. + +3. **Gossip-triggered, single contiguous-subtree proof (chosen).** Receiving a + node's commitment is what may launch an audit, checked against that freshly + published commitment. A random value chosen by the auditor deterministically + selects one contiguous branch of the audited node's *own* tree; the node returns + that whole branch plus a small summary of the rest; the auditor rebuilds the + root, spot-checks a few leaves against real chunk bytes, and requires a timely + response. Small proof, no excusable answers, surprises the node. + +4. **Select several branches per audit instead of one.** Rejected: against an + attacker who deletes data in large contiguous blocks, the per-audit chance of + catching them depends only on the *fraction* deleted, not on how many or how + large the branches are. Extra branches only add proof cost; a fresh random + selection each audit covers the tree over time anyway. + +## Decision + +We will make the audit **gossip-triggered** and replace its proof shape with a +**single contiguous-subtree storage proof**, reusing the existing tree, +commitment, and freshness-hash primitives. + +- **Trigger.** When a node ingests a neighbour's commitment during normal + (steady-state) operation, it may start an audit of that neighbour — not every + time, but with a fixed probability and a per-neighbour cooldown, so audits are + occasional surprise exams that keep traffic low. The decision is cooldown-first + then the probability lottery, so a burst of gossip from one peer yields at most + one audit attempt per cooldown window. The audit always checks the neighbour + against the commitment it *just published*, and a *stable* commitment is still + re-audited over time (the trigger fires on every steady-state gossip, not only + on a changed root). There is no separate periodic audit timer. + *Exception:* gossip received during the node's own bootstrap is cached but does + NOT trigger an audit — the node may itself still be bootstrapping (audits are + gated on that) and its routing-table view is not yet stable. Such a peer is + audited on the first steady-state gossip round after bootstrap drains (within + one sync cycle), so there is no coverage gap. + +- **Subtree selection.** The auditor sends a fresh random value. That value walks + the tree from the root downward (each bit picking left or right) and stops at + the smallest contiguous branch that still contains at least the square root of N + *real* (non-padding) leaves. Stopping on a real-leaf count — rather than at a + fixed depth — is deliberate: a fixed depth can, when the tree is mostly padding, + land on a branch that is entirely padding, so the audit checks nothing. The + real-leaf rule makes an empty selection impossible. The random value alone fixes + *which* branch is selected: the auditor and the audited node each walk the tree + from it independently and arrive at the same branch, so the audited node cannot + choose a convenient branch to present. The auditor then checks that the returned + branch is exactly the one the random value selects and that it contains at least + the square root of the claimed held chunks in real leaves. + +- **The proof.** The audited node returns every leaf of the selected subtree — + each given both as the plain content hash and as a freshness hash (the content + mixed with the auditor's random value) — plus one summary hash per level for the + unselected siblings along the path to the root. Everything outside the selected + branch costs a single hash; nothing there is touched. + +- **Verification, three independent checks.** + - *Structure:* rebuild the root from the returned subtree and the sibling + summaries; it must equal the freshly-published root the audit was started + against. This proves the subtree genuinely belongs to the committed tree. + - *Real bytes:* pick a small fixed number of leaves at random from within the + subtree and confirm both the plain hash and the freshness hash match against + the actual chunk bytes. The auditor prefers spot-check leaves it already holds + itself (common within a close group, so no fetch is needed); if it holds none + of the subtree's leaves it may fetch a few from the network to spot-check, but + a fetch that is slow or fails is never counted as the audited node's failure. + This defeats a node that + rebuilt the tree from public chunk addresses but never held the bytes: it + cannot produce a correct freshness hash without the actual data, so faking a + fraction of leaves survives only with probability (1 − fraction) raised to the + number of spot-checks. + - *Possession in time:* the whole response must arrive within a deadline sized + to hashing the subtree from local disk. A node that doesn't hold the data must + fetch it across the network first and misses the deadline. + +- **Retention — "you stay answerable for what you publish."** A node keeps the + chunk data behind its **last two published commitments**. Two, not one, absorbs + the normal race where an auditor is asking about the commitment a node published + just before its newest one. Because of this, an honest node can always answer an + audit about a commitment it published recently — so "I don't recognise that + commitment" about a recently-published root is now provably misbehaviour, not + lag. + +- **Accounting and Fasle Positives** "That chunk isn't in my commitment" + can never occur, because the auditor only ever challenges leaves of the node's + *own* committed tree, so every challenged leaf is in the commitment by + construction. Failures that are deterministic and cannot be caused by bad luck — a + rebuilt root that doesn't match, a content or freshness hash that doesn't match, + or repudiating a recently-published commitment — are acted on **the first time + they occur**, because re-asking cannot turn a genuine failure into a pass. + Failures that *can* be caused by transient bad luck — a missed response deadline + — keep a small grace allowance of consecutive misses (reset on any success) + before counting, so a momentarily slow but honest node is not punished. This + grace allowance is the *only* failure type that the adaptive scaling below + touches; deterministic failures are always acted on the first time, regardless + of network conditions. + +- **Closeness** A node should mostly hold chunks whose addresses are + near its own. We may flag a selected leaf as suspicious padding only when its + address is implausibly far from the node *relative to how much data overlap is + normal on this network*. On a small, dense network where every node holds nearly + everything, "far" chunks are normal and must never trigger a penalty. This check + is intentionally biased toward missing some padding rather than ever wrongly + penalising an honest node. + +- **Network Resilience** In the event of large churn or generalized network + disruption, to prevent a death spiral, the **timeout** grace allowance (and only + that allowance) scales with how widely *timeouts* are currently being seen: the + number of consecutive deadline misses tolerated is the median recent *timeout* + count across recently-audited peers plus a constant (in a healthy network this is + roughly 0 + 3). Crucially, the scaling is driven by missed-deadline / liveness + signals — never by deterministic failures (a bad root or a bad hash), which are + always acted on immediately and can therefore never be inflated by an attacker to + buy itself more grace. Genuine disruption makes *honest* nodes time out together, + lifting the median and relaxing the deadline tolerance just when the network is + struggling; once conditions normalise the median falls back toward zero and the + tolerance tightens again. Because most nodes are honest, the median sits near + zero in normal operation, so this never weakens detection of a node that is + actually deleting data. + +## Consequences + +### Positive + +- The deterministic nature of the 3 checks makes a faked proof detectable: a structurally wrong, byte-less, or stale answer fails outright, and repeated probabilistic sampling catches the cases that can only be hidden in one branch at a time. +- The probabilistic approach to verification ensures that verification is cheap but over time efficient. +- Each proof is small and contiguous (about the square root of N leaves plus a handful of summary hashes) instead of many scattered inclusion paths. +- Audits are surprise exams pinned to the *freshly published* commitment, so there is no stale-data ambiguity unlike in the previous audit design +- Three independent defences cover the three cheating strategies: structure (belongs to the committed tree), real bytes (actually held, not fabricated from public addresses), and timeliness (held locally, not fetched on demand). +- Acting on the first deterministic failure roughly cuts time-to-detection compared with requiring several strikes, with no added risk of false positives. + +### Negative / Trade-offs + +- **Big-block deletion is caught only proportionally.** An attacker who deletes data in large contiguous blocks is caught, per audit, with probability roughly equal to the fraction deleted — independent of N and of subtree size. We accept this: there is no economic reason to delete a *small* fraction (you save almost nothing and are still eventually caught), and a node that deletes a large fraction to actually save resources is caught within one or two audits. If ever needed, the lever is auditing *more often*, not bigger subtrees. +- **Inflating the claimed size is not fully prevented.** Only the selected subtree and the path summaries are verified each audit, so filler leaves elsewhere could inflate the claimed chunk count. Both the regular audits and the closeness check mitigates this over time. Fully auditing the entire claimed set would be too much effort. We accept this probabilistic approach in which over time cheaters are detected. +- **Retention has a storage cost.** A node must keep the chunk data behind its last two published commitments. This is an accepted cost. +- **The audit format change is breaking.** The whole network must upgrade before the new audit can be relied on and before eviction is enabled. + +### Neutral / Operational + +- Introduces a few tunable settings: the per-gossip audit probability, the per-neighbour cooldown, the number of real-byte spot-checks, and the retention count (two). The grace allowance for missed deadlines reuses the existing strike threshold and applies to deadline misses only. +- The old periodic audit timer and the related "node is capable but has no current commitment" special case become unnecessary and are removed. A silent node needs no special handling — it simply stops earning storage credit, so all nodes are naturally motivated to gossip. +- At the chosen settings, steady-state audit load is on the order of a handful of small audits per node per hour. + +## Validation + +How we will know this decision remains correct: + +- **Detection holds in simulation.** For deletions spread evenly across a node's + data, the per-audit chance of catching it rises quickly with the square root of + N; for deletions concentrated in large contiguous blocks (the worst case), it is + roughly the deleted fraction per audit. A simulation must confirm both rates and + that, at the chosen settings, a node deleting a meaningful fraction is caught + within one or two audits and a worst-case concentrated large deletion within + about an hour. Detection must not depend on ever sampling the whole tree. + +- **Tests required before this ADR is Accepted.** Branch selection is deterministic + and identical on the auditor and the audited node; selection never lands on an + all-padding branch across many awkward sizes (a regression test for the + fixed-depth flaw this ADR fixes); the root rebuilds correctly from a single-branch + proof; possession verifies both when the spot-checked chunk is held by the auditor + and when it is fetched; the real-byte spot-check catches a node that fabricated + freshness hashes, at the expected probability; deterministic failures are acted on + the first time while deadline misses honour the grace allowance; the adaptive + timeout grace responds to widespread timeouts but never to deterministic failures; + repudiating a recently-published commitment fails; the last two published + commitments stay answerable; the response deadline is sized correctly; and a flood + of gossip does not multiply audits. + +- **Operational signals and re-open triggers.** Audits per node per hour stay within + budget; false-positive penalties on a small, dense test network stay at zero + (confirming the closeness leniency and the adaptive grace hold); during induced + churn the network does not enter an eviction death spiral; revisit the + concentrated-deletion trade-off if a real attacker is ever observed deleting below + the economically-irrational threshold; revisit if the maximum supported committed + size is approached. + +## Notes for AI-assisted work + +AI tools may help draft this ADR, but **must not mark it Accepted without human +review**. Accepted ADRs are immutable: create a new superseding ADR rather than +editing an Accepted ADR. diff --git a/notes/audit-v13-contiguous-subtree-spec.md b/notes/audit-v13-contiguous-subtree-spec.md new file mode 100644 index 00000000..7a61addd --- /dev/null +++ b/notes/audit-v13-contiguous-subtree-spec.md @@ -0,0 +1,297 @@ +# v13 audit redesign — gossip-triggered contiguous-subtree storage proof + +Status: DRAFT SPEC for review (no code yet). Branch: `grumbach/audit-on-gossip`, +based on PR #113 head. This is a follow-up to #113, NOT folded into it — it is a +second breaking change to the audit challenge/response format and ships as its +own protocol revision once #113 is merged and the network has upgraded. + +Goal: make a node prove it actually holds the data it committed to, with a +*light* (small-proof) audit that is **triggered by gossip** and run as +**probabilistic random exams**, with **no silent no-penalty escape lane**. + +--- + +## 1. Why change the v12 (#113) audit + +v12 works (testnet-confirmed: relay + data-shedders caught), but has three +shapes we want to change: + +1. **Audit is decoupled from gossip.** It fires on a random 10–20 min tick and + pins whatever commitment it last cached, which routinely lags the peer's + real commitment. That lag is the *only* reason `unknown commitment hash` must + be treated as benign (no penalty) — a silent escape lane an upgraded + malicious node can ride once eviction is re-enabled. +2. **Per-key scattered sampling** sends `sqrt(N)` independent inclusion proofs + (`sqrt(N)·log N` hashes). +3. The auditor samples keys from *its own* store, which is why + `key not in commitment` exists and is benign. + +This spec replaces the audit *scheduling* and the *proof shape*, while reusing +v12's cryptographic primitives (BLAKE3 Merkle tree, ML-DSA-signed commitment, +`H(nonce‖peer‖key‖bytes)` possession digest, the 5 gossip-ingest gates). + +--- + +## 2. Model overview (what the network does) + +- **Gossip (UNCHANGED from v12):** a node periodically gossips its signed + `StorageCommitment` = { plain-tree root, key_count, sender_peer_id, pubkey, + signature }. Light: one root, no key list. +- **Trigger:** receiving a peer's *changed* commitment gossip is what may launch + an audit of that peer. Not every gossip → audit: fire with probability `p` + and a per-peer cooldown ("random exams", keeps load low, surprise to the + audited). The audit pins the **just-received** root. +- **Challenge:** auditor sends a fresh random nonce `N` (+ the pinned root). `N` + deterministically selects ONE contiguous subtree of the committed tree. +- **Response (subtree proof):** the audited node returns that one subtree + expanded to its ≈`sqrt(key_count)` leaves (each with its plain leaf hash and a + nonce-fresh hash), plus the `log` sibling cut-hashes on the path to the root. + Everything outside the selected subtree is a single cut-hash per sibling — no + data touched there. +- **Verify:** reconstruct the plain root from the proof and check it equals the + pinned (gossiped) root; for the selected leaves, confirm possession by + rehashing the bytes (locally held, else fetched) with and without `N`; check + leaf uniqueness; require the response within a time bound. +- **Accounting:** every failure (bad proof, wrong root, missing/forged bytes, + timeout past the strike threshold, or repudiating a recently-gossiped root) + is recorded. No `Idle` no-penalty lane for a node repudiating what it just + gossiped. (Trust *reporting* remains gated by the #113 + `TIMEOUT-EVICTION-DISABLED` rollout switch; accounting runs regardless.) + +--- + +## 3. Contiguous-subtree selection (deterministic from N + key_count) + +Both sides know `key_count` (in the commitment) and therefore the tree depth +`D = ceil(log2(key_count))` (v12 tree self-pairs odd nodes, so depth is fixed by +key_count). + +Target subtree leaf count ≈ `sqrt(key_count)`, i.e. select down to depth +`d_sel = max(0, D - ceil(log2(sqrt(key_count)))) = ceil(D/2)` levels from the +root (so the subtree spans `2^(D - d_sel) ≈ sqrt(key_count)` leaves). + +Walk from the root consuming `N`'s bits: bit = 1 → take the left child, bit = 0 +→ take the right child, for `d_sel` steps. The node reached is the **selected +subtree root**; its descendant leaves are the **selected leaves**. + +Notes / edge cases: +- `key_count == 1`: D = 0, subtree = the single leaf. Trivial proof. +- Small trees (`key_count` ≤ a floor, say 4): just challenge all leaves (subtree + = whole tree); `sqrt` rounding is meaningless there. +- The selection MUST be reproducible by the auditor to reconstruct the root, and + by the responder to know which leaves to expand. Both derive `d_sel` and the + bit-walk identically from `(N, key_count)`. Spec a single shared helper + `select_subtree_path(nonce, key_count) -> (depth, path_bits)` used by both. +- `N` is 32 bytes = 256 bits ≫ any realistic `D`, so we never run out of bits. + +--- + +## 4. Wire format (the breaking change) + +### Challenge (extends v12 `AuditChallenge`) +v12 sends an explicit `keys: Vec` + `expected_commitment_hash`. v13 +replaces the key list with subtree selection: +``` +AuditChallengeV13 { + challenge_id: u64, + nonce: [u8; 32], // selects subtree AND freshens leaf hashes + challenged_peer_id: [u8; 32], + expected_commitment_hash: [u8; 32], // the pinned (gossiped) root's commitment hash; REQUIRED in v13 +} +``` +No key list — the subtree is derived from `nonce + key_count`. (`key_count` is +known to the auditor from the gossiped commitment it pinned.) + +### Response (new `SubtreeProof` variant) +``` +AuditResponseV13::SubtreeProof { + challenge_id: u64, + commitment: StorageCommitment, // the pinned commitment, so the auditor re-derives key_count + verifies the sig/root binding (v12 gates 2a/2b/2c/3 reused) + selected_leaves: Vec, // the ~sqrt(N) leaves of the selected subtree, in tree order + sibling_cut_hashes: Vec<[u8;32]>, // one per level on the path root->subtree, the UNSELECTED sibling subtree roots (plain) +} + +SubtreeLeaf { + key: XorName, + bytes_hash: [u8;32], // H(bytes) — the plain leaf value (v12 leaf = BLAKE3(DOMAIN_LEAF || key || bytes_hash)) + nonced_hash: [u8;32], // H(N || bytes) — fresh possession proof for THIS audit +} +``` +Rejection variants retained for genuine cases (see §6): `Bootstrapping`, +`Rejected{reason}`. + +Size: `selected_leaves` ≈ `sqrt(N)` × ~96 B + `sibling_cut_hashes` ≈ `D/2` × 32 B. +For N=10k: ~100 leaves ≈ 9.6 KB + ~7 cut hashes. Small. + +--- + +## 5. Verification (auditor side) + +1. **Pin + signature gates (reuse v12):** `commitment.sender_peer_id == + challenged_peer`; `BLAKE3(pubkey)==peer_id`; ML-DSA sig valid; + `commitment_hash(commitment) == expected_commitment_hash` (the pinned root). + Any mismatch → fail (this is a confirmed misbehaviour, not staleness, because + the pin is the root the peer *just gossiped* — see retention §7). +2. **Derive** `(d_sel, path_bits) = select_subtree_path(nonce, commitment.key_count)`. +3. **Structural:** `selected_leaves.len() == expected subtree leaf count` for + that path; `sibling_cut_hashes.len() == d_sel`; leaves are unique and in + ascending key order (v12 sorts leaves by key for deterministic roots). +4. **Reconstruct root:** build the selected subtree root from + `leaf_hash(key_i, bytes_hash_i)` over `selected_leaves` (v12 leaf hashing + + node hashing, self-pair on odd). Then fold up through `sibling_cut_hashes` + using `path_bits` (selected child on the side dictated by the bit, sibling = + cut hash) to a candidate root. **Candidate root MUST equal + `commitment.root`.** This proves: the selected subtree genuinely belongs to + the committed tree, AND the cut hashes are consistent with the committed root + (the responder can't fake the unselected regions without breaking the root). +5. **Possession of selected leaves:** for each selected leaf: + - Obtain the chunk bytes: from local store if held (the common case among + close-group peers), else fetch from the network (anywhere — see §8 relay + note). + - Confirm `BLAKE3(bytes) == bytes_hash` (leaf consistency) AND + `H(N ‖ bytes) == nonced_hash`. Both must hold. The nonced check is the + fresh-possession proof: the responder could only produce `nonced_hash` + correctly by having the bytes at challenge time. +6. **Timing:** the whole response must arrive within `audit_response_timeout` + sized for hashing `sqrt(N)` chunks at local-disk speed × slack (reuse v12's + formula, scaled to the subtree leaf count). A relay/lazy node missing + selected leaves must fetch them over the network → blows the deadline. + +All-pass → `Passed`. Any structural/root/possession failure → confirmed audit +failure (`Rejected`-class), accounted + credit-revoked. Timeout → strike +(accounted; penalty gated by the rollout switch). + +--- + +## 6. Disposition of every outcome (no Idle escape) + +| Outcome | v12 today | v13 | +|---|---|---| +| Valid subtree proof, bytes verify | Passed | **Passed** | +| Root reconstruction ≠ pinned root | (n/a) | **Confirmed failure** (forged/inconsistent tree) | +| `bytes_hash`/`nonced_hash` mismatch on a selected leaf | DigestMismatch failure | **Confirmed failure** (byte loss / fake) | +| `unknown commitment hash` (peer can't answer the root it *just gossiped*) | benign `Idle`, no penalty | **Confirmed failure** — retention (§7) guarantees an honest node retains the last-2 gossiped trees, so repudiating one is misbehaviour, not lag | +| `key not in commitment` | benign `Idle` | **DOES NOT EXIST** — auditor no longer names keys; it challenges a subtree of the peer's *own* committed tree, so every challenged leaf is by construction in the commitment | +| Timeout | strike → (penalty disabled in #113) | same: strike, accounted, penalty gated by rollout switch | +| Peer not responsible for the key set anymore (topology churn) | `Idle` | n/a — challenge is over the peer's own committed tree; responsibility/closeness is checked separately (§9), not a per-key skip | +| §3 capable-but-no-current-commitment | `Idle` | **unreachable on the gossip-triggered path** (audit is triggered BY a fresh commitment, so one always exists); only relevant to an optional backstop tick | + +The two v12 benign-`Idle` escapes are eliminated: one becomes impossible +(`key not in commitment`), the other becomes a confirmed failure +(`unknown hash`, justified by retention). + +--- + +## 7. Retention: "commit to what you gossip, challengeable until next-next gossip" + +Responder keeps, with chunk data, the trees for the **last 2 GOSSIPED +commitments** (not last-2-rotations): the current gossiped one and the previous +gossiped one. Rationale for 2 (not 1): absorbs the race where an auditor pins +gossip Gₙ while the node has already gossiped Gₙ₊₁ — the auditor's in-flight +challenge for Gₙ is still answerable. A challenge pinned to anything older than +the last 2 gossiped roots may legitimately `Rejected{unknown}`; the auditor only +ever pins the freshly-received root (it audits on gossip), so in practice it +always pins Gₙ or Gₙ₊₁. + +Implementation: change `ResponderCommitmentState` retention from N-slots-by- +rotation to "retain the last 2 commitments that were emitted on the wire + +their referenced chunks." Mark-on-gossip. Memory bound: 2 trees + their chunks; +chunks are retained (not pruned) until they fall out of the last-2-gossiped +window. This is the storage cost the user accepted. + +Because of this, an honest node challenged on a root it gossiped within the last +2 gossip cycles can ALWAYS answer → `unknown commitment hash` for such a root is +provably misbehaviour → safe to treat as a confirmed failure (closes the v12 +escape). + +--- + +## 8. Threat model + accepted tradeoffs + +- **Relay (stores nothing, fetches on demand):** must fetch+hash `sqrt(N)` chunks + for the selected subtree under the response deadline. Fetch-from-anywhere is + fine — the defense is *time*: a relay can't fetch+hash its subtree as fast as + a storer reads local disk. Caught by timeout. (Same mechanism as v12, now over + a contiguous subtree.) +- **Data-shedder (deletes a fraction `f`):** caught only if a deleted chunk + falls in the nonce-selected subtree (a `~1/sqrt(N)` region). ACCEPTED + TRADEOFF: per-audit coverage is concentrated, not whole-keyspace. Convergence + comes from *frequent random-nonce audits* selecting different subtrees over + time. Quantify in the spec review: with audit probability `p` per gossip and + gossip interval `g`, expected audits/hour and expected time-to-detection for a + given `f` must be computed and deemed acceptable. (If too slow, raise `p`, + shrink cooldown, or select >1 subtree per audit.) +- **Tree-padding / size inflation:** v13 does NOT fully verify the whole key set + (only the selected subtree + cut hashes), so a node could still pad unselected + regions with junk leaves to inflate `key_count`. PARTIALLY mitigated: §9 + closeness check on *selected* leaves only. Full size/closeness/uniqueness + auditing over the whole key set is explicitly OUT OF SCOPE here (it needs the + whole leaf set; that's the quote-quantity-audit follow-up). State this limit. +- **Nonce grinding:** the responder cannot grind `N` (auditor picks it). The + auditor picking `N` adaptively gains nothing (it wants to catch cheating, not + cause false failures). +- **Replay:** `nonced_hash = H(N‖bytes)` with fresh `N` per challenge prevents + replay of a prior response. + +--- + +## 9. Closeness / responsibility + +For each selected leaf's `key`, optionally check XOR-closeness to +`challenged_peer_id` (a node should only commit to keys near its address). A +selected leaf whose key is implausibly far from the peer is evidence of padding +→ failure. Cheap (only on selected leaves). Decide in review whether to include +in v1 of v13 or defer with the full key-set audit. + +--- + +## 10. Scheduling, probability, cooldown, load + +- Trigger in `ingest_peer_commitment` on a *changed* commitment: with prob + `AUDIT_ON_GOSSIP_PROBABILITY` (start 0.1) and per-peer cooldown + `AUDIT_ON_GOSSIP_COOLDOWN` (start 5 min), spawn a detached audit (permit-gated + by the existing send semaphore) of the gossiper, pinned to the just-ingested + root. +- Backstop tick: OPEN DECISION (user leaning pure-gossip-triggered). If pure, + delete the periodic random tick + the §3 shield branch; a silent peer is + handled by holder-credit TTL (it stops being credited). If kept, run it slow + (hours) for GC + re-challenging long-silent peers. +- Flood safety: cooldown + semaphore bound audits-per-peer and global + concurrency; v12's 60s-per-peer sig-verify rate-limit throttles how often a + peer's gossip is even processed. + +--- + +## 11. Implementation surface (for the later impl plan) + +- `protocol.rs`: new `AuditChallenge` (drop key list, require pin) + + `AuditResponse::SubtreeProof`. Bump audit protocol/version marker. +- `commitment.rs`: `select_subtree_path(nonce, key_count)`; subtree-root + reconstruction from selected leaves + sibling cut-hashes; the `nonced_hash` + leaf helper. +- `commitment_state.rs`: last-2-gossiped retention + chunk retention; `mark_gossiped`. +- `audit.rs`: responder builds the pruned subtree proof (expand selected subtree, + collect sibling cut-hashes, compute plain+nonced leaf hashes from local bytes); + auditor verifier (§5); failure dispositions (§6). +- `mod.rs`: gossip-trigger plumbing (ingest → probabilistic spawn), retention + marking at the gossip-emit sites, remove/repurpose the random tick. +- `config.rs`: `AUDIT_ON_GOSSIP_PROBABILITY`, `AUDIT_ON_GOSSIP_COOLDOWN`, + subtree target-size policy, retention count (=2). +- Tests: selection determinism; root reconstruction from pruned proof; + possession (local + fetched); unknown-hash-now-fails; retention-keeps-last-2; + timeout sizing; flood doesn't amplify; coverage-convergence simulation for a + given `f`. + +--- + +## 12. OPEN QUESTIONS for review + +1. **Coverage math:** compute expected detection time for `f = 1%/5%/10%` given + `p` and gossip cadence; confirm acceptable or tune `p`/cooldown/#subtrees. +2. **Backstop tick:** keep slow or pure-gossip-only? +3. **Closeness check (§9):** in v13.0 or deferred? +4. **>1 subtree per audit?** Selecting k independent subtrees (k small) trades a + little proof size for much better per-audit coverage — cheap insurance + against the concentrated-coverage weakness. Worth considering. +5. **Interaction with #113 rollout:** v13 is a 3rd protocol id (`.v3`)? Or does + it supersede `.v2` before `.v2` ever ships? Sequencing decision. diff --git a/src/node.rs b/src/node.rs index e63ec272..8b7a16fd 100644 --- a/src/node.rs +++ b/src/node.rs @@ -46,6 +46,36 @@ impl NodeBuilder { Self { config } } + /// Reject startup in production mode without a usable rewards address. + /// + /// A node that cannot receive payment must not silently run on the + /// production network. The placeholder address shipped in the example + /// config and an empty string both count as "unconfigured". + /// + /// # Errors + /// + /// Returns [`Error::Config`] if `network_mode` is `Production` and + /// `payment.rewards_address` is unset, empty, or the example placeholder. + fn validate_production_rewards_address(config: &NodeConfig) -> Result<()> { + if config.network_mode != NetworkMode::Production { + return Ok(()); + } + let configured = config + .payment + .rewards_address + .as_deref() + .is_some_and(|addr| !addr.is_empty() && addr != "0xYOUR_ARBITRUM_ADDRESS_HERE"); + if configured { + Ok(()) + } else { + Err(Error::Config( + "CRITICAL: Rewards address is not configured. \ + Set payment.rewards_address in config to your Arbitrum wallet address." + .to_string(), + )) + } + } + /// Build and start the node. /// /// # Errors @@ -54,26 +84,7 @@ impl NodeBuilder { pub async fn build(mut self) -> Result { info!("Building ant-node with config: {:?}", self.config); - // Validate rewards address in production - if self.config.network_mode == NetworkMode::Production { - match self.config.payment.rewards_address { - None => { - return Err(Error::Config( - "CRITICAL: Rewards address is not configured. \ - Set payment.rewards_address in config to your Arbitrum wallet address." - .to_string(), - )); - } - Some(ref addr) if addr == "0xYOUR_ARBITRUM_ADDRESS_HERE" || addr.is_empty() => { - return Err(Error::Config( - "CRITICAL: Rewards address is not configured. \ - Set payment.rewards_address in config to your Arbitrum wallet address." - .to_string(), - )); - } - Some(_) => {} - } - } + Self::validate_production_rewards_address(&self.config)?; // Resolve identity and root_dir (may update self.config.root_dir) let identity = Arc::new(Self::resolve_identity(&mut self.config).await?); @@ -150,6 +161,7 @@ impl NodeBuilder { Arc::clone(&p2p_arc), storage_arc, payment_verifier_arc, + Arc::clone(&identity), &self.config.root_dir, fresh_rx, shutdown.clone(), diff --git a/src/payment/metrics.rs b/src/payment/metrics.rs index badd4f55..b59c19f5 100644 --- a/src/payment/metrics.rs +++ b/src/payment/metrics.rs @@ -33,6 +33,18 @@ impl QuotingMetricsTracker { self.close_records_stored.fetch_add(1, Ordering::SeqCst); } + /// Overwrite the counter with an authoritative count of held records. + /// + /// This is the deletion-aware path and the SINGLE source of truth for the + /// priced record count: the handler calls it at quote time with the live + /// LMDB entry count (`current_chunks()`), so any record removed from + /// storage — by delete, prune, or otherwise — is reflected on the next + /// quote with no per-delete bookkeeping to keep in sync. `record_store` + /// remains only an optimistic between-quote hint; the resync overwrites it. + pub fn set_records(&self, count: usize) { + self.close_records_stored.store(count, Ordering::SeqCst); + } + /// Get the number of records stored. #[must_use] pub fn records_stored(&self) -> usize { @@ -62,4 +74,22 @@ mod tests { tracker.record_store(); assert_eq!(tracker.records_stored(), 3); } + + #[test] + fn test_set_records_resyncs_to_authoritative_count() { + let tracker = QuotingMetricsTracker::new(100); + assert_eq!(tracker.records_stored(), 100); + + // Resync down (e.g. after deletions/pruning the store now holds fewer). + tracker.set_records(42); + assert_eq!(tracker.records_stored(), 42); + + // Resync up (e.g. after new stores). + tracker.set_records(57); + assert_eq!(tracker.records_stored(), 57); + + // Resync to zero (empty store). + tracker.set_records(0); + assert_eq!(tracker.records_stored(), 0); + } } diff --git a/src/payment/quote.rs b/src/payment/quote.rs index 6fd40251..a154a14c 100644 --- a/src/payment/quote.rs +++ b/src/payment/quote.rs @@ -180,6 +180,17 @@ impl QuoteGenerator { self.metrics_tracker.record_store(); } + /// Resync the quoting metric to an authoritative count of held records. + /// + /// The quote price is driven by `records_stored()`. A monotonic store + /// counter would let a node delete chunks it was paid to hold yet keep + /// quoting as if it still held everything. Callers pass the authoritative + /// count of records the node ACTUALLY HOLDS (from the storage layer) so the + /// price reflects current holdings, including deletions and pruning. + pub fn resync_records(&self, count: usize) { + self.metrics_tracker.set_records(count); + } + /// Create a merkle candidate quote for batch payment using ML-DSA-65. /// /// Returns a `MerklePaymentCandidateNode` constructed with the node's diff --git a/src/replication/audit.rs b/src/replication/audit.rs index af4584ff..e810de06 100644 --- a/src/replication/audit.rs +++ b/src/replication/audit.rs @@ -1,23 +1,33 @@ -//! Storage audit protocol (Section 15). +//! Gossip-triggered contiguous-subtree storage audit (ADR-0002). //! -//! Challenge-response for claimed holders. Anti-outsourcing protection. +//! A node commits to what it stores (a signed Merkle [`StorageCommitment`] +//! gossiped to neighbours). On receiving a peer's changed commitment, a +//! neighbour may audit it: pin the just-gossiped root, send a fresh nonce that +//! deterministically selects one contiguous subtree, and require the peer to +//! prove that subtree (structure + real bytes) within a deadline. This module +//! owns the auditor entry point [`run_subtree_audit`] and the responder handler +//! [`handle_subtree_challenge`]; the pure proof maths live in +//! [`crate::replication::subtree`]. -use std::collections::{HashMap, HashSet}; use std::sync::Arc; use crate::logging::{debug, info, warn}; -use rand::seq::SliceRandom; use rand::Rng; use crate::ant_protocol::XorName; +use crate::replication::commitment::{commitment_hash, StorageCommitment}; +use crate::replication::commitment_state::ResponderCommitmentState; use crate::replication::config::{ReplicationConfig, REPLICATION_PROTOCOL_ID}; use crate::replication::protocol::{ - compute_audit_digest, AuditChallenge, AuditResponse, ReplicationMessage, - ReplicationMessageBody, ABSENT_KEY_DIGEST, + ReplicationMessage, ReplicationMessageBody, SubtreeAuditChallenge, SubtreeAuditResponse, + SubtreeByteChallenge, SubtreeByteItem, SubtreeByteResponse, }; -use crate::replication::types::{ - AuditFailureReason, FailureEvidence, PeerSyncRecord, RepairProofs, +use crate::replication::recent_provers::RecentProvers; +use crate::replication::subtree::{ + select_spotcheck_indices, select_subtree_path, subtree_plan, verify_subtree_proof, + StructureVerdict, SubtreeProof, }; +use crate::replication::types::{AuditFailureReason, FailureEvidence}; use crate::storage::LmdbStorage; use saorsa_core::identity::PeerId; use saorsa_core::P2PNode; @@ -27,1615 +37,1135 @@ use tokio::sync::RwLock; // Audit tick result // --------------------------------------------------------------------------- -/// Result of an audit tick. +/// Outcome of a single gossip-triggered audit. #[derive(Debug)] pub enum AuditTickResult { - /// Audit completed successfully (all digests matched). + /// The subtree proof verified (structure + real-bytes spot-checks). Passed { /// The peer that was challenged. challenged_peer: PeerId, - /// Number of keys verified. + /// Number of subtree leaves whose bytes were spot-checked. keys_checked: usize, }, - /// Audit found failures (after responsibility confirmation). + /// A confirmed audit failure (forged/inconsistent proof, byte/nonce + /// mismatch, repudiation of a recently gossiped commitment, or timeout). Failed { - /// Evidence of the failure for trust engine. + /// Evidence of the failure for the trust engine. evidence: FailureEvidence, }, - /// Audit target claimed bootstrapping. + /// Audit target claimed it is still bootstrapping. BootstrapClaim { /// The peer claiming bootstrap status. peer: PeerId, }, - /// No eligible peers for audit this tick. + /// Nothing to do this round (e.g. auditor itself is bootstrapping, or the + /// pinned commitment is out of protocol range). No trust effect. Idle, - /// Audit skipped (not enough local keys). + /// Retained for the engine's exhaustive match; not produced by the + /// gossip-triggered auditor (which never samples local keys). InsufficientKeys, } // --------------------------------------------------------------------------- -// Main audit tick +// Auditor side // --------------------------------------------------------------------------- -/// Execute one audit tick (Section 15 steps 2-9). -/// -/// Returns the audit result. Caller is responsible for emitting trust events. +/// ADR-0002 round-2 byte challenge samples a SMALL surprise set of the proven +/// leaves (3..=5). Small enough that the responder's honest local-disk read of +/// the original chunks stays well inside the possession-in-time deadline, while +/// a relay forced to fetch them over the network blows it; large enough that +/// faking a fraction `x` of leaves survives only `(1 - x)^k`. +const BYTE_SPOTCHECK_MIN: u32 = 3; +const BYTE_SPOTCHECK_MAX: u32 = 5; + +/// Holder-eligibility cache the auditor credits on a passing audit. /// -/// **Invariant 19**: Returns [`AuditTickResult::Idle`] immediately if -/// `is_bootstrapping` is `true` — a node must not audit others while it -/// is still bootstrapping. -#[allow(clippy::implicit_hasher)] -pub async fn audit_tick( - p2p_node: &Arc, - storage: &Arc, - config: &ReplicationConfig, - sync_history: &HashMap, - is_bootstrapping: bool, -) -> AuditTickResult { - let repair_proofs = Arc::new(RwLock::new(RepairProofs::new())); - audit_tick_with_repair_proofs( - p2p_node, - storage, - config, - sync_history, - &repair_proofs, - 0, - is_bootstrapping, - ) - .await +/// Owned by [`crate::replication::ReplicationEngine`]; borrowed here so a +/// passing audit can record `(peer, commitment_hash)` as a proven holder for +/// downstream quorum / paid-list credit. +pub struct AuditCredit<'a> { + /// Holder-eligibility cache. + pub recent_provers: &'a Arc>, } -/// Execute one repair-proof-gated audit tick. +/// The cross-cutting context for verifying one audit response, bundled so the +/// response-dispatch and verification functions stay readable. +struct AuditCtx<'a> { + p2p_node: &'a Arc, + challenged_peer: &'a PeerId, + challenge_id: u64, + nonce: [u8; 32], + expected_commitment_hash: [u8; 32], + config: &'a ReplicationConfig, + credit: Option<&'a AuditCredit<'a>>, +} + +/// Run one gossip-triggered subtree audit against `challenged_peer`, pinned to +/// the commitment hash the peer just gossiped (`expected_commitment_hash`). +/// +/// ADR-0002 two-round audit. The auditor sends a fresh random nonce and runs: /// -/// This is the production path used by the replication engine. The -/// compatibility [`audit_tick`] wrapper passes an empty proof table, so direct -/// callers that have not adopted repair proofs remain conservative and do not -/// audit peers for unproven keys. -#[allow(clippy::implicit_hasher, clippy::too_many_lines)] -pub async fn audit_tick_with_repair_proofs( +/// 1. **Structure** (round 1) — the returned subtree rebuilds to the pinned +/// root, within a size-scaled deadline. +/// 2. **Real bytes** (round 2) — the auditor demands the ORIGINAL chunk content +/// for a 3..=5 nonce-selected sample of the proven leaves FROM the responder, +/// and recomputes both the content-address hash and the nonce freshness hash +/// from that served content. The auditor holds none of the peer's chunks. +/// 3. **Timing** — each round's deadline is sized to an honest local-disk read, +/// so a relay forced to fetch over the network blows it. +/// +/// A timeout (either round) is reported as [`AuditFailureReason::Timeout`] (the +/// caller applies the strike/grace policy). Any structural failure, served +/// content that fails a hash, an explicit `Absent` for a committed sampled key, +/// or a rejection of a recently gossiped commitment, is a confirmed failure +/// acted on immediately. On a full pass, records the peer as a proven holder. +pub async fn run_subtree_audit( p2p_node: &Arc, - storage: &Arc, config: &ReplicationConfig, - sync_history: &HashMap, - repair_proofs: &Arc>, - current_sync_epoch: u64, - is_bootstrapping: bool, + challenged_peer: &PeerId, + expected_commitment_hash: [u8; 32], + key_count: u32, + credit: Option<&AuditCredit<'_>>, ) -> AuditTickResult { - // Invariant 19: never audit while still bootstrapping. - if is_bootstrapping { - return AuditTickResult::Idle; - } - - let dht = p2p_node.dht_manager(); - - // Step 2: Select one eligible peer (has RepairOpportunity) at random. - // Peers with active bootstrap claims remain eligible. A follow-up audit is - // how we observe a continued claim and apply past-grace abuse handling. - let eligible_peers = eligible_audit_peers(sync_history); - - if eligible_peers.is_empty() { - return AuditTickResult::Idle; - } - - let (challenged_peer, nonce, challenge_id) = { + let (nonce, challenge_id) = { let mut rng = rand::thread_rng(); - let selected = match eligible_peers.choose(&mut rng) { - Some(p) => *p, - None => return AuditTickResult::Idle, - }; - let n: [u8; 32] = rng.gen(); - let c: u64 = rng.gen(); - (selected, n, c) + (rng.gen::<[u8; 32]>(), rng.gen::()) }; - // Step 3: Sample keys from local store and keep those the peer is - // responsible for (appears in the close group via local RT lookup). - let all_keys = match storage.all_keys().await { - Ok(keys) => keys, + let challenge = SubtreeAuditChallenge { + challenge_id, + nonce, + challenged_peer_id: *challenged_peer.as_bytes(), + expected_commitment_hash, + }; + let msg = ReplicationMessage { + request_id: challenge_id, + body: ReplicationMessageBody::SubtreeAuditChallenge(challenge), + }; + let encoded = match msg.encode() { + Ok(data) => data, Err(e) => { - warn!("Audit: failed to read local keys: {e}"); + warn!("Audit: failed to encode subtree challenge for {challenged_peer}: {e}"); return AuditTickResult::Idle; } }; - if all_keys.is_empty() { - return AuditTickResult::Idle; - } + // Size the proof deadline from the ACTUAL selected subtree (its real-leaf + // count for this nonce + key_count), not a fixed worst-case hint. This keeps + // the deadline tight to "responder hashes ~sqrt(N) chunks at local-disk + // speed", so a relay that must fetch the subtree over the network blows it. + // The auditor and responder derive the same selection, so we know the leaf + // count before the response arrives. + let subtree_leaves = select_subtree_path(&nonce, key_count).map_or_else( + || config.subtree_audit_timeout_leaf_hint(), + |p| p.real_leaf_count() as usize, + ); + let timeout = config.audit_response_timeout(subtree_leaves); - let sample_count = ReplicationConfig::audit_sample_count(all_keys.len()); - let sampled_keys: Vec = { - let mut rng = rand::thread_rng(); - all_keys - .choose_multiple(&mut rng, sample_count) - .copied() - .collect() + let response = match p2p_node + .send_request(challenged_peer, REPLICATION_PROTOCOL_ID, encoded, timeout) + .await + { + Ok(resp) => resp, + Err(e) => { + debug!("Audit: subtree challenge to {challenged_peer} timed out / failed: {e}"); + return failed(challenged_peer, challenge_id, AuditFailureReason::Timeout); + } }; - // Step 4: Filter to keys where the chosen peer is in the close group and - // this node has proof that it already sent the peer a repair hint for the - // specific key. - let mut sampled_key_groups = Vec::new(); - for key in &sampled_keys { - let closest = dht - .find_closest_nodes_local_with_self(key, config.close_group_size) - .await; - let close_peers: HashSet = closest.iter().map(|node| node.peer_id).collect(); - if close_peers.contains(&challenged_peer) { - sampled_key_groups.push((*key, close_peers)); + let resp_msg = match ReplicationMessage::decode(&response.data) { + Ok(m) => m, + Err(e) => { + warn!("Audit: failed to decode subtree response from {challenged_peer}: {e}"); + return failed( + challenged_peer, + challenge_id, + AuditFailureReason::MalformedResponse, + ); } - } - - let peer_keys = { - let mut proofs = repair_proofs.write().await; - mature_audit_keys_for_peer( - &challenged_peer, - sampled_key_groups, - &mut proofs, - current_sync_epoch, - ) }; - if peer_keys.is_empty() { - return AuditTickResult::Idle; - } - - // peer_keys is naturally bounded by audit_sample_count (sqrt-scaled), - // so no explicit truncation needed. - - // Step 6: Send challenge. - - let challenge = AuditChallenge { + let ctx = AuditCtx { + p2p_node, + challenged_peer, challenge_id, nonce, - challenged_peer_id: *challenged_peer.as_bytes(), - keys: peer_keys.clone(), + expected_commitment_hash, + config, + credit, }; + dispatch_subtree_response(resp_msg.body, &ctx).await +} + +/// Outcome of the round-2 byte challenge round-trip (auditor side). +enum ByteRound { + /// The responder returned per-key items (verified by the caller). + Served(Vec), + /// The responder rejected the byte challenge (confirmed failure for a + /// recently pinned commitment). + Rejected, + /// No response within the byte deadline, or a transport error (graced + /// timeout). + Timeout, + /// Malformed / unexpected round-2 response body. + Malformed, +} +/// Round 2: ask the responder for the ORIGINAL chunk content of the +/// auditor-selected spot-check `keys`, sized to a possession-in-time deadline +/// (honest local-disk read of `keys.len()` chunks). The responder cannot have +/// predicted which keys are sampled. +async fn request_byte_proof(ctx: &AuditCtx<'_>, keys: &[XorName]) -> ByteRound { + let challenge = SubtreeByteChallenge { + challenge_id: ctx.challenge_id, + nonce: ctx.nonce, + challenged_peer_id: *ctx.challenged_peer.as_bytes(), + expected_commitment_hash: ctx.expected_commitment_hash, + keys: keys.to_vec(), + }; let msg = ReplicationMessage { - request_id: challenge_id, - body: ReplicationMessageBody::AuditChallenge(challenge), + request_id: ctx.challenge_id, + body: ReplicationMessageBody::SubtreeByteChallenge(challenge), }; - let encoded = match msg.encode() { Ok(data) => data, Err(e) => { - warn!("Audit: failed to encode challenge: {e}"); - return AuditTickResult::Idle; + warn!("Audit: failed to encode byte challenge: {e}"); + return ByteRound::Malformed; } }; - let response = match p2p_node + // Deadline sized to "honest responder reads `keys.len()` local chunks": a + // relay forced to fetch them over the network blows it (graced timeout, + // never a confirmed failure — same possession-in-time principle as round 1). + let timeout = ctx.config.audit_response_timeout(keys.len()); + let response = match ctx + .p2p_node .send_request( - &challenged_peer, + ctx.challenged_peer, REPLICATION_PROTOCOL_ID, encoded, - config.audit_response_timeout(peer_keys.len()), + timeout, ) .await { Ok(resp) => resp, Err(e) => { - debug!("Audit: challenge to {challenged_peer} failed: {e}"); - // Timeout — need responsibility confirmation before penalty. - return handle_audit_timeout( - &challenged_peer, - challenge_id, - &peer_keys, - p2p_node, - config, - ) - .await; + debug!( + "Audit: byte challenge to {} timed out / failed: {e}", + ctx.challenged_peer + ); + return ByteRound::Timeout; } }; - // Step 7: Parse response. let resp_msg = match ReplicationMessage::decode(&response.data) { Ok(m) => m, Err(e) => { - warn!("Audit: failed to decode response from {challenged_peer}: {e}"); - return handle_audit_failure( - &challenged_peer, - challenge_id, - &peer_keys, - AuditFailureReason::MalformedResponse, - p2p_node, - config, - ) - .await; + warn!("Audit: failed to decode byte response: {e}"); + return ByteRound::Malformed; } }; match resp_msg.body { - ReplicationMessageBody::AuditResponse(AuditResponse::Bootstrapping { + ReplicationMessageBody::SubtreeByteResponse(SubtreeByteResponse::Items { + challenge_id, + items, + }) if challenge_id == ctx.challenge_id => ByteRound::Served(items), + ReplicationMessageBody::SubtreeByteResponse(SubtreeByteResponse::Rejected { + challenge_id, + reason, + }) if challenge_id == ctx.challenge_id => { + warn!( + "Audit: {} rejected byte challenge: {reason}", + ctx.challenged_peer + ); + ByteRound::Rejected + } + // A node claiming bootstrap MID-AUDIT (it answered round 1) is treated + // as a timeout: it didn't prove possession but the round-1 proof shows + // it isn't bootstrapping, so the bootstrap-claim-abuse detector (round 1) + // owns that lane; here we just don't credit it. + ReplicationMessageBody::SubtreeByteResponse(SubtreeByteResponse::Bootstrapping { + challenge_id, + }) if challenge_id == ctx.challenge_id => ByteRound::Timeout, + _ => ByteRound::Malformed, + } +} + +/// Map a decoded response body to an audit outcome (auditor side). A response +/// whose `challenge_id` doesn't match, or any non-subtree body, is malformed. +async fn dispatch_subtree_response( + body: ReplicationMessageBody, + ctx: &AuditCtx<'_>, +) -> AuditTickResult { + let challenged_peer = ctx.challenged_peer; + let challenge_id = ctx.challenge_id; + let malformed = || { + failed( + challenged_peer, + challenge_id, + AuditFailureReason::MalformedResponse, + ) + }; + match body { + ReplicationMessageBody::SubtreeAuditResponse(SubtreeAuditResponse::Bootstrapping { challenge_id: resp_id, }) => { if resp_id != challenge_id { - warn!("Audit: challenge ID mismatch on Bootstrapping from {challenged_peer}"); - return handle_audit_failure( - &challenged_peer, - challenge_id, - &peer_keys, - AuditFailureReason::MalformedResponse, - p2p_node, - config, - ) - .await; + return malformed(); } - // Step 7b: Bootstrapping claim. AuditTickResult::BootstrapClaim { - peer: challenged_peer, + peer: *challenged_peer, } } - ReplicationMessageBody::AuditResponse(AuditResponse::Digests { + ReplicationMessageBody::SubtreeAuditResponse(SubtreeAuditResponse::Rejected { challenge_id: resp_id, - digests, + reason, }) => { if resp_id != challenge_id { - warn!("Audit: challenge ID mismatch from {challenged_peer}"); - return handle_audit_failure( - &challenged_peer, - challenge_id, - &peer_keys, - AuditFailureReason::MalformedResponse, - p2p_node, - config, - ) - .await; + return malformed(); } - verify_digests( - &challenged_peer, - challenge_id, - &nonce, - &peer_keys, - &digests, - storage, - p2p_node, - config, - ) - .await + // ADR-0002: the auditor only ever pins a commitment the peer JUST + // gossiped, and an honest responder retains its last two gossiped + // commitments. So a rejection of a freshly pinned root is a + // confirmed failure (repudiating what you just published), not + // benign staleness. There is no no-penalty lane. + warn!("Audit: peer {challenged_peer} rejected subtree challenge: {reason}"); + failed(challenged_peer, challenge_id, AuditFailureReason::Rejected) } - ReplicationMessageBody::AuditResponse(AuditResponse::Rejected { + ReplicationMessageBody::SubtreeAuditResponse(SubtreeAuditResponse::Proof { challenge_id: resp_id, - reason, + commitment, + proof, }) => { if resp_id != challenge_id { - warn!("Audit: challenge ID mismatch on Rejected from {challenged_peer}"); - return handle_audit_failure( - &challenged_peer, - challenge_id, - &peer_keys, - AuditFailureReason::MalformedResponse, - p2p_node, - config, - ) - .await; + return malformed(); } - warn!("Audit: challenge rejected by {challenged_peer}: {reason}"); - handle_audit_failure( - &challenged_peer, - challenge_id, - &peer_keys, - AuditFailureReason::Rejected, - p2p_node, - config, - ) - .await + verify_subtree_response(ctx, &commitment, &proof).await } _ => { warn!("Audit: unexpected response type from {challenged_peer}"); - handle_audit_failure( - &challenged_peer, - challenge_id, - &peer_keys, - AuditFailureReason::MalformedResponse, - p2p_node, - config, - ) - .await + malformed() } } } -fn eligible_audit_peers(sync_history: &HashMap) -> Vec { - sync_history - .iter() - .filter(|(_, record)| record.has_repair_opportunity()) - .map(|(peer, _)| *peer) - .collect() +/// The pure verdict of evaluating a subtree-audit response, independent of +/// storage/network. Tests call this directly so the SHIPPED gate logic is what +/// gets exercised (no reimplementation that could drift). +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum AuditVerdict { + /// All gates passed and at least one leaf was byte-verified. + Pass { + /// Number of leaves whose real bytes were verified in round 2. + checked: usize, + }, + /// A confirmed failure with this reason (penalizable / acted upon). + Fail(AuditFailureReason), } -fn mature_audit_keys_for_peer( - challenged_peer: &PeerId, - sampled_key_groups: Vec<(XorName, HashSet)>, - repair_proofs: &mut RepairProofs, - current_sync_epoch: u64, -) -> Vec { - sampled_key_groups - .into_iter() - .filter_map(|(key, close_peers)| { - repair_proofs - .has_mature_replica_hint(challenged_peer, &key, &close_peers, current_sync_epoch) - .then_some(key) - }) - .collect() +/// Round-1 structural evaluation of a subtree-audit proof (ADR-0002). +/// +/// Runs the cheap gates in fail-fast order: pin / identity / signature → +/// structure (the returned subtree rebuilds to the pinned root). It does **not** +/// prove byte possession — the leaves carry only the public `bytes_hash` (the +/// chunk address) and a `nonced_hash` the responder computed itself. Possession +/// is proven in round 2 ([`verify_byte_response`]), where the auditor demands +/// the original chunk bytes for a nonce-selected sample and recomputes both +/// hashes from the SERVED content. This removes any dependency on the auditor +/// holding the peer's chunks. +/// +/// Returns [`StructureVerdict::Valid`] (proceed to round 2) or a confirmed +/// [`AuditFailureReason`] mapped from the failing gate. +pub(crate) fn evaluate_subtree_structure( + commitment: &StorageCommitment, + proof: &SubtreeProof, + nonce: &[u8; 32], + expected_commitment_hash: &[u8; 32], + challenged_peer_bytes: &[u8; 32], +) -> Result<(), AuditFailureReason> { + // -- Pin + identity + signature -- + if &commitment.sender_peer_id != challenged_peer_bytes { + return Err(AuditFailureReason::Rejected); + } + let derived_peer_id = *blake3::hash(&commitment.sender_public_key).as_bytes(); + if derived_peer_id != commitment.sender_peer_id { + return Err(AuditFailureReason::Rejected); + } + match commitment_hash(commitment) { + Some(h) if &h == expected_commitment_hash => {} + _ => return Err(AuditFailureReason::Rejected), + } + if !crate::replication::commitment::verify_commitment_signature(commitment) { + return Err(AuditFailureReason::Rejected); + } + + // -- Structure -- + if let StructureVerdict::Invalid(_) = verify_subtree_proof(proof, nonce, commitment) { + return Err(AuditFailureReason::DigestMismatch); + } + Ok(()) } -// --------------------------------------------------------------------------- -// Digest verification -// --------------------------------------------------------------------------- +/// The auditor's nonce-derived spot-check sample of the round-1 subtree: the +/// distinct leaves (in proof order) whose original bytes the auditor will demand +/// in round 2. Empty only if the proof is empty (cannot happen post-structure). +pub(crate) fn spotcheck_leaves<'a>( + proof: &'a SubtreeProof, + nonce: &[u8; 32], + key_count: u32, + spotcheck_count: u32, +) -> Vec<&'a crate::replication::subtree::SubtreeLeaf> { + let Some(path) = select_subtree_path(nonce, key_count) else { + return Vec::new(); + }; + let mut out = Vec::new(); + for idx in select_spotcheck_indices(nonce, &path, spotcheck_count) { + if let Some(leaf) = proof.leaves.get(idx as usize) { + out.push(leaf); + } + } + out +} -/// Verify per-key digests from audit response (Step 8). -#[allow(clippy::too_many_arguments)] -async fn verify_digests( - challenged_peer: &PeerId, - challenge_id: u64, +/// Round-2 verdict (ADR-0002): the responder served the original chunk content +/// for the auditor's spot-check sample; verify possession from THAT content. +/// +/// `served(key)` returns what the responder returned for a requested key: +/// `Some(Some(bytes))` for [`SubtreeByteItem::Present`], `Some(None)` for an +/// explicit [`SubtreeByteItem::Absent`], and `None` if the responder omitted the +/// key entirely (treated like `Absent` — a committed key it would not serve). +/// +/// For each sampled leaf the auditor recomputes, from the SERVED content: +/// - `BLAKE3(content) == leaf.bytes_hash` (the chunk's content address), AND +/// - `BLAKE3(nonce ‖ peer ‖ key ‖ content) == leaf.nonced_hash` (freshness), +/// i.e. `compute_audit_digest(nonce, peer, key, content)`. +/// +/// The freshness inputs are byte-identical to what the responder used to BUILD +/// the leaf in round 1 (`subtree_leaf` → `nonced_leaf_hash`): the SAME four +/// inputs, so an honest holder's served content reproduces `nonced_hash` +/// exactly. Round 1 commits over the data (the `nonced_hash` is uncomputable +/// without the bytes); round 2 reveals a random subset to prove the commitment +/// was not fabricated. +/// +/// Both checks are over the content the responder sent, so the auditor needs to +/// hold none of the peer's chunks. Any `Absent`/omitted committed key, or any +/// served content that fails a hash, is a provable lie → confirmed +/// [`AuditFailureReason::DigestMismatch`]. All sampled leaves verifying → +/// `Pass { checked }`. +pub(crate) fn verify_byte_response( + leaves: &[&crate::replication::subtree::SubtreeLeaf], nonce: &[u8; 32], - keys: &[XorName], - digests: &[[u8; 32]], - storage: &Arc, - p2p_node: &Arc, - config: &ReplicationConfig, -) -> AuditTickResult { - // Requirement: response must have exactly one digest per key. - if digests.len() != keys.len() { - warn!( - "Audit: malformed response from {challenged_peer}: {} digests for {} keys", - digests.len(), - keys.len() + challenged_peer_bytes: &[u8; 32], + served: impl Fn(&XorName) -> Option>>, +) -> AuditVerdict { + let mut checked = 0usize; + for leaf in leaves { + // Present{bytes} -> Some(Some(bytes)); Absent -> Some(None); omitted -> None. + // A committed key the responder cannot / will not serve is a provable lie. + let Some(Some(content)) = served(&leaf.key) else { + return AuditVerdict::Fail(AuditFailureReason::DigestMismatch); + }; + let plain = *blake3::hash(&content).as_bytes(); + let nonced = crate::replication::subtree::nonced_leaf_hash( + nonce, + challenged_peer_bytes, + &leaf.key, + &content, ); - return handle_audit_failure( - challenged_peer, - challenge_id, - keys, - AuditFailureReason::MalformedResponse, - p2p_node, - config, - ) - .await; + if leaf.bytes_hash != plain || leaf.nonced_hash != nonced { + // Served content does not hash to the committed address / freshness + // hash: cannot be the chunk it committed to. + return AuditVerdict::Fail(AuditFailureReason::DigestMismatch); + } + checked += 1; } + AuditVerdict::Pass { checked } +} - let challenged_peer_bytes = challenged_peer.as_bytes(); - let mut failed_keys = Vec::new(); - - for (i, key) in keys.iter().enumerate() { - let received_digest = &digests[i]; +/// Verify a subtree-proof response (auditor side), ADR-0002 two-round audit. +/// +/// **Round 1** (this proof): pin + identity + signature + structure. If the +/// proof structurally rebuilds to the pinned root, the tree SHAPE is committed — +/// but not yet that the bytes are held. **Round 2**: the auditor picks a small +/// nonce-selected sample of the just-proven leaves and sends a +/// [`SubtreeByteChallenge`] demanding their original chunk content FROM the +/// responder, then verifies that content against the committed `bytes_hash` +/// (content address) and `nonced_hash` (freshness). A responder that committed +/// to a chunk it no longer holds cannot serve content that hashes to the +/// committed address, so it fails — regardless of what the auditor holds. On a +/// full pass, credits the peer as a proven holder. +async fn verify_subtree_response( + ctx: &AuditCtx<'_>, + commitment: &StorageCommitment, + proof: &SubtreeProof, +) -> AuditTickResult { + let challenged_peer = ctx.challenged_peer; + let challenge_id = ctx.challenge_id; + + // -- Round 1: pin/identity/signature + structure (no bytes). -- + if let Err(reason) = evaluate_subtree_structure( + commitment, + proof, + &ctx.nonce, + &ctx.expected_commitment_hash, + challenged_peer.as_bytes(), + ) { + warn!("Audit: {challenged_peer} failed subtree structure ({reason:?})"); + return failed(challenged_peer, challenge_id, reason); + } - // Check for absent sentinel. - if *received_digest == ABSENT_KEY_DIGEST { - failed_keys.push(*key); - continue; + // -- Round 2: surprise byte challenge for a 3..=5 nonce-selected sample. -- + // The responder cannot predict which leaves are sampled, and must serve the + // ORIGINAL content for each. We cap the sample at the ADR's 3..=5 band + // (clamped to the subtree size) so the round-2 message and the responder's + // disk read stay cheap. + let sample_n = ctx + .config + .audit_spotcheck_count() + .clamp(BYTE_SPOTCHECK_MIN, BYTE_SPOTCHECK_MAX); + let sampled = spotcheck_leaves(proof, &ctx.nonce, commitment.key_count, sample_n); + if sampled.is_empty() { + // Cannot happen after a valid structure (subtree is never empty), but + // guard rather than credit an unproven peer. + warn!("Audit: {challenged_peer} produced an empty spot-check sample; rejecting"); + return failed( + challenged_peer, + challenge_id, + AuditFailureReason::DigestMismatch, + ); + } + let sampled_keys: Vec = sampled.iter().map(|l| l.key).collect(); + + let verdict = match request_byte_proof(ctx, &sampled_keys).await { + ByteRound::Served(items) => { + verify_byte_response(&sampled, &ctx.nonce, challenged_peer.as_bytes(), |key| { + items.iter().find_map(|it| match it { + SubtreeByteItem::Present { key: k, bytes } if k == key => { + Some(Some(bytes.clone())) + } + SubtreeByteItem::Absent { key: k } if k == key => Some(None), + _ => None, + }) + }) } + // The responder rejected the byte challenge for a recently pinned + // commitment → confirmed failure, same as a round-1 rejection. + ByteRound::Rejected => AuditVerdict::Fail(AuditFailureReason::Rejected), + // No response within the byte deadline (or transport error) → timeout + // (graced by the caller's strike policy — could be honest slowness). + ByteRound::Timeout => AuditVerdict::Fail(AuditFailureReason::Timeout), + // Malformed/unexpected round-2 body. + ByteRound::Malformed => AuditVerdict::Fail(AuditFailureReason::MalformedResponse), + }; - // Recompute expected digest from local copy. - let local_bytes = match storage.get_raw(key).await { - Ok(Some(bytes)) => bytes, - Ok(None) => { - // We should hold this key (we sampled it), but it's gone. - warn!( - "Audit: local key {} disappeared during audit", - hex::encode(key) - ); - continue; + match verdict { + AuditVerdict::Fail(reason) => { + warn!("Audit: {challenged_peer} failed subtree audit ({reason:?})"); + failed(challenged_peer, challenge_id, reason) + } + AuditVerdict::Pass { checked } => { + // Closeness (ADR-0002, soft/observe-only) — see observe_closeness. + observe_closeness(ctx.p2p_node, ctx.config, challenged_peer, proof).await; + // Credit the peer as a proven holder of its committed keys. + if let (Some(credit), Some(pin)) = (ctx.credit, commitment_hash(commitment)) { + let now = std::time::Instant::now(); + let mut provers = credit.recent_provers.write().await; + for leaf in &proof.leaves { + provers.record_proof(leaf.key, *challenged_peer, pin, now); + } } - Err(e) => { - warn!("Audit: failed to read local key {}: {e}", hex::encode(key)); - continue; + info!( + "Audit: peer {challenged_peer} passed subtree audit ({} leaves, {checked} \ + byte-checked)", + proof.leaves.len() + ); + AuditTickResult::Passed { + challenged_peer: *challenged_peer, + keys_checked: checked, } - }; - - let expected = compute_audit_digest(nonce, challenged_peer_bytes, key, &local_bytes); - if *received_digest != expected { - failed_keys.push(*key); } } - - if failed_keys.is_empty() { - info!( - "Audit: peer {challenged_peer} passed (all {} keys verified)", - keys.len() - ); - return AuditTickResult::Passed { - challenged_peer: *challenged_peer, - keys_checked: keys.len(), - }; - } - - // Step 9: Responsibility confirmation for failed keys. - handle_audit_failure( - challenged_peer, - challenge_id, - &failed_keys, - AuditFailureReason::DigestMismatch, - p2p_node, - config, - ) - .await } -// --------------------------------------------------------------------------- -// Failure handling with responsibility confirmation -// --------------------------------------------------------------------------- - -/// Handle audit failure: confirm responsibility before emitting evidence (Step 9). -async fn handle_audit_failure( - challenged_peer: &PeerId, - challenge_id: u64, - failed_keys: &[XorName], - reason: AuditFailureReason, +/// Soft, density-aware closeness observation (ADR-0002). Logs — never fails — +/// when a suspicious fraction of the proof's leaves are keys the auditor itself +/// is NOT responsible for (a proxy for "implausibly far from the peer"). +/// +/// Using the auditor's own `SelfInclusiveRT` responsibility as the yardstick +/// makes this density-aware for free: on a small/dense network the auditor is +/// close to nearly every key, so almost nothing reads as far and no honest peer +/// is ever flagged. Enforcement is intentionally deferred until a testnet +/// calibrates the density threshold. +async fn observe_closeness( p2p_node: &Arc, config: &ReplicationConfig, -) -> AuditTickResult { - let dht = p2p_node.dht_manager(); - let mut confirmed_failures = Vec::new(); - - // Step 9a-b: Fresh local RT lookup for each failed key. - for key in failed_keys { - let closest = dht - .find_closest_nodes_local_with_self(key, config.close_group_size) - .await; - if closest.iter().any(|n| n.peer_id == *challenged_peer) { - confirmed_failures.push(*key); - } else { - debug!( - "Audit: peer {challenged_peer} not responsible for {} (removed from failure set)", - hex::encode(key) - ); + challenged_peer: &PeerId, + proof: &SubtreeProof, +) { + let self_id = *p2p_node.peer_id(); + let mut far = 0usize; + for leaf in &proof.leaves { + if !crate::replication::admission::is_responsible( + &self_id, + &leaf.key, + p2p_node, + config.close_group_size, + ) + .await + { + far += 1; } } - - // Step 9c: Empty confirmed set -> peer is no longer responsible for any - // of the failed keys (topology churn). This is NOT a pass — the peer did - // not prove it stores the data. Return Idle to avoid granting unearned - // positive trust. - if confirmed_failures.is_empty() { - info!("Audit: all failures for {challenged_peer} cleared by responsibility confirmation"); - return AuditTickResult::Idle; + // Only worth a line when MOST of the proof is far — that's the padding + // shape. A normal proof on a sparse network has some far keys; that's fine. + let total = proof.leaves.len(); + if total > 0 && far * 2 > total { + debug!( + "Audit: closeness signal — {far}/{total} of {challenged_peer}'s proven leaves are \ + keys this auditor is not close to (observe-only; possible padding, not penalized)" + ); } - - // Step 9d: Non-empty confirmed set -> emit evidence. - let evidence = FailureEvidence::AuditFailure { - challenge_id, - challenged_peer: *challenged_peer, - confirmed_failed_keys: confirmed_failures, - reason, - }; - - AuditTickResult::Failed { evidence } } -/// Handle audit timeout (no response received). -async fn handle_audit_timeout( +/// Build a confirmed-failure result. The auditor pinned a commitment the peer +/// committed to itself, so there is no per-key responsibility to re-confirm: +/// the failure is about the peer's own committed tree. +fn failed( challenged_peer: &PeerId, challenge_id: u64, - keys: &[XorName], - p2p_node: &Arc, - config: &ReplicationConfig, + reason: AuditFailureReason, ) -> AuditTickResult { - handle_audit_failure( - challenged_peer, - challenge_id, - keys, - AuditFailureReason::Timeout, - p2p_node, - config, - ) - .await + AuditTickResult::Failed { + evidence: FailureEvidence::AuditFailure { + challenge_id, + challenged_peer: *challenged_peer, + confirmed_failed_keys: Vec::new(), + reason, + }, + } } // --------------------------------------------------------------------------- -// Responder-side handler +// Responder side // --------------------------------------------------------------------------- -/// Handle an incoming audit challenge (responder side). +/// Handle an incoming subtree audit challenge (responder side). /// -/// Validates that the challenge targets this node, computes per-key digests, -/// and returns the response. Rejects challenges where -/// `challenged_peer_id` does not match `self_peer_id` to prevent an oracle -/// attack where a malicious challenger forges digests for a different peer. -pub async fn handle_audit_challenge( - challenge: &AuditChallenge, +/// Validates the challenge targets this node, looks up the pinned commitment in +/// the retained (last-two-gossiped) set, and builds the subtree proof for the +/// nonce-selected branch. If this node is bootstrapping it says so; if it +/// genuinely does not retain the pinned commitment it rejects (which the +/// auditor treats as a confirmed failure for a recently gossiped root). +pub async fn handle_subtree_challenge( + challenge: &SubtreeAuditChallenge, storage: &LmdbStorage, self_peer_id: &PeerId, is_bootstrapping: bool, - stored_chunks: usize, -) -> AuditResponse { + commitment_state: Option<&Arc>, +) -> SubtreeAuditResponse { if is_bootstrapping { - return AuditResponse::Bootstrapping { + return SubtreeAuditResponse::Bootstrapping { challenge_id: challenge.challenge_id, }; } if challenge.challenged_peer_id != *self_peer_id.as_bytes() { warn!( - "Audit challenge targeted wrong peer: expected {}, got {}", + "Subtree audit challenge targeted wrong peer: expected {}, got {}", hex::encode(self_peer_id.as_bytes()), hex::encode(challenge.challenged_peer_id), ); - return AuditResponse::Rejected { + return SubtreeAuditResponse::Rejected { challenge_id: challenge.challenge_id, reason: "challenged_peer_id does not match this node".to_string(), }; } - let max_keys = ReplicationConfig::max_incoming_audit_keys(stored_chunks); - if challenge.keys.len() > max_keys { - warn!( - "Audit challenge rejected: {} keys exceeds dynamic limit of {max_keys} \ - (stored_chunks={stored_chunks})", - challenge.keys.len(), - ); - return AuditResponse::Rejected { + let Some(state) = commitment_state else { + return SubtreeAuditResponse::Rejected { + challenge_id: challenge.challenge_id, + reason: "no commitment state".to_string(), + }; + }; + + // Look up the pinned commitment among the last-two-gossiped retained set. + let Some(built) = state.lookup_by_hash(&challenge.expected_commitment_hash) else { + return SubtreeAuditResponse::Rejected { + challenge_id: challenge.challenge_id, + reason: "unknown commitment hash".to_string(), + }; + }; + + // Geometry first (no bytes touched): which leaves to prove + the sibling + // cut-hashes from the committed tree. + let plan = match subtree_plan(built.tree(), &challenge.nonce) { + Ok(p) => p, + Err(e) => { + warn!("Subtree audit: failed to plan proof: {e:?}"); + return SubtreeAuditResponse::Rejected { + challenge_id: challenge.challenge_id, + reason: "could not build subtree proof".to_string(), + }; + } + }; + + // Read chunk bytes one leaf at a time so peak memory is bounded regardless + // of subtree size, hashing each into its plain + nonced leaf. + let mut leaves = Vec::with_capacity(plan.leaf_keys.len()); + for key in &plan.leaf_keys { + let Ok(Some(bytes)) = storage.get_raw(key).await else { + // Key is in our committed tree but we cannot read its bytes — real + // storage loss / deliberate non-response. For a recently gossiped + // pin the auditor counts this rejection as a confirmed failure. + warn!( + "Subtree audit: missing bytes for committed key {}", + hex::encode(key) + ); + return SubtreeAuditResponse::Rejected { + challenge_id: challenge.challenge_id, + reason: format!("missing bytes for committed key: {}", hex::encode(key)), + }; + }; + leaves.push(crate::replication::subtree::subtree_leaf( + &challenge.nonce, + &challenge.challenged_peer_id, + key, + &bytes, + )); + // bytes drops here. + } + + SubtreeAuditResponse::Proof { + challenge_id: challenge.challenge_id, + commitment: built.commitment().clone(), + proof: SubtreeProof { + leaves, + sibling_cut_hashes: plan.sibling_cut_hashes, + }, + } +} + +/// Handle a round-2 byte challenge (responder side), ADR-0002. +/// +/// The auditor has already structurally verified this node's round-1 subtree +/// proof and now demands the ORIGINAL chunk bytes for a small nonce-selected +/// sample of those leaves. For each requested key the responder either returns +/// the bytes ([`SubtreeByteItem::Present`]) or — if it committed to the key but +/// can no longer produce it — an explicit [`SubtreeByteItem::Absent`], which the +/// auditor counts as a provable failure (committing to bytes you don't hold). +/// +/// A key the responder never committed to (not in the pinned tree) is also +/// returned `Absent`: the auditor only ever samples keys it saw in round 1, so +/// in practice this guards against a malformed/forged byte challenge rather than +/// an honest mismatch. +pub async fn handle_subtree_byte_challenge( + challenge: &SubtreeByteChallenge, + storage: &LmdbStorage, + self_peer_id: &PeerId, + is_bootstrapping: bool, + commitment_state: Option<&Arc>, +) -> SubtreeByteResponse { + if is_bootstrapping { + return SubtreeByteResponse::Bootstrapping { challenge_id: challenge.challenge_id, - reason: format!( - "challenge contains {} keys, limit is {max_keys}", - challenge.keys.len() - ), }; } - let mut digests = Vec::with_capacity(challenge.keys.len()); + if challenge.challenged_peer_id != *self_peer_id.as_bytes() { + return SubtreeByteResponse::Rejected { + challenge_id: challenge.challenge_id, + reason: "challenged_peer_id does not match this node".to_string(), + }; + } + let Some(state) = commitment_state else { + return SubtreeByteResponse::Rejected { + challenge_id: challenge.challenge_id, + reason: "no commitment state".to_string(), + }; + }; + // Resolve the SAME commitment the auditor pinned in round 1. If we no longer + // retain it (it aged out of the last-two-gossiped set), reject — for a + // recently gossiped pin the auditor treats this as a confirmed failure, like + // round 1. We serve bytes only for keys actually committed to under this pin. + let Some(built) = state.lookup_by_hash(&challenge.expected_commitment_hash) else { + return SubtreeByteResponse::Rejected { + challenge_id: challenge.challenge_id, + reason: "unknown commitment hash".to_string(), + }; + }; + let committed = |key: &XorName| -> bool { built.proof_for(key).is_some() }; + + let mut items = Vec::with_capacity(challenge.keys.len()); for key in &challenge.keys { - match storage.get_raw(key).await { - Ok(Some(data)) => { - let digest = compute_audit_digest( - &challenge.nonce, - &challenge.challenged_peer_id, - key, - &data, - ); - digests.push(digest); - } - Ok(None) => { - digests.push(ABSENT_KEY_DIGEST); - } - Err(e) => { + // Read the original bytes for the requested, committed key. + if let Ok(Some(bytes)) = storage.get_raw(key).await { + items.push(SubtreeByteItem::Present { key: *key, bytes }); + } else { + // Committed to the key but cannot read its bytes → provable failure. + if committed(key) { warn!( - "Audit responder: failed to read key {}: {e}", + "Subtree byte audit: committed key {} requested but bytes absent", hex::encode(key) ); - digests.push(ABSENT_KEY_DIGEST); } + items.push(SubtreeByteItem::Absent { key: *key }); } } - AuditResponse::Digests { + SubtreeByteResponse::Items { challenge_id: challenge.challenge_id, - digests, + items, } } -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - #[cfg(test)] #[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] mod tests { use super::*; - use crate::replication::protocol::compute_audit_digest; - use crate::replication::types::{BootstrapClaimObservation, NeighborSyncState}; - use crate::storage::LmdbStorageConfig; - use std::time::Instant; - use tempfile::TempDir; - - /// Simulated stored chunk count for tests. Large enough that the dynamic - /// incoming audit limit (`2 * sqrt(N)`) never rejects small test challenges. - const TEST_STORED_CHUNKS: usize = 1_000_000; - - /// Create a test `LmdbStorage` backed by a temp directory. - async fn create_test_storage() -> (LmdbStorage, TempDir) { - let temp_dir = TempDir::new().expect("create temp dir"); - let config = LmdbStorageConfig { - root_dir: temp_dir.path().to_path_buf(), - verify_on_read: false, - max_map_size: 0, - disk_reserve: 0, - }; - let storage = LmdbStorage::new(config).await.expect("create storage"); - (storage, temp_dir) - } - - /// Build a challenge with the given parameters. - fn make_challenge( - challenge_id: u64, - nonce: [u8; 32], - peer_id: [u8; 32], - keys: Vec, - ) -> AuditChallenge { - AuditChallenge { - challenge_id, - nonce, - challenged_peer_id: peer_id, - keys, - } + use crate::replication::commitment_state::BuiltCommitment; + use crate::replication::subtree::{ + build_subtree_proof, nonced_leaf_hash, select_subtree_path, SubtreeLeaf, + }; + use saorsa_pqc::api::sig::ml_dsa_65; + + // The two-round audit splits into SHIPPED pure functions exercised directly + // here (no reimplementation that could drift): + // - round 1: `evaluate_subtree_structure` (pin/identity/signature + + // structural root rebuild), + // - sampling: `spotcheck_leaves` (the 3..=5 nonce-selected leaves), and + // - round 2: `verify_byte_response` (recompute content-address + freshness + // from the bytes the RESPONDER served — the auditor holds nothing). + + fn key(i: u32) -> XorName { + let mut k = [0u8; 32]; + k[..4].copy_from_slice(&i.to_be_bytes()); + k } - - /// Build a `PeerId` matching the raw bytes used in a challenge. - fn peer_id_from_bytes(bytes: [u8; 32]) -> PeerId { - PeerId::from_bytes(bytes) + /// The "chunk content" for a key in these fixtures. The committed tree's leaf + /// `bytes_hash` is `BLAKE3(chunk_bytes(key))`, mirroring the general + /// `(key, BLAKE3(content))` commitment; round 2 serves exactly this content. + fn chunk_bytes(k: &XorName) -> Vec { + let mut v = k.to_vec(); + v.extend_from_slice(b"chunk-body"); + v } - // -- handle_audit_challenge: present keys --------------------------------- - - #[tokio::test] - async fn handle_challenge_present_keys_returns_correct_digests() { - let (storage, _temp) = create_test_storage().await; - - // Store two chunks. - let content_a = b"chunk alpha"; - let addr_a = LmdbStorage::compute_address(content_a); - storage.put(&addr_a, content_a).await.expect("put a"); - - let content_b = b"chunk beta"; - let addr_b = LmdbStorage::compute_address(content_b); - storage.put(&addr_b, content_b).await.expect("put b"); - - let nonce = [0xAA; 32]; - let peer_id = [0xBB; 32]; - let challenge = make_challenge(42, nonce, peer_id, vec![addr_a, addr_b]); - let self_id = peer_id_from_bytes(peer_id); - - let response = - handle_audit_challenge(&challenge, &storage, &self_id, false, TEST_STORED_CHUNKS).await; - - match response { - AuditResponse::Digests { - challenge_id, - digests, - } => { - assert_eq!(challenge_id, 42); - assert_eq!(digests.len(), 2); - - let expected_a = compute_audit_digest(&nonce, &peer_id, &addr_a, content_a); - let expected_b = compute_audit_digest(&nonce, &peer_id, &addr_b, content_b); - assert_eq!(digests[0], expected_a); - assert_eq!(digests[1], expected_b); - } - AuditResponse::Bootstrapping { .. } => { - panic!("expected Digests, got Bootstrapping"); - } - AuditResponse::Rejected { .. } => { - panic!("Unexpected Rejected response"); - } - } + /// Build an honest committed tree of `n` keys + a valid round-1 proof for + /// `nonce`. Returns `(built, proof, peer_id)`. The auditor pins `built.hash()`. + fn honest(n: u32, nonce: &[u8; 32]) -> (BuiltCommitment, SubtreeProof, [u8; 32]) { + let (pk, sk) = ml_dsa_65().generate_keypair().unwrap(); + let peer_id = *blake3::hash(&pk.to_bytes()).as_bytes(); + let pk_b = pk.to_bytes(); + let entries: Vec<_> = (0..n) + .map(|i| { + let k = key(i); + (k, *blake3::hash(&chunk_bytes(&k)).as_bytes()) + }) + .collect(); + let built = BuiltCommitment::build(entries, &peer_id, &sk, &pk_b).unwrap(); + let proof = + build_subtree_proof(built.tree(), nonce, &peer_id, |k| Some(chunk_bytes(k))).unwrap(); + (built, proof, peer_id) } - // -- handle_audit_challenge: absent keys ---------------------------------- - - #[tokio::test] - async fn handle_challenge_absent_keys_returns_sentinel() { - let (storage, _temp) = create_test_storage().await; - - let absent_key = [0xFF; 32]; - let nonce = [0x11; 32]; - let peer_id = [0x22; 32]; - let challenge = make_challenge(99, nonce, peer_id, vec![absent_key]); - let self_id = peer_id_from_bytes(peer_id); - - let response = - handle_audit_challenge(&challenge, &storage, &self_id, false, TEST_STORED_CHUNKS).await; - - match response { - AuditResponse::Digests { - challenge_id, - digests, - } => { - assert_eq!(challenge_id, 99); - assert_eq!(digests.len(), 1); - assert_eq!( - digests[0], ABSENT_KEY_DIGEST, - "absent key should produce sentinel digest" - ); - } - AuditResponse::Bootstrapping { .. } => { - panic!("expected Digests, got Bootstrapping"); - } - AuditResponse::Rejected { .. } => { - panic!("Unexpected Rejected response"); - } - } + /// Round-1 verdict against the pinned commitment. + fn structure( + built: &BuiltCommitment, + proof: &SubtreeProof, + nonce: &[u8; 32], + peer: &[u8; 32], + ) -> Result<(), AuditFailureReason> { + evaluate_subtree_structure(built.commitment(), proof, nonce, &built.hash(), peer) } - // -- handle_audit_challenge: mixed present and absent --------------------- - - #[tokio::test] - async fn handle_challenge_mixed_present_and_absent() { - let (storage, _temp) = create_test_storage().await; - - let content = b"present chunk"; - let addr_present = LmdbStorage::compute_address(content); - storage.put(&addr_present, content).await.expect("put"); - - let addr_absent = [0xDE; 32]; - let nonce = [0x33; 32]; - let peer_id = [0x44; 32]; - let challenge = make_challenge(7, nonce, peer_id, vec![addr_present, addr_absent]); - let self_id = peer_id_from_bytes(peer_id); - - let response = - handle_audit_challenge(&challenge, &storage, &self_id, false, TEST_STORED_CHUNKS).await; - - match response { - AuditResponse::Digests { digests, .. } => { - assert_eq!(digests.len(), 2); - - let expected_present = - compute_audit_digest(&nonce, &peer_id, &addr_present, content); - assert_eq!(digests[0], expected_present); - assert_eq!( - digests[1], ABSENT_KEY_DIGEST, - "absent key should be sentinel" - ); - } - AuditResponse::Bootstrapping { .. } => { - panic!("expected Digests, got Bootstrapping"); - } - AuditResponse::Rejected { .. } => { - panic!("Unexpected Rejected response"); - } - } + /// The 3..=5 spot-check leaves the auditor would demand bytes for in round 2. + fn sample<'a>(proof: &'a SubtreeProof, nonce: &[u8; 32], n: u32) -> Vec<&'a SubtreeLeaf> { + spotcheck_leaves( + proof, + nonce, + n, + 8u32.clamp(BYTE_SPOTCHECK_MIN, BYTE_SPOTCHECK_MAX), + ) } - // -- handle_audit_challenge: bootstrapping -------------------------------- - - #[tokio::test] - async fn handle_challenge_bootstrapping_returns_bootstrapping_response() { - let (storage, _temp) = create_test_storage().await; - - let challenge = make_challenge(55, [0x00; 32], [0x01; 32], vec![[0x02; 32]]); - let self_id = peer_id_from_bytes([0x01; 32]); - - let response = - handle_audit_challenge(&challenge, &storage, &self_id, true, TEST_STORED_CHUNKS).await; - - match response { - AuditResponse::Bootstrapping { challenge_id } => { - assert_eq!(challenge_id, 55); - } - AuditResponse::Digests { .. } => { - panic!("expected Bootstrapping, got Digests"); - } - AuditResponse::Rejected { .. } => { - panic!("Unexpected Rejected response"); - } - } + // A round-2 `served` closure that returns the HONEST content for every key. + fn served_honest(key: &XorName) -> Option>> { + Some(Some(chunk_bytes(key))) } - // -- handle_audit_challenge: empty key list ------------------------------- - - #[tokio::test] - async fn handle_challenge_empty_keys_returns_empty_digests() { - let (storage, _temp) = create_test_storage().await; - - let challenge = make_challenge(100, [0x10; 32], [0x20; 32], vec![]); - let self_id = peer_id_from_bytes([0x20; 32]); + // ---- round 1: structure -------------------------------------------------- - let response = - handle_audit_challenge(&challenge, &storage, &self_id, false, TEST_STORED_CHUNKS).await; - - match response { - AuditResponse::Digests { - challenge_id, - digests, - } => { - assert_eq!(challenge_id, 100); - assert!( - digests.is_empty(), - "empty key list should yield empty digests" - ); - } - AuditResponse::Bootstrapping { .. } => { - panic!("expected Digests, got Bootstrapping"); - } - AuditResponse::Rejected { .. } => { - panic!("Unexpected Rejected response"); - } + #[test] + fn honest_structure_then_bytes_passes() { + let nonce = [9u8; 32]; + let (built, proof, peer) = honest(400, &nonce); + // Round 1. + assert!(structure(&built, &proof, &nonce, &peer).is_ok()); + // Round 2: honest responder serves the real content for the sample. + let s = sample(&proof, &nonce, built.commitment().key_count); + assert!(!s.is_empty()); + match verify_byte_response(&s, &nonce, &peer, served_honest) { + AuditVerdict::Pass { checked } => assert!(checked >= 1, "must verify >=1 leaf"), + other => panic!("expected Pass, got {other:?}"), } } - // -- Digest verification: matching ---------------------------------------- - #[test] - fn digest_verification_matching() { - let nonce = [0x01; 32]; - let peer_id = [0x02; 32]; - let key: XorName = [0x03; 32]; - let data = b"correct data"; - - let expected = compute_audit_digest(&nonce, &peer_id, &key, data); - let recomputed = compute_audit_digest(&nonce, &peer_id, &key, data); - + fn commitment_bound_to_another_peer_rejected() { + let nonce = [3u8; 32]; + let (built, proof, _peer) = honest(200, &nonce); + let other = [0xAAu8; 32]; assert_eq!( - expected, recomputed, - "same inputs must produce identical digests" - ); - assert_ne!( - expected, ABSENT_KEY_DIGEST, - "real digest must not be sentinel" - ); - } - - // -- Digest verification: mismatching ------------------------------------- - - #[test] - fn digest_verification_mismatching_data() { - let nonce = [0x01; 32]; - let peer_id = [0x02; 32]; - let key: XorName = [0x03; 32]; - - let digest_a = compute_audit_digest(&nonce, &peer_id, &key, b"data version A"); - let digest_b = compute_audit_digest(&nonce, &peer_id, &key, b"data version B"); - - assert_ne!( - digest_a, digest_b, - "different data must produce different digests" + structure(&built, &proof, &nonce, &other), + Err(AuditFailureReason::Rejected) ); } #[test] - fn digest_verification_mismatching_nonce() { - let peer_id = [0x02; 32]; - let key: XorName = [0x03; 32]; - let data = b"same data"; - - let digest_a = compute_audit_digest(&[0x01; 32], &peer_id, &key, data); - let digest_b = compute_audit_digest(&[0xFF; 32], &peer_id, &key, data); - - assert_ne!( - digest_a, digest_b, - "different nonces must produce different digests" + fn wrong_pinned_commitment_rejected() { + let nonce = [3u8; 32]; + let (built, proof, peer) = honest(200, &nonce); + let mut wrong_pin = built.hash(); + wrong_pin[0] ^= 0x01; + assert_eq!( + evaluate_subtree_structure(built.commitment(), &proof, &nonce, &wrong_pin, &peer), + Err(AuditFailureReason::Rejected) ); } #[test] - fn digest_verification_mismatching_peer() { - let nonce = [0x01; 32]; - let key: XorName = [0x03; 32]; - let data = b"same data"; - - let digest_a = compute_audit_digest(&nonce, &[0x02; 32], &key, data); - let digest_b = compute_audit_digest(&nonce, &[0xFE; 32], &key, data); - - assert_ne!( - digest_a, digest_b, - "different peers must produce different digests" + fn tampered_leaf_structure_rejected() { + let nonce = [3u8; 32]; + let (built, mut proof, peer) = honest(200, &nonce); + if let Some(first) = proof.leaves.first_mut() { + first.bytes_hash[0] ^= 0x01; // breaks root reconstruction + } + assert_eq!( + structure(&built, &proof, &nonce, &peer), + Err(AuditFailureReason::DigestMismatch) ); } #[test] - fn digest_verification_mismatching_key() { - let nonce = [0x01; 32]; - let peer_id = [0x02; 32]; - let data = b"same data"; - - let digest_a = compute_audit_digest(&nonce, &peer_id, &[0x03; 32], data); - let digest_b = compute_audit_digest(&nonce, &peer_id, &[0xFC; 32], data); - - assert_ne!( - digest_a, digest_b, - "different keys must produce different digests" + fn wrong_leaf_count_structure_rejected() { + let nonce = [3u8; 32]; + let (built, mut proof, peer) = honest(200, &nonce); + proof.leaves.pop(); + assert_eq!( + structure(&built, &proof, &nonce, &peer), + Err(AuditFailureReason::DigestMismatch) ); } - // -- Absent sentinel is all zeros ----------------------------------------- + // ---- round 2: responder-served bytes ------------------------------------ #[test] - fn absent_sentinel_is_all_zeros() { - assert_eq!(ABSENT_KEY_DIGEST, [0u8; 32], "sentinel must be all zeros"); - } - - // -- Bootstrapping skips digest computation even with stored keys --------- - - #[tokio::test] - async fn bootstrapping_skips_digest_computation() { - let (storage, _temp) = create_test_storage().await; - - let content = b"stored but bootstrapping"; - let addr = LmdbStorage::compute_address(content); - storage.put(&addr, content).await.expect("put"); - - let challenge = make_challenge(200, [0xCC; 32], [0xDD; 32], vec![addr]); - let self_id = peer_id_from_bytes([0xDD; 32]); - - let response = - handle_audit_challenge(&challenge, &storage, &self_id, true, TEST_STORED_CHUNKS).await; - - assert!( - matches!(response, AuditResponse::Bootstrapping { challenge_id: 200 }), - "bootstrapping node must not compute digests" - ); - } - - // -- Scenario 19/53: Partial failure with mixed responsibility ---------------- - - #[tokio::test] - async fn scenario_19_partial_failure_mixed_responsibility() { - // Three keys challenged: K1 matches, K2 mismatches, K3 absent. - // After responsibility confirmation, only K2 is confirmed responsible. - // AuditFailure emitted for {K2} only. - // Test handle_audit_challenge with mixed results, then verify - // the digest logic manually. - - let (storage, _temp) = create_test_storage().await; - let nonce = [0x42u8; 32]; - let peer_id = [0xAA; 32]; - - // Store K1 and K2, but NOT K3 - let content_k1 = b"key one data"; - let addr_k1 = LmdbStorage::compute_address(content_k1); - storage.put(&addr_k1, content_k1).await.unwrap(); - - let content_k2 = b"key two data"; - let addr_k2 = LmdbStorage::compute_address(content_k2); - storage.put(&addr_k2, content_k2).await.unwrap(); - - let addr_k3 = [0xFF; 32]; // Not stored - - let challenge = AuditChallenge { - challenge_id: 100, - nonce, - challenged_peer_id: peer_id, - keys: vec![addr_k1, addr_k2, addr_k3], - }; - let self_id = peer_id_from_bytes(peer_id); - - let response = - handle_audit_challenge(&challenge, &storage, &self_id, false, TEST_STORED_CHUNKS).await; - - match response { - AuditResponse::Digests { digests, .. } => { - assert_eq!(digests.len(), 3); - - // K1 should have correct digest - let expected_k1 = compute_audit_digest(&nonce, &peer_id, &addr_k1, content_k1); - assert_eq!(digests[0], expected_k1); - - // K2 should have correct digest - let expected_k2 = compute_audit_digest(&nonce, &peer_id, &addr_k2, content_k2); - assert_eq!(digests[1], expected_k2); - - // K3 absent -> sentinel - assert_eq!(digests[2], ABSENT_KEY_DIGEST); + fn deleter_absent_bytes_is_confirmed_failure() { + // THE headline fix: a node whose round-1 proof is structurally perfect + // but which has DELETED a committed chunk cannot serve its bytes. It + // signals `Absent` for the sampled key → provable lie → confirmed + // failure. Crucially, the auditor holds NONE of the peer's chunks; the + // verdict depends only on what the responder serves. + let nonce = [9u8; 32]; + let (built, proof, peer) = honest(400, &nonce); + assert!(structure(&built, &proof, &nonce, &peer).is_ok()); + let s = sample(&proof, &nonce, built.commitment().key_count); + // Responder returns Absent for the FIRST sampled key, honest for the rest. + let victim = s.first().map(|l| l.key).unwrap(); + let v = verify_byte_response(&s, &nonce, &peer, |k| { + if *k == victim { + Some(None) // explicit Absent + } else { + Some(Some(chunk_bytes(k))) } - AuditResponse::Bootstrapping { .. } => panic!("Expected Digests response"), - AuditResponse::Rejected { .. } => panic!("Unexpected Rejected response"), - } + }); + assert_eq!(v, AuditVerdict::Fail(AuditFailureReason::DigestMismatch)); } - // -- Scenario 54: All digests pass ------------------------------------------- - - #[tokio::test] - async fn scenario_54_all_digests_pass() { - // All challenged keys present and digests match. - // Multiple keys to strengthen coverage beyond existing two-key tests. - let (storage, _temp) = create_test_storage().await; - let nonce = [0x10; 32]; - let peer_id = [0x20; 32]; - - let c1 = b"chunk alpha"; - let c2 = b"chunk beta"; - let c3 = b"chunk gamma"; - let a1 = LmdbStorage::compute_address(c1); - let a2 = LmdbStorage::compute_address(c2); - let a3 = LmdbStorage::compute_address(c3); - storage.put(&a1, c1).await.unwrap(); - storage.put(&a2, c2).await.unwrap(); - storage.put(&a3, c3).await.unwrap(); - - let challenge = AuditChallenge { - challenge_id: 200, - nonce, - challenged_peer_id: peer_id, - keys: vec![a1, a2, a3], - }; - let self_id = peer_id_from_bytes(peer_id); - - let response = - handle_audit_challenge(&challenge, &storage, &self_id, false, TEST_STORED_CHUNKS).await; - match response { - AuditResponse::Digests { digests, .. } => { - assert_eq!(digests.len(), 3); - for (i, (addr, content)) in [(a1, &c1[..]), (a2, &c2[..]), (a3, &c3[..])] - .iter() - .enumerate() - { - let expected = compute_audit_digest(&nonce, &peer_id, addr, content); - assert_eq!(digests[i], expected, "Key {i} digest should match"); - } + #[test] + fn omitted_committed_key_is_confirmed_failure() { + // A responder that simply omits a sampled committed key from its items + // (neither Present nor Absent) is treated identically to Absent: it + // committed to the key and won't serve it → confirmed failure. + let nonce = [9u8; 32]; + let (built, proof, peer) = honest(400, &nonce); + let s = sample(&proof, &nonce, built.commitment().key_count); + let victim = s.first().map(|l| l.key).unwrap(); + let v = verify_byte_response(&s, &nonce, &peer, |k| { + if *k == victim { + None // omitted entirely + } else { + Some(Some(chunk_bytes(k))) } - AuditResponse::Bootstrapping { .. } => panic!("Expected Digests"), - AuditResponse::Rejected { .. } => panic!("Unexpected Rejected response"), - } + }); + assert_eq!(v, AuditVerdict::Fail(AuditFailureReason::DigestMismatch)); } - // -- Scenario 55: Empty failure set means no evidence ------------------------- - - /// Scenario 55: Peer challenged on {K1, K2}. Both digests mismatch. - /// Responsibility confirmation shows the peer is NOT responsible for - /// either key. The confirmed failure set is empty — no `AuditFailure` - /// evidence is emitted. - /// - /// Full `verify_digests` requires a live `P2PNode` for network lookups. - /// This test exercises the deterministic sub-steps: - /// (1) Digest comparison identifies K1 and K2 as mismatches. - /// (2) Responsibility confirmation removes both keys. - /// (3) Empty confirmed failure set means no evidence. - #[tokio::test] - async fn scenario_55_no_confirmed_responsibility_no_evidence() { - let (storage, _temp) = create_test_storage().await; - let nonce = [0x55; 32]; - let peer_id = [0x55; 32]; - - // Store K1 and K2 on the challenger (for expected digest computation). - let c1 = b"scenario 55 key one"; - let c2 = b"scenario 55 key two"; - let k1 = LmdbStorage::compute_address(c1); - let k2 = LmdbStorage::compute_address(c2); - storage.put(&k1, c1).await.expect("put k1"); - storage.put(&k2, c2).await.expect("put k2"); - - // Challenger computes expected digests. - let expected_d1 = compute_audit_digest(&nonce, &peer_id, &k1, c1); - let expected_d2 = compute_audit_digest(&nonce, &peer_id, &k2, c2); - - // Simulate peer returning WRONG digests for both keys. - let wrong_d1 = compute_audit_digest(&nonce, &peer_id, &k1, b"corrupted k1"); - let wrong_d2 = compute_audit_digest(&nonce, &peer_id, &k2, b"corrupted k2"); - assert_ne!(wrong_d1, expected_d1, "K1 digest should mismatch"); - assert_ne!(wrong_d2, expected_d2, "K2 digest should mismatch"); - - // Step 1: Identify failed keys via digest comparison. - let keys = [k1, k2]; - let expected = [expected_d1, expected_d2]; - let received = [wrong_d1, wrong_d2]; - - let mut failed_keys = Vec::new(); - for i in 0..keys.len() { - if received[i] != expected[i] { - failed_keys.push(keys[i]); - } - } - assert_eq!( - failed_keys.len(), - 2, - "Both keys should be identified as digest mismatches" - ); + #[test] + fn fake_storage_garbage_bytes_is_confirmed_failure() { + // A "fake-storage" responder claims possession but serves garbage. The + // garbage does not hash to the committed content address (`bytes_hash`), + // so the round-2 content-address check fails → confirmed failure. No + // auditor holdings involved. + let nonce = [9u8; 32]; + let (built, proof, peer) = honest(400, &nonce); + let s = sample(&proof, &nonce, built.commitment().key_count); + let v = verify_byte_response(&s, &nonce, &peer, |k| { + let mut garbage = blake3::hash(k).as_bytes().to_vec(); + garbage.extend_from_slice(b"adversary-fake-storage"); + Some(Some(garbage)) + }); + assert_eq!(v, AuditVerdict::Fail(AuditFailureReason::DigestMismatch)); + } - // Step 2: Responsibility confirmation — peer is NOT responsible for - // either key (simulated by filtering them all out). - let confirmed_responsible_keys: Vec = Vec::new(); - let confirmed_failures: Vec = failed_keys - .into_iter() - .filter(|k| confirmed_responsible_keys.contains(k)) + #[test] + fn correct_content_address_but_stale_freshness_fails() { + // Suppose a responder could serve bytes that hash to the content address + // (it holds the chunk) — then BOTH checks pass; that is honest. But if + // it serves bytes whose freshness hash does not match (e.g. replaying a + // different nonce's digest is impossible since we recompute it here), the + // freshness check must catch any content that doesn't reproduce the + // committed `nonced_hash`. We model a leaf whose committed nonced_hash was + // built under a DIFFERENT nonce, so the audit nonce's recompute differs. + let nonce = [9u8; 32]; + let (built, mut proof, peer) = honest(400, &nonce); + // Rewrite the first leaf's nonced_hash to one bound to a different nonce + // but keep its bytes_hash correct (so structure for THAT leaf's content + // address is fine; only freshness is wrong). + let other_nonce = [0xEEu8; 32]; + let s_keys: Vec = sample(&proof, &nonce, built.commitment().key_count) + .iter() + .map(|l| l.key) .collect(); - - // Step 3: Empty confirmed failure set → no AuditFailure evidence. - assert!( - confirmed_failures.is_empty(), - "With no confirmed responsibility, failure set must be empty — \ - no AuditFailure evidence should be emitted" - ); - - // Verify that constructing evidence with empty keys results in a - // no-penalty outcome (the caller checks is_empty before emitting). - let peer = PeerId::from_bytes(peer_id); - let evidence = FailureEvidence::AuditFailure { - challenge_id: 5500, - challenged_peer: peer, - confirmed_failed_keys: confirmed_failures, - reason: AuditFailureReason::DigestMismatch, - }; - if let FailureEvidence::AuditFailure { - confirmed_failed_keys, - .. - } = evidence - { - assert!( - confirmed_failed_keys.is_empty(), - "Evidence with empty failure set should not trigger a trust penalty" - ); + let victim = s_keys.first().copied().unwrap(); + for leaf in &mut proof.leaves { + if leaf.key == victim { + leaf.nonced_hash = + nonced_leaf_hash(&other_nonce, &peer, &leaf.key, &chunk_bytes(&leaf.key)); + } } + // Re-sample against the (now tampered) proof; serve honest content. + let s = sample(&proof, &nonce, built.commitment().key_count); + let v = verify_byte_response(&s, &nonce, &peer, served_honest); + assert_eq!(v, AuditVerdict::Fail(AuditFailureReason::DigestMismatch)); } - // -- Scenario 56: RepairOpportunity filters never-synced peers ---------------- - #[test] - fn scenario_56_repair_opportunity_filters_never_synced() { - // PeerSyncRecord with last_sync=None should not pass - // has_repair_opportunity(). - - let never_synced = PeerSyncRecord { - last_sync: None, - cycles_since_sync: 5, - }; - assert!(!never_synced.has_repair_opportunity()); - - let synced_no_cycle = PeerSyncRecord { - last_sync: Some(Instant::now()), - cycles_since_sync: 0, - }; - assert!(!synced_no_cycle.has_repair_opportunity()); - - let synced_with_cycle = PeerSyncRecord { - last_sync: Some(Instant::now()), - cycles_since_sync: 1, - }; - assert!(synced_with_cycle.has_repair_opportunity()); + fn auditor_holds_nothing_still_catches_deleter() { + // Explicit contract: the auditor's own storage is irrelevant. A deleter + // is caught purely from its served (absent) response. (Compare the OLD + // design, where an auditor holding none of the chunks went Inconclusive + // and the deleter walked free.) + let nonce = [0x21u8; 32]; + let (built, proof, peer) = honest(256, &nonce); + assert!(structure(&built, &proof, &nonce, &peer).is_ok()); + let s = sample(&proof, &nonce, built.commitment().key_count); + // Responder is a total deleter: Absent for everything. + let v = verify_byte_response(&s, &nonce, &peer, |_| Some(None)); + assert_eq!(v, AuditVerdict::Fail(AuditFailureReason::DigestMismatch)); } #[test] - fn expired_bootstrap_claim_does_not_remove_peer_from_audit_eligibility() { - let peer = peer_id_from_bytes([0x57; 32]); - let mut sync_history = HashMap::new(); - sync_history.insert( - peer, - PeerSyncRecord { - last_sync: Some(Instant::now()), - cycles_since_sync: 1, - }, - ); - - let mut bootstrap_claims = HashMap::new(); - let first_seen = Instant::now() - .checked_sub( - crate::replication::config::BOOTSTRAP_CLAIM_GRACE_PERIOD - + std::time::Duration::from_secs(1), - ) - .unwrap_or_else(Instant::now); - bootstrap_claims.insert(peer, first_seen); - - let eligible = eligible_audit_peers(&sync_history); - - assert!(bootstrap_claims.contains_key(&peer)); + fn sample_size_is_in_3_to_5_band() { + // ADR-0002: round-2 samples a SMALL surprise set (3..=5) of the proven + // leaves. For a large subtree the sample is capped at 5. + let nonce = [7u8; 32]; + let (built, proof, _peer) = honest(1024, &nonce); + let s = sample(&proof, &nonce, built.commitment().key_count); assert!( - eligible.contains(&peer), - "continued bootstrap claims must remain auditable so past-grace abuse can be observed" + (BYTE_SPOTCHECK_MIN as usize..=BYTE_SPOTCHECK_MAX as usize).contains(&s.len()), + "sample {} must be within 3..=5", + s.len() ); } #[test] - fn audit_key_filter_retains_stable_proofs_and_rejects_evicted_peers() { - const HINT_EPOCH: u64 = 7; - const CURRENT_EPOCH: u64 = HINT_EPOCH + 1; - const CHALLENGED_PEER_BYTE: u8 = 0xA1; - const OTHER_PEER_BYTE: u8 = 0xA2; - const NEW_PEER_BYTE: u8 = 0xA3; - const MATURE_KEY_BYTE: u8 = 0xB1; - const SAME_EPOCH_KEY_BYTE: u8 = 0xB2; - const MISSING_PROOF_KEY_BYTE: u8 = 0xB3; - const STABLE_CHURN_KEY_BYTE: u8 = 0xB4; - const EVICTED_KEY_BYTE: u8 = 0xB5; - const XOR_NAME_LEN: usize = 32; - - let challenged_peer = peer_id_from_bytes([CHALLENGED_PEER_BYTE; XOR_NAME_LEN]); - let other_peer = peer_id_from_bytes([OTHER_PEER_BYTE; XOR_NAME_LEN]); - let new_peer = peer_id_from_bytes([NEW_PEER_BYTE; XOR_NAME_LEN]); - let mature_key = [MATURE_KEY_BYTE; XOR_NAME_LEN]; - let same_epoch_key = [SAME_EPOCH_KEY_BYTE; XOR_NAME_LEN]; - let missing_proof_key = [MISSING_PROOF_KEY_BYTE; XOR_NAME_LEN]; - let stable_churn_key = [STABLE_CHURN_KEY_BYTE; XOR_NAME_LEN]; - let evicted_key = [EVICTED_KEY_BYTE; XOR_NAME_LEN]; - let close_group = HashSet::from([challenged_peer, other_peer]); - let changed_close_group = HashSet::from([challenged_peer, new_peer]); - let evicted_close_group = HashSet::from([other_peer, new_peer]); - let mut repair_proofs = RepairProofs::new(); - - assert!(repair_proofs.record_replica_hint_sent( - challenged_peer, - mature_key, - &close_group, - HINT_EPOCH, - )); - assert!(repair_proofs.record_replica_hint_sent( - challenged_peer, - same_epoch_key, - &close_group, - CURRENT_EPOCH, - )); - assert!(repair_proofs.record_replica_hint_sent( - challenged_peer, - stable_churn_key, - &close_group, - HINT_EPOCH, - )); - assert!(repair_proofs.record_replica_hint_sent( - challenged_peer, - evicted_key, - &close_group, - HINT_EPOCH, - )); - - let sampled_key_groups = vec![ - (mature_key, close_group.clone()), - (same_epoch_key, close_group.clone()), - (missing_proof_key, close_group.clone()), - (stable_churn_key, changed_close_group), - (evicted_key, evicted_close_group), - ]; - let peer_keys = mature_audit_keys_for_peer( - &challenged_peer, - sampled_key_groups, - &mut repair_proofs, - CURRENT_EPOCH, - ); - - assert_eq!( - peer_keys, - vec![mature_key, stable_churn_key], - "mature proofs for stable close-group peers should become audit keys, while same-epoch, missing, and evicted-peer proofs should not" - ); - } - - // -- Audit response must match key count -------------------------------------- - - #[tokio::test] - async fn audit_response_must_match_key_count() { - // Section 15: "A response is invalid if it has fewer or more entries - // than challenged keys." - // Verify handle_audit_challenge always produces exactly N digests for - // N keys, including edge cases. - - let (storage, _temp) = create_test_storage().await; - let nonce = [0x50; 32]; - let peer_id = [0x60; 32]; - - // Store a single chunk - let content = b"single chunk"; - let addr = LmdbStorage::compute_address(content); - storage.put(&addr, content).await.unwrap(); - - // Challenge with 1 stored + 4 absent = 5 keys total - let absent_keys: Vec = (1..=4u8).map(|i| [i; 32]).collect(); - let mut keys = vec![addr]; - keys.extend_from_slice(&absent_keys); - - let key_count = keys.len(); - let challenge = make_challenge(300, nonce, peer_id, keys); - let self_id = peer_id_from_bytes(peer_id); - - let response = - handle_audit_challenge(&challenge, &storage, &self_id, false, TEST_STORED_CHUNKS).await; - match response { - AuditResponse::Digests { digests, .. } => { - assert_eq!( - digests.len(), - key_count, - "must produce exactly one digest per challenged key" - ); - } - AuditResponse::Bootstrapping { .. } => panic!("Expected Digests"), - AuditResponse::Rejected { .. } => panic!("Unexpected Rejected response"), + fn full_pass_requires_every_sampled_leaf() { + // checked must equal the number of sampled leaves on a pass (no leaf is + // silently skipped — every sampled, committed key must verify). + let nonce = [11u8; 32]; + let (built, proof, peer) = honest(400, &nonce); + let s = sample(&proof, &nonce, built.commitment().key_count); + match verify_byte_response(&s, &nonce, &peer, served_honest) { + AuditVerdict::Pass { checked } => assert_eq!(checked, s.len()), + other => panic!("expected Pass, got {other:?}"), } } - // -- Audit digest uses full record bytes -------------------------------------- + // ---- end-to-end gate composition ---------------------------------------- #[test] - fn audit_digest_uses_full_record_bytes() { - // Verify digest changes when record content changes. - let nonce = [1u8; 32]; - let peer = [2u8; 32]; - let key = [3u8; 32]; - - let d1 = compute_audit_digest(&nonce, &peer, &key, b"data version 1"); - let d2 = compute_audit_digest(&nonce, &peer, &key, b"data version 2"); - assert_ne!( - d1, d2, - "Different record bytes must produce different digests" - ); + fn structure_fail_short_circuits_before_round_2() { + // A structurally invalid proof is rejected in round 1; the byte challenge + // is never issued. We assert the round-1 gate returns Err so the auditor + // (verify_subtree_response) never reaches request_byte_proof. + let nonce = [5u8; 32]; + let (built, mut proof, peer) = honest(300, &nonce); + if let Some(first) = proof.leaves.first_mut() { + first.bytes_hash[0] ^= 0x01; + } + assert!(structure(&built, &proof, &nonce, &peer).is_err()); } - // -- Scenario 29: Audit start gate ------------------------------------------ - - /// Scenario 29: `handle_audit_challenge` returns `Bootstrapping` when the - /// node is still bootstrapping — audit digests are never computed, and no - /// `AuditFailure` evidence is emitted by the caller. - /// - /// This is the responder-side gate. The challenger-side gate is enforced - /// by `audit_tick`'s `is_bootstrapping` guard (Invariant 19) and by - /// `check_bootstrap_drained()` in the engine loop; this test confirms the - /// complementary responder behavior. - #[tokio::test] - async fn scenario_29_audit_start_gate_during_bootstrap() { - let (storage, _temp) = create_test_storage().await; - - // Store data so there *would* be work to audit. - let content = b"should not be audited during bootstrap"; - let addr = LmdbStorage::compute_address(content); - storage.put(&addr, content).await.expect("put"); - - let challenge = make_challenge(2900, [0x29; 32], [0x29; 32], vec![addr]); - let self_id = peer_id_from_bytes([0x29; 32]); - - // Responder is bootstrapping → Bootstrapping response, NOT Digests. - let response = - handle_audit_challenge(&challenge, &storage, &self_id, true, TEST_STORED_CHUNKS).await; - assert!( - matches!( - response, - AuditResponse::Bootstrapping { challenge_id: 2900 } - ), - "bootstrapping node must not compute digests — audit start gate" - ); - - // Responder is NOT bootstrapping → normal Digests. - let response = - handle_audit_challenge(&challenge, &storage, &self_id, false, TEST_STORED_CHUNKS).await; - assert!( - matches!(response, AuditResponse::Digests { .. }), - "drained node should compute digests normally" - ); + /// Build an honest committed tree whose keys are deliberately "FAR": their + /// addresses live at the high end of the XOR space (top bytes = 0xFF). On the + /// auditor side these are the leaves `observe_closeness` counts toward `far`. + fn honest_far(n: u32, nonce: &[u8; 32]) -> (BuiltCommitment, SubtreeProof, [u8; 32]) { + let (pk, sk) = ml_dsa_65().generate_keypair().unwrap(); + let peer_id = *blake3::hash(&pk.to_bytes()).as_bytes(); + let pk_b = pk.to_bytes(); + let entries: Vec<_> = (0..n) + .map(|i| { + let mut k = [0xFFu8; 32]; + k[28..].copy_from_slice(&i.to_be_bytes()); + (k, *blake3::hash(&chunk_bytes(&k)).as_bytes()) + }) + .collect(); + let built = BuiltCommitment::build(entries, &peer_id, &sk, &pk_b).unwrap(); + let proof = + build_subtree_proof(built.tree(), nonce, &peer_id, |k| Some(chunk_bytes(k))).unwrap(); + (built, proof, peer_id) } - // -- Scenario 30: Audit peer selection from sampled keys -------------------- - - /// Scenario 30: Key sampling uses dynamic sqrt-based batch sizing and - /// `RepairOpportunity` filtering excludes never-synced peers. - /// - /// Full `audit_tick` requires a live network. This test verifies the two - /// deterministic sub-steps the function relies on: - /// (a) `audit_sample_count` scales with `sqrt(total_keys)`. - /// (b) `PeerSyncRecord::has_repair_opportunity` gates peer eligibility. + /// ADR-0002 "Closeness" is OBSERVE-ONLY: far-keyed honest proofs verify + /// exactly like near-keyed ones. The verdict (structure + served bytes) is + /// closeness-blind, so a "far/padding" shape can never produce a Fail. #[test] - fn scenario_30_audit_peer_selection_from_sampled_keys() { - // (a) Dynamic sample count scales with sqrt(total_keys). - assert_eq!( - ReplicationConfig::audit_sample_count(100), - 10, - "sample count should scale with sqrt(total_keys)" - ); - - assert_eq!(ReplicationConfig::audit_sample_count(3), 1, "sqrt(3) = 1"); - - assert_eq!( - ReplicationConfig::audit_sample_count(10_000), - 100, - "sqrt(10000) = 100" - ); - - // (b) Peer eligibility via RepairOpportunity. - // Never synced → not eligible. - let never = PeerSyncRecord { - last_sync: None, - cycles_since_sync: 10, - }; - assert!(!never.has_repair_opportunity()); - - // Synced but zero subsequent cycles → not eligible. - let too_soon = PeerSyncRecord { - last_sync: Some(Instant::now()), - cycles_since_sync: 0, - }; - assert!(!too_soon.has_repair_opportunity()); - - // Synced with ≥1 cycle → eligible. - let eligible = PeerSyncRecord { - last_sync: Some(Instant::now()), - cycles_since_sync: 2, - }; - assert!(eligible.has_repair_opportunity()); - } - - // -- Scenario 32: Dynamic challenge size ------------------------------------ - - /// Scenario 32: Challenge key count equals `|PeerKeySet(challenged_peer)|`, - /// which is dynamic per round. If no eligible peer remains after filtering, - /// the tick is idle. - /// - /// Verified via `handle_audit_challenge`: the response digest count always - /// equals the number of keys in the challenge. - #[tokio::test] - async fn scenario_32_dynamic_challenge_size() { - let (storage, _temp) = create_test_storage().await; - - // Store varying numbers of chunks. - let mut addrs = Vec::new(); - for i in 0u8..5 { - let content = format!("dynamic challenge key {i}"); - let addr = LmdbStorage::compute_address(content.as_bytes()); - storage.put(&addr, content.as_bytes()).await.expect("put"); - addrs.push(addr); - } - - let nonce = [0x32; 32]; - let peer_id = [0x32; 32]; - let self_id = peer_id_from_bytes(peer_id); - - // Challenge with 1 key. - let challenge1 = make_challenge(3201, nonce, peer_id, vec![addrs[0]]); - let resp1 = - handle_audit_challenge(&challenge1, &storage, &self_id, false, TEST_STORED_CHUNKS) - .await; - if let AuditResponse::Digests { digests, .. } = resp1 { - assert_eq!(digests.len(), 1, "|PeerKeySet| = 1 → 1 digest"); - } - - // Challenge with 3 keys. - let challenge3 = make_challenge(3203, nonce, peer_id, addrs[0..3].to_vec()); - let resp3 = - handle_audit_challenge(&challenge3, &storage, &self_id, false, TEST_STORED_CHUNKS) - .await; - if let AuditResponse::Digests { digests, .. } = resp3 { - assert_eq!(digests.len(), 3, "|PeerKeySet| = 3 → 3 digests"); - } - - // Challenge with all 5 keys. - let challenge5 = make_challenge(3205, nonce, peer_id, addrs.clone()); - let resp5 = - handle_audit_challenge(&challenge5, &storage, &self_id, false, TEST_STORED_CHUNKS) - .await; - if let AuditResponse::Digests { digests, .. } = resp5 { - assert_eq!(digests.len(), 5, "|PeerKeySet| = 5 → 5 digests"); - } - - // Challenge with 0 keys (idle equivalent — no work). - let challenge0 = make_challenge(3200, nonce, peer_id, vec![]); - let resp0 = - handle_audit_challenge(&challenge0, &storage, &self_id, false, TEST_STORED_CHUNKS) - .await; - if let AuditResponse::Digests { digests, .. } = resp0 { - assert!(digests.is_empty(), "|PeerKeySet| = 0 → 0 digests (idle)"); - } - } - - // -- Scenario 47: Bootstrap claim grace period (audit) ---------------------- - - /// Scenario 47: Challenged peer responds with bootstrapping claim during - /// audit. `handle_audit_challenge` returns `Bootstrapping`; caller records - /// `BootstrapClaimFirstSeen`. No `AuditFailure` evidence is emitted. - #[tokio::test] - async fn scenario_47_bootstrap_claim_grace_period_audit() { - let (storage, _temp) = create_test_storage().await; - - // Store data so there is an auditable key. - let content = b"bootstrap grace test"; - let addr = LmdbStorage::compute_address(content); - storage.put(&addr, content).await.expect("put"); - - let challenge = make_challenge(4700, [0x47; 32], [0x47; 32], vec![addr]); - let self_id = peer_id_from_bytes([0x47; 32]); - - // Bootstrapping peer → Bootstrapping response (grace period start). - let response = - handle_audit_challenge(&challenge, &storage, &self_id, true, TEST_STORED_CHUNKS).await; - let challenge_id = match response { - AuditResponse::Bootstrapping { challenge_id } => challenge_id, - AuditResponse::Digests { .. } => { - panic!("Expected Bootstrapping response during grace period") - } - AuditResponse::Rejected { .. } => { - panic!("Unexpected Rejected response") + fn closeness_is_observe_only_far_keys_still_pass() { + let nonce = [9u8; 32]; + + let (built_far, proof_far, peer_far) = honest_far(400, &nonce); + assert!(structure(&built_far, &proof_far, &nonce, &peer_far).is_ok()); + let sf = sample(&proof_far, &nonce, built_far.commitment().key_count); + let v_far = verify_byte_response(&sf, &nonce, &peer_far, served_honest); + + let (built_near, proof_near, peer_near) = honest(400, &nonce); + assert!(structure(&built_near, &proof_near, &nonce, &peer_near).is_ok()); + let sn = sample(&proof_near, &nonce, built_near.commitment().key_count); + let v_near = verify_byte_response(&sn, &nonce, &peer_near, served_honest); + + match (&v_far, &v_near) { + (AuditVerdict::Pass { checked: cf }, AuditVerdict::Pass { checked: cn }) => { + assert!(*cf >= 1 && *cn >= 1); } - }; - assert_eq!(challenge_id, 4700); - - // Caller records BootstrapClaimFirstSeen — verify the types support it. - let peer = PeerId::from_bytes([0x47; 32]); - let mut state = NeighborSyncState::new_cycle(vec![peer]); - let now = Instant::now(); - let observed = state.observe_bootstrap_claim( - peer, - now, - crate::replication::config::BOOTSTRAP_CLAIM_GRACE_PERIOD, - ); - - assert_eq!( - observed, - BootstrapClaimObservation::WithinGrace { first_seen: now } - ); - assert!( - state.bootstrap_claims.contains_key(&peer), - "BootstrapClaimFirstSeen should be recorded after grace-period claim" - ); + other => panic!("both honest proofs must Pass regardless of closeness, got {other:?}"), + } assert!( - state.bootstrap_claim_history.contains_key(&peer), - "Bootstrap claim history should remember that the grace window was used" + !matches!(v_far, AuditVerdict::Fail(_)), + "far/padding-shaped honest proof must NEVER fail, got {v_far:?}" ); } - // -- Scenario 53: Audit partial per-key failure with mixed responsibility --- - - /// Scenario 53: P challenged on {K1, K2, K3}. K1 matches, K2 and K3 - /// mismatch. Responsibility confirmation: P is responsible for K2 but - /// not K3. `AuditFailure` emitted for {K2} only. - /// - /// Full `verify_digests` + `handle_audit_failure` requires a `P2PNode` for - /// network lookups. This test verifies the conceptual steps: - /// (1) Digest comparison correctly identifies K2 and K3 as failures. - /// (2) `FailureEvidence::AuditFailure` carries only confirmed keys. - #[tokio::test] - async fn scenario_53_partial_failure_mixed_responsibility() { - let (storage, _temp) = create_test_storage().await; - let nonce = [0x53; 32]; - let peer_id = [0x53; 32]; - - // Store K1, K2, K3. - let c1 = b"scenario 53 key one"; - let c2 = b"scenario 53 key two"; - let c3 = b"scenario 53 key three"; - let k1 = LmdbStorage::compute_address(c1); - let k2 = LmdbStorage::compute_address(c2); - let k3 = LmdbStorage::compute_address(c3); - storage.put(&k1, c1).await.expect("put k1"); - storage.put(&k2, c2).await.expect("put k2"); - storage.put(&k3, c3).await.expect("put k3"); - - // Correct digests from challenger's local store. - let d1_expected = compute_audit_digest(&nonce, &peer_id, &k1, c1); - let d2_expected = compute_audit_digest(&nonce, &peer_id, &k2, c2); - let d3_expected = compute_audit_digest(&nonce, &peer_id, &k3, c3); - - // Simulate peer response: K1 matches, K2 wrong data, K3 wrong data. - let d2_wrong = compute_audit_digest(&nonce, &peer_id, &k2, b"tampered k2"); - let d3_wrong = compute_audit_digest(&nonce, &peer_id, &k3, b"tampered k3"); - - assert_eq!(d1_expected, d1_expected, "K1 should match"); - assert_ne!(d2_wrong, d2_expected, "K2 should mismatch"); - assert_ne!(d3_wrong, d3_expected, "K3 should mismatch"); - - // Step 1: Identify failed keys (digest comparison). - let digests = [d1_expected, d2_wrong, d3_wrong]; - let keys = [k1, k2, k3]; - let contents: [&[u8]; 3] = [c1, c2, c3]; - - let mut failed_keys = Vec::new(); - for (i, key) in keys.iter().enumerate() { - if digests[i] == ABSENT_KEY_DIGEST { - failed_keys.push(*key); - continue; - } - let expected = compute_audit_digest(&nonce, &peer_id, key, contents[i]); - if digests[i] != expected { - failed_keys.push(*key); - } - } - - assert_eq!(failed_keys.len(), 2, "K2 and K3 should be in failure set"); - assert!(failed_keys.contains(&k2)); - assert!(failed_keys.contains(&k3)); - assert!(!failed_keys.contains(&k1), "K1 passed digest check"); - - // Step 2: Responsibility confirmation removes K3 (not responsible). - // Simulate: P is in closest peers for K2 but not K3. - let responsible_for_k2 = true; - let responsible_for_k3 = false; - let mut confirmed = Vec::new(); - for key in &failed_keys { - let is_responsible = if *key == k2 { - responsible_for_k2 - } else { - responsible_for_k3 - }; - if is_responsible { - confirmed.push(*key); - } - } - - assert_eq!(confirmed, vec![k2], "Only K2 should be in confirmed set"); - - // Step 3: Construct evidence for confirmed failures only. - let challenged_peer = PeerId::from_bytes(peer_id); - let evidence = FailureEvidence::AuditFailure { - challenge_id: 5300, - challenged_peer, - confirmed_failed_keys: confirmed, - reason: AuditFailureReason::DigestMismatch, + // Unused-leaf constructor guard: keep SubtreeLeaf import meaningful. + #[test] + fn subtree_leaf_is_constructible() { + let _l = SubtreeLeaf { + key: key(1), + bytes_hash: [0u8; 32], + nonced_hash: [0u8; 32], }; - - match evidence { - FailureEvidence::AuditFailure { - confirmed_failed_keys, - .. - } => { - assert_eq!( - confirmed_failed_keys.len(), - 1, - "Only K2 should generate evidence" - ); - assert_eq!(confirmed_failed_keys[0], k2); - } - _ => panic!("Expected AuditFailure evidence"), - } } } diff --git a/src/replication/commitment.rs b/src/replication/commitment.rs new file mode 100644 index 00000000..773d0739 --- /dev/null +++ b/src/replication/commitment.rs @@ -0,0 +1,905 @@ +//! Storage-bound audit via piggybacked commitments. +//! +//! Implements the v12 design (`notes/security-findings-2026-05-22/ +//! proposal-gossip-audit-v12.md`) for closing audit Findings 1 and 2. +//! +//! ## What this module provides +//! +//! - [`StorageCommitment`] — the wire type sent on neighbour-sync gossip +//! and embedded in commitment-bound audit responses. `ML-DSA-65` signed +//! over `(root, key_count, sender_peer_id)` with explicit domain separation. +//! - [`MerkleTree`] — an in-memory Merkle tree over `(key, BLAKE3(bytes))` +//! leaves. Rebuilt by the responder when its key set changes; produces +//! inclusion paths used in audit responses. +//! - [`commitment_hash`] — the auditor's pin: a `BLAKE3` digest over the +//! full signed commitment blob. Audit challenges carry this; audit +//! responses must include a commitment that hashes to the same value. +//! - [`verify_path`] — auditor's per-key check: rebuilds the leaf from +//! `(key, bytes_hash)` and verifies the inclusion path against the +//! committed root. +//! +//! Nothing else (responder gossip loop, auditor verify path, +//! reward-eligibility cache) lives here yet — that's the next phase. + +use blake3::Hasher; +use saorsa_pqc::api::sig::{ + ml_dsa_65, MlDsaPublicKey, MlDsaSecretKey, MlDsaSignature, MlDsaVariant, +}; +use serde::{Deserialize, Serialize}; + +use crate::ant_protocol::XorName; + +/// Domain-separation tag for the commitment signature. +/// +/// Signed payload is BLAKE3 over (this tag || canonical commitment fields). +pub const DOMAIN_COMMITMENT: &[u8] = b"autonomi.ant.replication.storage_commitment.v1"; + +/// Domain-separation tag for the auditor's pin: BLAKE3 over (this tag || +/// canonical commitment blob). +pub const DOMAIN_COMMITMENT_HASH: &[u8] = b"autonomi.ant.replication.commitment_hash.v1"; + +/// Domain-separation tag for Merkle leaves: `BLAKE3(this || key || H(bytes))`. +pub const DOMAIN_LEAF: &[u8] = b"autonomi.ant.replication.storage_leaf.v1"; + +/// Domain-separation tag for Merkle internal nodes: `BLAKE3(this || left || right)`. +pub const DOMAIN_NODE: &[u8] = b"autonomi.ant.replication.storage_node.v1"; + +/// Maximum number of keys a single commitment may cover. +/// +/// Bounds the Merkle path depth (audit responses carry `O(log2 key_count)` +/// hashes per key) and the responder-side tree memory. A node storing more +/// keys than this would need to split its claim — out of scope for v1. +pub const MAX_COMMITMENT_KEY_COUNT: u32 = 1_000_000; + +/// Signed storage commitment. +/// +/// Piggybacked on neighbour-sync gossip. The signature commits to the +/// Merkle root, key count, sender peer ID, **and the sender's ML-DSA-65 +/// public key** under [`DOMAIN_COMMITMENT`]. +/// +/// Embedding the public key lets any receiver verify the signature +/// without an external `PeerId → MlDsaPublicKey` lookup. Binding the +/// public key in the signed payload prevents a key-swap attack where an +/// adversary keeps the message body but re-signs it under a different key +/// to claim a different identity. The peer-id binding (gate 2a in +/// `verify_commitment_bound_response`) still ensures the embedded key +/// belongs to the gossiping peer. +/// +/// # Wire size +/// +/// One commitment is approximately 5.3 KiB: +/// - root: 32 B +/// - `key_count`: 4 B +/// - `sender_peer_id`: 32 B +/// - `sender_public_key`: 1952 B (ML-DSA-65 public key) +/// - signature: 3293 B (ML-DSA-65 signature) +/// +/// Piggybacked on every `NeighborSyncRequest`/`Response` (~1 h interval +/// per close-group peer with the round-11 rotation cadence). At a +/// realistic close-group size of 8 with bidirectional sync, that's +/// roughly 8 × 2 × 5.3 KiB / hour = ~85 KiB/h of additional gossip +/// per node. Negligible against typical chunk-transfer bandwidth. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct StorageCommitment { + /// Merkle root over the responder's claimed keys. + pub root: [u8; 32], + /// Number of leaves committed over. + pub key_count: u32, + /// Sender peer ID, bound to the signature. + pub sender_peer_id: [u8; 32], + /// Sender's ML-DSA-65 public key bytes (1952 bytes). Embedded so + /// receivers can verify the signature without a separate pubkey + /// directory. Bound by the signature. + pub sender_public_key: Vec, + /// ML-DSA-65 signature over canonical commitment fields. 3293 bytes. + pub signature: Vec, +} + +// --------------------------------------------------------------------------- +// Hashing helpers +// --------------------------------------------------------------------------- + +/// Compute the Merkle leaf hash for `(key, bytes_hash)`. +/// +/// `bytes_hash` is BLAKE3 over the record bytes; the leaf binds the key to +/// the content so an adversary cannot reuse a leaf for a different chunk. +#[must_use] +pub fn leaf_hash(key: &XorName, bytes_hash: &[u8; 32]) -> [u8; 32] { + let mut h = Hasher::new(); + h.update(DOMAIN_LEAF); + h.update(key); + h.update(bytes_hash); + *h.finalize().as_bytes() +} + +/// Combine two child hashes into a Merkle internal-node hash. +#[must_use] +pub fn node_hash(left: &[u8; 32], right: &[u8; 32]) -> [u8; 32] { + let mut h = Hasher::new(); + h.update(DOMAIN_NODE); + h.update(left); + h.update(right); + *h.finalize().as_bytes() +} + +/// The auditor's pin: `BLAKE3(DOMAIN_COMMITMENT_HASH || postcard(commitment))`. +/// +/// Equal commitments produce equal hashes; any change to `root`, `key_count`, +/// peer ID, or signature changes the hash because postcard's canonical +/// encoding includes a length prefix for `signature`. The audit challenge +/// carries this value; the audit response must include a commitment that +/// hashes to the same value, defeating fresh-commitment substitution. +/// +/// Postcard encoding is the same canonical wire form the rest of the +/// replication protocol uses (`MessageCodec::encode`), so an encoded +/// commitment from a `NeighborSyncRequest` produces the same hash as the +/// same commitment received in an `AuditResponse`. +/// +/// # Errors +/// +/// Returns `None` only if postcard fails to serialize the commitment, which +/// in practice means the signature is somehow `> isize::MAX` bytes — not +/// reachable for ML-DSA-65 (3293 bytes). Callers may safely treat `None` as +/// a malformed commitment and drop it. +#[must_use] +pub fn commitment_hash(c: &StorageCommitment) -> Option<[u8; 32]> { + let serialized = postcard::to_allocvec(c).ok()?; + let mut h = Hasher::new(); + h.update(DOMAIN_COMMITMENT_HASH); + h.update(&serialized); + Some(*h.finalize().as_bytes()) +} + +/// Canonical bytes the ML-DSA signature covers: the commitment fields +/// minus the signature itself. +/// +/// `sender_public_key` is included so an adversary cannot keep the body +/// and re-sign under a different key (the audit-time verifier would +/// otherwise accept the swap because verification uses the embedded key). +fn commitment_signed_payload( + root: &[u8; 32], + key_count: u32, + sender_peer_id: &[u8; 32], + sender_public_key: &[u8], +) -> Vec { + let mut v = Vec::with_capacity(32 + 4 + 32 + 4 + sender_public_key.len()); + v.extend_from_slice(root); + v.extend_from_slice(&key_count.to_le_bytes()); + v.extend_from_slice(sender_peer_id); + // Length-prefix the pubkey so two different (key, suffix) splits cannot + // produce the same byte stream (canonical encoding). + let pk_len = u32::try_from(sender_public_key.len()).unwrap_or(u32::MAX); + v.extend_from_slice(&pk_len.to_le_bytes()); + v.extend_from_slice(sender_public_key); + v +} + +// --------------------------------------------------------------------------- +// Merkle tree +// --------------------------------------------------------------------------- + +/// In-memory Merkle tree over the responder's claimed keys. +/// +/// Leaves are `BLAKE3(DOMAIN_LEAF || key || BLAKE3(bytes))`, sorted by +/// `key`. Internal nodes are `BLAKE3(DOMAIN_NODE || left || right)`. When +/// a level has an odd number of nodes, the last node is paired with +/// **itself** — i.e. `node_hash(x, x)` — so the level above has +/// `ceil(n/2)` nodes. This is a standard self-pair construction (NOT +/// node promotion) and deterministically maps any non-empty key set to +/// a single root. +/// +/// Rebuilt by the responder whenever its key set changes meaningfully +/// (debounced in the integration layer; not this module's concern). +pub struct MerkleTree { + /// Sorted leaves, indexed by their position in the sorted key set. + /// + /// `leaves[i] = (key_i, leaf_hash(key_i, bytes_hash_i))`. + leaves: Vec<(XorName, [u8; 32])>, + /// Tree levels, level 0 is the leaves and the last level is the root. + /// + /// `levels[0].len() == leaves.len()`; `levels[L].len() == 1` where L + /// is the root level. + levels: Vec>, +} + +impl MerkleTree { + /// Build a Merkle tree over `(key, bytes_hash)` pairs. + /// + /// `entries` does not need to be sorted; this method sorts internally + /// so the produced root is deterministic per key set. Duplicate keys + /// are an error: the responder must deduplicate before calling. + /// + /// # Errors + /// + /// Returns an error if `entries` is empty (no commitment to make), if + /// `entries.len() > MAX_COMMITMENT_KEY_COUNT`, or if it contains + /// duplicate keys. + pub fn build(mut entries: Vec<(XorName, [u8; 32])>) -> Result { + if entries.is_empty() { + return Err(CommitmentError::EmptyKeySet); + } + if entries.len() > MAX_COMMITMENT_KEY_COUNT as usize { + return Err(CommitmentError::TooManyKeys(entries.len())); + } + + entries.sort_by_key(|a| a.0); + for w in entries.windows(2) { + if let [a, b] = w { + if a.0 == b.0 { + return Err(CommitmentError::DuplicateKey(a.0)); + } + } + } + + let leaves: Vec<(XorName, [u8; 32])> = entries + .into_iter() + .map(|(k, bh)| { + let lh = leaf_hash(&k, &bh); + (k, lh) + }) + .collect(); + + let mut level: Vec<[u8; 32]> = leaves.iter().map(|(_, h)| *h).collect(); + let mut levels = vec![level.clone()]; + while level.len() > 1 { + level = build_next_level(&level); + levels.push(level.clone()); + } + + Ok(Self { leaves, levels }) + } + + /// The Merkle root of this tree. + /// + /// `unwrap`-free: `build` guarantees at least one level with at least + /// one entry, so `last().first()` is always `Some`. + #[must_use] + pub fn root(&self) -> [u8; 32] { + // SAFETY: build() enforces non-empty entries → non-empty leaves → + // non-empty levels → last level has exactly one hash. + self.levels + .last() + .and_then(|l| l.first()) + .copied() + .unwrap_or([0u8; 32]) + } + + /// The number of leaves (== claimed keys). + #[must_use] + pub fn key_count(&self) -> u32 { + // Cast is safe because build() rejects > MAX_COMMITMENT_KEY_COUNT. + u32::try_from(self.leaves.len()).unwrap_or(u32::MAX) + } + + /// Inclusion path for `key` from its leaf up to (but not including) + /// the root. + /// + /// Returns `None` if `key` is not in this tree. + #[must_use] + pub fn path_for(&self, key: &XorName) -> Option> { + let idx = self.leaves.binary_search_by(|(k, _)| k.cmp(key)).ok()?; + + let mut path = Vec::with_capacity(self.levels.len()); + let mut i = idx; + for level in &self.levels[..self.levels.len().saturating_sub(1)] { + // Sibling is the *other* half of the pair containing `i`. If + // `i` is the unpaired last node at this level, its sibling is + // itself (matches the self-pair construction in + // `build_next_level`). + let sibling_idx = if i % 2 == 0 { + if i + 1 < level.len() { + i + 1 + } else { + i + } + } else { + i - 1 + }; + path.push(level[sibling_idx]); + i /= 2; + } + Some(path) + } + + /// Iterate over `(key, leaf_hash)` pairs in sorted order. Test-only. + #[cfg(test)] + pub(crate) fn iter_leaves(&self) -> impl Iterator { + self.leaves.iter() + } + + /// The keys this tree commits to, in sorted order. + /// + /// `sorted_keys()[i]` is the key at leaf index `i`. Used by the + /// responder's audit-answer path to recover the `leaf_index` field + /// for a challenged key in `O(log n)` via binary search. + #[must_use] + pub fn sorted_keys(&self) -> Vec { + self.leaves.iter().map(|(k, _)| *k).collect() + } + + /// The key at sorted leaf index `idx`, if in range. + /// + /// Used by the subtree-proof builder to enumerate the keys of a + /// contiguous leaf range without cloning the whole key list. + #[must_use] + pub fn key_at(&self, idx: usize) -> Option { + self.leaves.get(idx).map(|(k, _)| *k) + } + + /// The node hash at `(level, index)`, where `level` counts up from the + /// leaves (`level == 0` is the leaf level, the last level is the root). + /// + /// Returns `None` if out of range. Used by the subtree-proof builder to + /// read sibling cut-hashes along the path from the root to the selected + /// subtree; honours the same left-packed self-pair construction as the + /// rest of the tree (a caller asking for an out-of-range sibling on an + /// odd-length level should substitute the node itself). + #[must_use] + pub fn node_at(&self, level: usize, index: u64) -> Option<[u8; 32]> { + let index = usize::try_from(index).ok()?; + self.levels.get(level).and_then(|l| l.get(index)).copied() + } + + /// The number of levels in the tree (`1` for a single-leaf tree; the + /// last index is the root level). `depth == levels_count() - 1`. + #[must_use] + pub fn levels_count(&self) -> usize { + self.levels.len() + } +} + +/// Build the next level up from `cur`. Odd-length levels pair the last +/// node with itself (`node_hash(x, x)`) so the level above has +/// `ceil(n/2)` nodes. Keeps the tree balanced without needing a dummy +/// leaf domain. +fn build_next_level(cur: &[[u8; 32]]) -> Vec<[u8; 32]> { + let mut next = Vec::with_capacity(cur.len().div_ceil(2)); + let mut i = 0; + while i < cur.len() { + let left = &cur[i]; + let right = if i + 1 < cur.len() { &cur[i + 1] } else { left }; + next.push(node_hash(left, right)); + i += 2; + } + next +} + +/// Verify an inclusion path against a commitment of size `key_count`. +/// +/// `leaf_index` is the responder's position of this leaf in the sorted +/// leaf set; the commitment's `key_count` comes from +/// `StorageCommitment.key_count`. +/// At each level of the path, if the current index is even, the current +/// hash is the left child and we compute `node_hash(self, sibling)`; +/// otherwise it is the right child and we compute `node_hash(sibling, self)`. +/// +/// Returns `true` iff: +/// - `leaf_index < key_count` (rejects out-of-range claims), AND +/// - `path.len() == ceil(log2(key_count))` for `key_count > 1`, or +/// `path.is_empty()` for `key_count == 1` (rejects wrong-shape paths +/// before doing any hashing), AND +/// - the recomputed root equals `expected_root`. +#[must_use] +pub fn verify_path( + leaf: &[u8; 32], + path: &[[u8; 32]], + leaf_index: usize, + key_count: u32, + expected_root: &[u8; 32], +) -> bool { + if key_count == 0 + || key_count > MAX_COMMITMENT_KEY_COUNT + || (leaf_index as u64) >= u64::from(key_count) + { + return false; + } + // Tree depth = ceil(log2(key_count)). For a power-of-two `n`, + // `n.next_power_of_two() == n` so trailing_zeros == log2(n). For non + // powers-of-two, next_power_of_two rounds up so trailing_zeros gives + // ceil(log2). Special case: key_count == 1 → next_power_of_two == 1 + // → trailing_zeros == 0 → empty path, which matches the single-leaf + // tree's root == leaf invariant. + // + // `checked_next_power_of_two` returns None on overflow; combined with + // the MAX_COMMITMENT_KEY_COUNT cap above it cannot fail in practice, + // but the explicit check is profile-independent (release vs debug + // would otherwise differ on overflow per Rust's primitive docs). + let Some(rounded) = key_count.checked_next_power_of_two() else { + return false; + }; + let expected_path_len = rounded.trailing_zeros() as usize; + if path.len() != expected_path_len { + return false; + } + + let mut cur = *leaf; + let mut i = leaf_index; + for sibling in path { + cur = if i % 2 == 0 { + node_hash(&cur, sibling) + } else { + node_hash(sibling, &cur) + }; + i /= 2; + } + cur == *expected_root +} + +// --------------------------------------------------------------------------- +// Sign + verify +// --------------------------------------------------------------------------- + +/// Sign a commitment's `(root, key_count, sender_peer_id, sender_public_key)` +/// with `secret_key`. +/// +/// The signature is over the canonical signed payload (see +/// `commitment_signed_payload`) under [`DOMAIN_COMMITMENT`]. +/// +/// # Errors +/// +/// Returns an error if the underlying ML-DSA-65 signer fails. +pub fn sign_commitment( + secret_key: &MlDsaSecretKey, + root: &[u8; 32], + key_count: u32, + sender_peer_id: &[u8; 32], + sender_public_key: &[u8], +) -> Result, CommitmentError> { + let payload = commitment_signed_payload(root, key_count, sender_peer_id, sender_public_key); + let dsa = ml_dsa_65(); + let sig = dsa + .sign_with_context(secret_key, &payload, DOMAIN_COMMITMENT) + .map_err(|e| CommitmentError::SignatureFailed(e.to_string()))?; + Ok(sig.to_bytes()) +} + +/// Verify a commitment's signature using the embedded `sender_public_key`. +/// +/// Returns `true` iff the signature is valid for `(root, key_count, +/// sender_peer_id, sender_public_key)` under `c.sender_public_key` and +/// [`DOMAIN_COMMITMENT`]. Returns `false` on key-format or signature-format +/// errors so the caller can simply drop the gossip. +/// +/// Verifying against the embedded key removes the need for an external +/// `PeerId → MlDsaPublicKey` lookup. The peer-id binding gate in +/// `ingest_peer_commitment` (and the auditor's `evaluate_subtree_structure`) +/// still ensures the embedded key belongs to the claimed peer. +#[must_use] +pub fn verify_commitment_signature(c: &StorageCommitment) -> bool { + let Ok(public_key) = MlDsaPublicKey::from_bytes(MlDsaVariant::MlDsa65, &c.sender_public_key) + else { + return false; + }; + verify_commitment_signature_with_key(c, &public_key) +} + +/// Verify a commitment's signature against an externally provided key. +/// +/// Test-helper variant. Production code should use [`verify_commitment_signature`] +/// since the key is embedded in the commitment. +#[must_use] +pub fn verify_commitment_signature_with_key( + c: &StorageCommitment, + public_key: &MlDsaPublicKey, +) -> bool { + let payload = commitment_signed_payload( + &c.root, + c.key_count, + &c.sender_peer_id, + &c.sender_public_key, + ); + let Ok(sig) = MlDsaSignature::from_bytes(MlDsaVariant::MlDsa65, &c.signature) else { + return false; + }; + let dsa = ml_dsa_65(); + dsa.verify_with_context(public_key, &payload, &sig, DOMAIN_COMMITMENT) + .unwrap_or(false) +} + +// --------------------------------------------------------------------------- +// Errors +// --------------------------------------------------------------------------- + +/// Errors from commitment construction or verification. +#[derive(Debug, Clone, thiserror::Error)] +pub enum CommitmentError { + /// `MerkleTree::build` was called with an empty key set. + #[error("cannot build commitment over empty key set")] + EmptyKeySet, + /// Key set exceeds [`MAX_COMMITMENT_KEY_COUNT`]. + #[error("commitment key count {0} exceeds MAX_COMMITMENT_KEY_COUNT")] + TooManyKeys(usize), + /// `MerkleTree::build` received the same key twice. + #[error("duplicate key in commitment: {}", hex::encode(.0))] + DuplicateKey(XorName), + /// Underlying ML-DSA-65 signer failed. + #[error("commitment signing failed: {0}")] + SignatureFailed(String), +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] +mod tests { + use super::*; + + fn xn(byte: u8) -> XorName { + [byte; 32] + } + + fn bh(byte: u8) -> [u8; 32] { + [byte ^ 0x5A; 32] + } + + #[test] + fn empty_key_set_rejected() { + let result = MerkleTree::build(vec![]); + assert!(matches!(result, Err(CommitmentError::EmptyKeySet))); + } + + #[test] + fn duplicate_keys_rejected() { + let result = MerkleTree::build(vec![(xn(1), bh(1)), (xn(1), bh(2))]); + assert!(matches!(result, Err(CommitmentError::DuplicateKey(_)))); + } + + #[test] + fn single_leaf_tree_root_is_leaf_hash() { + let key = xn(1); + let bytes_hash = bh(1); + let tree = MerkleTree::build(vec![(key, bytes_hash)]).unwrap(); + assert_eq!(tree.root(), leaf_hash(&key, &bytes_hash)); + assert_eq!(tree.key_count(), 1); + assert_eq!(tree.path_for(&key), Some(vec![])); + // Empty path verifies trivially (root == leaf). + assert!(verify_path( + &leaf_hash(&key, &bytes_hash), + &[], + 0, + 1, + &tree.root() + )); + } + + #[test] + fn two_leaf_tree_root_combines_both_leaves() { + let entries = vec![(xn(1), bh(1)), (xn(2), bh(2))]; + let tree = MerkleTree::build(entries).unwrap(); + // Sorted order: xn(1), xn(2). + let l1 = leaf_hash(&xn(1), &bh(1)); + let l2 = leaf_hash(&xn(2), &bh(2)); + assert_eq!(tree.root(), node_hash(&l1, &l2)); + } + + #[test] + fn root_is_deterministic_regardless_of_input_order() { + let mut a = vec![(xn(3), bh(3)), (xn(1), bh(1)), (xn(2), bh(2))]; + let mut b = vec![(xn(2), bh(2)), (xn(3), bh(3)), (xn(1), bh(1))]; + let tree_a = MerkleTree::build(a.clone()).unwrap(); + let tree_b = MerkleTree::build(b.clone()).unwrap(); + a.sort_by_key(|x| x.0); + b.sort_by_key(|x| x.0); + assert_eq!(tree_a.root(), tree_b.root()); + } + + fn xn_u32(i: u32) -> XorName { + let mut k = [0u8; 32]; + k[..4].copy_from_slice(&i.to_le_bytes()); + k + } + + fn bh_u32(i: u32) -> [u8; 32] { + let mut h = [0u8; 32]; + h[..4].copy_from_slice(&i.to_le_bytes()); + h[4] = 0x5A; + h + } + + #[test] + fn paths_verify_for_every_key_at_various_sizes() { + for n in [1u32, 2, 3, 4, 5, 7, 8, 16, 17, 100, 333] { + let entries: Vec<_> = (0..n).map(|i| (xn_u32(i), bh_u32(i))).collect(); + let tree = MerkleTree::build(entries.clone()).unwrap(); + let root = tree.root(); + let key_count = tree.key_count(); + for (idx, (k, _)) in tree.iter_leaves().enumerate() { + let path = tree.path_for(k).expect("path for present key"); + let bytes_hash = entries.iter().find(|(kk, _)| kk == k).unwrap().1; + let lh = leaf_hash(k, &bytes_hash); + assert!( + verify_path(&lh, &path, idx, key_count, &root), + "path verify failed at n={n} idx={idx}", + ); + } + } + } + + #[test] + fn path_for_absent_key_is_none() { + let tree = MerkleTree::build(vec![(xn(1), bh(1)), (xn(2), bh(2))]).unwrap(); + assert!(tree.path_for(&xn(99)).is_none()); + } + + #[test] + fn tampered_bytes_hash_breaks_path_verify() { + // Use 8 distinct sorted keys so the index in `entries` matches the + // sorted leaf index in the tree. + let entries: Vec<_> = (1..=8u8).map(|i| (xn(i), bh(i))).collect(); + let tree = MerkleTree::build(entries.clone()).unwrap(); + let root = tree.root(); + let (k, _) = &entries[3]; + let path = tree.path_for(k).unwrap(); + + let wrong_bytes_hash = [0xFFu8; 32]; + let lh = leaf_hash(k, &wrong_bytes_hash); + assert!(!verify_path(&lh, &path, 3, 8, &root)); + } + + #[test] + fn tampered_path_node_breaks_verify() { + let entries: Vec<_> = (1..=8u8).map(|i| (xn(i), bh(i))).collect(); + let tree = MerkleTree::build(entries.clone()).unwrap(); + let root = tree.root(); + let (k, _) = &entries[3]; + let mut path = tree.path_for(k).unwrap(); + path[0][0] ^= 0x01; + let lh = leaf_hash(k, &bh(4)); + assert!(!verify_path(&lh, &path, 3, 8, &root)); + } + + #[test] + fn wrong_leaf_index_breaks_verify() { + let entries: Vec<_> = (1..=8u8).map(|i| (xn(i), bh(i))).collect(); + let tree = MerkleTree::build(entries.clone()).unwrap(); + let root = tree.root(); + let (k, _) = &entries[3]; + let path = tree.path_for(k).unwrap(); + let lh = leaf_hash(k, &bh(4)); + // Correct index is 3; using 2 should fail because the left/right + // child ordering swaps. + assert!(!verify_path(&lh, &path, 2, 8, &root)); + assert!(verify_path(&lh, &path, 3, 8, &root)); + } + + #[test] + fn out_of_range_leaf_index_rejected() { + let entries: Vec<_> = (1..=8u8).map(|i| (xn(i), bh(i))).collect(); + let tree = MerkleTree::build(entries.clone()).unwrap(); + let root = tree.root(); + let (k, _) = &entries[3]; + let path = tree.path_for(k).unwrap(); + let lh = leaf_hash(k, &bh(4)); + // leaf_index >= key_count must be rejected without even hashing. + assert!(!verify_path(&lh, &path, 8, 8, &root)); + assert!(!verify_path(&lh, &path, 99, 8, &root)); + // Valid baseline. + assert!(verify_path(&lh, &path, 3, 8, &root)); + } + + #[test] + fn wrong_path_length_rejected_pre_hashing() { + let entries: Vec<_> = (1..=8u8).map(|i| (xn(i), bh(i))).collect(); + let tree = MerkleTree::build(entries.clone()).unwrap(); + let root = tree.root(); + let (k, _) = &entries[3]; + let path = tree.path_for(k).unwrap(); + let lh = leaf_hash(k, &bh(4)); + // For key_count=8 the expected path length is 3 (ceil(log2(8))=3). + assert_eq!(path.len(), 3); + // Truncating breaks structural check. + let short: Vec<_> = path.iter().take(2).copied().collect(); + assert!(!verify_path(&lh, &short, 3, 8, &root)); + // Padding too long also breaks structural check. + let mut long = path; + long.push([0; 32]); + assert!(!verify_path(&lh, &long, 3, 8, &root)); + } + + #[test] + fn zero_key_count_rejected() { + // Defensive: even with an empty path and correct-shape root, a + // commitment claiming zero keys is nonsensical. + let lh = [0u8; 32]; + assert!(!verify_path(&lh, &[], 0, 0, &[0u8; 32])); + } + + #[test] + fn out_of_protocol_key_count_rejected() { + // Wire-supplied key_count exceeding MAX_COMMITMENT_KEY_COUNT is + // refused before any hashing. Defends against the round-3 BLOCKER: + // `next_power_of_two()` would otherwise panic in debug and wrap in + // release on key_count > 1 << 31. + let lh = [0u8; 32]; + assert!(!verify_path( + &lh, + &[], + 0, + MAX_COMMITMENT_KEY_COUNT + 1, + &[0u8; 32] + )); + assert!(!verify_path(&lh, &[], 0, u32::MAX, &[0u8; 32])); + } + + fn pk_bytes(pk: &MlDsaPublicKey) -> Vec { + pk.to_bytes() + } + + #[test] + fn sign_and_verify_roundtrip() { + let dsa = ml_dsa_65(); + let (pk, sk) = dsa.generate_keypair().unwrap(); + let entries: Vec<_> = (0..5u8).map(|i| (xn(i), bh(i))).collect(); + let tree = MerkleTree::build(entries).unwrap(); + let root = tree.root(); + let key_count = tree.key_count(); + let peer_id = [0xAB; 32]; + let pk_b = pk_bytes(&pk); + let signature = sign_commitment(&sk, &root, key_count, &peer_id, &pk_b).unwrap(); + let c = StorageCommitment { + root, + key_count, + sender_peer_id: peer_id, + sender_public_key: pk_b, + signature, + }; + // Verifies via embedded key, no external lookup needed. + assert!(verify_commitment_signature(&c)); + } + + #[test] + fn signature_fails_when_root_tampered() { + let dsa = ml_dsa_65(); + let (pk, sk) = dsa.generate_keypair().unwrap(); + let root = [0u8; 32]; + let pk_b = pk_bytes(&pk); + let signature = sign_commitment(&sk, &root, 1, &[0; 32], &pk_b).unwrap(); + let c = StorageCommitment { + root: [1u8; 32], // tampered + key_count: 1, + sender_peer_id: [0; 32], + sender_public_key: pk_b, + signature, + }; + assert!(!verify_commitment_signature(&c)); + } + + #[test] + fn signature_fails_under_swapped_public_key() { + let dsa = ml_dsa_65(); + let (pk1, sk1) = dsa.generate_keypair().unwrap(); + let (pk2, _sk2) = dsa.generate_keypair().unwrap(); + let pk1_b = pk_bytes(&pk1); + let pk2_b = pk_bytes(&pk2); + // Sign under pk1 but embed pk2 — verification (using embedded key) + // should fail because pk2 didn't sign this payload AND because the + // signed payload binds pk1, not pk2. + let signature = sign_commitment(&sk1, &[0u8; 32], 1, &[0; 32], &pk1_b).unwrap(); + let c = StorageCommitment { + root: [0u8; 32], + key_count: 1, + sender_peer_id: [0; 32], + sender_public_key: pk2_b, + signature, + }; + assert!(!verify_commitment_signature(&c)); + } + + #[test] + fn signature_fails_with_garbage_bytes() { + let dsa = ml_dsa_65(); + let (pk, _sk) = dsa.generate_keypair().unwrap(); + let c = StorageCommitment { + root: [0u8; 32], + key_count: 1, + sender_peer_id: [0; 32], + sender_public_key: pk_bytes(&pk), + signature: vec![0u8; 100], // too short and zero-filled + }; + assert!(!verify_commitment_signature(&c)); + } + + #[test] + fn signature_fails_with_garbage_public_key() { + // Embedded pubkey is wrong length / invalid → from_bytes fails → + // verify returns false. Defends against malformed gossip. + let c = StorageCommitment { + root: [0u8; 32], + key_count: 1, + sender_peer_id: [0; 32], + sender_public_key: vec![0u8; 100], // wrong length + signature: vec![0u8; 3293], + }; + assert!(!verify_commitment_signature(&c)); + } + + #[test] + fn commitment_hash_differs_on_any_field_change() { + let dsa = ml_dsa_65(); + let (pk, sk) = dsa.generate_keypair().unwrap(); + let pk_b = pk_bytes(&pk); + let sig = sign_commitment(&sk, &[0; 32], 1, &[0; 32], &pk_b).unwrap(); + let c1 = StorageCommitment { + root: [0; 32], + key_count: 1, + sender_peer_id: [0; 32], + sender_public_key: pk_b, + signature: sig, + }; + let h1 = commitment_hash(&c1).unwrap(); + + let mut c2 = c1.clone(); + c2.root = [1; 32]; + assert_ne!(h1, commitment_hash(&c2).unwrap()); + + let mut c3 = c1.clone(); + c3.key_count = 2; + assert_ne!(h1, commitment_hash(&c3).unwrap()); + + let mut c4 = c1.clone(); + c4.sender_peer_id = [1; 32]; + assert_ne!(h1, commitment_hash(&c4).unwrap()); + + let mut c5 = c1.clone(); + c5.signature[0] ^= 1; + assert_ne!(h1, commitment_hash(&c5).unwrap()); + + let (pk_other, _) = dsa.generate_keypair().unwrap(); + let mut c6 = c1; + c6.sender_public_key = pk_bytes(&pk_other); + assert_ne!(h1, commitment_hash(&c6).unwrap()); + } + + #[test] + fn commitment_hash_stable_for_identical_input() { + let dsa = ml_dsa_65(); + let (pk, sk) = dsa.generate_keypair().unwrap(); + let pk_b = pk_bytes(&pk); + let sig = sign_commitment(&sk, &[7; 32], 42, &[3; 32], &pk_b).unwrap(); + let c = StorageCommitment { + root: [7; 32], + key_count: 42, + sender_peer_id: [3; 32], + sender_public_key: pk_b, + signature: sig, + }; + assert_eq!(commitment_hash(&c), commitment_hash(&c)); + } + + #[test] + fn commitment_hash_signature_length_change_changes_hash() { + // Postcard's varint length prefix means hashing a 1-byte signature + // and a 2-byte signature whose first byte is the same produces + // different commitment hashes — defends against the codex round-1 + // BLOCKER "omits the serialized length prefix." + let c1 = StorageCommitment { + root: [0; 32], + key_count: 1, + sender_peer_id: [0; 32], + sender_public_key: vec![0u8; 1952], + signature: vec![0xAB], + }; + let c2 = StorageCommitment { + root: [0; 32], + key_count: 1, + sender_peer_id: [0; 32], + sender_public_key: vec![0u8; 1952], + signature: vec![0xAB, 0x00], + }; + assert_ne!(commitment_hash(&c1).unwrap(), commitment_hash(&c2).unwrap()); + } + + #[test] + fn too_many_keys_rejected() { + let mut entries = Vec::with_capacity(MAX_COMMITMENT_KEY_COUNT as usize + 1); + for i in 0..=MAX_COMMITMENT_KEY_COUNT { + let mut k = [0u8; 32]; + k[..4].copy_from_slice(&i.to_le_bytes()); + entries.push((k, [0; 32])); + } + let result = MerkleTree::build(entries); + assert!(matches!(result, Err(CommitmentError::TooManyKeys(_)))); + } +} diff --git a/src/replication/commitment_state.rs b/src/replication/commitment_state.rs new file mode 100644 index 00000000..5c7c357e --- /dev/null +++ b/src/replication/commitment_state.rs @@ -0,0 +1,610 @@ +//! Responder-side commitment builder + rotation state. +//! +//! Phase 2b of the v12 storage-bound audit design. Builds, signs, and +//! caches a [`StorageCommitment`] over the responder's currently-stored +//! key set; serves audit lookups by `expected_commitment_hash`; retains +//! the previous commitment across one rotation so an audit pinned to it +//! does not false-fail at the rotation boundary (v5/v12 §4 retention). +//! +//! Rotation strategy: +//! +//! - `rotate(new_built)` atomically replaces `current` with `new_built` +//! and demotes the prior `current` to `previous`. The prior +//! `previous` is dropped. +//! - `lookup(hash)` reads the in-memory map and returns an [`Arc`] to +//! the matching `BuiltCommitment`, keeping it alive for the audit +//! response regardless of subsequent rotation (mirrors the `ArcSwap` +//! semantics specified in v6 §2: an in-flight reader holding its +//! `Arc` is unaffected by a concurrent rotate). +//! +//! No persistent disk state. Trees are rebuilt from `LmdbStorage` at +//! the next rotation tick. Memory cost is bounded by +//! `2 × (key_count × ~64 bytes + signature_size)` — for 10k keys, ~1.3 MB. + +use std::sync::Arc; +use std::time::Instant; + +use parking_lot::RwLock; +use saorsa_pqc::api::sig::MlDsaSecretKey; + +use crate::ant_protocol::XorName; +use crate::replication::commitment::{ + commitment_hash, sign_commitment, CommitmentError, MerkleTree, StorageCommitment, +}; + +/// Auditor-side per-peer commitment state. +/// +/// Holds two things that together implement v10/v12 §2 step 5 and §6: +/// - `last_commitment`: the most recently received, verified, signed +/// commitment from this peer. `None` if we've evicted it (TTL, +/// sybil cap, peer-removed) or never received one. +/// - `commitment_capable`: a **sticky** boolean that flips to `true` +/// on the first successful gossip ingest and NEVER reverts. Used +/// by holder-eligibility (§6) and bootstrap-claim shield: a peer +/// that has at least once proven it speaks v12 is forever held to +/// that standard. Without stickiness, a peer could flip the flag +/// off by silencing its gossip and downgrade to the weaker legacy +/// audit path. +#[derive(Debug, Clone)] +pub struct PeerCommitmentRecord { + /// Last verified commitment, or `None` if evicted/expired. + pub last_commitment: Option, + /// Sticky: true once this peer has gossiped a valid commitment. + /// Set on ingest. Never set back to false except by full + /// `PeerRemoved` cleanup. + pub commitment_capable: bool, + /// When `last_commitment` was received. Used for TTL on the + /// commitment itself (independent of the `commitment_capable` + /// stickiness — losing the commitment via TTL doesn't make us + /// forget the peer ever spoke v12). + pub received_at: Instant, + /// Last time we performed an ML-DSA signature verify for this + /// peer's commitment. Used to enforce the §2 step 3 rate limit + /// (at most one sig verify per peer per 60s). + pub last_sig_verify_at: Instant, +} + +impl PeerCommitmentRecord { + /// Construct from a freshly-verified commitment. `commitment_capable` + /// is set to `true` here and must remain so for the lifetime of the + /// record. + #[must_use] + pub fn from_verified(commitment: StorageCommitment, now: Instant) -> Self { + Self { + last_commitment: Some(commitment), + commitment_capable: true, + received_at: now, + last_sig_verify_at: now, + } + } + + /// Mark commitment-capable without storing a commitment (used when + /// we've TTL-expired the commitment itself but want to remember the + /// peer has spoken v12 before). + #[must_use] + pub fn capable_but_no_commitment(now: Instant) -> Self { + Self { + last_commitment: None, + commitment_capable: true, + received_at: now, + last_sig_verify_at: now, + } + } +} + +/// A fully-built commitment: signed wire blob, cached hash, Merkle tree +/// for inclusion proofs, and a sorted leaf-index lookup for the auditor's +/// `leaf_index` field. +/// +/// Held inside an [`Arc`] so audit responders can grab a reference and +/// build a reply without holding the [`ResponderCommitmentState`] read +/// lock for the duration of the response. +pub struct BuiltCommitment { + /// The signed wire blob. + commitment: StorageCommitment, + /// `commitment_hash(commitment)` — cached so audit lookups don't + /// re-serialize on every match. + cached_hash: [u8; 32], + /// The Merkle tree behind the commitment. `path_for(key)` produces + /// the inclusion proof; the responder's leaf-index lookup is below. + tree: MerkleTree, + /// `sorted_keys[i]` is the key at leaf index `i`. Sorted ascending + /// so binary search reconstructs `leaf_index` for any key in + /// `O(log n)`. + sorted_keys: Vec, +} + +impl BuiltCommitment { + /// Build a commitment over `entries = [(key, bytes_hash), ...]` and + /// sign it with `secret_key`. + /// + /// `entries` does not need to be sorted (the inner [`MerkleTree`] + /// sorts internally); `sender_peer_id` is bound into the signature + /// and the commitment. + /// + /// # Errors + /// + /// Returns the wrapped [`CommitmentError`] on empty key sets, + /// over-cap key counts, duplicates, or signing failures. + pub fn build( + entries: Vec<(XorName, [u8; 32])>, + sender_peer_id: &[u8; 32], + secret_key: &MlDsaSecretKey, + sender_public_key: &[u8], + ) -> Result { + let tree = MerkleTree::build(entries)?; + let root = tree.root(); + let key_count = tree.key_count(); + let signature = sign_commitment( + secret_key, + &root, + key_count, + sender_peer_id, + sender_public_key, + )?; + let commitment = StorageCommitment { + root, + key_count, + sender_peer_id: *sender_peer_id, + sender_public_key: sender_public_key.to_vec(), + signature, + }; + // `commitment_hash` only returns None on a postcard serialization + // failure, which for our fixed-size commitment cannot occur in + // practice (ML-DSA-65 signature is 3293 bytes). If it ever + // somehow does, surface as a SignatureFailed so callers don't + // need a new error variant for an unreachable case. + let cached_hash = commitment_hash(&commitment).ok_or_else(|| { + CommitmentError::SignatureFailed("commitment serialization failed".to_string()) + })?; + // Recover the sorted key list from the tree (path_for uses + // binary search internally, but we need an explicit list for + // leaf_index lookup at audit time). + let sorted_keys: Vec = tree.sorted_keys(); + Ok(Self { + commitment, + cached_hash, + tree, + sorted_keys, + }) + } + + /// The signed wire blob. + #[must_use] + pub fn commitment(&self) -> &StorageCommitment { + &self.commitment + } + + /// The cached commitment hash. Equal to + /// [`crate::replication::commitment::commitment_hash`] + /// `(self.commitment())`. + #[must_use] + pub fn hash(&self) -> [u8; 32] { + self.cached_hash + } + + /// The Merkle tree behind this commitment. + /// + /// Used by the subtree-audit responder to plan a proof (select the + /// nonce-determined branch and read its sibling cut-hashes). + #[must_use] + pub fn tree(&self) -> &MerkleTree { + &self.tree + } + + /// Inclusion path + leaf index for `key`, if it is in this + /// commitment. Returns `None` if `key` is not committed. + #[must_use] + pub fn proof_for(&self, key: &XorName) -> Option<(Vec<[u8; 32]>, u32)> { + let idx = self.sorted_keys.binary_search(key).ok()?; + let path = self.tree.path_for(key)?; + // u32 cast safe because MerkleTree::build rejects > MAX_COMMITMENT_KEY_COUNT. + let leaf_index = u32::try_from(idx).unwrap_or(u32::MAX); + Some((path, leaf_index)) + } +} + +/// Number of recently-gossiped commitments a responder stays answerable for +/// (ADR-0002 "you stay answerable for what you publish"). +/// +/// The auditor only ever pins a commitment it received via gossip, so retaining +/// the last two **actually-gossiped** commitments (plus the current one) +/// guarantees an honest node can always answer a pin the auditor could have +/// formed. Two — not one — absorbs the race where the auditor pins the +/// commitment a node published just before its newest one. Retention is keyed on +/// gossip emission, NOT on the rotation timer: a node that rebuilds its tree +/// faster than it gossips never drops a commitment it actually put on the wire, +/// so it is never wrongly failed for "unknown commitment hash". +const RETAINED_GOSSIPED_COMMITMENTS: usize = 2; + +/// Responder retention state (ADR-0002). +/// +/// Keeps the current (latest-rotated) commitment plus every commitment whose +/// hash is among the last `RETAINED_GOSSIPED_COMMITMENTS` *gossiped* hashes. +/// A built-but-never-gossiped commitment is dropped on the next rotation unless +/// it gets gossiped. Rotation and gossip are the only paths that mutate this. +pub struct ResponderCommitmentState { + inner: RwLock, +} + +struct Inner { + /// Newest-first: `slots[0]` is the current commitment; the rest are + /// retained because their hash is still in `recently_gossiped`. + slots: Vec>, + /// Hashes of the last `RETAINED_GOSSIPED_COMMITMENTS` commitments actually + /// emitted on the wire, newest-first. A commitment is retained iff it is + /// the current one or its hash appears here. + recently_gossiped: Vec<[u8; 32]>, +} + +impl Default for ResponderCommitmentState { + fn default() -> Self { + Self::new() + } +} + +impl ResponderCommitmentState { + /// Empty state: no commitments yet. Audits before the first rotation + /// see `None` lookups and the auditor falls back to the legacy plain + /// digest path. + #[must_use] + pub fn new() -> Self { + Self { + inner: RwLock::new(Inner { + slots: Vec::with_capacity(RETAINED_GOSSIPED_COMMITMENTS + 1), + recently_gossiped: Vec::with_capacity(RETAINED_GOSSIPED_COMMITMENTS), + }), + } + } + + /// Rotate: the freshly-rebuilt commitment becomes `current`. Slots that are + /// neither the new current nor among the last gossiped hashes are dropped + /// (a built-but-never-gossiped commitment does not linger). + pub fn rotate(&self, new_current: BuiltCommitment) { + let new_current = Arc::new(new_current); + let mut guard = self.inner.write(); + guard.slots.insert(0, new_current); + prune_slots(&mut guard); + } + + /// Record that `hash` was emitted on the wire (gossiped). Keeps the last + /// `RETAINED_GOSSIPED_COMMITMENTS` gossiped hashes so the matching + /// commitments stay answerable (ADR-0002). Call at every gossip-emit site. + pub fn mark_gossiped(&self, hash: [u8; 32]) { + let mut guard = self.inner.write(); + // Move to front (newest), de-duplicating. + guard.recently_gossiped.retain(|h| h != &hash); + guard.recently_gossiped.insert(0, hash); + guard + .recently_gossiped + .truncate(RETAINED_GOSSIPED_COMMITMENTS); + prune_slots(&mut guard); + } + + /// Look up a commitment by its hash. Returns `Some(arc)` if `hash` + /// matches any retained slot. The returned `Arc` keeps the + /// [`BuiltCommitment`] alive for as long as the caller holds it, + /// even if a concurrent `rotate` ages it out of the retention buffer. + #[must_use] + pub fn lookup_by_hash(&self, hash: &[u8; 32]) -> Option> { + let guard = self.inner.read(); + for c in &guard.slots { + if &c.cached_hash == hash { + return Some(Arc::clone(c)); + } + } + None + } + + /// Snapshot the current commitment, if any. Used by the gossip + /// piggyback path: emit `state.current()` on the next outbound + /// `NeighborSyncRequest`/`Response`. + #[must_use] + pub fn current(&self) -> Option> { + self.inner.read().slots.first().map(Arc::clone) + } + + /// Number of commitment slots currently retained (the current commitment + /// plus any still-answerable recently-gossiped ones). Used only for the + /// v12 `commitment_rotated` event's `retained_slots` field; carries no + /// behavioural meaning. + #[must_use] + pub fn retained_slot_count(&self) -> usize { + self.inner.read().slots.len() + } + + /// Drop every retained slot. Called when the local store has + /// transitioned to empty: keeping the previously-advertised + /// commitment alive would invite audit failures (we can no longer + /// answer for any of the keys we committed to), and would leave + /// remote auditors pinning a hash this node will never satisfy + /// again. After clearing, the gossip piggyback path will emit + /// `commitment: None` until a fresh rotation occurs. + /// + /// This is the one sanctioned escape from the "callers MUST NOT + /// clear retention by any other mechanism" invariant — empty + /// storage means there is nothing to retain. + pub fn clear_all(&self) { + let mut guard = self.inner.write(); + guard.slots.clear(); + guard.recently_gossiped.clear(); + } +} + +/// Keep `slots[0]` (the current commitment) and any slot whose hash is among +/// the recently-gossiped hashes; drop the rest. Idempotent; preserves +/// newest-first order. This is the single place retention is enforced. +fn prune_slots(inner: &mut Inner) { + let gossiped = &inner.recently_gossiped; + let mut idx = 0usize; + inner.slots.retain(|c| { + let keep = idx == 0 || gossiped.contains(&c.cached_hash); + idx += 1; + keep + }); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] +mod tests { + use super::*; + use crate::replication::commitment::{commitment_hash, leaf_hash, verify_path}; + use saorsa_pqc::api::sig::ml_dsa_65; + + fn key(byte: u8) -> XorName { + let mut k = [0u8; 32]; + k[0] = byte; + k + } + + fn bh(byte: u8) -> [u8; 32] { + [byte ^ 0x5A; 32] + } + + fn keypair() -> (saorsa_pqc::api::sig::MlDsaPublicKey, MlDsaSecretKey) { + ml_dsa_65().generate_keypair().unwrap() + } + + #[test] + fn built_commitment_hash_matches_global_hash() { + let (pk, sk) = keypair(); + let pk_bytes = pk.to_bytes(); + let entries: Vec<_> = (1..=5u8).map(|i| (key(i), bh(i))).collect(); + let built = BuiltCommitment::build(entries, &[0xAB; 32], &sk, &pk_bytes).unwrap(); + let expected = commitment_hash(built.commitment()).unwrap(); + assert_eq!(built.hash(), expected); + } + + #[test] + fn built_commitment_proof_verifies_under_its_own_root() { + let (pk, sk) = keypair(); + let pk_bytes = pk.to_bytes(); + let entries: Vec<_> = (1..=8u8).map(|i| (key(i), bh(i))).collect(); + let built = BuiltCommitment::build(entries.clone(), &[1; 32], &sk, &pk_bytes).unwrap(); + let root = built.commitment().root; + let key_count = built.commitment().key_count; + + for (k, _) in &entries { + let (path, leaf_index) = built.proof_for(k).expect("present"); + // Find the bytes_hash for this key. + let bh_k = entries.iter().find(|(kk, _)| kk == k).unwrap().1; + let lh = leaf_hash(k, &bh_k); + assert!( + verify_path(&lh, &path, leaf_index as usize, key_count, &root), + "path verify failed for key {k:?}" + ); + } + } + + #[test] + fn proof_for_absent_key_is_none() { + let (pk, sk) = keypair(); + let pk_bytes = pk.to_bytes(); + let built = BuiltCommitment::build( + vec![(key(1), bh(1)), (key(2), bh(2))], + &[0; 32], + &sk, + &pk_bytes, + ) + .unwrap(); + assert!(built.proof_for(&key(99)).is_none()); + } + + #[test] + fn empty_state_returns_none() { + let state = ResponderCommitmentState::new(); + assert!(state.current().is_none()); + assert!(state.lookup_by_hash(&[0; 32]).is_none()); + } + + #[test] + fn clear_all_drops_every_slot() { + // Empty-storage transition: after clear_all, the gossip path + // must observe `current() == None` so it stops piggybacking a + // commitment the node can no longer answer audits against. + let (pk, sk) = keypair(); + let pk_bytes = pk.to_bytes(); + let state = ResponderCommitmentState::new(); + let peer_id = *blake3::hash(&pk.to_bytes()).as_bytes(); + + let c1 = BuiltCommitment::build(vec![(key(1), bh(1))], &peer_id, &sk, &pk_bytes).unwrap(); + let h1 = c1.hash(); + state.rotate(c1); + state.mark_gossiped(h1); // gossiped → retained across the next rotation + let c2 = BuiltCommitment::build(vec![(key(2), bh(2))], &peer_id, &sk, &pk_bytes).unwrap(); + let h2 = c2.hash(); + state.rotate(c2); + state.mark_gossiped(h2); + + assert!(state.current().is_some()); + assert!(state.lookup_by_hash(&h1).is_some()); + + state.clear_all(); + + assert!(state.current().is_none()); + assert!(state.lookup_by_hash(&h1).is_none()); + } + + #[test] + fn lookup_arc_outlives_subsequent_rotation() { + // INV-R2: an in-flight audit responder that grabbed an Arc must + // be able to finish building the response even after the state + // rotates that commitment out past the retention window. + let (pk, sk) = keypair(); + let pk_bytes = pk.to_bytes(); + let state = ResponderCommitmentState::new(); + + let c1 = BuiltCommitment::build(vec![(key(1), bh(1))], &[0; 32], &sk, &pk_bytes).unwrap(); + let h1 = c1.hash(); + state.rotate(c1); + + let in_flight = state.lookup_by_hash(&h1).unwrap(); + + // c1 was never gossiped, so the next rotation (a new current) drops it + // from the retention buffer. + let c2 = BuiltCommitment::build(vec![(key(2), bh(2))], &[0; 32], &sk, &pk_bytes).unwrap(); + state.rotate(c2); + assert!(state.lookup_by_hash(&h1).is_none()); + + // But the in-flight Arc still works (INV: Arc keeps it alive). + assert_eq!(in_flight.hash(), h1); + assert!(in_flight.proof_for(&key(1)).is_some()); + } + + #[test] + fn gossiped_commitment_stays_answerable_across_rotations() { + // ADR-0002: a commitment that was actually gossiped stays answerable + // even after rotation, until it falls out of the last-2-gossiped window. + let (pk, sk) = keypair(); + let pk_bytes = pk.to_bytes(); + let state = ResponderCommitmentState::new(); + + let c1 = BuiltCommitment::build(vec![(key(1), bh(1))], &[0; 32], &sk, &pk_bytes).unwrap(); + let h1 = c1.hash(); + state.rotate(c1); + state.mark_gossiped(h1); // we put c1 on the wire + + // Rotate to c2 and gossip it. c1 is still within the last-2-gossiped. + let c2 = BuiltCommitment::build(vec![(key(2), bh(2))], &[0; 32], &sk, &pk_bytes).unwrap(); + let h2 = c2.hash(); + state.rotate(c2); + state.mark_gossiped(h2); + assert!( + state.lookup_by_hash(&h1).is_some(), + "c1 must stay answerable" + ); + assert!(state.lookup_by_hash(&h2).is_some()); + + // Rotate to c3 and gossip it. Now the last-2-gossiped are {h3, h2}; + // h1 has fallen out of the window and is dropped. + let c3 = BuiltCommitment::build(vec![(key(3), bh(3))], &[0; 32], &sk, &pk_bytes).unwrap(); + let h3 = c3.hash(); + state.rotate(c3); + state.mark_gossiped(h3); + assert!( + state.lookup_by_hash(&h1).is_none(), + "c1 aged out of gossip window" + ); + assert!(state.lookup_by_hash(&h2).is_some()); + assert!(state.lookup_by_hash(&h3).is_some()); + } + + #[test] + fn current_plus_last_two_gossiped_are_simultaneously_answerable() { + // ADR-0002 "Two, not one": the retention depth must keep BOTH of the + // last two gossiped commitments answerable at the same time, alongside + // the current one. This is the property that "absorbs the race where an + // auditor asks about the commitment a node published just before its + // newest one". The existing across-rotations test only ever checks two + // hashes at once; this one proves three DISTINCT commitments are live + // simultaneously and that the third-oldest gossiped root is dropped — + // i.e. RETAINED_GOSSIPED_COMMITMENTS is exactly 2, not 1 and not 3. + let (pk, sk) = keypair(); + let pk_bytes = pk.to_bytes(); + let state = ResponderCommitmentState::new(); + + // Gossip three commitments in order: c1, c2, c3. After this the current + // slot is c3 and the last-two-gossiped are {h3, h2}. But c2 and c1 also + // need to be checked relative to the window: once c3 is gossiped, the + // window is {h3, h2}; c1 (the 3rd-oldest gossiped) must be gone. + let c1 = BuiltCommitment::build(vec![(key(1), bh(1))], &[0; 32], &sk, &pk_bytes).unwrap(); + let h1 = c1.hash(); + state.rotate(c1); + state.mark_gossiped(h1); + + let c2 = BuiltCommitment::build(vec![(key(2), bh(2))], &[0; 32], &sk, &pk_bytes).unwrap(); + let h2 = c2.hash(); + state.rotate(c2); + state.mark_gossiped(h2); + + // At this moment: current = c2, last-2-gossiped = {h2, h1}. Both the + // current AND the previously-gossiped c1 must be answerable — the "two, + // not one" race window. c1 is the commitment "published just before the + // newest one" and an auditor may still pin it. + assert!( + state.lookup_by_hash(&h1).is_some(), + "the commitment published just before the newest one must stay answerable" + ); + assert!( + state.lookup_by_hash(&h2).is_some(), + "current must be answerable" + ); + assert_ne!(h1, h2, "the two retained commitments must be distinct"); + + // Now gossip a third distinct commitment c3. Window becomes {h3, h2}. + // c3 (current) + c2 + c1: c1 must now be dropped (3rd-oldest gossiped), + // while c2 and c3 remain. This proves depth is exactly 2 beyond... no: + // depth is 2 gossiped TOTAL including current's hash once gossiped. + let c3 = BuiltCommitment::build(vec![(key(3), bh(3))], &[0; 32], &sk, &pk_bytes).unwrap(); + let h3 = c3.hash(); + state.rotate(c3); + state.mark_gossiped(h3); + + assert_ne!(h2, h3); + assert_ne!(h1, h3); + assert!( + state.lookup_by_hash(&h3).is_some(), + "current (c3) answerable" + ); + assert!( + state.lookup_by_hash(&h2).is_some(), + "c2 (published just before newest) answerable — the race-absorbing slot" + ); + assert!( + state.lookup_by_hash(&h1).is_none(), + "c1 is the 3rd-oldest gossiped root and MUST be dropped — depth is exactly 2" + ); + } + + #[test] + fn ungossiped_rebuild_does_not_evict_gossiped_commitment() { + // The rebuild-faster-than-gossip case: a node rebuilds (rotates) several + // times without gossiping. The last *gossiped* commitment must remain + // answerable so the node is not wrongly failed for "unknown hash". + let (pk, sk) = keypair(); + let pk_bytes = pk.to_bytes(); + let state = ResponderCommitmentState::new(); + + let c1 = BuiltCommitment::build(vec![(key(1), bh(1))], &[0; 32], &sk, &pk_bytes).unwrap(); + let h1 = c1.hash(); + state.rotate(c1); + state.mark_gossiped(h1); + + // Several ungossiped rebuilds. + for i in 2..=6u8 { + let c = + BuiltCommitment::build(vec![(key(i), bh(i))], &[0; 32], &sk, &pk_bytes).unwrap(); + state.rotate(c); + } + // h1 was gossiped and is still within the last-2-gossiped window + // (nothing else was gossiped), so it must still be answerable. + assert!( + state.lookup_by_hash(&h1).is_some(), + "gossiped commitment must survive ungossiped rebuilds" + ); + } +} diff --git a/src/replication/config.rs b/src/replication/config.rs index 1ca8b3db..b132b6fc 100644 --- a/src/replication/config.rs +++ b/src/replication/config.rs @@ -104,10 +104,57 @@ pub const AUDIT_TICK_INTERVAL_MIN: Duration = Duration::from_secs(AUDIT_TICK_INT /// Audit scheduler cadence range (max). pub const AUDIT_TICK_INTERVAL_MAX: Duration = Duration::from_secs(AUDIT_TICK_INTERVAL_MAX_SECS); -/// Base audit response deadline (independent of challenge size). -const AUDIT_RESPONSE_BASE_SECS: u64 = 10; -/// Per-key allowance added to the base audit response deadline. -const AUDIT_RESPONSE_PER_KEY_MS: u64 = 20; +/// Floor on the audit response deadline (independent of challenge size). +/// +/// Sized to absorb worst-case global RTT for the audit envelope +/// (the request + response messages are KB-scale, not chunk-scale) +/// plus scheduling jitter. Tokyo↔NY round-trip is ~150ms each way, +/// so 2 seconds comfortably covers cross-continent communication +/// for any audit. +const AUDIT_RESPONSE_FLOOR_SECS: u64 = 2; + +/// Conservative honest-responder read throughput, in bytes per second. +/// +/// Used to size the audit response deadline. An honest peer answers +/// a k-key challenge by reading k chunks from local disk, computing +/// BLAKE3 + path proofs, and signing the response. The bottleneck is +/// disk read; BLAKE3 at ~3 GB/s + ML-DSA signing at ~3 ms are +/// negligible. +/// +/// Set conservatively below any modern SSD (typical: 500 MB/s+). +/// At 50 MB/s, a k=10 sample at 4 MiB chunks reads in ~0.8s, well +/// inside even an aggressive timeout. A relay attacker who must +/// fetch the same 40 MB over the network at typical bandwidth +/// (100 Mbps = 12.5 MB/s) takes 3+ seconds for the data alone, plus +/// per-chunk network round-trips. At larger sample sizes the gap +/// is exponential in the relay's disadvantage. +const AUDIT_HONEST_READ_BPS: u64 = 50 * 1024 * 1024; + +/// Slack multiplier on the honest-read estimate. +/// +/// Set so an honest peer that's slower than `HONEST_READ_BPS` (e.g. an +/// HDD-backed node, or one under load) still answers within the +/// timeout. 5× is generous; a relay peer fetching the same data over a +/// residential link (~5-12 MB/s) sees ~10-100× higher latency than disk +/// and misses the budget. This is an economic deterrent calibrated for +/// residential bandwidth, NOT a hard cryptographic bound — a relay on a +/// datacenter cross-connect could still fetch fast enough to answer in +/// time (see the §7 note on `audit_response_timeout`). +const AUDIT_RESPONSE_HONEST_MULTIPLIER: u64 = 5; + +/// Single-key prune audit response deadline. +/// +/// Prune audits ask a peer whether they still hold one specific key +/// they previously claimed. The relay-defence rationale that motivates +/// the tight commitment-bound timeout does NOT apply here: the +/// auditor's own out-of-range hysteresis (`PRUNE_HYSTERESIS_DURATION`, +/// 3 days) already makes "fetch on demand" infeasible as a sustained +/// strategy. +/// +/// Sized to comfortably accommodate cold cross-continent QUIC +/// handshake plus scheduling jitter on a busy honest peer answering +/// a single-key challenge: 10 s. +const PRUNE_AUDIT_RESPONSE_SECS: u64 = 10; /// Maximum duration a peer may claim bootstrap status before penalties apply. const BOOTSTRAP_CLAIM_GRACE_PERIOD_SECS: u64 = 24 * 60 * 60; // 24 h @@ -121,7 +168,23 @@ const PRUNE_HYSTERESIS_DURATION_SECS: u64 = 3 * 24 * 60 * 60; // 3 days pub const PRUNE_HYSTERESIS_DURATION: Duration = Duration::from_secs(PRUNE_HYSTERESIS_DURATION_SECS); /// Protocol identifier for replication operations. -pub const REPLICATION_PROTOCOL_ID: &str = "autonomi.ant.replication.v1"; +/// +/// Bumped to `v2` for the v12 storage-bound audit. That change extends the +/// wire types (`NeighborSyncRequest`/`Response` carry an optional +/// `StorageCommitment`, `AuditChallenge` carries an optional pinned hash, and +/// `AuditResponse` gains a `CommitmentBound` variant). The encoding is NOT +/// backward/forward compatible: postcard is non-self-describing, so a v2 node +/// cannot decode a v1 node's shorter message (it hits end-of-buffer), and a +/// v1 node mis-handles the v2 trailer. Rather than risk mis-decode, we route +/// v12 replication on a distinct protocol id: a node only delivers messages +/// whose topic matches its own id (see the topic check in `mod.rs`), so v1 and +/// v2 nodes simply do not exchange replication traffic during a mixed-version +/// window — they ignore each other's replication messages instead of +/// corrupting state. This is the rollout-safe behaviour: no cross-version +/// decode, no spurious eviction. Replication between matched-version peers is +/// unaffected. (DHT routing/lookups are a separate protocol and continue to +/// span both versions.) +pub const REPLICATION_PROTOCOL_ID: &str = "autonomi.ant.replication.v2"; /// 10 MiB — maximum replication wire message size (accommodates hint batches). const REPLICATION_MESSAGE_SIZE_MIB: usize = 10; @@ -147,6 +210,47 @@ pub const PENDING_VERIFY_MAX_AGE: Duration = Duration::from_secs(PENDING_VERIFY_ /// Trust event weight for confirmed audit failures. pub const AUDIT_FAILURE_TRUST_WEIGHT: f64 = 5.0; +/// Consecutive audit *timeouts* a peer may accumulate before a timeout is +/// reported as an `ApplicationFailure` trust event. +/// +/// The audit response timeout is an economic deterrent calibrated for +/// residential bandwidth, not a hard cryptographic bound: a single slow +/// response is routine for an honest node under transient load (GC pause, +/// disk flush, a burst of concurrent requests). Penalizing on the first +/// timeout false-positives those nodes. +/// +/// Requiring `N` *consecutive* timeouts before penalizing removes that +/// false-positive while preserving the deterrent against a peer that does not +/// actually store the data and must fetch it at audit time: such a peer is +/// slow on *every* audit and accumulates a fresh strike each tick until it +/// crosses the threshold, whereas an honest node answers normally between rare +/// slow ticks and any success resets its strike counter to zero (see +/// `handle_audit_result`). The discriminator is *persistence* of slowness +/// versus *transience*. This deliberately does not widen the per-challenge +/// window. Applies ONLY to `AuditFailureReason::Timeout`; confirmed +/// storage-integrity failures (`DigestMismatch` / `KeyAbsent` / `Rejected` / +/// `MalformedResponse`) remain instantly punishable. +pub const AUDIT_TIMEOUT_STRIKE_THRESHOLD: u32 = 3; + +/// Probability of launching a subtree audit when a peer's *changed* commitment +/// is ingested via gossip (ADR-0002). Keeps audits occasional surprise exams. +pub const AUDIT_ON_GOSSIP_PROBABILITY: f64 = 0.2; + +/// Per-peer cooldown between gossip-triggered subtree audits (ADR-0002), in +/// seconds. Bounds how often any one peer is audited regardless of gossip rate. +pub const AUDIT_ON_GOSSIP_COOLDOWN_SECS: u64 = 30 * 60; + +/// Number of subtree leaves spot-checked against real chunk bytes per audit +/// (ADR-0002 real-bytes layer). +pub const AUDIT_SPOTCHECK_COUNT: u32 = 8; + +/// Conservative leaf-count hint for sizing the subtree-audit response deadline. +/// +/// The deadline is set before the proof arrives, so we size for the largest +/// legal store: `sqrt(MAX_COMMITMENT_KEY_COUNT) = 1000`. Honest small stores +/// finish well within it. +pub const SUBTREE_AUDIT_TIMEOUT_LEAF_HINT: usize = 1000; + /// Maximum number of prune-confirmation audit challenges sent per prune pass. pub const MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS: usize = 64; @@ -187,10 +291,24 @@ pub struct ReplicationConfig { pub audit_tick_interval_min: Duration, /// Audit scheduler cadence range (max). pub audit_tick_interval_max: Duration, - /// Base audit response deadline (key-independent component). - pub audit_response_base: Duration, - /// Per-key allowance added to the base audit response deadline. - pub audit_response_per_key: Duration, + /// Floor on the audit response deadline. Covers global RTT for + /// the small request/response envelope plus scheduling jitter. + /// See `AUDIT_RESPONSE_FLOOR_SECS` for sizing. + pub audit_response_floor: Duration, + /// Conservative honest-responder read throughput (bytes/sec). + /// Used to scale the audit response deadline against the size of + /// the challenge. Slow enough that even an HDD-backed honest peer + /// fits inside the budget; fast enough that a relay attacker who + /// must fetch bytes over the network falls outside. + pub audit_honest_read_bps: u64, + /// Slack multiplier on the honest-read estimate before + /// declaring an audit timed out. + pub audit_response_honest_multiplier: u64, + /// Single-key prune-audit response deadline. Has its own constant + /// because the relay-defence rationale that motivates the tight + /// commitment-bound budget does not apply to a single-key prune + /// challenge. + pub prune_audit_response_timeout: Duration, /// Maximum duration a peer may claim bootstrap status. pub bootstrap_claim_grace_period: Duration, /// Minimum continuous out-of-range duration before pruning a key. @@ -219,8 +337,10 @@ impl Default for ReplicationConfig { self_lookup_interval_max: SELF_LOOKUP_INTERVAL_MAX, audit_tick_interval_min: AUDIT_TICK_INTERVAL_MIN, audit_tick_interval_max: AUDIT_TICK_INTERVAL_MAX, - audit_response_base: Duration::from_secs(AUDIT_RESPONSE_BASE_SECS), - audit_response_per_key: Duration::from_millis(AUDIT_RESPONSE_PER_KEY_MS), + audit_response_floor: Duration::from_secs(AUDIT_RESPONSE_FLOOR_SECS), + audit_honest_read_bps: AUDIT_HONEST_READ_BPS, + audit_response_honest_multiplier: AUDIT_RESPONSE_HONEST_MULTIPLIER, + prune_audit_response_timeout: Duration::from_secs(PRUNE_AUDIT_RESPONSE_SECS), bootstrap_claim_grace_period: BOOTSTRAP_CLAIM_GRACE_PERIOD, prune_hysteresis_duration: PRUNE_HYSTERESIS_DURATION, verification_request_timeout: VERIFICATION_REQUEST_TIMEOUT, @@ -343,11 +463,72 @@ impl ReplicationConfig { } /// Compute the audit response timeout for a challenge with - /// `challenged_key_count` keys: `base + per_key * challenged_key_count`. + /// `challenged_key_count` keys, **sized to be tight enough that a + /// relay attacker that must fetch the chunk bytes from elsewhere + /// falls outside the budget**. + /// + /// Formula: + /// `floor + (challenged_bytes / honest_read_bps) × multiplier` + /// + /// Where `challenged_bytes = k × MAX_CHUNK_SIZE`. An honest peer + /// reads `k × 4 MiB` from local disk at `honest_read_bps` (set + /// conservatively at 50 MB/s — well below modern SSDs); the + /// multiplier of 5 absorbs jitter, BLAKE3, ML-DSA, and slow disks. + /// + /// A relay attacker on a residential link (~5-12 MB/s) who must + /// fetch the same `k × 4 MiB` over the network sees ~10-100× higher + /// latency than disk for the data alone, plus per-chunk round-trips, + /// and misses the budget — recording a timeout strike (per + /// `handle_audit_timeout` → `handle_audit_failure`). After + /// [`AUDIT_TIMEOUT_STRIKE_THRESHOLD`] consecutive timeouts this would + /// fire an `application_failure` trust event — but note that report is + /// currently suppressed for the breaking rollout (grep + /// TIMEOUT-EVICTION-DISABLED); the strike accounting still runs. + /// + /// This is an economic deterrent for the §7 relay limit calibrated + /// for residential bandwidth, NOT a hard bound: a relay on a + /// datacenter cross-connect (≥1 Gbps) can fetch `k × 4 MiB` fast + /// enough to answer in time. It raises the relay's cost (bandwidth + /// per audit) without claiming to make relaying impossible. The + /// cryptographic guarantee remains commitment-binding (the relay + /// must still hold or fetch the exact committed bytes); the timeout + /// only attacks the economics. #[must_use] pub fn audit_response_timeout(&self, challenged_key_count: usize) -> Duration { - let keys = u32::try_from(challenged_key_count).unwrap_or(u32::MAX); - self.audit_response_base + self.audit_response_per_key * keys + let bytes_per_key = u64::try_from(crate::ant_protocol::MAX_CHUNK_SIZE).unwrap_or(u64::MAX); + let keys = u64::try_from(challenged_key_count).unwrap_or(u64::MAX); + let total_bytes = bytes_per_key.saturating_mul(keys); + let bps = self.audit_honest_read_bps.max(1); + // Apply the multiplier BEFORE integer-dividing by bps so each + // chunk contributes a fractional second rather than rounding + // down to zero. Otherwise k in 1..=12 would all collapse to the + // floor (~40 MiB / 50 MB/s = 0 secs in integer arithmetic), and + // an honest HDD-backed peer at sqrt(N)=10 stored chunks could + // miss the budget under load. + let multiplied = total_bytes.saturating_mul(self.audit_response_honest_multiplier); + let scaled_secs = multiplied / bps; + // saturating_add avoids a panic if `scaled_secs` (or the floor + // plus it) would overflow `Duration::MAX`. + self.audit_response_floor + .saturating_add(Duration::from_secs(scaled_secs)) + } + + /// Number of subtree leaves to spot-check against real chunk bytes per + /// audit (ADR-0002 real-bytes layer). Faking a fraction `x` of nonced + /// leaves survives only `(1 - x)^k`. + #[must_use] + pub fn audit_spotcheck_count(&self) -> u32 { + AUDIT_SPOTCHECK_COUNT + } + + /// Conservative leaf-count hint for sizing the subtree-audit response + /// deadline before the proof arrives. + /// + /// The selected subtree holds about `sqrt(key_count)` real leaves; sizing + /// for a large store keeps an honest peer with a big store from timing out. + #[must_use] + pub fn subtree_audit_timeout_leaf_hint(&self) -> usize { + SUBTREE_AUDIT_TIMEOUT_LEAF_HINT } /// Returns a random duration in `[audit_tick_interval_min, @@ -409,6 +590,109 @@ mod tests { assert!((AUDIT_FAILURE_TRUST_WEIGHT - 5.0).abs() <= f64::EPSILON); } + #[test] + fn audit_timeout_strike_threshold_is_three() { + // Smallest threshold that tolerates back-to-back transient slowness + // while still penalizing a persistently-slow non-storing peer within a + // few audit ticks. + assert_eq!(AUDIT_TIMEOUT_STRIKE_THRESHOLD, 3); + } + + #[test] + fn replication_protocol_id_is_v2() { + // The v12 storage-bound audit is a breaking wire change. The protocol + // id MUST advance past v1 so v1 and v2 nodes never attempt to decode + // each other's replication messages (rollout safety — see the const's + // doc). If this regresses to v1, mixed-version nodes would mis-decode. + assert_eq!(REPLICATION_PROTOCOL_ID, "autonomi.ant.replication.v2"); + assert!(REPLICATION_PROTOCOL_ID.ends_with(".v2")); + } + + #[test] + fn audit_response_timeout_floor_at_zero_keys() { + let config = ReplicationConfig::default(); + assert_eq!( + config.audit_response_timeout(0), + Duration::from_secs(AUDIT_RESPONSE_FLOOR_SECS), + "zero-key challenge should yield the floor exactly" + ); + } + + #[test] + fn audit_response_timeout_scales_with_key_count() { + let config = ReplicationConfig::default(); + let t1 = config.audit_response_timeout(1); + let t10 = config.audit_response_timeout(10); + let t100 = config.audit_response_timeout(100); + assert!(t1 <= t10 && t10 < t100, "timeout must not decrease with k"); + + // Multiplier is applied before the divide so each chunk + // contributes ~0.4 s rather than rounding to 0 at small k. + // For k=1: (4_194_304 × 5) / 52_428_800 = 0 (still below 1 s), + // + 2 s floor = 2 s. + assert_eq!(t1, Duration::from_secs(2)); + + // For k=10: (10 × 4_194_304 × 5) / 52_428_800 = 4 s scaled, + // + 2 s floor = 6 s. An HDD-backed honest peer at 20 MB/s reads + // 40 MiB in ~2 s, comfortably inside the budget; a relay + // attacker fetching the same 40 MiB at 5 MB/s residential + // bandwidth needs ~8 s for the data alone, outside. + assert_eq!(t10, Duration::from_secs(6)); + + // For k=100: (100 × 4_194_304 × 5) / 52_428_800 = 40 s scaled, + // + 2 s floor = 42 s. + assert_eq!(t100, Duration::from_secs(42)); + } + + #[test] + fn audit_response_timeout_fits_honest_hdd_at_typical_sample_size() { + // The canonical audit sample is sqrt(N) at N stored chunks. + // At N=100 stored chunks, sample is 10. An HDD-backed honest + // peer at the slowest realistic random-read throughput (20 MB/s, + // well below modern HDDs which sustain 80-150 MB/s sequential) + // reads 10 × 4 MiB = 40 MiB in ~2 s. Add 300 ms cross-continent + // RTT, ~10 ms scheduling, ~3 ms ML-DSA sign, and the honest + // envelope is ~2.3 s. The 6 s budget at k=10 leaves >3 s of + // slack. + let config = ReplicationConfig::default(); + let budget = config.audit_response_timeout(10); + let realistic_hdd_bps: u64 = 20 * 1024 * 1024; + let bytes: u64 = 10 * 4 * 1024 * 1024; + let honest_envelope_secs = bytes / realistic_hdd_bps + 1; // +1 s for network/scheduling/sign + assert!( + Duration::from_secs(honest_envelope_secs) < budget, + "honest HDD envelope ({honest_envelope_secs}s) must fit inside k=10 budget ({}s)", + budget.as_secs(), + ); + } + + #[test] + fn audit_response_timeout_relay_is_outside_envelope() { + // The intended invariant: an honest peer with the SSD-class + // read budget fits inside `audit_response_timeout(k)`, while a + // relay attacker fetching k*4MiB over residential bandwidth + // (≈ 5 MB/s realistic for sustained download) does NOT. Spot- + // check this at k=100: honest budget is 42s, relay needs at + // least 100 * 4 MiB / 5 MB/s = 80s for the data alone, which + // exceeds the budget. + let config = ReplicationConfig::default(); + let budget = config.audit_response_timeout(100); + let relay_data_only = Duration::from_secs(100 * 4 * 1024 * 1024 / (5 * 1024 * 1024)); + assert!( + relay_data_only > budget, + "relay fetch ({}s) must exceed honest audit budget ({}s)", + relay_data_only.as_secs(), + budget.as_secs(), + ); + } + + #[test] + fn audit_response_timeout_saturates_on_huge_k() { + let config = ReplicationConfig::default(); + // Should not panic or overflow at extreme k values. + let _ = config.audit_response_timeout(usize::MAX); + } + #[test] fn quorum_threshold_zero_rejected() { let config = ReplicationConfig { diff --git a/src/replication/mod.rs b/src/replication/mod.rs index 996de487..17571961 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -17,6 +17,8 @@ pub mod admission; pub mod audit; pub mod bootstrap; +pub mod commitment; +pub mod commitment_state; pub mod config; pub mod fresh; pub mod neighbor_sync; @@ -24,7 +26,9 @@ pub mod paid_list; pub mod protocol; pub mod pruning; pub mod quorum; +pub mod recent_provers; pub mod scheduling; +pub mod subtree; pub mod types; use std::collections::{HashMap, HashSet}; @@ -46,6 +50,8 @@ use crate::ant_protocol::XorName; use crate::error::{Error, Result}; use crate::payment::PaymentVerifier; use crate::replication::audit::AuditTickResult; +use crate::replication::commitment::{commitment_hash, StorageCommitment}; +use crate::replication::commitment_state::{PeerCommitmentRecord, ResponderCommitmentState}; use crate::replication::config::{ max_parallel_fetch, ReplicationConfig, MAX_CONCURRENT_REPLICATION_SENDS, REPLICATION_PROTOCOL_ID, @@ -56,13 +62,14 @@ use crate::replication::protocol::{ VerificationResponse, }; use crate::replication::quorum::KeyVerificationOutcome; +use crate::replication::recent_provers::RecentProvers; use crate::replication::scheduling::ReplicationQueues; use crate::replication::types::{ AuditFailureReason, BootstrapClaimObservation, BootstrapState, FailureEvidence, HintPipeline, NeighborSyncState, PeerSyncRecord, RepairProofs, VerificationEntry, VerificationState, }; use crate::storage::LmdbStorage; -use saorsa_core::identity::PeerId; +use saorsa_core::identity::{NodeIdentity, PeerId}; use saorsa_core::{DhtNetworkEvent, P2PEvent, P2PNode, TrustEvent}; // --------------------------------------------------------------------------- @@ -85,6 +92,13 @@ struct VerificationCycleContext<'a> { bootstrap_state: &'a Arc>, is_bootstrapping: &'a Arc>, bootstrap_complete_notify: &'a Arc, + /// v12 §6 holder-eligibility inputs. The verifier downgrades a + /// peer's Present claim to Unresolved unless they're a credited + /// holder of the key (i.e. they recently passed a commitment-bound + /// audit on it under their currently-credited commitment hash). + last_commitment_by_peer: &'a Arc>>, + ever_capable_peers: &'a Arc>>, + recent_provers: &'a Arc>, } /// Fetch worker polling interval in milliseconds. @@ -93,9 +107,6 @@ const FETCH_WORKER_POLL_MS: u64 = 100; /// Verification worker polling interval in milliseconds. const VERIFICATION_WORKER_POLL_MS: u64 = 250; -/// Bootstrap drain check interval in seconds. -const BOOTSTRAP_DRAIN_CHECK_SECS: u64 = 5; - /// Standard trust event weight for per-operation success/failure signals. /// /// Used for individual replication fetch outcomes, integrity check failures, @@ -103,6 +114,67 @@ const BOOTSTRAP_DRAIN_CHECK_SECS: u64 = 5; /// is reserved for confirmed audit failures. const REPLICATION_TRUST_WEIGHT: f64 = 1.0; +/// How often the responder rebuilds + rotates its storage commitment. +/// +/// Each rebuild scans LMDB to compute leaf hashes; for ~10k keys this is +/// sub-100ms (BLAKE3 + tree build). The four-slot retention +/// (`RETAINED_COMMITMENT_SLOTS = 4`: current + 3 previous) means a +/// rotation is also when a pinned audit may need an older commitment, +/// so don't rotate so often that we drop a commitment a peer might +/// still pin to. +/// +/// Default: 1 hour, aligned with the worst-case neighbor-sync cooldown +/// (`NEIGHBOR_SYNC_COOLDOWN_SECS = 3600`) so that with the four-slot +/// retention, any commitment we gossiped is still answerable for up to +/// ~4 hours after rotation. That covers the gap +/// between our rotation and the next gossip arrival at a remote peer, +/// preventing the "unknown commitment hash" -> Idle audit-skip pattern +/// from being the common case (codex round-10 MAJOR #1). +/// +/// Why not faster: the v12 pin is bound to a specific point-in-time +/// commitment, so rotation isn't security-critical for pin freshness — +/// only for keeping the committed key set current as the responder +/// writes new keys. 1 hour is plenty for that, and slow enough that +/// honest auditors mostly hit `current` or `previous` rather than the +/// "rotated past" case. +const COMMITMENT_ROTATION_INTERVAL_SECS: u64 = 3600; + +/// Minimum interval between commitment signature verifications for a +/// single peer (v10/v12 §2 step 3 + §11 `DoS`). +/// +/// A sybil that bypasses the routing-table gate (e.g. by transient +/// bucket pollution) could otherwise force one ML-DSA-65 verify (~1 ms) +/// per gossip message. This rate limit caps the verify-per-peer rate +/// at 1/min, which is comfortably above the legitimate gossip cadence +/// (the 10-20 min neighbor-sync round on each peer). +const COMMITMENT_SIG_VERIFY_MIN_INTERVAL: Duration = Duration::from_secs(60); + +/// Hard cap on the size of `last_commitment_by_peer`. +/// +/// Bounds the per-process memory cost of the auditor's per-peer +/// commitment cache. Each entry holds a `StorageCommitment` +/// (~5 KiB: 1952-byte pubkey + 3293-byte signature + small fields). +/// At 4096 entries the cache is ~20 MiB, which comfortably covers a +/// realistic close-group neighborhood. When the cap is hit, one +/// arbitrary existing entry is evicted on insert (`HashMap` iteration +/// order is unspecified; we do not track insertion order). The +/// `PeerRemoved` handler proactively drops entries as the DHT +/// detects departures, and `ingest_peer_commitment` only admits +/// commitments from peers currently in the routing table — together +/// the cap is the third line of defence against sybil/churn flooding +/// (codex round-6 MAJOR, refined in round-7). +const MAX_LAST_COMMITMENT_BY_PEER: usize = 4096; + +/// Cap on the sticky `ever_capable_peers` set. Bounds memory so a +/// long-running bootstrap node cannot have the set grow without limit +/// from peer-id churn. Sized at 4x `MAX_LAST_COMMITMENT_BY_PEER` so +/// the set comfortably outlives normal LRU churn but still caps the +/// blast radius of identity-rotation attacks. Once full we refuse new +/// inserts (no eviction) — keeps the historic set stable; new v12 +/// peers above the cap are treated as legacy on rejoin, which is the +/// pre-round-2 behaviour, not a security regression. +const MAX_EVER_CAPABLE_PEERS: usize = 4 * MAX_LAST_COMMITMENT_BY_PEER; + // --------------------------------------------------------------------------- // ReplicationEngine // --------------------------------------------------------------------------- @@ -129,6 +201,24 @@ pub struct ReplicationEngine { /// are lightweight (`PeerSyncRecord` is two fields) and peer IDs are /// naturally bounded by the routing table's k-bucket capacity. sync_history: Arc>>, + /// Per-peer consecutive audit-timeout strike counter. + /// + /// A timeout increments the peer's strike count; a successful audit + /// response resets it to zero. Only when a peer reaches + /// [`config::AUDIT_TIMEOUT_STRIKE_THRESHOLD`] consecutive timeouts is a + /// timeout reported as an `ApplicationFailure` trust event. This separates + /// honest transient slowness (resets on the next normal response) from a + /// peer that does not store the data and is slow on every audit. Lives + /// outside `NeighborSyncState` so it is never wiped by a neighbor-sync + /// cycle reset. Grows with peer churn like `sync_history`; entries are a + /// single `u32` and peer IDs are bounded by k-bucket capacity. + audit_timeout_strikes: Arc>>, + /// Per-peer cooldown for gossip-triggered subtree audits (ADR-0002). + /// + /// Records when each peer was last audited so a burst of gossiped + /// commitment changes cannot spawn back-to-back audits of the same peer. + /// Bounded by routing-table membership and cleaned on `PeerRemoved`. + audit_on_gossip_cooldown: Arc>>, /// Completed local neighbor-sync cycle epoch for proof maturity. sync_cycle_epoch: Arc>, /// Per-key repair proof tracking for audit eligibility. @@ -141,6 +231,51 @@ pub struct ReplicationEngine { sync_trigger: Arc, /// Notified when `is_bootstrapping` transitions from `true` to `false`. bootstrap_complete_notify: Arc, + /// Node identity (for signing storage commitments). + /// + /// Phase 3 of the v12 storage-bound audit design. The responder + /// uses this to sign its periodically-built `StorageCommitment`. + identity: Arc, + /// Responder-side commitment state (two-slot atomic rotation). + /// + /// Periodically rebuilt from the live LMDB key set; gossiped on + /// outbound `NeighborSyncRequest`/`Response`; consulted by the + /// commitment-bound audit handler. + commitment_state: Arc, + /// Auditor-side per-peer commitment record (last known commitment + + /// sticky `commitment_capable` flag). + /// + /// Populated whenever an inbound gossip carries a verified + /// commitment from the sender. Used by `audit_tick` to snapshot + /// `expected_commitment_hash` into outbound challenges, and by + /// holder-eligibility (§6) to decide whether a peer's `recent_provers` + /// proof should be honoured. The sticky `commitment_capable` flag + /// flips true on first successful ingest and never reverts (§2 + /// step 5). + last_commitment_by_peer: Arc>>, + /// Sticky set of peer IDs we have EVER seen carrying a v12 + /// commitment, independent of whether their commitment bytes are + /// still in `last_commitment_by_peer`. The §6 holder-eligibility + /// closure consults this set to keep treating churned-out + /// previously-v12 peers as v12-capable (rather than degrading them + /// to "legacy" credit-unconditionally) when they re-appear on the + /// network before their next gossip arrives. Bounded growth: even + /// at one million peers seen over the node's lifetime, the set is + /// 32 MB. + ever_capable_peers: Arc>>, + /// Auditor-side holder-eligibility cache (v12 §6). + /// + /// Recorded on successful commitment-bound audit; read by future + /// quorum / paid-list eligibility checks (phase-3 stretch). + recent_provers: Arc>, + /// Per-peer last sig-verify attempt timestamp for the §2 step 3 / + /// §11 `DoS` rate limit. Bumped on EVERY verify attempt (success or + /// failure) so a peer we've never successfully verified can't burn + /// CPU on a flood of structurally-plausible-but-invalid gossips. + /// Lives separately from `last_commitment_by_peer` because that + /// map's records only exist after a successful verify (codex + /// round-13 finding). + sig_verify_attempts: Arc>>, /// Limits concurrent outbound replication sends to prevent bandwidth /// saturation on home broadband connections. send_semaphore: Arc, @@ -162,11 +297,13 @@ impl ReplicationEngine { /// /// Returns an error if the `PaidList` LMDB environment cannot be opened /// or if the configuration fails validation. + #[allow(clippy::too_many_arguments)] pub async fn new( config: ReplicationConfig, p2p_node: Arc, storage: Arc, payment_verifier: Arc, + identity: Arc, root_dir: &Path, fresh_write_rx: mpsc::UnboundedReceiver, shutdown: CancellationToken, @@ -191,12 +328,20 @@ impl ReplicationEngine { queues: Arc::new(RwLock::new(ReplicationQueues::new())), sync_state: Arc::new(RwLock::new(initial_neighbors)), sync_history: Arc::new(RwLock::new(HashMap::new())), + audit_timeout_strikes: Arc::new(RwLock::new(HashMap::new())), + audit_on_gossip_cooldown: Arc::new(RwLock::new(HashMap::new())), sync_cycle_epoch: Arc::new(RwLock::new(0)), repair_proofs: Arc::new(RwLock::new(RepairProofs::new())), bootstrap_state: Arc::new(RwLock::new(BootstrapState::new())), is_bootstrapping: Arc::new(RwLock::new(true)), sync_trigger: Arc::new(Notify::new()), bootstrap_complete_notify: Arc::new(Notify::new()), + identity, + commitment_state: Arc::new(ResponderCommitmentState::new()), + last_commitment_by_peer: Arc::new(RwLock::new(HashMap::new())), + ever_capable_peers: Arc::new(RwLock::new(HashSet::new())), + recent_provers: Arc::new(RwLock::new(RecentProvers::new())), + sig_verify_attempts: Arc::new(RwLock::new(HashMap::new())), send_semaphore: Arc::new(Semaphore::new(MAX_CONCURRENT_REPLICATION_SENDS)), fresh_write_rx: Some(fresh_write_rx), shutdown, @@ -210,6 +355,96 @@ impl ReplicationEngine { &self.paid_list } + /// Get a reference to the responder's commitment state. Used by audit + /// handlers to look up commitments by hash; used by the rotation tick + /// to install fresh ones. + #[must_use] + pub fn commitment_state(&self) -> &Arc { + &self.commitment_state + } + + /// Get a reference to the auditor's last-commitment-by-peer table. + #[must_use] + pub fn last_commitment_by_peer(&self) -> &Arc>> { + &self.last_commitment_by_peer + } + + /// Get a reference to the holder-eligibility cache. Phase-3 stretch: + /// will be read by quorum / paid-list eligibility checks. + #[must_use] + pub fn recent_provers(&self) -> &Arc> { + &self.recent_provers + } + + /// Test-only: rebuild + rotate this node's storage commitment now over its + /// current key set (normally on a 1h timer). Lets a test commit to chunks it + /// just stored without waiting for the rotation cadence. + /// + /// # Errors + /// + /// Propagates any error from reading the local key set or building/signing + /// the commitment. + #[cfg(any(test, feature = "test-utils"))] + pub async fn rebuild_commitment_now(&self) -> Result<()> { + rebuild_and_rotate_commitment( + &self.storage, + &self.identity, + &self.commitment_state, + &self.p2p_node, + ) + .await + } + + /// Test-only: directly seed this node's cached commitment for `peer`, + /// simulating "we received `peer`'s gossiped commitment" without depending + /// on neighbor-sync propagation timing. Lets a two-node audit test pin the + /// peer's commitment deterministically. + #[cfg(any(feature = "test-utils", test))] + pub async fn inject_peer_commitment_for_test( + &self, + peer: &PeerId, + commitment: StorageCommitment, + ) { + let now = Instant::now(); + self.last_commitment_by_peer + .write() + .await + .insert(*peer, PeerCommitmentRecord::from_verified(commitment, now)); + self.ever_capable_peers.write().await.insert(*peer); + } + + /// Test-only: run ONE subtree audit against `peer` right now, pinned to the + /// commitment this node has cached for it (from gossip), over the live wire. + /// Returns the audit outcome so tests can assert honest-pass / adversary-fail + /// in a real two-node setting without waiting for the gossip cadence. + /// + /// Returns `AuditTickResult::Idle` if we have no cached commitment for the + /// peer yet (gossip hasn't reached us). Gated to test builds. + #[cfg(any(test, feature = "test-utils"))] + pub async fn audit_peer_now(&self, peer: &PeerId) -> audit::AuditTickResult { + let target = { + let map = self.last_commitment_by_peer.read().await; + map.get(peer) + .and_then(|r| r.last_commitment.as_ref()) + .and_then(|c| commitment_hash(c).map(|h| (h, c.key_count))) + }; + let Some((pin, key_count)) = target else { + return audit::AuditTickResult::Idle; + }; + let credit = audit::AuditCredit { + recent_provers: &self.recent_provers, + }; + audit::run_subtree_audit( + &self.p2p_node, + &self.config, + peer, + pin, + key_count, + Some(&credit), + ) + .await + } + /// Start all background tasks. /// /// `dht_events` must be subscribed **before** `P2PNode::start()` so that @@ -225,7 +460,9 @@ impl ReplicationEngine { self.start_message_handler(); self.start_neighbor_sync_loop(); self.start_self_lookup_loop(); - self.start_audit_loop(); + // ADR-0002: audits are gossip-triggered (in the message handler when a + // peer's changed commitment is ingested), not run on a periodic tick. + self.start_commitment_rotation_loop(); self.start_fetch_worker(); self.start_verification_worker(); self.start_bootstrap_sync(dht_events); @@ -367,6 +604,25 @@ impl ReplicationEngine { let sync_cycle_epoch = Arc::clone(&self.sync_cycle_epoch); let repair_proofs = Arc::clone(&self.repair_proofs); let sync_trigger = Arc::clone(&self.sync_trigger); + let my_commitment_state = Arc::clone(&self.commitment_state); + let last_commitment_by_peer = Arc::clone(&self.last_commitment_by_peer); + let ever_capable_peers = Arc::clone(&self.ever_capable_peers); + let recent_provers = Arc::clone(&self.recent_provers); + let sig_verify_attempts = Arc::clone(&self.sig_verify_attempts); + let audit_timeout_strikes = Arc::clone(&self.audit_timeout_strikes); + let audit_on_gossip_cooldown = Arc::clone(&self.audit_on_gossip_cooldown); + let sync_state = Arc::clone(&self.sync_state); + + // ADR-0002 gossip-audit trigger: bundled state so an ingested *changed* + // commitment can spawn a probabilistic, cooldown-gated subtree audit. + let gossip_audit = GossipAuditTrigger { + p2p_node: Arc::clone(&p2p), + config: Arc::clone(&config), + recent_provers: Arc::clone(&recent_provers), + sync_state: Arc::clone(&sync_state), + audit_timeout_strikes: Arc::clone(&audit_timeout_strikes), + cooldown: Arc::clone(&audit_on_gossip_cooldown), + }; let handle = tokio::spawn(async move { loop { @@ -409,6 +665,11 @@ impl ReplicationEngine { &sync_history, &sync_cycle_epoch, &repair_proofs, + &last_commitment_by_peer, + &ever_capable_peers, + &sig_verify_attempts, + &my_commitment_state, + &gossip_audit, rr_message_id.as_deref(), ).await { Ok(()) => {} @@ -439,6 +700,27 @@ impl ReplicationEngine { } DhtNetworkEvent::PeerRemoved { peer_id } => { repair_proofs.write().await.remove_peer(&peer_id); + // v12: drop the commitment bytes and the + // recent-prover credit so a churn / sybil + // attacker cannot leave behind one + // StorageCommitment per identity in + // `last_commitment_by_peer`. Also drop the + // sig-verify rate-limit timestamp. + last_commitment_by_peer.write().await.remove(&peer_id); + recent_provers.write().await.forget_peer(&peer_id); + sig_verify_attempts.write().await.remove(&peer_id); + // Drop the timeout-strike entry too, so a + // departed peer leaves no residual (keeps this + // map bounded under churn, like its siblings). + audit_timeout_strikes.write().await.remove(&peer_id); + // Same for the gossip-audit cooldown (ADR-0002). + audit_on_gossip_cooldown.write().await.remove(&peer_id); + // The sticky `commitment_capable` flag is + // preserved orthogonally via + // `ever_capable_peers` — even after this + // removal, a re-joining peer continues to + // be treated as v12-capable rather than + // legacy (§3 shield). } _ => {} } @@ -464,6 +746,22 @@ impl ReplicationEngine { let is_bootstrapping = Arc::clone(&self.is_bootstrapping); let bootstrap_state = Arc::clone(&self.bootstrap_state); let sync_trigger = Arc::clone(&self.sync_trigger); + let commitment_state = Arc::clone(&self.commitment_state); + let last_commitment_by_peer = Arc::clone(&self.last_commitment_by_peer); + let ever_capable_peers = Arc::clone(&self.ever_capable_peers); + let sig_verify_attempts = Arc::clone(&self.sig_verify_attempts); + // ADR-0002: a peer's commitment also arrives on the sync RESPONSE path + // (we initiated, they piggybacked theirs). Carry a gossip-audit trigger + // here too so a peer that only ever answers — never initiates sync — + // is still audited; otherwise it could fully evade auditing. + let gossip_audit = GossipAuditTrigger { + p2p_node: Arc::clone(&p2p), + config: Arc::clone(&config), + recent_provers: Arc::clone(&self.recent_provers), + sync_state: Arc::clone(&sync_state), + audit_timeout_strikes: Arc::clone(&self.audit_timeout_strikes), + cooldown: Arc::clone(&self.audit_on_gossip_cooldown), + }; let handle = tokio::spawn(async move { loop { @@ -492,6 +790,11 @@ impl ReplicationEngine { &repair_proofs, &is_bootstrapping, &bootstrap_state, + &commitment_state, + &last_commitment_by_peer, + &ever_capable_peers, + &sig_verify_attempts, + &gossip_audit, ) => {} } } @@ -522,79 +825,84 @@ impl ReplicationEngine { self.task_handles.push(handle); } - fn start_audit_loop(&mut self) { - let p2p = Arc::clone(&self.p2p_node); + /// Periodically rebuild + sign + rotate the responder's storage + /// commitment. + /// + /// Phase 3 of the v12 storage-bound audit. Once per + /// [`COMMITMENT_ROTATION_INTERVAL_SECS`], the responder reads the + /// current LMDB key set, builds a Merkle tree (for content-addressed + /// chunks `bytes_hash == key`, so no chunk re-read is needed), signs + /// the root with the node's `MlDsaSecretKey`, and rotates the result + /// into `commitment_state`. Old `previous` slot is dropped by the + /// rotate (per `ResponderCommitmentState::rotate`). + /// + /// Skips if the key set is empty (no commitment to make) — the + /// auditor side falls back to the legacy plain-digest path for + /// peers that have never gossiped a commitment. + fn start_commitment_rotation_loop(&mut self) { let storage = Arc::clone(&self.storage); - let config = Arc::clone(&self.config); + let identity = Arc::clone(&self.identity); + let commitment_state = Arc::clone(&self.commitment_state); let shutdown = self.shutdown.clone(); - let sync_history = Arc::clone(&self.sync_history); - let sync_cycle_epoch = Arc::clone(&self.sync_cycle_epoch); - let repair_proofs = Arc::clone(&self.repair_proofs); - let bootstrap_state = Arc::clone(&self.bootstrap_state); - let is_bootstrapping = Arc::clone(&self.is_bootstrapping); - let sync_state = Arc::clone(&self.sync_state); + let p2p = Arc::clone(&self.p2p_node); + let sync_trigger = Arc::clone(&self.sync_trigger); + let recent_provers = Arc::clone(&self.recent_provers); let handle = tokio::spawn(async move { - // Invariant 19: wait for bootstrap to drain before starting audits. - loop { - tokio::select! { - () = shutdown.cancelled() => return, - () = tokio::time::sleep( - std::time::Duration::from_secs(BOOTSTRAP_DRAIN_CHECK_SECS) - ) => { - if bootstrap_state.read().await.is_drained() { - break; - } - } - } - } - - // Run one audit tick immediately after bootstrap drain. + // Build the first commitment immediately on startup so a + // restarted node can answer commitment-bound audits right + // away — otherwise current() stays None for a full rotation + // interval and audits silently fall back to legacy + // (codex round-11 MAJOR #2a). + // + // After the first build, trigger an immediate neighbor-sync + // round so the new commitment gossips out within seconds. + // Without this, after a restart remote auditors keep pinning + // the pre-restart (rotated-away) hash until their normal + // sync cadence elapses — up to 1 h in the worst case, + // during which time commitment-bound audits hit "unknown + // commitment hash" -> Idle no-ops (codex round-12 MAJOR #2). + // ML-DSA signatures are randomized so we cannot reproduce + // the pre-restart hash; the only honest path to recovery + // is fast re-gossip. + if let Err(e) = + rebuild_and_rotate_commitment(&storage, &identity, &commitment_state, &p2p).await { - let bootstrapping = *is_bootstrapping.read().await; - let result = { - let history = sync_history.read().await; - let current_sync_epoch = *sync_cycle_epoch.read().await; - audit::audit_tick_with_repair_proofs( - &p2p, - &storage, - &config, - &history, - &repair_proofs, - current_sync_epoch, - bootstrapping, - ) - .await - }; - handle_audit_result(&result, &p2p, &sync_state, &config).await; + warn!("Initial commitment build failed: {e}"); + } else { + sync_trigger.notify_one(); } - - // Then run periodically. loop { - let interval = config.random_audit_tick_interval(); tokio::select! { () = shutdown.cancelled() => break, - () = tokio::time::sleep(interval) => { - let bootstrapping = *is_bootstrapping.read().await; - let result = { - let history = sync_history.read().await; - let current_sync_epoch = *sync_cycle_epoch.read().await; - audit::audit_tick_with_repair_proofs( - &p2p, - &storage, - &config, - &history, - &repair_proofs, - current_sync_epoch, - bootstrapping, - ) - .await - }; - handle_audit_result(&result, &p2p, &sync_state, &config).await; + () = tokio::time::sleep( + std::time::Duration::from_secs(COMMITMENT_ROTATION_INTERVAL_SECS) + ) => { + if let Err(e) = rebuild_and_rotate_commitment( + &storage, + &identity, + &commitment_state, + &p2p, + ).await { + warn!("Commitment rotation failed: {e}"); + } + // Piggyback a sweep of expired recent_provers + // entries on the rotation tick (same cadence, + // 1 h). David's PR review (round-12) flagged + // the lack of TTL eviction — is_credited_holder + // already honours the TTL on read, but the + // sweep reclaims memory for entries we'll + // never re-read. + let dropped = recent_provers.write().await.sweep_expired( + std::time::Instant::now() + ); + if dropped > 0 { + debug!("recent_provers: swept {dropped} expired entries"); + } } } } - debug!("Audit loop shut down"); + debug!("Commitment rotation loop shut down"); }); self.task_handles.push(handle); } @@ -774,6 +1082,9 @@ impl ReplicationEngine { let bootstrap_state = Arc::clone(&self.bootstrap_state); let is_bootstrapping = Arc::clone(&self.is_bootstrapping); let bootstrap_complete_notify = Arc::clone(&self.bootstrap_complete_notify); + let last_commitment_by_peer = Arc::clone(&self.last_commitment_by_peer); + let ever_capable_peers = Arc::clone(&self.ever_capable_peers); + let recent_provers = Arc::clone(&self.recent_provers); let handle = tokio::spawn(async move { loop { @@ -791,6 +1102,9 @@ impl ReplicationEngine { bootstrap_state: &bootstrap_state, is_bootstrapping: &is_bootstrapping, bootstrap_complete_notify: &bootstrap_complete_notify, + last_commitment_by_peer: &last_commitment_by_peer, + ever_capable_peers: &ever_capable_peers, + recent_provers: &recent_provers, }; run_verification_cycle(ctx).await; } @@ -828,6 +1142,10 @@ impl ReplicationEngine { let bootstrap_complete_notify = Arc::clone(&self.bootstrap_complete_notify); let sync_cycle_epoch = Arc::clone(&self.sync_cycle_epoch); let repair_proofs = Arc::clone(&self.repair_proofs); + let my_commitment_state = Arc::clone(&self.commitment_state); + let last_commitment_by_peer = Arc::clone(&self.last_commitment_by_peer); + let ever_capable_peers = Arc::clone(&self.ever_capable_peers); + let sig_verify_attempts = Arc::clone(&self.sig_verify_attempts); let handle = tokio::spawn(async move { // Wait for DHT bootstrap to complete before snapshotting @@ -882,12 +1200,42 @@ impl ReplicationEngine { &paid_list, &config, bootstrapping, + my_commitment_state.current().map(|b| { + // Mark gossiped: emitted in the bootstrap-sync + // request, so we stay answerable for it (ADR-0002). + my_commitment_state.mark_gossiped(b.hash()); + b.commitment().clone() + }), ) .await; bootstrap::decrement_pending_requests(&bootstrap_state, 1).await; if let Some(outcome) = outcome { + // Ingest the peer's piggybacked commitment from the + // response (same verification as the request path). + // Bootstrap is the FIRST gossip we receive from most + // peers, so this populates last_commitment_by_peer. + // + // We intentionally do NOT trigger a gossip-audit here: + // during bootstrap this node may itself still be + // bootstrapping (audits are gated on that), and the + // close-group/RT view is not yet stable. The peer is + // audited on the first STEADY-STATE neighbor-sync round + // after bootstrap drains (request + response paths both + // trigger), which is within one sync cycle — so caching + // the commitment here is sufficient and there is no + // coverage gap (ADR-0002). + ingest_peer_commitment( + peer, + outcome.response.commitment.as_ref(), + &p2p, + &last_commitment_by_peer, + &ever_capable_peers, + &sig_verify_attempts, + ) + .await; // sig_verify_attempts in scope from line ~1080 + if !outcome.response.bootstrapping { record_sent_replica_hints( peer, @@ -956,7 +1304,7 @@ impl ReplicationEngine { /// When `rr_message_id` is `Some`, the request arrived via the `/rr/` /// request-response path and the response must be sent via `send_response` /// so saorsa-core can route it back to the waiting `send_request` caller. -#[allow(clippy::too_many_arguments)] +#[allow(clippy::too_many_arguments, clippy::too_many_lines)] async fn handle_replication_message( source: &PeerId, data: &[u8], @@ -971,6 +1319,11 @@ async fn handle_replication_message( sync_history: &Arc>>, sync_cycle_epoch: &Arc>, repair_proofs: &Arc>, + last_commitment_by_peer: &Arc>>, + ever_capable_peers: &Arc>>, + sig_verify_attempts: &Arc>>, + my_commitment_state: &Arc, + gossip_audit: &GossipAuditTrigger, rr_message_id: Option<&str>, ) -> Result<()> { let msg = ReplicationMessage::decode(data) @@ -1004,6 +1357,22 @@ async fn handle_replication_message( } ReplicationMessageBody::NeighborSyncRequest(ref request) => { let bootstrapping = *is_bootstrapping.read().await; + // Phase-3 storage-bound audit: store the sender's + // commitment for use as `expected_commitment_hash` in + // future audits. Verify signature before storing so a peer + // cannot inject a forged commitment for someone else. + if let Some(target) = ingest_peer_commitment( + source, + request.commitment.as_ref(), + p2p_node, + last_commitment_by_peer, + ever_capable_peers, + sig_verify_attempts, + ) + .await + { + maybe_trigger_gossip_audit(gossip_audit, source, target).await; + } handle_neighbor_sync_request( source, request, @@ -1017,6 +1386,12 @@ async fn handle_replication_message( sync_history, sync_cycle_epoch, repair_proofs, + my_commitment_state.current().map(|b| { + // Mark gossiped: we emit this commitment in the sync + // response, so we must stay answerable for it (ADR-0002). + my_commitment_state.mark_gossiped(b.hash()); + b.commitment().clone() + }), msg.request_id, rr_message_id, ) @@ -1046,8 +1421,10 @@ async fn handle_replication_message( .await } ReplicationMessageBody::AuditChallenge(ref challenge) => { + // Single-key prune-confirmation audit (pre-existing): answer with + // per-key possession digests. let bootstrapping = *is_bootstrapping.read().await; - handle_audit_challenge_msg( + handle_prune_audit_challenge_msg( source, challenge, storage, @@ -1058,12 +1435,58 @@ async fn handle_replication_message( ) .await } + ReplicationMessageBody::SubtreeAuditChallenge(ref challenge) => { + // Gossip-triggered storage-bound subtree audit (ADR-0002). + let bootstrapping = *is_bootstrapping.read().await; + let response = audit::handle_subtree_challenge( + challenge, + storage, + p2p_node.peer_id(), + bootstrapping, + Some(my_commitment_state), + ) + .await; + send_replication_response( + source, + p2p_node, + msg.request_id, + ReplicationMessageBody::SubtreeAuditResponse(response), + rr_message_id, + ) + .await; + Ok(()) + } + ReplicationMessageBody::SubtreeByteChallenge(ref challenge) => { + // Round 2 of the storage audit (ADR-0002): serve the original bytes + // for the auditor's nonce-selected spot-check keys, or signal + // `Absent` for a committed key we can no longer produce. + let bootstrapping = *is_bootstrapping.read().await; + let response = audit::handle_subtree_byte_challenge( + challenge, + storage, + p2p_node.peer_id(), + bootstrapping, + Some(my_commitment_state), + ) + .await; + send_replication_response( + source, + p2p_node, + msg.request_id, + ReplicationMessageBody::SubtreeByteResponse(response), + rr_message_id, + ) + .await; + Ok(()) + } // Response messages are handled by their respective request initiators. ReplicationMessageBody::FreshReplicationResponse(_) | ReplicationMessageBody::NeighborSyncResponse(_) | ReplicationMessageBody::VerificationResponse(_) | ReplicationMessageBody::FetchResponse(_) - | ReplicationMessageBody::AuditResponse(_) => Ok(()), + | ReplicationMessageBody::AuditResponse(_) + | ReplicationMessageBody::SubtreeAuditResponse(_) + | ReplicationMessageBody::SubtreeByteResponse(_) => Ok(()), } } @@ -1314,6 +1737,7 @@ async fn handle_neighbor_sync_request( sync_history: &Arc>>, sync_cycle_epoch: &Arc>, repair_proofs: &Arc>, + my_commitment: Option, request_id: u64, rr_message_id: Option<&str>, ) -> Result<()> { @@ -1335,6 +1759,7 @@ async fn handle_neighbor_sync_request( paid_list, config, is_bootstrapping, + my_commitment.clone(), ) .await; @@ -1494,7 +1919,8 @@ async fn handle_fetch_request( Ok(()) } -async fn handle_audit_challenge_msg( +/// Responder for a single-key prune-confirmation audit challenge. +async fn handle_prune_audit_challenge_msg( source: &PeerId, challenge: &protocol::AuditChallenge, storage: &Arc, @@ -1503,14 +1929,10 @@ async fn handle_audit_challenge_msg( request_id: u64, rr_message_id: Option<&str>, ) -> Result<()> { - #[allow(clippy::cast_possible_truncation)] - let stored_chunks = storage.current_chunks().map_or(0, |c| c as usize); - let response = audit::handle_audit_challenge( + let response = crate::replication::pruning::handle_prune_audit_challenge( challenge, storage, - p2p_node.peer_id(), is_bootstrapping, - stored_chunks, ) .await; @@ -1626,6 +2048,11 @@ async fn run_neighbor_sync_round( repair_proofs: &Arc>, is_bootstrapping: &Arc>, bootstrap_state: &Arc>, + commitment_state: &Arc, + last_commitment_by_peer: &Arc>>, + ever_capable_peers: &Arc>>, + sig_verify_attempts: &Arc>>, + gossip_audit: &GossipAuditTrigger, ) { let self_id = *p2p_node.peer_id(); let bootstrapping = *is_bootstrapping.read().await; @@ -1705,6 +2132,15 @@ async fn run_neighbor_sync_round( debug!("Neighbor sync: syncing with {} peers", batch.len()); + // Snapshot our current commitment once per round so all peers in + // this batch see the same thing (gossip is the responder's attestation; + // same value across the batch is fine and reduces RwLock churn). Mark it + // gossiped so we stay answerable for it (ADR-0002 retention). + let my_commitment = commitment_state.current().map(|b| { + commitment_state.mark_gossiped(b.hash()); + b.commitment().clone() + }); + // Sync with each peer in the batch. for peer in &batch { let outcome = neighbor_sync::sync_with_peer_with_outcome( @@ -1714,6 +2150,7 @@ async fn run_neighbor_sync_round( paid_list, config, bootstrapping, + my_commitment.clone(), ) .await; @@ -1734,6 +2171,10 @@ async fn run_neighbor_sync_round( sync_history, sync_cycle_epoch, repair_proofs, + last_commitment_by_peer, + ever_capable_peers, + sig_verify_attempts, + gossip_audit, ) .await; } else { @@ -1752,6 +2193,7 @@ async fn run_neighbor_sync_round( paid_list, config, bootstrapping, + my_commitment.clone(), ) .await; @@ -1772,6 +2214,10 @@ async fn run_neighbor_sync_round( sync_history, sync_cycle_epoch, repair_proofs, + last_commitment_by_peer, + ever_capable_peers, + sig_verify_attempts, + gossip_audit, ) .await; } @@ -1799,7 +2245,29 @@ async fn handle_sync_response( sync_history: &Arc>>, sync_cycle_epoch: &Arc>, repair_proofs: &Arc>, + last_commitment_by_peer: &Arc>>, + ever_capable_peers: &Arc>>, + sig_verify_attempts: &Arc>>, + gossip_audit: &GossipAuditTrigger, ) { + // Ingest the peer's commitment if they piggybacked one on the response. + // Same verification as the request path (peer-id binding + signature); + // forged commitments are dropped at the edge. A *changed* commitment here + // is a gossip-audit trigger just like on the request path — so a peer that + // only ever answers sync (never initiates) is still audited (ADR-0002). + if let Some(target) = ingest_peer_commitment( + peer, + resp.commitment.as_ref(), + p2p_node, + last_commitment_by_peer, + ever_capable_peers, + sig_verify_attempts, + ) + .await + { + maybe_trigger_gossip_audit(gossip_audit, peer, target).await; + } + // Record successful sync. { let mut state = sync_state.write().await; @@ -2018,6 +2486,9 @@ async fn run_verification_cycle(ctx: VerificationCycleContext<'_>) { bootstrap_state, is_bootstrapping, bootstrap_complete_notify, + last_commitment_by_peer, + ever_capable_peers, + recent_provers, } = ctx; // Evict stale entries that have been pending too long (e.g. unreachable @@ -2156,6 +2627,83 @@ async fn run_verification_cycle(ctx: VerificationCycleContext<'_>) { // Step 3: Evaluate results — collect outcomes without holding the write // lock across paid-list I/O. + // + // v12 §6 holder-eligibility: snapshot the per-peer last-commitment + // table and recent_provers cache up front so the synchronous + // evaluate_key_evidence_with_holder_check predicate can consult + // them without awaiting. The predicate downgrades a Present + // claim to Unresolved unless the peer is credited for that key. + // Snapshot per-peer commitment data. We need two views: + // - `commitment_by_peer_snapshot`: peers that currently have + // a verified commitment record on file (used to look up + // their current hash). + // - `capable_peer_snapshot`: the sticky "ever v12-capable" + // set. Sourced from a separate set rather than the + // commitment map so eviction (PeerRemoved cleanup, sybil + // cap at `MAX_LAST_COMMITMENT_BY_PEER`) does NOT downgrade + // a previously-v12 peer to "legacy" credit-unconditionally. + // Legacy / pre-v12 peers that have never sent a commitment + // remain absent from the set and are credited via the + // legacy path so mixed-version networks stay live. + let commitment_by_peer_snapshot: HashMap = { + let map = last_commitment_by_peer.read().await; + map.iter() + .filter_map(|(p, rec)| { + rec.last_commitment.as_ref().and_then(|c| { + crate::replication::commitment::commitment_hash(c).map(|h| (*p, h)) + }) + }) + .collect() + }; + let capable_peer_snapshot: HashSet = ever_capable_peers.read().await.clone(); + // Take a full snapshot of recent_provers under the read lock, + // then release. The cache is bounded (16/key × keys), so the + // clone is cheap. + let provers_snapshot = recent_provers.read().await.clone(); + // For the replica-fetch path, we need to know whether THIS + // node already holds the key being verified. The v12 §6 + // holder-credit gate is meant to prevent uncredited Present + // claims from contributing to paid-list / reward quorum for + // keys we DO hold (and could audit ourselves). For keys we + // are trying to FETCH (i.e. not in local storage), there is + // no possible local audit credit, and gating the presence + // quorum on credit would deadlock replica-repair in a + // fully v12-capable close group. + let mut locally_held: HashSet = HashSet::new(); + for key in &keys_needing_network { + if storage.exists(key).unwrap_or(false) { + locally_held.insert(*key); + } + } + let holder_credit = |peer: &PeerId, key: &XorName| -> bool { + if !locally_held.contains(key) { + // Replica-fetch path: we don't hold this key, so we + // cannot have collected audit credit for it. Trust + // Present claims to drive fetch-source promotion; + // chunk-PUT payment_verifier is the security backstop + // when the bytes actually arrive. + return true; + } + if !capable_peer_snapshot.contains(peer) { + // Pre-v12 / legacy peer that has never gossiped a + // commitment. The v12 §6 holder-eligibility check + // doesn't apply: their Present evidence comes through + // the legacy path and we credit it unconditionally + // so a mixed-version network stays live during + // transition. + return true; + } + let Some(hash) = commitment_by_peer_snapshot.get(peer) else { + // Peer is commitment_capable (sticky) but currently + // has no live commitment record on file (e.g. their + // last gossip was evicted from the LRU cache, or it + // failed verification). Withhold credit until they + // re-prove storage under a fresh commitment. + return false; + }; + provers_snapshot.is_credited_holder(key, peer, hash) + }; + let mut evaluated: Vec<(XorName, KeyVerificationOutcome, HintPipeline)> = Vec::new(); { let q = queues.read().await; @@ -2166,7 +2714,13 @@ async fn run_verification_cycle(ctx: VerificationCycleContext<'_>) { let Some(entry) = q.get_pending(key) else { continue; }; - let outcome = quorum::evaluate_key_evidence(key, ev, &targets, config); + let outcome = quorum::evaluate_key_evidence_with_holder_check( + key, + ev, + &targets, + config, + holder_credit, + ); evaluated.push((*key, outcome, entry.pipeline)); } } // read lock released @@ -2524,11 +3078,101 @@ async fn execute_single_fetch( // Audit result handler // --------------------------------------------------------------------------- +/// Execute the side effects for a confirmed audit failure. +/// +/// [`plan_failed_audit`] is the pure decision INCLUDING the strike selection +/// (record-a-strike-for-`Timeout` vs leave-untouched for confirmed failures), +/// extracted so the whole glue — not just the verdict — is testable without a +/// live `P2PNode`. This function is only the resulting I/O. +async fn handle_failed_audit( + challenged_peer: &PeerId, + confirmed_failed_key_count: usize, + reason: &AuditFailureReason, + p2p_node: &Arc, + sync_state: &Arc>, + recent_provers: &Arc>, + audit_timeout_strikes: &Arc>>, +) { + let action = { + let mut strikes = audit_timeout_strikes.write().await; + plan_failed_audit(reason, &mut strikes, challenged_peer) + }; + match action { + AuditFailureAction::TimeoutGrace => { + // Honest transient slowness: no penalty, no credit loss, retain the + // bootstrap claim. Only *sustained* timeouts (a peer that always + // has to refetch) survive to the threshold — the per-challenge + // window is never widened. + debug!( + "Audit timeout for {challenged_peer} (under the {}-strike threshold); \ + within grace, retaining bootstrap claim, no penalty", + config::AUDIT_TIMEOUT_STRIKE_THRESHOLD + ); + } + AuditFailureAction::TimeoutPenalize => { + // TIMEOUT-EVICTION-DISABLED: re-enable once enough nodes have + // upgraded. This PR is a breaking wire change (StorageCommitment + // gossip old nodes cannot decode), so a pre-upgrade node times out + // on every new audit and looks exactly like a non-storing peer. + // Penalising timeouts now would make upgraded nodes evict every + // not-yet-upgraded node — a network death spiral during rollout. + // Strikes are still tracked/logged so the mechanism stays + // observable; we just don't report the trust event that drives + // eviction. Confirmed storage-integrity failures (ConfirmedPenalize + // below) are unaffected — those only come from a peer that actually + // answered with bad data, never an old node. Grep + // TIMEOUT-EVICTION-DISABLED to restore the report in a small + // follow-up release. + warn!( + "Audit timeout for {challenged_peer}: reached the {}-strike threshold of \ + consecutive timeouts (eviction disabled this release — not penalizing)", + config::AUDIT_TIMEOUT_STRIKE_THRESHOLD + ); + // p2p_node + // .report_trust_event( + // challenged_peer, + // TrustEvent::ApplicationFailure(config::AUDIT_FAILURE_TRUST_WEIGHT), + // ) + // .await; + } + AuditFailureAction::ConfirmedPenalize => { + error!( + "Audit failure for {challenged_peer}: {confirmed_failed_key_count} confirmed \ + failed keys" + ); + // Peer returned a non-bootstrap response — clear the active claim + // while retaining claim history. + { + let mut state = sync_state.write().await; + state.clear_active_bootstrap_claim(challenged_peer); + } + // Revoke holder credit on a CONFIRMED failure (DigestMismatch / + // KeyAbsent / Rejected / MalformedResponse): the peer no longer + // provably holds what it committed to, so it must not keep §6 + // holder credit for the proof TTL. The §5 `forget_commitment` path + // only fires on an "unknown commitment hash" reply; genuine byte + // loss surfaces here. + { + let mut provers_guard = recent_provers.write().await; + apply_audit_failure_credit_revocation(&mut provers_guard, challenged_peer, reason); + } + p2p_node + .report_trust_event( + challenged_peer, + TrustEvent::ApplicationFailure(config::AUDIT_FAILURE_TRUST_WEIGHT), + ) + .await; + } + } +} + /// Handle audit result: log findings and emit trust events. async fn handle_audit_result( result: &AuditTickResult, p2p_node: &Arc, sync_state: &Arc>, + recent_provers: &Arc>, + audit_timeout_strikes: &Arc>>, config: &ReplicationConfig, ) { match result { @@ -2543,6 +3187,14 @@ async fn handle_audit_result( let mut state = sync_state.write().await; state.clear_active_bootstrap_claim(challenged_peer); } + // A normal response proves the slowness (if any) was transient, so + // reset the timeout-strike counter. Only *sustained* timeouts (a + // peer that must refetch on every audit) survive this reset to + // accumulate toward the penalty threshold. + { + let mut strikes = audit_timeout_strikes.write().await; + strikes.remove(challenged_peer); + } p2p_node .report_trust_event( challenged_peer, @@ -2558,24 +3210,16 @@ async fn handle_audit_result( .. } = evidence { - error!( - "Audit failure for {challenged_peer}: {} confirmed failed keys", - confirmed_failed_keys.len() - ); - if audit_failure_clears_bootstrap_claim(reason) { - // Peer returned a non-bootstrap response — clear the active - // claim while retaining claim history. - let mut state = sync_state.write().await; - state.clear_active_bootstrap_claim(challenged_peer); - } else { - debug!("Audit timeout for {challenged_peer}; retaining active bootstrap claim"); - } - p2p_node - .report_trust_event( - challenged_peer, - TrustEvent::ApplicationFailure(config::AUDIT_FAILURE_TRUST_WEIGHT), - ) - .await; + handle_failed_audit( + challenged_peer, + confirmed_failed_keys.len(), + reason, + p2p_node, + sync_state, + recent_provers, + audit_timeout_strikes, + ) + .await; } } AuditTickResult::BootstrapClaim { peer } => { @@ -2623,17 +3267,673 @@ async fn handle_audit_result( } } +/// Whether a confirmed audit failure with this reason clears the peer's active +/// bootstrap claim. A `Timeout` does not (the peer may still be legitimately +/// bootstrapping); every confirmed storage-integrity reason does. The `Failed` +/// arm now special-cases `Timeout` directly (timeout → strike gate, retaining +/// the claim; confirmed → clear), so this predicate is retained as the +/// documented source of truth and is exercised by the regression tests; it is +/// not called on the production path. +#[cfg_attr(not(test), allow(dead_code))] fn audit_failure_clears_bootstrap_claim(reason: &AuditFailureReason) -> bool { !matches!(reason, AuditFailureReason::Timeout) } +/// What the audit-failure handler should do for a given failure, given the +/// peer's post-increment timeout-strike count. Pure (no I/O) so the whole +/// decision can be exercised end-to-end without a live `P2PNode`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum AuditFailureAction { + /// Timeout under the strike threshold: no trust penalty, no credit + /// revocation, retain the bootstrap claim (honest transient slowness). + TimeoutGrace, + /// Timeout at/over the threshold: report `ApplicationFailure`. Bootstrap + /// claim retained; holder credit NOT revoked (the peer never admitted byte + /// loss). The non-storing-peer case. + TimeoutPenalize, + /// Confirmed storage-integrity failure: penalize immediately, clear the + /// active bootstrap claim, and revoke holder credit. + ConfirmedPenalize, +} + +/// Upper bound on a peer's consecutive-timeout strike count. Must exceed the +/// largest reachable adaptive threshold (base + `MAX_ADAPTIVE_TIMEOUT_GRACE`) so +/// a genuinely non-responsive peer's count can always catch up to and cross an +/// inflated threshold — otherwise capping at the base would make timeout +/// penalties unreachable once the adaptive threshold rose (codex finding). +const AUDIT_TIMEOUT_STRIKE_MAX: u32 = 64; + +/// Maximum extra grace the adaptive mechanism may add on top of the base +/// threshold. Bounds how far a (possibly stale) set of timing-out peers can +/// widen the window, so a small persistent failing cohort cannot push the +/// threshold arbitrarily high and shield a bad node indefinitely. +const MAX_ADAPTIVE_TIMEOUT_GRACE: u32 = 2 * config::AUDIT_TIMEOUT_STRIKE_THRESHOLD; + +/// Record an audit timeout for `peer` and return its new consecutive-timeout +/// strike count, saturating at [`AUDIT_TIMEOUT_STRIKE_MAX`] (well above any +/// reachable adaptive threshold). A successful audit removes the peer's entry +/// (the `Passed` arm of [`handle_audit_result`]), so only *consecutive* +/// timeouts accumulate here. +fn record_audit_timeout_strike(strikes: &mut HashMap, peer: &PeerId) -> u32 { + let count = strikes.entry(*peer).or_insert(0); + *count = count.saturating_add(1).min(AUDIT_TIMEOUT_STRIKE_MAX); + *count +} + +/// The adaptive timeout-strike threshold for judging `peer` (ADR-0002 "Network +/// Resilience"): `min(median of the OTHER timing-out peers' counts, +/// MAX_ADAPTIVE_TIMEOUT_GRACE) + base threshold`. +/// +/// In a healthy network almost no peer carries timeout strikes, so the median +/// is 0 and the threshold is the base [`config::AUDIT_TIMEOUT_STRIKE_THRESHOLD`]. +/// During genuine disruption many *honest* peers time out together, lifting the +/// median and widening the grace so the audit system does not pile onto a +/// struggling network — but the widening is capped at `MAX_ADAPTIVE_TIMEOUT_GRACE` +/// so a stale failing cohort cannot inflate it without bound. +/// +/// `peer` is EXCLUDED from the median so a lone timing-out peer cannot raise its +/// own grace bar. Combined with the map being fed ONLY by timeouts (deterministic +/// failures never touch it), this closes self-inflation and bounds +/// attacker-inflation of the grace window. +fn adaptive_timeout_threshold(strikes: &HashMap, peer: &PeerId) -> u32 { + let grace = median_timeout_strikes_excluding(strikes, peer).min(MAX_ADAPTIVE_TIMEOUT_GRACE); + grace.saturating_add(config::AUDIT_TIMEOUT_STRIKE_THRESHOLD) +} + +/// Lower median of the current per-peer consecutive-timeout counts, excluding +/// `peer`. No other peers → 0. +fn median_timeout_strikes_excluding(strikes: &HashMap, peer: &PeerId) -> u32 { + let mut counts: Vec = strikes + .iter() + .filter(|(p, _)| *p != peer) + .map(|(_, c)| *c) + .collect(); + if counts.is_empty() { + return 0; + } + counts.sort_unstable(); + // Lower median: for even-sized inputs take the lower of the two middle + // values ((len-1)/2), so the grace is conservative rather than inflated. + counts.get((counts.len() - 1) / 2).copied().unwrap_or(0) +} + +/// Whether a peer's consecutive-timeout strike count reaches the (adaptive) +/// threshold for emitting an `ApplicationFailure` trust event. +fn timeout_strike_reaches_threshold(strikes: u32, threshold: u32) -> bool { + strikes >= threshold +} + +/// Decide what to do about a confirmed audit failure. `timeout_strikes_after` +/// is the peer's strike count after recording this event and `timeout_threshold` +/// the adaptive threshold to compare against (both only meaningful when +/// `reason == Timeout`). Pure, so the integration-level decision can be asserted +/// in tests with no networking. +fn decide_audit_failure_action( + reason: &AuditFailureReason, + timeout_strikes_after: u32, + timeout_threshold: u32, +) -> AuditFailureAction { + if matches!(reason, AuditFailureReason::Timeout) { + if timeout_strike_reaches_threshold(timeout_strikes_after, timeout_threshold) { + AuditFailureAction::TimeoutPenalize + } else { + AuditFailureAction::TimeoutGrace + } + } else { + AuditFailureAction::ConfirmedPenalize + } +} + +/// Plan the response to a confirmed audit failure, performing the +/// strike-selection glue in-process: a `Timeout` records a strike against +/// `peer` (so consecutive timeouts accumulate) and is judged against the +/// adaptive threshold; every other reason is a confirmed failure that does NOT +/// touch the strike map. The caller owns the lock and performs the resulting I/O. +fn plan_failed_audit( + reason: &AuditFailureReason, + strikes: &mut HashMap, + peer: &PeerId, +) -> AuditFailureAction { + // Snapshot the adaptive threshold from the *other* peers' counts (excluding + // this peer), so a single peer's own timeouts cannot raise its own grace bar. + let threshold = adaptive_timeout_threshold(strikes, peer); + let strikes_after = if matches!(reason, AuditFailureReason::Timeout) { + record_audit_timeout_strike(strikes, peer) + } else { + 0 + }; + decide_audit_failure_action(reason, strikes_after, threshold) +} + +/// Whether a confirmed audit failure with this reason should revoke the +/// peer's `recent_provers` holder credit immediately (v12 §6). +/// +/// `true` for any reason where the peer actually answered (or admitted +/// it cannot): `DigestMismatch`, `KeyAbsent`, `Rejected` ("missing +/// bytes for committed key"), `MalformedResponse` — these prove the +/// peer no longer holds what it committed to, so it must not keep +/// holder credit for the proof TTL. `false` for `Timeout`: a single +/// dropped packet must not strip an honest peer; the 40-min TTL is the +/// deliberate liveness cushion there. +fn audit_failure_revokes_holder_credit(reason: &AuditFailureReason) -> bool { + !matches!(reason, AuditFailureReason::Timeout) +} + +/// Apply the holder-credit revocation decision for a confirmed audit +/// failure. Pure over `RecentProvers` so the handler wiring is unit- +/// testable without a live `P2PNode`: the production `Failed` arm of +/// `handle_audit_result` calls exactly this. +fn apply_audit_failure_credit_revocation( + provers: &mut RecentProvers, + challenged_peer: &PeerId, + reason: &AuditFailureReason, +) { + if audit_failure_revokes_holder_credit(reason) { + provers.forget_peer(challenged_peer); + } +} + // `admit_bootstrap_hints` was consolidated into `admit_and_queue_hints`. +// --------------------------------------------------------------------------- +// Storage-bound audit (ADR-0002) — gossip trigger + auditor-side ingestion +// --------------------------------------------------------------------------- + +/// State the gossip-audit trigger needs to spawn an audit. Bundled so the +/// message handler passes one value instead of a long argument list; all +/// fields are cheap `Arc` clones. +#[derive(Clone)] +struct GossipAuditTrigger { + p2p_node: Arc, + config: Arc, + recent_provers: Arc>, + sync_state: Arc>, + audit_timeout_strikes: Arc>>, + cooldown: Arc>>, +} + +/// What a gossip ingest yields for the audit trigger: the commitment hash to +/// pin and the `key_count` needed to size the response deadline from the actual +/// `ceil(sqrt(N))` subtree (ADR-0002). Returned on every VALID gossip (changed +/// or not) so a stable-keyset node stays auditable — not just on its first +/// commitment. +#[derive(Debug, Clone, Copy)] +struct AuditTarget { + pin_hash: [u8; 32], + key_count: u32, +} + +/// Per-peer audit cooldown check-and-stamp (ADR-0002 "occasional surprise +/// exams, keeps load low"). Returns `true` if `peer` may be audited now (and +/// stamps `now`), `false` if it was audited within +/// `AUDIT_ON_GOSSIP_COOLDOWN_SECS`. Bounds the map under a flood of distinct +/// peers. Pure over the passed map so the flood/cooldown behaviour is testable +/// without a live node: a burst of gossips from one peer yields at most one +/// `true` per cooldown window. +fn cooldown_allows_audit(map: &mut HashMap, peer: &PeerId, now: Instant) -> bool { + let cooldown = Duration::from_secs(config::AUDIT_ON_GOSSIP_COOLDOWN_SECS); + let known = match map.get(peer) { + Some(&last) => { + if now.saturating_duration_since(last) < cooldown { + return false; + } + true + } + None => false, + }; + // Bound the map under churn like its siblings (drop the oldest stamp) before + // admitting a brand-new peer. + if !known && map.len() >= MAX_LAST_COMMITMENT_BY_PEER { + if let Some(victim) = map.iter().min_by_key(|(_, &ts)| ts).map(|(p, _)| *p) { + map.remove(&victim); + } + } + map.insert(*peer, now); + true +} + +/// The gossip-audit launch decision in ONE place so the ordering is shared +/// between production and its test (ADR-0002 "occasional surprise exams"). +/// +/// Order matters and is the security-relevant property: the per-peer cooldown is +/// checked-and-stamped FIRST, THEN the probability lottery (`lottery_wins`) is +/// applied. If the lottery were sampled first, a gossip flood would re-roll it on +/// every message until one won, multiplying audits. Because the cooldown is +/// stamped before the lottery is consulted, a LOSING ticket still consumes the +/// window — so each peer gets at most one audit lottery per cooldown window +/// regardless of how often it gossips. Production calls this with +/// `lottery_wins = gen_bool(AUDIT_ON_GOSSIP_PROBABILITY)`; the test calls it with +/// a deterministic `lottery_wins`, so a reorder regression here fails the test. +fn audit_launch_decision( + map: &mut HashMap, + peer: &PeerId, + now: Instant, + lottery_wins: bool, +) -> bool { + // Gate 1: cooldown check-and-stamp (consumes the window even on a loss). + if !cooldown_allows_audit(map, peer, now) { + return false; + } + // Gate 2: the probability lottery. + lottery_wins +} + +/// On a peer's *changed* gossiped commitment, maybe launch a subtree audit +/// (ADR-0002): fire with probability `AUDIT_ON_GOSSIP_PROBABILITY`, subject to a +/// per-peer cooldown, pinned to the just-ingested root. Detached so gossip +/// handling is never blocked on a network round-trip. +async fn maybe_trigger_gossip_audit( + trigger: &GossipAuditTrigger, + peer: &PeerId, + target: AuditTarget, +) { + // The launch decision (cooldown-then-lottery ordering) lives in the pure + // `audit_launch_decision` so the ordering is shared with its test. Sample + // the lottery here, then let the helper apply it AFTER the cooldown stamp. + let now = Instant::now(); + let lottery_wins = rand::thread_rng().gen_bool(config::AUDIT_ON_GOSSIP_PROBABILITY); + { + let mut map = trigger.cooldown.write().await; + if !audit_launch_decision(&mut map, peer, now, lottery_wins) { + return; + } + } + + let trigger = trigger.clone(); + let peer = *peer; + tokio::spawn(async move { + let credit = audit::AuditCredit { + recent_provers: &trigger.recent_provers, + }; + let result = audit::run_subtree_audit( + &trigger.p2p_node, + &trigger.config, + &peer, + target.pin_hash, + target.key_count, + Some(&credit), + ) + .await; + handle_audit_result( + &result, + &trigger.p2p_node, + &trigger.sync_state, + &trigger.recent_provers, + &trigger.audit_timeout_strikes, + &trigger.config, + ) + .await; + }); +} + +/// Atomic check-and-stamp of the per-peer commitment sig-verify rate limit. +/// +/// Returns `true` if a signature verify is allowed now (and stamps the attempt +/// time), `false` if the peer is within [`COMMITMENT_SIG_VERIFY_MIN_INTERVAL`] +/// of its last attempt. Holds one write lock across the decision so two +/// concurrent ingests from the same peer cannot both pass. Stamps BEFORE the +/// caller's expensive verify so a slow/failed verify still rate-limits the next +/// message. Bounds the map under a flood of distinct peer ids. +async fn sig_verify_rate_limit_ok( + sig_verify_attempts: &Arc>>, + source: &PeerId, + now: Instant, +) -> bool { + let mut attempts = sig_verify_attempts.write().await; + if let Some(&last) = attempts.get(source) { + if now.saturating_duration_since(last) < COMMITMENT_SIG_VERIFY_MIN_INTERVAL { + return false; + } + } + if attempts.len() >= MAX_LAST_COMMITMENT_BY_PEER && !attempts.contains_key(source) { + if let Some(victim) = attempts.iter().min_by_key(|(_, &ts)| ts).map(|(p, _)| *p) { + attempts.remove(&victim); + } + } + attempts.insert(*source, now); + true +} + +/// Verify + store an inbound commitment from a gossip peer. +/// +/// Called from the inbound `NeighborSyncRequest`/`Response` handlers and +/// the bootstrap-sync loop. Drops the commitment unless all five gates +/// pass: +/// 1. `source` is in our DHT routing table (sybil/churn cap). +/// 2. `commitment.sender_peer_id == source.as_bytes()` (peer-id +/// binding to the authenticated transport peer). +/// 3. `BLAKE3(commitment.sender_public_key) == commitment.sender_peer_id` +/// (the embedded pubkey actually belongs to the claimed identity — +/// saorsa-core derives `PeerId = BLAKE3(pubkey)`). +/// 4. `verify_commitment_signature(commitment)` succeeds against the +/// embedded public key. The signed payload binds the pubkey, so an +/// adversary cannot swap the key while keeping the body. +/// 5. The cache has room or this is an update for an existing entry +/// (sybil cap, `MAX_LAST_COMMITMENT_BY_PEER`). +/// +/// On all-pass, the commitment is stored as the auditor's per-peer +/// "last known commitment" for use as `expected_commitment_hash` in +/// future audits. +/// +/// Failures (no commitment / mismatched peer id / bad signature) are +/// silent drops — gossip is best-effort and a malformed commitment from +/// one peer should not affect anything else. +/// +/// Returns `Some(AuditTarget)` whenever a VALID commitment was stored (whether +/// or not its root changed), so the caller can run a probabilistic, +/// cooldown-gated subtree audit. Returning on *every* valid gossip — not only +/// changed ones — is deliberate (ADR-0002): a node with a stable key set keeps +/// being auditable, so it cannot pass one audit and then delete data while +/// re-gossiping the same root forever. The cooldown + probability bound the +/// audit frequency. Returns `None` only if the commitment was dropped (failed a +/// gate) or there is nothing to pin. +async fn ingest_peer_commitment( + source: &PeerId, + commitment: Option<&StorageCommitment>, + p2p_node: &Arc, + last_commitment_by_peer: &Arc>>, + ever_capable_peers: &Arc>>, + sig_verify_attempts: &Arc>>, +) -> Option { + let Some(c) = commitment else { + // Commitment-downgrade signal: a capable peer that previously gossiped a + // commitment but now gossips None is trying to drop off the audit path. + // We keep the cached commitment pinned AND return it as an audit target + // so this gossip still schedules a subtree audit against the peer's last + // known commitment. If it genuinely dropped the data, the audit fails + // (or it rejects the pin → confirmed failure). There is no periodic + // audit tick anymore, so the trigger MUST fire here or the downgrade + // would never be re-challenged. + if let Some(rec) = last_commitment_by_peer.read().await.get(source) { + if rec.commitment_capable { + if let Some(last) = rec.last_commitment.as_ref() { + if let Some(pin) = commitment_hash(last) { + warn!( + "ingest_peer_commitment: commitment-capable peer {source} sent None \ + (downgrade attempt); auditing against its last cached commitment" + ); + return Some(AuditTarget { + pin_hash: pin, + key_count: last.key_count, + }); + } + } + } + } + return None; + }; + // RT-membership gate: only accept commitments from peers in our + // routing table. Off-RT senders (sybils, drive-by relays) cannot + // populate the cache, which closes the round-7 MAJOR where a + // flood of off-RT identities could fill the cap and evict honest + // peers. The neighbor-sync request handler applies the same gate + // before admitting inbound replication hints (see neighbor_sync.rs + // `sender_in_rt`); we mirror that policy here for the commitment + // piggyback. + if !p2p_node.dht_manager().is_in_routing_table(source).await { + debug!("ingest_peer_commitment: source {source} not in routing table (dropped)"); + return None; + } + // Peer-id binding: the commitment's claimed sender must match the + // authenticated transport peer (`source`). Defeats relay/replay + // and also pins which embedded public key we are about to verify + // against — the verify itself trusts the embedded key, so the + // peer-id binding is the link to a real identity. + if &c.sender_peer_id != source.as_bytes() { + warn!( + "ingest_peer_commitment: sender_peer_id mismatch from {source} \ + (dropped, possible relay attempt)" + ); + return None; + } + // Peer-id to embedded-pubkey binding: saorsa-core derives PeerId as + // BLAKE3(pubkey_bytes). Without this check, a responder could sign + // with a throwaway key they own and lie about which identity it + // belongs to (the embedded-key signature would verify trivially). + let derived_peer_id = *blake3::hash(&c.sender_public_key).as_bytes(); + if derived_peer_id != c.sender_peer_id { + warn!( + "ingest_peer_commitment: embedded pubkey does not hash to claimed peer_id for \ + {source} (dropped, throwaway-key attack)" + ); + return None; + } + // §2 step 3 + §11 DoS: rate-limit per-peer to at most one ML-DSA + // signature verify per `COMMITMENT_SIG_VERIFY_MIN_INTERVAL`. A + // sybil/RT-membership-bypassing peer that flooded valid-looking + // gossip would otherwise burn CPU on every message. The rate + // limit is checked AFTER cheap structural gates (RT, peer-id + // binding, pubkey-binding) and BEFORE the expensive sig verify. + // + // Tracked in `sig_verify_attempts` (separate from + // last_commitment_by_peer) so EVERY attempt — successful or not — + // bumps the rate-limit clock. Reading only from PeerCommitmentRecord + // would skip the cap for peers we've never successfully verified, + // letting a flood of invalid-but-structurally-plausible gossips + // burn CPU (codex round-13 finding). + let now = Instant::now(); + if !sig_verify_rate_limit_ok(sig_verify_attempts, source, now).await { + debug!( + "ingest_peer_commitment: rate-limited sig verify from {source} \ + (< {COMMITMENT_SIG_VERIFY_MIN_INTERVAL:?} since last attempt); dropped" + ); + return None; + } + // Signature verify, using the public key embedded in the commitment + // itself. The pubkey is bound by the signature payload (see + // commitment_signed_payload) so an adversary cannot keep the body + // and swap the key to one they hold the secret for. + if !crate::replication::commitment::verify_commitment_signature(c) { + warn!( + "ingest_peer_commitment: signature did not verify under embedded key for {source} \ + (dropped, forged commitment)" + ); + return None; + } + // The new commitment's hash, used to store and to pin for the audit target. + let new_hash = commitment_hash(c); + let mut map = last_commitment_by_peer.write().await; + // Sybil/churn cap: if we're at the hard cap AND this is a new peer, + // evict an arbitrary existing entry to make room. Updates for peers + // already in the map are always accepted (they replace, not grow). + if map.len() >= MAX_LAST_COMMITMENT_BY_PEER && !map.contains_key(source) { + // Drop one arbitrary entry. HashMap iter order is random which + // is fine — over time PeerRemoved cleanup keeps the working set + // anchored on the real RT membership; this cap only fires under + // active flooding attempts. + if let Some(victim) = map.keys().next().copied() { + map.remove(&victim); + warn!( + "ingest_peer_commitment: cache full ({MAX_LAST_COMMITMENT_BY_PEER}); \ + evicted {victim} to admit {source}" + ); + } + } + // Preserve sticky commitment_capable across updates — once true, + // always true. New entries start with capable = true (we just + // verified a valid commitment from this peer). + map.entry(*source) + .and_modify(|r| { + r.last_commitment = Some(c.clone()); + r.received_at = now; + r.last_sig_verify_at = now; + r.commitment_capable = true; // sticky-redundant but explicit + }) + .or_insert_with(|| PeerCommitmentRecord::from_verified(c.clone(), now)); + drop(map); + // Record the sticky "ever v12-capable" bit in a set independent of + // `last_commitment_by_peer` (whose entries can be evicted by + // `PeerRemoved` and the sybil cap). This is what the §3 audit + // shield and the §6 holder-eligibility closure consult to decide + // whether the peer is expected to speak v12. + // + // Capped at `MAX_EVER_CAPABLE_PEERS` to bound memory under + // identity-rotation attacks: once full, new entries are refused. + // Refusal degrades to pre-round-2 behaviour for over-cap peers + // (treated as legacy on rejoin), which is not a security regression + // and preserves the historic set stable. + { + let mut set = ever_capable_peers.write().await; + if set.contains(source) || set.len() < MAX_EVER_CAPABLE_PEERS { + set.insert(*source); + } else { + warn!( + "ingest_peer_commitment: ever_capable_peers at cap \ + ({MAX_EVER_CAPABLE_PEERS}); refusing to record {source} as sticky-capable" + ); + } + } + // Return an audit target for EVERY valid stored commitment (changed or + // not), so the caller's cooldown+probability-gated trigger keeps a + // stable-keyset peer auditable over time (ADR-0002). Only a serialization + // failure (new_hash == None, unreachable for a real commitment) yields None. + new_hash.map(|pin_hash| AuditTarget { + pin_hash, + key_count: c.key_count, + }) +} + +// --------------------------------------------------------------------------- +// Storage-bound audit (v12) — responder commitment rotation +// --------------------------------------------------------------------------- + +/// Read the current LMDB key set, build + sign a fresh +/// `StorageCommitment`, and rotate it into `state` as the new `current`. +/// The prior `current` is demoted to `previous`; the prior `previous` is +/// dropped (per `ResponderCommitmentState::rotate`). +/// +/// For content-addressed chunks (Autonomi's chunk store), `address == +/// BLAKE3(content)`, so `bytes_hash := key` and we don't have to +/// re-read each chunk's bytes to compute the leaf hash. +/// +/// Skips (returns `Ok(())`) if the key set is empty — no commitment to +/// rotate. The auditor side handles "no commitment for this peer" by +/// falling back to the legacy plain-digest audit path. +async fn rebuild_and_rotate_commitment( + storage: &Arc, + identity: &Arc, + state: &Arc, + p2p: &Arc, +) -> Result<()> { + use saorsa_pqc::api::sig::{MlDsaSecretKey, MlDsaVariant}; + + let keys = storage + .all_keys() + .await + .map_err(|e| Error::Storage(format!("commitment build: read keys: {e}")))?; + if keys.is_empty() { + // Storage has emptied since the last rotation (pruning, manual + // cleanup, fresh start with stale state). Drop the previously + // advertised commitment so gossip stops piggybacking it; if we + // kept it, remote auditors would continue pinning a hash we + // can no longer answer (`missing bytes for committed key`) and + // accumulate trust failures against this node for nothing. + if state.current().is_some() { + debug!("Commitment rotation: storage empty, clearing retained slots"); + state.clear_all(); + } + return Ok(()); + } + + // Cap to MAX_COMMITMENT_KEY_COUNT for v12 (responder must not commit + // to more than the protocol limit; auditor would reject the + // commitment otherwise). + let cap = commitment::MAX_COMMITMENT_KEY_COUNT as usize; + if keys.len() > cap { + warn!( + "Commitment rotation: key set ({}) exceeds MAX_COMMITMENT_KEY_COUNT ({}); \ + truncating — investigate as this likely means a misconfiguration", + keys.len(), + cap + ); + } + + // INVARIANT: this module is only used with CONTENT-ADDRESSED chunks, + // where `key == BLAKE3(content)`, so `bytes_hash := key` and we skip a + // full chunk re-read per rotation. + // + // Consequence to be precise about: because the leaf is `(key, key)`, + // the Merkle root commits to the SET OF KEYS, not to the bytes. The + // commitment therefore binds "which keys I claim to hold"; it does NOT + // by itself prove byte possession. Byte possession is enforced by the + // audit-verify path, which recomputes `bytes_hash == BLAKE3(local_bytes)` + // and the per-key digest against the AUDITOR'S OWN local copy of the + // bytes — so a responder that holds the key list but dropped the bytes + // still fails (`missing bytes for committed key` / digest mismatch). + // This is sound ONLY while keys are content addresses. If this module + // is ever reused for non-content-addressed records (`bytes_hash != key`), + // the `(k, k)` shortcut would let a byte-less node forge a valid root and + // MUST be replaced with `(key, BLAKE3(bytes))` computed from real bytes. + let entries: Vec<_> = keys.into_iter().take(cap).map(|k| (k, k)).collect(); + + // No-op-rotation guard: compute just the Merkle root from `entries` + // and compare against the currently-advertised commitment's root. + // If they match, the key set is unchanged and a new rotation would + // only swap a randomized ML-DSA signature for a fresh one — same + // content, different commitment_hash. That invalidates every + // outstanding `recent_provers` credit on this node across the + // close group with no security benefit, breaking steady-state + // quorum liveness on large nodes that can't re-audit every key + // every rotation interval. Skip the rotation entirely when the + // tree is unchanged. + let candidate_tree = + commitment::MerkleTree::build(entries.iter().map(|(k, bh)| (*k, *bh)).collect::>()) + .map_err(|e| Error::Crypto(format!("commitment tree build: {e}")))?; + let candidate_root = candidate_tree.root(); + if let Some(current) = state.current() { + if current.commitment().root == candidate_root { + debug!( + "Commitment rotation: key set unchanged (root={}); skipping no-op re-sign", + hex::encode(candidate_root) + ); + return Ok(()); + } + } + + let sk_bytes = identity.secret_key_bytes().to_vec(); + let sk = MlDsaSecretKey::from_bytes(MlDsaVariant::MlDsa65, &sk_bytes) + .map_err(|e| Error::Crypto(format!("commitment build: load sk: {e}")))?; + let pk_bytes = identity.public_key().as_bytes().to_vec(); + let peer_id_bytes = *p2p.peer_id().as_bytes(); + + let built = commitment_state::BuiltCommitment::build(entries, &peer_id_bytes, &sk, &pk_bytes) + .map_err(|e| Error::Crypto(format!("commitment build: {e}")))?; + + let hash = hex::encode(built.hash()); + let key_count = built.commitment().key_count; + state.rotate(built); + info!("Storage commitment rotated: hash={hash} key_count={key_count}"); + Ok(()) +} + #[cfg(test)] #[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] mod tests { - use super::audit_failure_clears_bootstrap_claim; + use super::{ + adaptive_timeout_threshold, apply_audit_failure_credit_revocation, + audit_failure_clears_bootstrap_claim, audit_failure_revokes_holder_credit, + audit_launch_decision, config, cooldown_allows_audit, decide_audit_failure_action, + median_timeout_strikes_excluding, plan_failed_audit, record_audit_timeout_strike, + timeout_strike_reaches_threshold, AuditFailureAction, AUDIT_TIMEOUT_STRIKE_MAX, + }; + use crate::replication::recent_provers::RecentProvers; use crate::replication::types::AuditFailureReason; + use saorsa_core::identity::PeerId; + use std::collections::HashMap; + use std::time::Duration; + use std::time::Instant; + + fn test_peer(b: u8) -> PeerId { + let mut bytes = [0u8; 32]; + bytes[0] = b; + PeerId::from_bytes(bytes) + } + + fn test_key(b: u8) -> crate::ant_protocol::XorName { + let mut k = [0u8; 32]; + k[0] = b; + k + } #[test] fn audit_timeout_preserves_active_bootstrap_claim() { @@ -2642,6 +3942,582 @@ mod tests { )); } + fn strike_peer(b: u8) -> PeerId { + let mut bytes = [0u8; 32]; + bytes[0] = b; + PeerId::from_bytes(bytes) + } + + // HELPER-LEVEL: counter arithmetic + threshold predicate. The reset is + // simulated by an in-test `strikes.remove`; the real reset path (the + // `Passed` arm) is covered at the glue level below. + #[test] + fn single_timeout_then_success_emits_no_failure_and_resets() { + let peer = strike_peer(1); + let mut strikes: HashMap = HashMap::new(); + let base = config::AUDIT_TIMEOUT_STRIKE_THRESHOLD; + let after_one = record_audit_timeout_strike(&mut strikes, &peer); + assert_eq!(after_one, 1); + assert!(!timeout_strike_reaches_threshold(after_one, base)); + strikes.remove(&peer); + assert!(!strikes.contains_key(&peer)); + } + + #[test] + fn consecutive_timeouts_cross_threshold_at_n() { + let peer = strike_peer(2); + let mut strikes: HashMap = HashMap::new(); + let n = config::AUDIT_TIMEOUT_STRIKE_THRESHOLD; + let mut last = 0; + for i in 1..=n { + last = record_audit_timeout_strike(&mut strikes, &peer); + if i < n { + assert!(!timeout_strike_reaches_threshold(last, n)); + } + } + assert!(timeout_strike_reaches_threshold(last, n)); + // The count keeps climbing past the base threshold (so it can also + // cross a higher *adaptive* threshold), but is bounded by the strike + // cap — no unbounded growth. + let mut c = last; + for _ in 0..200 { + c = record_audit_timeout_strike(&mut strikes, &peer); + } + assert_eq!( + c, + super::AUDIT_TIMEOUT_STRIKE_MAX, + "count saturates at the max cap" + ); + assert!(c > n, "count must be able to exceed the base threshold"); + } + + // ADR-0002 Network Resilience: adaptive timeout threshold. + + #[test] + fn median_timeout_strikes_basics() { + let target = strike_peer(99); + let mut strikes: HashMap = HashMap::new(); + // No other peers → 0 (healthy network, threshold == base). + assert_eq!(median_timeout_strikes_excluding(&strikes, &target), 0); + strikes.insert(strike_peer(1), 1); + strikes.insert(strike_peer(2), 3); + strikes.insert(strike_peer(3), 5); + // Sorted [1,3,5], lower-median index 1 → 3. + assert_eq!(median_timeout_strikes_excluding(&strikes, &target), 3); + } + + // ADVERSARIAL (ADR point e + sybil-inflation bound). Two invariants the + // existing suite leaves unpinned: + // 1. EVEN-count inputs must take the LOWER of the two middle values. The + // existing basics test only feeds an odd-length cohort, so an + // implementation that used `len/2` (upper median) would still pass it. + // Here [1,4] -> lower median 1 (not 4) and [2,4,6,8] -> 4 (not 6). + // 2. A sybil cohort pinned at the *strike cap* (the most an attacker could + // ever drive fabricated peers to) STILL cannot push the grace past + // MAX_ADAPTIVE_TIMEOUT_GRACE: the threshold saturates at base + max + // grace regardless of how high or how numerous the cohort is. + // FLIPS IF: median switches to the upper element on even input, or the + // grace clamp (`.min(MAX_ADAPTIVE_TIMEOUT_GRACE)`) is removed. + #[test] + fn even_count_takes_lower_median_and_sybil_cohort_cannot_exceed_grace_bound() { + let target = strike_peer(150); + + // Even count == 2: lower of [1, 4] is 1. + let mut two: HashMap = HashMap::new(); + two.insert(strike_peer(1), 1); + two.insert(strike_peer(2), 4); + assert_eq!( + median_timeout_strikes_excluding(&two, &target), + 1, + "even-count median must take the LOWER middle value (1), not the upper (4)" + ); + + // Even count == 4: sorted [2,4,6,8], lower median index (4-1)/2 = 1 → 4. + let mut four: HashMap = HashMap::new(); + for (i, v) in [2u32, 4, 6, 8].into_iter().enumerate() { + four.insert(strike_peer(10 + i as u8), v); + } + assert_eq!( + median_timeout_strikes_excluding(&four, &target), + 4, + "even-count median must be the lower middle (4), not the upper (6)" + ); + + // Sybil cohort pinned at the strike CAP — the strongest inflation an + // attacker could mount — must not lift the threshold past base + max + // grace. Try several cohort sizes (odd and even) to be sure. + for cohort in [2u8, 5, 8, 20] { + let mut strikes: HashMap = HashMap::new(); + for i in 0..cohort { + strikes.insert(strike_peer(50 + i), super::AUDIT_TIMEOUT_STRIKE_MAX); + } + let threshold = adaptive_timeout_threshold(&strikes, &target); + assert_eq!( + threshold, + config::AUDIT_TIMEOUT_STRIKE_THRESHOLD + super::MAX_ADAPTIVE_TIMEOUT_GRACE, + "a sybil cohort at the strike cap (size {cohort}) must saturate the grace at \ + the bound, never exceed it" + ); + } + + // And even at the bounded-but-inflated threshold, a genuinely + // non-responsive target can still cross it (cap > max reachable + // threshold), so the bound never shields a bad node forever. + let mut strikes: HashMap = HashMap::new(); + for i in 0..8u8 { + strikes.insert(strike_peer(80 + i), super::AUDIT_TIMEOUT_STRIKE_MAX); + } + let threshold = adaptive_timeout_threshold(&strikes, &target); + let mut c = 0; + for _ in 0..(threshold + 5) { + c = record_audit_timeout_strike(&mut strikes, &target); + } + assert!( + timeout_strike_reaches_threshold(c, threshold), + "target must still cross the bounded inflated threshold ({c} vs {threshold})" + ); + } + + #[test] + fn lone_timing_out_peer_does_not_inflate_its_own_grace() { + // The peer under judgement is excluded from the median, so a single bad + // peer (the common case) is judged against the base threshold and caught + // — it cannot raise its own bar as its strike count climbs. + let bad = strike_peer(7); + let mut strikes: HashMap = HashMap::new(); + strikes.insert(bad, 5); // its own large count must not count + assert_eq!( + adaptive_timeout_threshold(&strikes, &bad), + config::AUDIT_TIMEOUT_STRIKE_THRESHOLD + ); + } + + #[test] + fn widespread_timeouts_widen_the_grace() { + // Genuine disruption: many OTHER honest peers carry timeout strikes. The + // median rises, so the threshold for any given peer widens beyond the + // base — the audit system does not pile onto a struggling network. + let target = strike_peer(100); + let mut strikes: HashMap = HashMap::new(); + for i in 0..9u8 { + strikes.insert(strike_peer(i), 4); + } + assert_eq!( + adaptive_timeout_threshold(&strikes, &target), + 4 + config::AUDIT_TIMEOUT_STRIKE_THRESHOLD + ); + assert!( + adaptive_timeout_threshold(&strikes, &target) > config::AUDIT_TIMEOUT_STRIKE_THRESHOLD + ); + } + + #[test] + fn adaptive_grace_only_responds_to_timeouts_not_deterministic_failures() { + // The strike map is fed ONLY by timeouts (plan_failed_audit records a + // strike for Timeout and never for confirmed failures). So a flood of + // deterministic failures cannot inflate the median to buy grace. + let target = strike_peer(101); + let mut strikes: HashMap = HashMap::new(); + // Many confirmed (non-timeout) failures: these must NOT touch the map. + for i in 0..9u8 { + let action = plan_failed_audit( + &AuditFailureReason::DigestMismatch, + &mut strikes, + &strike_peer(i), + ); + assert_eq!(action, AuditFailureAction::ConfirmedPenalize); + } + assert!( + strikes.is_empty(), + "deterministic failures must not record strikes" + ); + // Threshold stays at the base — an attacker cannot widen grace by + // failing audits on purpose. + assert_eq!( + adaptive_timeout_threshold(&strikes, &target), + config::AUDIT_TIMEOUT_STRIKE_THRESHOLD + ); + } + + // ADR-0002: "occasional surprise exams, keeps load low" — the per-peer + // cooldown must collapse a gossip flood into at most one audit per window. + + #[test] + fn gossip_flood_yields_at_most_one_audit_per_cooldown_window() { + let peer = strike_peer(1); + let mut map: HashMap = HashMap::new(); + let t0 = Instant::now(); + // First gossip in the window passes; a burst of further gossips at the + // same instant are all suppressed. + assert!(cooldown_allows_audit(&mut map, &peer, t0)); + let mut passed = 1; + for _ in 0..100 { + if cooldown_allows_audit(&mut map, &peer, t0) { + passed += 1; + } + } + assert_eq!( + passed, 1, + "a flood at one instant must trigger exactly one audit" + ); + } + + // ADR-0002 ordering invariant: `maybe_trigger_gossip_audit` stamps the + // per-peer cooldown BEFORE the probability lottery, so a LOSING ticket still + // consumes the window. This is the property the isolated cooldown tests above + // cannot see: they never sample the lottery, so a regression that reordered + // the gates (sample probability first, only stamp the cooldown on a win) + // would still pass them while breaking flood-resistance: a flood would then + // re-roll the lottery on EVERY message until one won, multiplying audits. + // + // We model the exact production gate order (cooldown-then-lottery) with a + // lottery driven by a fixed outcome instead of `gen_bool(..)`. The first + // message LOSES the lottery; the remaining flood messages all WIN. With the + // production order, the losing first ticket burns the window and every later + // winner in the same window is blocked, so there are 0 audits this window. If + // the gates were flipped, the second message's winning ticket would slip + // through. The window only reopens after the cooldown elapses. + // + // FLIPS IF: the lottery is sampled before `cooldown_allows_audit` (a losing + // ticket no longer consumes the window), re-enabling a flood-amplified audit + // storm. + #[test] + fn losing_lottery_still_consumes_cooldown_window() { + // Faithful re-implementation of the two gates in + // `maybe_trigger_gossip_audit`, with the lottery outcome made + // deterministic instead of `rand::thread_rng().gen_bool(..)`. + // Calls the SHIPPED `audit_launch_decision` (the same function + // `maybe_trigger_gossip_audit` uses), so a reorder of the two gates in + // production fails this test — not a local reimplementation. + let peer = strike_peer(3); + let mut map: HashMap = HashMap::new(); + let t0 = Instant::now(); + + // First flooded message at t0 LOSES the lottery, but the cooldown is + // stamped BEFORE the lottery is consulted, so the window is now consumed. + assert!( + !audit_launch_decision(&mut map, &peer, t0, false), + "a losing ticket launches no audit" + ); + + // 99 more flooded messages at the same instant would all WIN the lottery, + // yet every one must be blocked by the cooldown the loser already stamped. + // (If production sampled the lottery FIRST, these would each get a fresh + // roll and audits would multiply — this assertion catches that reorder.) + let mut audits = 0; + for _ in 0..99 { + if audit_launch_decision(&mut map, &peer, t0, true) { + audits += 1; + } + } + assert_eq!( + audits, 0, + "a losing first ticket must consume the window so no later flooded \ + message in the same window can audit" + ); + + // The window only reopens after the cooldown elapses; the next winning + // ticket then launches exactly one audit. + let after = t0 + Duration::from_secs(config::AUDIT_ON_GOSSIP_COOLDOWN_SECS + 1); + assert!( + audit_launch_decision(&mut map, &peer, after, true), + "after the cooldown a winning ticket audits again" + ); + } + + #[test] + fn cooldown_lets_audit_through_after_the_window() { + let peer = strike_peer(2); + let mut map: HashMap = HashMap::new(); + let t0 = Instant::now(); + assert!(cooldown_allows_audit(&mut map, &peer, t0)); + // Within the window: suppressed. + let within = t0 + Duration::from_secs(config::AUDIT_ON_GOSSIP_COOLDOWN_SECS - 1); + assert!(!cooldown_allows_audit(&mut map, &peer, within)); + // Past the window: allowed again. + let after = t0 + Duration::from_secs(config::AUDIT_ON_GOSSIP_COOLDOWN_SECS + 1); + assert!(cooldown_allows_audit(&mut map, &peer, after)); + } + + #[test] + fn cooldown_is_per_peer_independent() { + let mut map: HashMap = HashMap::new(); + let t0 = Instant::now(); + // Different peers each get their own first-audit pass at the same instant. + for i in 0..20u8 { + assert!( + cooldown_allows_audit(&mut map, &strike_peer(i), t0), + "peer {i} should be auditable independently" + ); + } + } + + #[test] + fn inflated_adaptive_threshold_is_still_reachable_and_bounded() { + // codex finding: when the median lifts the threshold above the base, a + // genuinely non-responsive peer's strike count must still be able to + // reach it (the count is no longer capped at the base). And the grace + // widening itself is bounded so it can't shield a bad node forever. + let target = strike_peer(200); + let mut strikes: HashMap = HashMap::new(); + // A cohort of other peers each at a high strike count. + for i in 0..9u8 { + strikes.insert(strike_peer(i), 10); + } + let threshold = adaptive_timeout_threshold(&strikes, &target); + // Grace is capped, so the threshold cannot exceed base + max grace. + assert!( + threshold <= config::AUDIT_TIMEOUT_STRIKE_THRESHOLD + super::MAX_ADAPTIVE_TIMEOUT_GRACE + ); + assert!(threshold > config::AUDIT_TIMEOUT_STRIKE_THRESHOLD); + // The target peer can accumulate strikes past that inflated threshold. + let mut c = 0; + for _ in 0..threshold + 5 { + c = record_audit_timeout_strike(&mut strikes, &target); + } + assert!( + timeout_strike_reaches_threshold(c, threshold), + "a persistent peer must be able to cross the inflated threshold ({c} vs {threshold})" + ); + } + + #[test] + fn audit_on_gossip_constants_match_adr() { + // Tripwire on the ADR-locked tunables. + assert_eq!(config::AUDIT_SPOTCHECK_COUNT, 8); + assert!((config::AUDIT_ON_GOSSIP_PROBABILITY - 0.2).abs() < f64::EPSILON); + assert_eq!(config::AUDIT_ON_GOSSIP_COOLDOWN_SECS, 30 * 60); + } + + // (d) A confirmed storage-integrity failure penalizes immediately and + // revokes credit; it is not a timeout. + #[test] + fn digest_mismatch_is_not_a_timeout_and_penalizes_immediately() { + assert!(audit_failure_clears_bootstrap_claim( + &AuditFailureReason::DigestMismatch + )); + assert!(audit_failure_revokes_holder_credit( + &AuditFailureReason::DigestMismatch + )); + } + + // E2E (pure decision): an honest peer that times out once, recovers, + // repeatedly, never reaches a penalty because each success resets strikes. + // FLIPS IF: the strike threshold is removed or success stops resetting. + #[test] + fn e2e_honest_intermittent_timeouts_never_penalized() { + let peer = strike_peer(10); + let base = config::AUDIT_TIMEOUT_STRIKE_THRESHOLD; + let mut strikes: HashMap = HashMap::new(); + for _ in 0..10 { + let after = record_audit_timeout_strike(&mut strikes, &peer); + assert_eq!( + decide_audit_failure_action(&AuditFailureReason::Timeout, after, base), + AuditFailureAction::TimeoutGrace + ); + strikes.remove(&peer); + } + assert!(!strikes.contains_key(&peer)); + } + + // E2E: a peer that times out on EVERY audit (never reset) crosses the + // threshold and is penalized — the deterrent against non-storing peers. + // FLIPS IF: per-challenge window widened so it answers in time, or strikes + // reset without a success. + #[test] + fn e2e_persistent_timeouts_get_penalized() { + let peer = strike_peer(11); + let mut strikes: HashMap = HashMap::new(); + let threshold = config::AUDIT_TIMEOUT_STRIKE_THRESHOLD; + let mut penalized_at = None; + for tick in 1..=(threshold + 2) { + let after = record_audit_timeout_strike(&mut strikes, &peer); + if decide_audit_failure_action(&AuditFailureReason::Timeout, after, threshold) + == AuditFailureAction::TimeoutPenalize + && penalized_at.is_none() + { + penalized_at = Some(tick); + } + } + assert_eq!(penalized_at, Some(threshold)); + } + + // Glue: a Timeout through the real plan_failed_audit MUST record a strike on + // the map AND penalize once enough accumulate. + // FLIPS IF: the handler stops feeding Timeout through the strike counter + // (e.g. strikes_after hard-coded to 0). (Mutation-verified.) + #[test] + fn e2e_glue_timeout_records_strike_and_penalizes_at_threshold() { + let peer = strike_peer(20); + let mut strikes: HashMap = HashMap::new(); + let threshold = config::AUDIT_TIMEOUT_STRIKE_THRESHOLD; + let mut action = AuditFailureAction::TimeoutGrace; + for tick in 1..=threshold { + action = plan_failed_audit(&AuditFailureReason::Timeout, &mut strikes, &peer); + assert_eq!(strikes.get(&peer).copied(), Some(tick)); + } + assert_eq!(action, AuditFailureAction::TimeoutPenalize); + } + + // Glue: a confirmed failure through plan_failed_audit must NOT touch the + // strike map and must return ConfirmedPenalize. + #[test] + fn e2e_glue_confirmed_failure_leaves_strike_map_untouched() { + let peer = strike_peer(21); + let mut strikes: HashMap = HashMap::new(); + for reason in [ + AuditFailureReason::DigestMismatch, + AuditFailureReason::KeyAbsent, + AuditFailureReason::Rejected, + AuditFailureReason::MalformedResponse, + ] { + assert_eq!( + plan_failed_audit(&reason, &mut strikes, &peer), + AuditFailureAction::ConfirmedPenalize + ); + } + assert!(strikes.is_empty()); + } + + // ADR-0002 "Accounting and False Positives", adversarial: a DETERMINISTIC + // failure is acted on the FIRST time it occurs, "regardless of network + // conditions". Here the strike map is pre-loaded with many *other* peers + // timing out, which inflates the adaptive timeout grace to its cap — the + // most forgiving the network ever gets. Under that maximally-relaxed + // window: + // - a brand-new peer's FIRST deterministic failure (DigestMismatch / + // Rejected / MalformedResponse) STILL returns ConfirmedPenalize, never + // a grace lane, and never touches the strike map; while + // - that same peer's FIRST timeout is only TimeoutGrace. + // This proves the inflated grace is the timeout-only lane and can NEVER be + // weaponized to buy a deterministic failure even one round of delay. + // FLIPS IF: deterministic failures start consulting the strike threshold, + // or ConfirmedPenalize is collapsed into a timeout action. + #[test] + fn deterministic_failure_penalizes_first_time_under_inflated_grace() { + let mut strikes: HashMap = HashMap::new(); + // Saturate the adaptive grace: many other peers each carrying a high + // consecutive-timeout count, so the median (and thus the grace) is + // pushed to its MAX cap for any newly-judged peer. + for b in 100..150u8 { + let other = strike_peer(b); + for _ in 0..AUDIT_TIMEOUT_STRIKE_MAX { + record_audit_timeout_strike(&mut strikes, &other); + } + } + let victim = strike_peer(7); + // Sanity: the grace seen by the victim is genuinely inflated above base. + let inflated = adaptive_timeout_threshold(&strikes, &victim); + assert!( + inflated > config::AUDIT_TIMEOUT_STRIKE_THRESHOLD, + "test precondition: grace must be inflated, got {inflated}" + ); + + // First deterministic failure of each kind -> ConfirmedPenalize on + // occurrence #1, and the victim is never inserted into the strike map. + for reason in [ + AuditFailureReason::DigestMismatch, + AuditFailureReason::Rejected, + AuditFailureReason::MalformedResponse, + ] { + let action = plan_failed_audit(&reason, &mut strikes, &victim); + assert_eq!( + action, + AuditFailureAction::ConfirmedPenalize, + "{reason:?} must penalize on the first occurrence regardless of grace" + ); + assert_ne!( + action, + AuditFailureAction::TimeoutPenalize, + "a deterministic failure must NOT be routed through the (eviction-gated) \ + timeout-penalize lane" + ); + assert!( + !strikes.contains_key(&victim), + "deterministic failure must not touch the timeout strike map" + ); + // And it always revokes holder credit / clears the claim. + assert!(audit_failure_revokes_holder_credit(&reason)); + assert!(audit_failure_clears_bootstrap_claim(&reason)); + } + + // The SAME victim's first timeout, under the same inflated grace, is + // only TimeoutGrace (no penalty, no revocation, claim retained). + let timeout_action = plan_failed_audit(&AuditFailureReason::Timeout, &mut strikes, &victim); + assert_eq!(timeout_action, AuditFailureAction::TimeoutGrace); + assert_eq!(strikes.get(&victim).copied(), Some(1)); + assert!(!audit_failure_revokes_holder_credit( + &AuditFailureReason::Timeout + )); + assert!(!audit_failure_clears_bootstrap_claim( + &AuditFailureReason::Timeout + )); + } + + /// The exact decision the `Failed` arm of `handle_audit_result` + /// uses: confirmed failures revoke credit, `Timeout` does not. + #[test] + fn confirmed_failures_revoke_credit_timeout_does_not() { + for reason in [ + AuditFailureReason::MalformedResponse, + AuditFailureReason::DigestMismatch, + AuditFailureReason::KeyAbsent, + AuditFailureReason::Rejected, + ] { + assert!( + audit_failure_revokes_holder_credit(&reason), + "confirmed failure {reason:?} must revoke holder credit" + ); + } + assert!( + !audit_failure_revokes_holder_credit(&AuditFailureReason::Timeout), + "Timeout must NOT revoke credit (single dropped packet != storage loss)" + ); + } + + /// Wiring test for the security fix: the helper the handler calls + /// actually strips a credited peer on a confirmed failure + /// (`DigestMismatch`), and actually RETAINS credit on `Timeout`. + /// Records genuine credit first so neither assertion is vacuous; + /// this fails if `forget_peer` stops being called, or if the + /// `Timeout` exclusion is dropped (both verified by mutation). + #[test] + fn apply_revocation_strips_on_digest_mismatch_retains_on_timeout() { + let peer = test_peer(0xAB); + let key = test_key(1); + let hash = [0xCD; 32]; + + // Confirmed failure -> credit revoked. + let mut provers = RecentProvers::new(); + provers.record_proof(key, peer, hash, Instant::now()); + assert!( + provers.is_credited_holder(&key, &peer, &hash), + "precondition: peer credited before failure" + ); + apply_audit_failure_credit_revocation( + &mut provers, + &peer, + &AuditFailureReason::DigestMismatch, + ); + assert!( + !provers.is_credited_holder(&key, &peer, &hash), + "DigestMismatch must strip the peer's holder credit" + ); + + // Timeout -> credit retained. + let mut provers_timeout = RecentProvers::new(); + provers_timeout.record_proof(key, peer, hash, Instant::now()); + apply_audit_failure_credit_revocation( + &mut provers_timeout, + &peer, + &AuditFailureReason::Timeout, + ); + assert!( + provers_timeout.is_credited_holder(&key, &peer, &hash), + "Timeout must retain holder credit (deliberate liveness cushion)" + ); + } + #[test] fn decoded_audit_failures_clear_active_bootstrap_claim() { for reason in [ diff --git a/src/replication/neighbor_sync.rs b/src/replication/neighbor_sync.rs index 897d41ad..b84dab6a 100644 --- a/src/replication/neighbor_sync.rs +++ b/src/replication/neighbor_sync.rs @@ -182,11 +182,23 @@ pub async fn sync_with_peer( config: &ReplicationConfig, is_bootstrapping: bool, ) -> Option { - sync_with_peer_with_outcome(peer, p2p_node, storage, paid_list, config, is_bootstrapping) - .await - .map(|outcome| outcome.response) + sync_with_peer_with_outcome( + peer, + p2p_node, + storage, + paid_list, + config, + is_bootstrapping, + None, + ) + .await + .map(|outcome| outcome.response) } +/// `commitment`: sender's current commitment to piggyback on the request. +/// `None` if the responder hasn't rotated one yet (e.g. fresh boot, +/// empty storage) — receiver falls back to legacy path. +#[allow(clippy::too_many_arguments)] pub(crate) async fn sync_with_peer_with_outcome( peer: &PeerId, p2p_node: &Arc, @@ -194,6 +206,7 @@ pub(crate) async fn sync_with_peer_with_outcome( paid_list: &Arc, config: &ReplicationConfig, is_bootstrapping: bool, + commitment: Option, ) -> Option { // Build peer-targeted hint sets (Rule 7). let sent_replica_hints = build_replica_hints_for_peer_with_close_groups( @@ -215,6 +228,7 @@ pub(crate) async fn sync_with_peer_with_outcome( replica_hints, paid_hints, bootstrapping: is_bootstrapping, + commitment, }; let request_id = rand::thread_rng().gen::(); let msg = ReplicationMessage { @@ -335,11 +349,13 @@ pub async fn handle_sync_request( paid_list, config, is_bootstrapping, + None, ) .await; (response, sender_in_rt) } +#[allow(clippy::too_many_arguments)] pub(crate) async fn handle_sync_request_with_proofs( sender: &PeerId, _request: &NeighborSyncRequest, @@ -348,6 +364,7 @@ pub(crate) async fn handle_sync_request_with_proofs( paid_list: &Arc, config: &ReplicationConfig, is_bootstrapping: bool, + my_commitment: Option, ) -> (NeighborSyncResponse, Vec, bool) { let sender_in_rt = p2p_node.dht_manager().is_in_routing_table(sender).await; @@ -376,6 +393,7 @@ pub(crate) async fn handle_sync_request_with_proofs( paid_hints, bootstrapping: is_bootstrapping, rejected_keys: Vec::new(), + commitment: my_commitment, }; // Rule 4-6: accept inbound hints only if sender is in LocalRT. @@ -977,6 +995,7 @@ mod tests { paid_hints: outbound_paid_hints.clone(), bootstrapping: false, rejected_keys: Vec::new(), + commitment: None, }; // Inbound hints from the sender (would be in the request). diff --git a/src/replication/protocol.rs b/src/replication/protocol.rs index a5151a33..e6f74031 100644 --- a/src/replication/protocol.rs +++ b/src/replication/protocol.rs @@ -109,11 +109,21 @@ pub enum ReplicationMessageBody { /// Response with the record data. FetchResponse(FetchResponse), - // === Audit (Section 15) === - /// Storage audit challenge. + // === Single-key audit (prune-confirmation) === + /// Single-key audit challenge (used by prune confirmation). AuditChallenge(AuditChallenge), - /// Response to audit challenge. + /// Response to a single-key audit challenge. AuditResponse(AuditResponse), + + // === Storage-bound subtree audit (ADR-0002) === + /// Gossip-triggered contiguous-subtree storage audit challenge (round 1). + SubtreeAuditChallenge(SubtreeAuditChallenge), + /// Response to a contiguous-subtree storage audit challenge (round 1). + SubtreeAuditResponse(SubtreeAuditResponse), + /// Surprise byte challenge for the spot-checked leaves (round 2). + SubtreeByteChallenge(SubtreeByteChallenge), + /// Response carrying the requested chunks' original bytes (round 2). + SubtreeByteResponse(SubtreeByteResponse), } // --------------------------------------------------------------------------- @@ -177,6 +187,14 @@ pub struct NeighborSyncRequest { pub paid_hints: Vec, /// Whether sender is currently bootstrapping. pub bootstrapping: bool, + /// Sender's signed storage commitment (optional, see + /// [`crate::replication::commitment`]). `None` from old peers; from + /// new peers this carries the Merkle-root commitment over the + /// sender's claimed keys. Receivers that recognize it store it as + /// the per-peer "last known commitment" used to pin commitment-bound + /// audits. + #[serde(default)] + pub commitment: Option, } /// Neighbor sync response carrying own hint sets. @@ -190,6 +208,10 @@ pub struct NeighborSyncResponse { pub bootstrapping: bool, /// Keys that receiver rejected (optional feedback to sender). pub rejected_keys: Vec, + /// Receiver's signed storage commitment (optional, see + /// [`NeighborSyncRequest::commitment`]). + #[serde(default)] + pub commitment: Option, } // --------------------------------------------------------------------------- @@ -271,11 +293,12 @@ pub enum FetchResponse { // Audit Messages // --------------------------------------------------------------------------- -/// Storage audit challenge (Section 15). +/// Single-key audit challenge. /// /// The challenger picks a random nonce and a set of keys the challenged peer -/// should hold, then sends this challenge. The challenged peer must prove -/// storage by returning per-key BLAKE3 digests. +/// should hold, then sends this challenge. The challenged peer proves storage +/// by returning per-key BLAKE3 digests. Used by the prune-confirmation path +/// (a node checks a peer still holds a key before pruning its own copy). #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AuditChallenge { /// Unique challenge identifier. @@ -288,7 +311,7 @@ pub struct AuditChallenge { pub keys: Vec, } -/// Response to audit challenge. +/// Response to a single-key audit challenge. #[derive(Debug, Clone, Serialize, Deserialize)] pub enum AuditResponse { /// Per-key digests proving storage. @@ -318,6 +341,158 @@ pub enum AuditResponse { }, } +/// Gossip-triggered contiguous-subtree storage audit challenge (ADR-0002). +/// +/// The auditor pins the commitment a peer just gossiped and sends a fresh +/// random nonce. The nonce alone deterministically selects one contiguous +/// subtree of the peer's committed Merkle tree (see +/// [`crate::replication::subtree::select_subtree_path`]); the auditor does +/// **not** name keys. The responder must reply with a +/// [`SubtreeAuditResponse::Proof`] for that selected subtree against the pinned +/// commitment, or a [`SubtreeAuditResponse::Rejected`] if it genuinely cannot +/// (for a recently gossiped pinned commitment a rejection is a confirmed +/// failure, since the responder retains its last two gossiped commitments). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SubtreeAuditChallenge { + /// Unique challenge identifier. + pub challenge_id: u64, + /// Random nonce. Selects the subtree AND freshens each leaf's possession + /// hash, so a stored answer cannot be replayed. + pub nonce: [u8; 32], + /// Challenged peer ID. Bound into each leaf's possession hash. + pub challenged_peer_id: [u8; 32], + /// The auditor's pin: the [`crate::replication::commitment::commitment_hash`] + /// of the commitment the peer just gossiped. The response's commitment must + /// hash to exactly this value. + pub expected_commitment_hash: [u8; 32], +} + +/// Response to a contiguous-subtree storage audit challenge (ADR-0002). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum SubtreeAuditResponse { + /// The single-contiguous-subtree proof. + /// + /// Carries the responder's signed commitment (so the auditor re-derives + /// `key_count` and confirms the pin and signature) and the + /// nonce-selected subtree expanded to its leaves plus the sibling + /// cut-hashes on the path to the root. This is **round 1** of the + /// two-round audit. The auditor: + /// 1. confirms `commitment_hash(commitment) == expected_commitment_hash` + /// and the signature is valid; + /// 2. re-derives the selected subtree from `(nonce, key_count)`, rebuilds + /// the root from the proof, and requires it to equal the commitment + /// root (structure). + /// + /// The leaves carry only hashes (`bytes_hash`, `nonced_hash`), so this round + /// proves the tree SHAPE is committed — not that the bytes are still held. + /// Real possession is proven in **round 2**: the auditor picks a few of the + /// just-verified leaves and sends a [`SubtreeByteChallenge`] requesting their + /// original chunk bytes FROM the responder (see that type). + Proof { + /// The challenge this response answers. + challenge_id: u64, + /// The signed commitment whose root the proof is against. + commitment: crate::replication::commitment::StorageCommitment, + /// The nonce-selected contiguous subtree proof. + proof: crate::replication::subtree::SubtreeProof, + }, + /// Peer is still bootstrapping (not ready for audit). + Bootstrapping { + /// The challenge this response answers. + challenge_id: u64, + }, + /// Challenge rejected. The `reason` is for logging only; for a recently + /// gossiped pinned commitment a rejection is a confirmed failure (the + /// responder retains its last two gossiped commitments and must be able to + /// answer either). + Rejected { + /// The challenge this response answers. + challenge_id: u64, + /// Human-readable rejection reason. + reason: String, + }, +} + +/// Round 2 of the storage audit (ADR-0002): the **surprise byte challenge**. +/// +/// After the auditor has structurally verified a [`SubtreeAuditResponse::Proof`] +/// it picks a small, nonce-derived random sample of that subtree's just-proven +/// leaves (the responder cannot predict which) and asks the responder to return +/// the ORIGINAL chunk bytes for exactly those keys. The auditor then checks each +/// returned chunk against the committed leaf: +/// - `BLAKE3(bytes) == leaf.bytes_hash` (the chunk's content address), AND +/// - `compute_audit_digest(nonce, peer, key, bytes) == leaf.nonced_hash`. +/// +/// This makes possession non-delegable to the auditor: the auditor needs to +/// hold NONE of the responder's chunks. A responder that committed to a chunk it +/// no longer holds cannot fabricate bytes that hash to the committed address (a +/// preimage break), so it is caught regardless of who audits it. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SubtreeByteChallenge { + /// The same `challenge_id` as the round-1 [`SubtreeAuditChallenge`], so the + /// responder/auditor correlate the two rounds. + pub challenge_id: u64, + /// The same nonce as round 1 — needed for the freshness (`nonced_hash`) + /// check and to bind these bytes to this audit. + pub nonce: [u8; 32], + /// The challenged peer ID (bound into each leaf's possession hash). + pub challenged_peer_id: [u8; 32], + /// The pinned commitment hash from round 1, so the responder resolves the + /// SAME tree it just proved and serves bytes only for keys it committed to. + pub expected_commitment_hash: [u8; 32], + /// The exact keys whose original bytes the responder must return. These are + /// the auditor's nonce-derived spot-check sample of the round-1 subtree. + pub keys: Vec, +} + +/// One requested chunk in a [`SubtreeByteResponse`]. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum SubtreeByteItem { + /// The responder holds this committed key and returns its original bytes. + Present { + /// The requested key. + key: XorName, + /// The original chunk bytes (the auditor re-hashes to verify). + bytes: Vec, + }, + /// The responder committed to this key but cannot serve its bytes. This is a + /// PROVABLE cheat (it published a commitment over a chunk it does not hold), + /// so the auditor counts it as a confirmed failure — NOT a graced timeout. + /// Distinguishing this explicit signal from silence is what separates a + /// deleter (instant fail) from a dropped packet (timeout). + Absent { + /// The committed key the responder could not serve. + key: XorName, + }, +} + +/// Response to a [`SubtreeByteChallenge`] (round 2). One item per requested key, +/// in the requested order. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum SubtreeByteResponse { + /// The responder's per-key answers (bytes or an explicit absent signal). + Items { + /// The challenge this response answers. + challenge_id: u64, + /// One entry per requested key. + items: Vec, + }, + /// Peer is still bootstrapping (should not happen mid-audit, but handled). + Bootstrapping { + /// The challenge this response answers. + challenge_id: u64, + }, + /// The responder rejects the byte challenge outright (e.g. it no longer + /// retains the pinned commitment). For a recently gossiped commitment the + /// auditor treats this as a confirmed failure, like round 1. + Rejected { + /// The challenge this response answers. + challenge_id: u64, + /// Human-readable rejection reason. + reason: String, + }, +} + // --------------------------------------------------------------------------- // Audit digest helper // --------------------------------------------------------------------------- @@ -490,6 +665,110 @@ mod tests { // === Neighbor Sync roundtrips === + // -- backwards compat across the wire-type extension -------------------- + + /// Backwards-compat: an old peer that has the v0 layout of + /// `NeighborSyncRequest` (no `commitment` field) can still decode a + /// message encoded by a new peer that emits `commitment: None`. This + /// is the realistic mixed-version case during rollout: new peers + /// gossip with the field; old peers must not crash. + /// + /// The check works because postcard's [`from_bytes`] is lenient on + /// trailing bytes — the old decoder reads what it knows about and + /// stops, the new fields are silently ignored. This test pins that + /// invariant so any future codec/library swap that breaks it is + /// caught immediately. + #[test] + fn old_decoder_tolerates_new_neighbor_sync_request() { + use serde::Deserialize; + #[derive(Deserialize)] + struct OldNeighborSyncRequest { + #[allow(dead_code)] + pub replica_hints: Vec, + #[allow(dead_code)] + pub paid_hints: Vec, + #[allow(dead_code)] + pub bootstrapping: bool, + } + + let new_req = NeighborSyncRequest { + replica_hints: vec![[0x01; 32], [0x02; 32]], + paid_hints: vec![[0x03; 32]], + bootstrapping: true, + commitment: None, + }; + let encoded = postcard::to_stdvec(&new_req).expect("encode"); + let old_decoded: OldNeighborSyncRequest = + postcard::from_bytes(&encoded).expect("old decoder accepts"); + // Field-by-field check would fail if old peer misaligned on the + // length prefix — passing decode is the structural check. + assert_eq!(old_decoded.replica_hints.len(), 2); + assert_eq!(old_decoded.paid_hints.len(), 1); + assert!(old_decoded.bootstrapping); + } + + /// Same property for `NeighborSyncResponse`. + #[test] + fn old_decoder_tolerates_new_neighbor_sync_response() { + use serde::Deserialize; + #[derive(Deserialize)] + struct OldNeighborSyncResponse { + #[allow(dead_code)] + pub replica_hints: Vec, + #[allow(dead_code)] + pub paid_hints: Vec, + #[allow(dead_code)] + pub bootstrapping: bool, + #[allow(dead_code)] + pub rejected_keys: Vec, + } + + let new_resp = NeighborSyncResponse { + replica_hints: vec![[0x04; 32]], + paid_hints: vec![], + bootstrapping: false, + rejected_keys: vec![[0x05; 32]], + commitment: None, + }; + let encoded = postcard::to_stdvec(&new_resp).expect("encode"); + let old_decoded: OldNeighborSyncResponse = + postcard::from_bytes(&encoded).expect("old decoder accepts"); + assert_eq!(old_decoded.replica_hints.len(), 1); + assert_eq!(old_decoded.rejected_keys.len(), 1); + } + + /// Roundtrip: a new peer can decode its own message including the + /// commitment field. Catches accidental serde annotation breakage + /// (e.g. forgetting `#[serde(default)]` on the new field). + #[test] + fn new_peer_roundtrips_with_commitment_some() { + use crate::replication::commitment::{sign_commitment, StorageCommitment}; + use saorsa_pqc::api::sig::ml_dsa_65; + + let (pk, sk) = ml_dsa_65().generate_keypair().expect("keygen"); + let root = [0x7Fu8; 32]; + let sender = [0xCCu8; 32]; + let pk_bytes = pk.to_bytes(); + let sig = sign_commitment(&sk, &root, 3, &sender, &pk_bytes).expect("sign"); + let commitment = StorageCommitment { + root, + key_count: 3, + sender_peer_id: sender, + sender_public_key: pk_bytes, + signature: sig, + }; + + let req = NeighborSyncRequest { + replica_hints: vec![[0x01; 32]], + paid_hints: vec![], + bootstrapping: false, + commitment: Some(commitment.clone()), + }; + let encoded = postcard::to_stdvec(&req).expect("encode"); + let decoded: NeighborSyncRequest = postcard::from_bytes(&encoded).expect("new decoder"); + assert_eq!(decoded.commitment, Some(commitment)); + } + #[test] fn neighbor_sync_request_roundtrip() { let msg = ReplicationMessage { @@ -498,6 +777,7 @@ mod tests { replica_hints: vec![[0x01; 32], [0x02; 32]], paid_hints: vec![[0x03; 32]], bootstrapping: true, + commitment: None, }), }; let encoded = msg.encode().expect("encode should succeed"); @@ -522,6 +802,7 @@ mod tests { paid_hints: vec![], bootstrapping: false, rejected_keys: vec![[0x05; 32], [0x06; 32]], + commitment: None, }), }; let encoded = msg.encode().expect("encode should succeed"); diff --git a/src/replication/pruning.rs b/src/replication/pruning.rs index 4618ab09..b0f84dc0 100644 --- a/src/replication/pruning.rs +++ b/src/replication/pruning.rs @@ -668,10 +668,21 @@ async fn peer_proves_record( let encoded = encode_prune_audit_challenge(&peer, key, challenge_id, nonce)?; let Some(decoded) = send_prune_audit_challenge(&peer, &key, encoded, p2p_node, config).await else { - // No decoded response means we did not observe the peer stop claiming - // bootstrap status. Preserve any active claim so a later claim is not - // misclassified as repeated abuse. - report_prune_audit_failure_once(&peer, &key, p2p_node, config, report_state).await; + // No decoded response means a timeout or an undecodable reply — the + // same "no response" case the main audit path treats as a timeout. + // TIMEOUT-EVICTION-DISABLED: do NOT penalise on a prune-audit timeout + // during the breaking rollout (a not-yet-upgraded peer, or a briefly + // slow one, must not be evicted by a no-response). This mirrors the + // suppressed timeout penalty in handle_failed_audit; only a DECODED + // PruneAuditStatus::Failed below (a peer that answered with bad/absent + // bytes) is penalised. Grep TIMEOUT-EVICTION-DISABLED to re-enable in + // the follow-up release once enough nodes have upgraded. + debug!( + "Prune audit for {peer} key {} got no decodable response \ + (eviction disabled this release — not penalising)", + hex::encode(key) + ); + // report_prune_audit_failure_once(&peer, &key, p2p_node, config, report_state).await; return None; }; @@ -699,6 +710,50 @@ fn prune_audit_response_clears_bootstrap_claim(status: PruneAuditStatus) -> bool matches!(status, PruneAuditStatus::Proven | PruneAuditStatus::Failed) } +/// Responder side of a single-key prune-confirmation audit. +/// +/// Answers with one per-key possession digest, an absent-sentinel for keys we +/// don't hold, or a bootstrapping signal. Pure single-key liveness check — no +/// commitment state involved. +pub async fn handle_prune_audit_challenge( + challenge: &AuditChallenge, + storage: &LmdbStorage, + is_bootstrapping: bool, +) -> AuditResponse { + if is_bootstrapping { + return AuditResponse::Bootstrapping { + challenge_id: challenge.challenge_id, + }; + } + + let mut digests = Vec::with_capacity(challenge.keys.len()); + for key in &challenge.keys { + match storage.get_raw(key).await { + Ok(Some(data)) => { + digests.push(compute_audit_digest( + &challenge.nonce, + &challenge.challenged_peer_id, + key, + &data, + )); + } + Ok(None) => digests.push(ABSENT_KEY_DIGEST), + Err(e) => { + warn!( + "Prune audit responder: failed to read key {}: {e}", + hex::encode(key) + ); + digests.push(ABSENT_KEY_DIGEST); + } + } + } + + AuditResponse::Digests { + challenge_id: challenge.challenge_id, + digests, + } +} + fn encode_prune_audit_challenge( peer: &PeerId, key: XorName, @@ -740,7 +795,7 @@ async fn send_prune_audit_challenge( peer, REPLICATION_PROTOCOL_ID, encoded, - config.audit_response_timeout(1), + config.prune_audit_response_timeout, ) .await { diff --git a/src/replication/quorum.rs b/src/replication/quorum.rs index 5f4d99af..19186639 100644 --- a/src/replication/quorum.rs +++ b/src/replication/quorum.rs @@ -202,19 +202,52 @@ pub fn evaluate_key_evidence( evidence: &KeyVerificationEvidence, targets: &VerificationTargets, config: &ReplicationConfig, +) -> KeyVerificationOutcome { + evaluate_key_evidence_with_holder_check(key, evidence, targets, config, |_, _| true) +} + +/// Variant of [`evaluate_key_evidence`] that consults a holder-credit +/// predicate before counting a peer's Present evidence (v12 §6). +/// +/// `holder_credit` is invoked as `(peer, key) -> bool`. Returning `false` +/// downgrades a Present claim to Unresolved (we don't trust this peer's +/// "I have it" without a recent commitment-bound audit proving it). +/// Returning `true` keeps today's behaviour. Paid-list evidence is +/// independent of holder credit (the paid-list lookup is a property of +/// the receiving peer's own data, not a claim about K being present). +/// +/// The non-`_with_holder_check` form preserves prior behaviour by +/// passing a predicate that always returns true. New call sites that +/// have a `RecentProvers` cache + commitment-by-peer table should pass +/// a real predicate. +#[must_use] +pub fn evaluate_key_evidence_with_holder_check( + key: &XorName, + evidence: &KeyVerificationEvidence, + targets: &VerificationTargets, + config: &ReplicationConfig, + holder_credit: impl Fn(&PeerId, &XorName) -> bool, ) -> KeyVerificationOutcome { let quorum_peers = targets .quorum_targets .get(key) .map_or(&[][..], Vec::as_slice); - // Count presence evidence from QuorumTargets. + // Count presence evidence from QuorumTargets. v12 §6: a peer that + // claims Present but is not commitment-credited for K is downgraded + // to Unresolved (we may have to retry once they re-prove storage). let mut presence_positive = 0usize; let mut presence_unresolved = 0usize; for peer in quorum_peers { match evidence.presence.get(peer) { - Some(PresenceEvidence::Present) => presence_positive += 1, + Some(PresenceEvidence::Present) => { + if holder_credit(peer, key) { + presence_positive += 1; + } else { + presence_unresolved += 1; + } + } Some(PresenceEvidence::Absent) => {} Some(PresenceEvidence::Unresolved) | None => { presence_unresolved += 1; @@ -662,6 +695,108 @@ mod tests { ); } + // ----------------------------------------------------------------------- + // v12 §6 holder-credit predicate downgrades uncredited peers + // ----------------------------------------------------------------------- + + #[test] + fn quorum_downgrades_uncredited_present_peers() { + // 7 quorum peers, threshold 4. 4 say Present, 3 say Absent — + // would normally pass. But with a holder-credit predicate that + // only credits 2 of them, presence_positive drops to 2 and the + // 2 uncredited Presents become Unresolved. Total = 2 positive + // + 2 unresolved + 3 absent = 5 valid → still possible → + // QuorumInconclusive (not yet failed, but not verified either). + let key = xor_name_from_byte(0x33); + let config = ReplicationConfig::default(); + let quorum_peers: Vec = (1..=7).map(peer_id_from_byte).collect(); + let targets = single_key_targets(&key, quorum_peers.clone(), vec![]); + + let evidence = build_evidence( + vec![ + (quorum_peers[0], PresenceEvidence::Present), + (quorum_peers[1], PresenceEvidence::Present), + (quorum_peers[2], PresenceEvidence::Present), + (quorum_peers[3], PresenceEvidence::Present), + (quorum_peers[4], PresenceEvidence::Absent), + (quorum_peers[5], PresenceEvidence::Absent), + (quorum_peers[6], PresenceEvidence::Absent), + ], + vec![], + ); + + // Credit only the first two peers (the other two Presents are + // uncredited and will be downgraded to Unresolved). + let credit = |peer: &PeerId, _: &XorName| -> bool { + *peer == quorum_peers[0] || *peer == quorum_peers[1] + }; + let outcome = + evaluate_key_evidence_with_holder_check(&key, &evidence, &targets, &config, credit); + assert!( + matches!(outcome, KeyVerificationOutcome::QuorumInconclusive), + "credit downgrade should drop presence_positive below threshold, got {outcome:?}" + ); + } + + #[test] + fn quorum_passes_when_all_present_peers_are_credited() { + let key = xor_name_from_byte(0x34); + let config = ReplicationConfig::default(); + let quorum_peers: Vec = (1..=7).map(peer_id_from_byte).collect(); + let targets = single_key_targets(&key, quorum_peers.clone(), vec![]); + + let evidence = build_evidence( + (0..4) + .map(|i| (quorum_peers[i], PresenceEvidence::Present)) + .chain((4..7).map(|i| (quorum_peers[i], PresenceEvidence::Absent))) + .collect(), + vec![], + ); + + let credit = |_: &PeerId, _: &XorName| -> bool { true }; + let outcome = + evaluate_key_evidence_with_holder_check(&key, &evidence, &targets, &config, credit); + assert!( + matches!(outcome, KeyVerificationOutcome::QuorumVerified { .. }), + "all-credited Present should pass quorum, got {outcome:?}" + ); + } + + #[test] + fn paid_list_path_unaffected_by_holder_credit() { + // v12 §6: holder-credit gates Present claims, NOT paid-list + // evidence (the paid-list lookup is the receiving peer's own + // data, not a claim about K). A peer with no credit at all + // can still contribute to paid-list majority. + let key = xor_name_from_byte(0x35); + let config = ReplicationConfig::default(); + let quorum_peers: Vec = (1..=3).map(peer_id_from_byte).collect(); + let paid_peers: Vec = (10..=14).map(peer_id_from_byte).collect(); + let targets = single_key_targets(&key, quorum_peers.clone(), paid_peers.clone()); + + let evidence = build_evidence( + quorum_peers + .iter() + .map(|p| (*p, PresenceEvidence::Absent)) + .collect(), + vec![ + (paid_peers[0], PaidListEvidence::Confirmed), + (paid_peers[1], PaidListEvidence::Confirmed), + (paid_peers[2], PaidListEvidence::Confirmed), + (paid_peers[3], PaidListEvidence::NotFound), + (paid_peers[4], PaidListEvidence::NotFound), + ], + ); + + let credit = |_: &PeerId, _: &XorName| -> bool { false }; + let outcome = + evaluate_key_evidence_with_holder_check(&key, &evidence, &targets, &config, credit); + assert!( + matches!(outcome, KeyVerificationOutcome::PaidListVerified { .. }), + "paid-list path must not be gated by holder-credit, got {outcome:?}" + ); + } + // ----------------------------------------------------------------------- // evaluate_key_evidence: PaidListVerified // ----------------------------------------------------------------------- diff --git a/src/replication/recent_provers.rs b/src/replication/recent_provers.rs new file mode 100644 index 00000000..b793c228 --- /dev/null +++ b/src/replication/recent_provers.rs @@ -0,0 +1,355 @@ +//! Holder-eligibility cache: which peers recently proved storage of +//! which key, against which commitment. +//! +//! Phase 2d of the v12 storage-bound audit design (`notes/security- +//! findings-2026-05-22/proposal-gossip-audit-v12.md`). +//! +//! When the auditor successfully verifies a commitment-bound audit for +//! peer P on key K (against P's currently-credited commitment hash H), +//! it inserts `(P, H, now)` into `recent_provers[K]`. Reward / quorum +//! eligibility for P-as-holder-of-K then checks that this cache entry +//! still matches P's *currently credited* commitment hash; if P rotates +//! the hash via fresh gossip, the cache entry becomes stale and credit +//! is denied until the next successful audit against the new hash. +//! +//! Invariants enforced here: +//! +//! - **Per-key cap**: at most [`MAX_PROVERS_PER_KEY`] entries per key, +//! LRU-evicted by `proved_at`. Bounds the per-key working set so a +//! well-replicated key cannot fill memory. +//! - **RT-only**: only peers in the caller's routing table populate +//! entries — the caller is responsible for filtering before +//! [`RecentProvers::record_proof`]; this module just stores what it's +//! told. +//! - **Hash-bound credit**: [`RecentProvers::is_credited_holder`] +//! requires the cache entry's `commitment_hash` to match the peer's +//! *current* `commitment_hash`. A peer who proves K under C1 then +//! rotates to C2 loses credit until re-proving K under C2. +//! +//! - **TTL**: entries older than [`PROVER_ENTRY_TTL`] are ignored by +//! [`RecentProvers::is_credited_holder`] on read, and +//! [`RecentProvers::sweep_expired`] reclaims their memory when a +//! caller invokes it (e.g. periodically from the engine). +//! - **`PeerRemoved` cleanup**: the caller should call +//! [`RecentProvers::forget_peer`] when a peer leaves the routing +//! table to drop their entries immediately (faster than waiting for +//! TTL). + +use std::collections::HashMap; +use std::time::{Duration, Instant}; + +use saorsa_core::identity::PeerId; + +use crate::ant_protocol::XorName; + +/// Maximum number of cached provers per key. +/// +/// Sized at 2× `CLOSE_GROUP_SIZE = 8`, giving 8 slack slots for churn +/// without unbounded growth. LRU-evicted within the cap. +pub const MAX_PROVERS_PER_KEY: usize = 16; + +/// Maximum age of a cached prover entry before it is considered stale. +/// +/// A proof older than this is treated as "no credit" by +/// [`RecentProvers::is_credited_holder`] even if the commitment hash +/// still matches. +/// +/// v10/v12 §6 spec: `RECENT_PROOF_TTL = 2 × max audit interval` (≈40 min +/// at the default 20 min max). Setting too low → peers fall out of +/// credit between audits. Setting too high → lazy node has more leeway +/// before re-audit is required. 40 min comfortably covers one audit +/// cycle on the average peer while still requiring re-proof inside the +/// rotation window. +pub const PROVER_ENTRY_TTL: Duration = Duration::from_secs(40 * 60); + +/// One cached prover entry: who proved the key, when, and against which +/// commitment. +#[derive(Debug, Clone, Copy)] +pub struct ProverEntry { + /// The peer that produced the audit proof. + pub peer_id: PeerId, + /// When the proof was recorded. Used for LRU eviction. + pub proved_at: Instant, + /// The peer's commitment hash at proof time. Holder-eligibility + /// requires this to match the peer's *currently credited* hash. + pub commitment_hash: [u8; 32], +} + +/// Per-key cache of recent provers, capped at [`MAX_PROVERS_PER_KEY`]. +#[derive(Debug, Default, Clone)] +pub struct RecentProvers { + /// `entries[K]` is the per-key bounded list. Entries are kept sorted + /// by `proved_at` ascending so eviction is `O(1)` (drop head). + entries: HashMap>, +} + +impl RecentProvers { + /// Empty cache. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Record that `peer_id` proved storage of `key` under commitment + /// `commitment_hash` at `proved_at`. + /// + /// If the same `(peer_id, commitment_hash)` is already cached for + /// this key, the entry is updated in place (refreshes `proved_at`). + /// Otherwise a new entry is appended, evicting the oldest entry if + /// the per-key cap would be exceeded. + pub fn record_proof( + &mut self, + key: XorName, + peer_id: PeerId, + commitment_hash: [u8; 32], + proved_at: Instant, + ) { + let bucket = self.entries.entry(key).or_default(); + + // Refresh-in-place if the (peer, hash) already exists. + for e in bucket.iter_mut() { + if e.peer_id == peer_id && e.commitment_hash == commitment_hash { + e.proved_at = proved_at; + bucket.sort_by_key(|e| e.proved_at); + return; + } + } + + // Evict the oldest entry if we're at the cap. + if bucket.len() >= MAX_PROVERS_PER_KEY { + // bucket is sorted ascending; oldest is index 0. + bucket.remove(0); + } + + bucket.push(ProverEntry { + peer_id, + proved_at, + commitment_hash, + }); + bucket.sort_by_key(|e| e.proved_at); + } + + /// Is `peer_id` currently credited as a holder of `key`? + /// + /// Returns `true` iff there is a non-stale cached entry with `peer_id` + /// and `commitment_hash == current_commitment_hash`. + /// + /// "Non-stale" means `now - proved_at < PROVER_ENTRY_TTL`. The hash + /// binding is the v12 §6 lever: a peer that rotates their commitment + /// must re-prove every key they want credit for. The TTL is a + /// secondary safety net that revokes credit even if the hash + /// happens to match (e.g. a peer who proved long ago but has been + /// silent or offline since). + #[must_use] + pub fn is_credited_holder( + &self, + key: &XorName, + peer_id: &PeerId, + current_commitment_hash: &[u8; 32], + ) -> bool { + let now = Instant::now(); + self.entries.get(key).is_some_and(|bucket| { + bucket.iter().any(|e| { + &e.peer_id == peer_id + && &e.commitment_hash == current_commitment_hash + && now.saturating_duration_since(e.proved_at) < PROVER_ENTRY_TTL + }) + }) + } + + /// Sweep entries older than [`PROVER_ENTRY_TTL`] across all keys. + /// + /// Returns the number of entries dropped. Intended for periodic + /// invocation by a background task; `is_credited_holder` already + /// honours the TTL on read, so the sweep only reclaims memory. + pub fn sweep_expired(&mut self, now: Instant) -> usize { + let mut dropped = 0; + for bucket in self.entries.values_mut() { + let before = bucket.len(); + bucket.retain(|e| now.saturating_duration_since(e.proved_at) < PROVER_ENTRY_TTL); + dropped += before - bucket.len(); + } + self.entries.retain(|_, b| !b.is_empty()); + dropped + } + + /// Drop every cached entry for `peer_id` across all keys. + /// + /// Called when a peer leaves the routing table (RT-only invariant) + /// or on explicit eviction. + pub fn forget_peer(&mut self, peer_id: &PeerId) { + for bucket in self.entries.values_mut() { + bucket.retain(|e| &e.peer_id != peer_id); + } + self.entries.retain(|_, b| !b.is_empty()); + } + + /// Drop every entry whose `commitment_hash` matches `stale_hash` + /// (used when the auditor invalidates a peer's `last_commitment` — + /// e.g. on `UnknownCommitmentHash` rejection — to remove the cached + /// proofs against that no-longer-valid commitment). + pub fn forget_commitment(&mut self, stale_hash: &[u8; 32]) { + for bucket in self.entries.values_mut() { + bucket.retain(|e| &e.commitment_hash != stale_hash); + } + self.entries.retain(|_, b| !b.is_empty()); + } + + /// Number of cached entries for `key`. Test/observability helper. + #[must_use] + pub fn provers_for(&self, key: &XorName) -> usize { + self.entries.get(key).map_or(0, Vec::len) + } + + /// Total number of cached entries across all keys. + #[must_use] + pub fn total_entries(&self) -> usize { + self.entries.values().map(Vec::len).sum() + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + use std::time::Duration; + + fn peer(byte: u8) -> PeerId { + let mut bytes = [0u8; 32]; + bytes[0] = byte; + PeerId::from_bytes(bytes) + } + + fn key(byte: u8) -> XorName { + let mut k = [0u8; 32]; + k[0] = byte; + k + } + + fn hash(byte: u8) -> [u8; 32] { + [byte; 32] + } + + #[test] + fn empty_cache_credits_no_one() { + let cache = RecentProvers::new(); + assert!(!cache.is_credited_holder(&key(1), &peer(1), &hash(1))); + assert_eq!(cache.total_entries(), 0); + } + + #[test] + fn recorded_proof_credits_under_same_hash() { + let mut cache = RecentProvers::new(); + cache.record_proof(key(1), peer(7), hash(0xAB), Instant::now()); + assert!(cache.is_credited_holder(&key(1), &peer(7), &hash(0xAB))); + } + + #[test] + fn rotated_hash_loses_credit() { + // Core v12 §6 attack-bound property: a peer who proves K under + // C1 must re-prove under C2 to keep credit. The cache entry's + // hash binding enforces this. + let mut cache = RecentProvers::new(); + cache.record_proof(key(1), peer(7), hash(0xAB), Instant::now()); + // Same peer, same key, but the auditor's "current" hash for + // this peer is now different (peer gossiped a new commitment). + assert!(!cache.is_credited_holder(&key(1), &peer(7), &hash(0xCD))); + } + + #[test] + fn other_peer_under_same_hash_not_credited() { + let mut cache = RecentProvers::new(); + cache.record_proof(key(1), peer(7), hash(0xAB), Instant::now()); + assert!(!cache.is_credited_holder(&key(1), &peer(8), &hash(0xAB))); + } + + #[test] + fn per_key_cap_evicts_oldest() { + let mut cache = RecentProvers::new(); + let now = Instant::now(); + // MAX_PROVERS_PER_KEY is a small usize (16). Narrow to u8 once + // so the test loop can hand the peer-id byte directly to + // `peer(...)` without per-iteration casts. + let max_u8 = u8::try_from(MAX_PROVERS_PER_KEY).unwrap_or(u8::MAX); + // Fill the bucket with MAX_PROVERS_PER_KEY + 1 distinct peers. + for i in 0..=max_u8 { + let t = now + Duration::from_millis(u64::from(i)); + cache.record_proof(key(1), peer(i), hash(0xAB), t); + } + assert_eq!(cache.provers_for(&key(1)), MAX_PROVERS_PER_KEY); + // The oldest (peer 0) should be evicted; peer MAX should be present. + assert!(!cache.is_credited_holder(&key(1), &peer(0), &hash(0xAB))); + assert!(cache.is_credited_holder(&key(1), &peer(max_u8), &hash(0xAB))); + } + + #[test] + fn refresh_in_place_does_not_grow_bucket() { + let mut cache = RecentProvers::new(); + let now = Instant::now(); + // Same (peer, hash) repeated three times. Bucket should stay at 1. + cache.record_proof(key(1), peer(1), hash(0xAB), now); + cache.record_proof(key(1), peer(1), hash(0xAB), now + Duration::from_secs(1)); + cache.record_proof(key(1), peer(1), hash(0xAB), now + Duration::from_secs(2)); + assert_eq!(cache.provers_for(&key(1)), 1); + } + + #[test] + fn forget_peer_drops_all_entries() { + let mut cache = RecentProvers::new(); + let now = Instant::now(); + cache.record_proof(key(1), peer(1), hash(0xAB), now); + cache.record_proof(key(2), peer(1), hash(0xAB), now); + cache.record_proof(key(1), peer(2), hash(0xAB), now); + assert_eq!(cache.total_entries(), 3); + + cache.forget_peer(&peer(1)); + assert_eq!(cache.total_entries(), 1); + assert!(!cache.is_credited_holder(&key(1), &peer(1), &hash(0xAB))); + assert!(cache.is_credited_holder(&key(1), &peer(2), &hash(0xAB))); + } + + #[test] + fn forget_commitment_drops_only_matching_entries() { + let mut cache = RecentProvers::new(); + let now = Instant::now(); + cache.record_proof(key(1), peer(1), hash(0xAB), now); + cache.record_proof(key(1), peer(1), hash(0xCD), now); + cache.record_proof(key(2), peer(2), hash(0xAB), now); + assert_eq!(cache.total_entries(), 3); + + cache.forget_commitment(&hash(0xAB)); + assert_eq!(cache.total_entries(), 1); + // Only the (peer(1), hash 0xCD) entry remains. + assert!(cache.is_credited_holder(&key(1), &peer(1), &hash(0xCD))); + assert!(!cache.is_credited_holder(&key(1), &peer(1), &hash(0xAB))); + assert!(!cache.is_credited_holder(&key(2), &peer(2), &hash(0xAB))); + } + + #[test] + fn lazy_rotation_via_unknown_commitment_hash_drops_credit() { + // Scenario from v12 §5 (revised UnknownCommitmentHash handler): + // 1. Peer P proves K under C1 → cached. + // 2. Auditor pinned to C1 sends a new challenge. + // 3. P replies UnknownCommitmentHash (they rotated and + // dropped the bytes). + // 4. Auditor invalidates last_commitment[P] AND calls + // forget_commitment(C1) so credit doesn't linger. + // + // Property checked: after forget_commitment(C1), P is no longer + // credited as holder of K under C1. + let mut cache = RecentProvers::new(); + cache.record_proof(key(1), peer(7), hash(0xAB), Instant::now()); + assert!(cache.is_credited_holder(&key(1), &peer(7), &hash(0xAB))); + + // Auditor detects rotation/dodge, invalidates the C1 hash. + cache.forget_commitment(&hash(0xAB)); + + assert!(!cache.is_credited_holder(&key(1), &peer(7), &hash(0xAB))); + // And under any new hash too — the peer has to re-prove. + assert!(!cache.is_credited_holder(&key(1), &peer(7), &hash(0xCD))); + } +} diff --git a/src/replication/subtree.rs b/src/replication/subtree.rs new file mode 100644 index 00000000..75ef228c --- /dev/null +++ b/src/replication/subtree.rs @@ -0,0 +1,1034 @@ +//! Gossip-triggered contiguous-subtree storage proof (ADR-0002). +//! +//! Pure, network-free core of the audit redesign. Given a peer's signed +//! [`StorageCommitment`] and an auditor-chosen random nonce, both sides +//! deterministically select **one contiguous subtree** of the committed +//! Merkle tree; the responder expands that subtree to its leaves plus the +//! sibling cut-hashes on the path to the root; the auditor rebuilds the root +//! and spot-checks a few leaves against real chunk bytes. +//! +//! Three independent checks (ADR-0002 "Verification, three independent +//! checks"); this module owns the first two — the third (response deadline) +//! is enforced by the caller: +//! +//! 1. **Structure** — [`verify_subtree_proof`] re-derives the selected branch +//! from `(nonce, key_count)`, rebuilds the root from the returned leaves and +//! cut-hashes, and requires it to equal the pinned root. +//! 2. **Real bytes** — [`select_spotcheck_indices`] picks a few leaves within +//! the subtree; the caller fetches their bytes and checks both the plain +//! content hash and the nonce freshness hash. Faking a fraction `x` of +//! leaves survives only `(1 - x)^k`. +//! +//! ## Tree geometry (must match [`super::commitment::MerkleTree`]) +//! +//! Leaves are sorted by key and fill positions `0..N`. The tree is +//! left-packed: when a level has an odd number of nodes the last node is +//! paired with itself (`node_hash(x, x)`). There are no explicit padding +//! leaves; "padding" is the empty right side of a subtree slot that extends +//! past `N`. Depth `D = ceil(log2(N))`. A node identified by `(depth, slot)` +//! (depth measured from the root, slot in `0..2^depth`) covers the contiguous +//! leaf range `[slot * span, (slot + 1) * span)` where `span = 2^(D - depth)`, +//! intersected with `0..N`. + +use super::commitment::{leaf_hash, node_hash, StorageCommitment, MAX_COMMITMENT_KEY_COUNT}; +use super::protocol::compute_audit_digest; +use crate::ant_protocol::XorName; +use serde::{Deserialize, Serialize}; + +/// Below this key count the whole tree is challenged; `sqrt` rounding is +/// meaningless for tiny trees and a full proof is cheap. +pub const SMALL_TREE_FULL_AUDIT_FLOOR: u32 = 4; + +/// One leaf of the selected subtree, as returned by the responder. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct SubtreeLeaf { + /// The committed key (chunk address) at this leaf position. + pub key: XorName, + /// `BLAKE3(record_bytes)` — the plain content hash. This is also the + /// chunk's network address, so it is public; possessing it does NOT prove + /// possession of the bytes (that is what `nonced_hash` is for). + pub bytes_hash: [u8; 32], + /// `compute_audit_digest(nonce, peer_id, key, record_bytes)` — the + /// freshness hash. Only a holder of the actual bytes can produce it for a + /// fresh nonce, so a spot-check on it proves real possession. + pub nonced_hash: [u8; 32], +} + +/// A responder's single-contiguous-subtree proof (ADR-0002 "The proof"). +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct SubtreeProof { + /// Every leaf of the selected subtree, in ascending leaf-index order. + pub leaves: Vec, + /// One sibling cut-hash per level on the path from the root down to the + /// selected subtree root, ordered root-first. Each is the plain hash of + /// the unselected sibling node at that level. + pub sibling_cut_hashes: Vec<[u8; 32]>, +} + +/// The deterministically-selected contiguous subtree, derived from +/// `(nonce, key_count)` and agreed by both sides. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct SubtreePath { + /// Depth of the subtree root below the tree root (0 = whole tree). + pub depth: u32, + /// Slot index of the subtree root within its level, in `0..2^depth`. + pub slot: u32, + /// First real leaf index covered (inclusive). + pub leaf_start: u32, + /// One past the last real leaf index covered (exclusive). Always + /// `leaf_end > leaf_start`, so the selection never covers zero real + /// leaves — this is the ADR's dead-block fix. + pub leaf_end: u32, +} + +impl SubtreePath { + /// Number of real (non-padding) leaves in the selected subtree. + #[must_use] + pub fn real_leaf_count(&self) -> u32 { + self.leaf_end - self.leaf_start + } +} + +/// Tree depth `D = ceil(log2(key_count))`, matching `MerkleTree` / `verify_path`. +/// +/// `key_count == 1` → depth 0 (the single leaf is the root). Returns `None` +/// for an out-of-protocol `key_count` so callers reject it before any work. +#[must_use] +fn tree_depth(key_count: u32) -> Option { + if key_count == 0 || key_count > MAX_COMMITMENT_KEY_COUNT { + return None; + } + // checked_next_power_of_two cannot fail under the cap above, but the + // explicit check keeps behaviour identical across debug/release. + let rounded = key_count.checked_next_power_of_two()?; + Some(rounded.trailing_zeros()) +} + +/// Count real leaves under the node at `(depth, slot)` for a tree of `key_count` +/// leaves. Pure function of geometry — identical on auditor and responder. +/// +/// `span = 2^(total_depth - depth)`; the node covers `[slot*span, (slot+1)*span)` +/// clamped to `0..key_count`. +#[must_use] +fn real_leaves_under(depth: u32, slot: u64, key_count: u32, total_depth: u32) -> u32 { + let levels_below = total_depth - depth; + // span fits in u64: total_depth <= 20 for key_count <= 1e6. + let span = 1u64 << levels_below; + let start = slot.saturating_mul(span).min(u64::from(key_count)); + let end = slot + .saturating_add(1) + .saturating_mul(span) + .min(u64::from(key_count)); + // end >= start always; difference fits in u32 (<= key_count). + u32::try_from(end - start).unwrap_or(0) +} + +/// `ceil(sqrt(key_count))` — the real-leaf floor a selected subtree must meet. +#[must_use] +fn sqrt_floor(key_count: u32) -> u32 { + // Exact integer ceil(sqrt(n)), float-free and MSRV-safe (no u64::isqrt). + // Newton's method converges to floor(sqrt(n)); then round up unless n is a + // perfect square. Always at least 1. + let n = u64::from(key_count); + if n <= 1 { + return 1; + } + let mut x = n; + let mut y = x.div_ceil(2); + while y < x { + x = y; + y = (x + n / x) / 2; + } + // x == floor(sqrt(n)) here. + let ceil = if x.saturating_mul(x) == n { x } else { x + 1 }; + u32::try_from(ceil.max(1)).unwrap_or(u32::MAX) +} + +/// Read bit `index` of the nonce (bit 0 = MSB of byte 0), `index` 0-based. +/// +/// `1 → left child, 0 → right child` (ADR). With a 256-bit nonce and a tree +/// depth ≤ 20 we never run out of bits. +#[must_use] +fn nonce_bit(nonce: &[u8; 32], index: u32) -> bool { + let byte = (index / 8) as usize; + let bit = 7 - (index % 8); + // byte < 32 because index < 256 for any reachable depth; guard anyway. + nonce.get(byte).is_some_and(|b| (b >> bit) & 1 == 1) +} + +/// Deterministically select one contiguous subtree from `(nonce, key_count)`. +/// +/// Walks the nonce bits from the root, descending into the child the bit picks, +/// and **stops at the smallest branch whose real-leaf count is still ≥ +/// `ceil(sqrt(key_count))`**. Because an all-padding child has zero real leaves +/// (< the floor), the walk never descends into one — so the selection always +/// covers ≥ `sqrt` real leaves and is never empty (ADR dead-block fix). +/// +/// For `key_count <= SMALL_TREE_FULL_AUDIT_FLOOR` the whole tree is selected. +/// +/// Returns `None` only for an out-of-protocol `key_count` (caller rejects). +#[must_use] +pub fn select_subtree_path(nonce: &[u8; 32], key_count: u32) -> Option { + let total_depth = tree_depth(key_count)?; + + // Tiny trees: challenge everything. + if key_count <= SMALL_TREE_FULL_AUDIT_FLOOR { + return Some(SubtreePath { + depth: 0, + slot: 0, + leaf_start: 0, + leaf_end: key_count, + }); + } + + let floor = sqrt_floor(key_count); + let mut depth = 0u32; + let mut slot = 0u64; // slot within the current level + + // Descend while the chosen child still meets the floor. + while depth < total_depth { + let go_left = nonce_bit(nonce, depth); + // 1 = left child (bit set), 0 = right child. Right child is the odd slot. + let child_slot = slot * 2 + u64::from(!go_left); + let child_real = real_leaves_under(depth + 1, child_slot, key_count, total_depth); + if child_real < floor { + break; // descending would drop below the floor → stay here + } + depth += 1; + slot = child_slot; + } + + let span = 1u64 << (total_depth - depth); + let leaf_start = + u32::try_from(slot.saturating_mul(span).min(u64::from(key_count))).unwrap_or(key_count); + let leaf_end = u32::try_from( + slot.saturating_add(1) + .saturating_mul(span) + .min(u64::from(key_count)), + ) + .unwrap_or(key_count); + + Some(SubtreePath { + depth, + slot: u32::try_from(slot).unwrap_or(u32::MAX), + leaf_start, + leaf_end, + }) +} + +/// Pick `k` distinct nonce-random leaf positions within the selected subtree. +/// +/// Returned as indices into `path.real_leaf_count()` (0-based within the +/// subtree). Used for the real-bytes spot-check (ADR-0002). Deterministic from +/// the nonce so the auditor and any observer derive the same positions; the +/// responder cannot predict-and-fake only these because it must produce a +/// correct nonced hash for *every* returned leaf anyway — the spot-check just +/// bounds how many it can fake and still pass. +#[must_use] +pub fn select_spotcheck_indices(nonce: &[u8; 32], path: &SubtreePath, k: u32) -> Vec { + let n = path.real_leaf_count(); + if n == 0 { + return Vec::new(); + } + if n <= k { + return (0..n).collect(); + } + // Derive a stream of indices by hashing (nonce || counter) and reducing + // mod n; skip collisions. Bounded: k is small (default 8) and n > k. + let mut out: Vec = Vec::with_capacity(k as usize); + let mut counter: u32 = 0; + while u32::try_from(out.len()).unwrap_or(u32::MAX) < k { + let mut h = blake3::Hasher::new(); + h.update(b"autonomi.ant.replication.audit_spotcheck.v1"); + h.update(nonce); + h.update(&counter.to_le_bytes()); + let digest = *h.finalize().as_bytes(); + let mut word = [0u8; 4]; + word.copy_from_slice(&digest[..4]); + let idx = u32::from_le_bytes(word) % n; + if !out.contains(&idx) { + out.push(idx); + } + counter = counter.wrapping_add(1); + // Safety valve: with n > k this terminates quickly, but bound the loop. + if counter > k.saturating_mul(64) { + break; + } + } + out +} + +/// Verdict from [`verify_subtree_proof`]'s structural check. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum StructureVerdict { + /// Proof is well-formed and its root matches the pinned commitment. + Valid, + /// Proof is malformed or its root does not match. Carries a static reason + /// for logging; all variants are confirmed failures, not benign. + Invalid(&'static str), +} + +/// Structural verification (ADR-0002 check 1): the returned subtree genuinely +/// belongs to the committed tree. +/// +/// Re-derives the selected branch from `(nonce, commitment.key_count)`, +/// rebuilds the root from `proof.leaves` and `proof.sibling_cut_hashes`, and +/// requires it to equal `commitment.root`. Also checks leaf count and +/// ascending-key order (the committed tree sorts leaves by key). +/// +/// This does NOT verify possession of bytes — that is the caller's spot-check +/// using [`select_spotcheck_indices`]. It only proves the structure. +#[must_use] +pub fn verify_subtree_proof( + proof: &SubtreeProof, + nonce: &[u8; 32], + commitment: &StorageCommitment, +) -> StructureVerdict { + let Some(path) = select_subtree_path(nonce, commitment.key_count) else { + return StructureVerdict::Invalid("out-of-protocol key_count"); + }; + + // Leaf count must equal the agreed subtree's real-leaf count exactly. + let expected_leaves = path.real_leaf_count() as usize; + if proof.leaves.len() != expected_leaves { + return StructureVerdict::Invalid("wrong leaf count"); + } + // Sibling cut-hashes: one per level on the path to the subtree root. + if proof.sibling_cut_hashes.len() != path.depth as usize { + return StructureVerdict::Invalid("wrong cut-hash count"); + } + + // Leaves must be strictly ascending by key (matches MerkleTree sort), which + // also rejects duplicates. + for w in proof.leaves.windows(2) { + if let [a, b] = w { + if a.key >= b.key { + return StructureVerdict::Invalid("leaves not strictly ascending"); + } + } + } + + // Out-of-protocol key_count cannot happen here (select_subtree_path already + // returned Some), but recompute total_depth defensively for the climb maths. + let Some(total_depth) = tree_depth(commitment.key_count) else { + return StructureVerdict::Invalid("out-of-protocol key_count"); + }; + + // Phase A — reconstruct the selected subtree's root NODE exactly as the + // committed tree's level-by-level build produces it. The subtree root sits + // at `(level_from_leaves, slot)`, covering a left-packed block of leaves; + // folding that block up `level_from_leaves` levels with the same + // self-pair-the-last-node rule as `MerkleTree::build_next_level` yields the + // identical node (including the `node_hash(x, x)` self-pair when the block + // is the tree's odd tail at some level). `fold_to_root` stopped at a single + // hash and so skipped the self-pair when a truncated block reached length 1 + // before climbing all the way to the subtree-root level — the geometry bug. + let leaf_hashes: Vec<[u8; 32]> = proof + .leaves + .iter() + .map(|l| leaf_hash(&l.key, &l.bytes_hash)) + .collect(); + let levels_to_subtree_root = total_depth - path.depth; + let mut cur = fold_levels(leaf_hashes, levels_to_subtree_root); + + // Phase B — climb from the subtree root to the tree root using one sibling + // cut-hash per level, exactly like `verify_path`: the climb's left/right + // choice is the real node-index parity, NOT a nonce bit, and the self-pair + // of an odd level's last node falls out naturally when the builder supplied + // the chosen node itself as its own sibling. The cut-hashes are root-first, + // so we consume them in reverse (lowest climb step uses the last cut-hash). + // + // We recompute the node index of the subtree root the same way the builder + // walked the nonce bits, then halve it as we climb — mirroring `verify_path`. + let mut node_index = u64::from(path.slot); + for level_above in (0..path.depth).rev() { + let Some(sibling) = proof.sibling_cut_hashes.get(level_above as usize) else { + return StructureVerdict::Invalid("missing cut-hash"); + }; + cur = if node_index % 2 == 0 { + node_hash(&cur, sibling) + } else { + node_hash(sibling, &cur) + }; + node_index /= 2; + } + + if cur == commitment.root { + StructureVerdict::Valid + } else { + StructureVerdict::Invalid("root mismatch") + } +} + +/// Fold a contiguous, left-aligned block of node hashes up exactly `levels` +/// levels, applying the same left-packed self-pair rule as +/// `MerkleTree::build_next_level` (`node_hash(x, x)` for an unpaired last node). +/// +/// This is the generalisation of a single-leaf inclusion fold to a *range* of +/// leaves: a subtree root at `(levels, slot)` covers a block whose left edge is +/// pair-aligned at every sub-level, so the only odd run that can occur is the +/// tree's genuine odd tail — exactly when `build_next_level` self-pairs. Folding +/// the block `levels` times therefore reproduces the committed node bit-for-bit, +/// including the self-pair that `fold_to_root` used to skip by stopping at a +/// single hash too early. +/// +/// `levels == 0` returns the block's single element unchanged (the subtree IS +/// the tree, e.g. the small-tree full-audit case after its own folds, or a +/// single-leaf tree). An empty input is impossible here (callers guarantee ≥ 1 +/// leaf via the dead-block fix); returns a zero hash defensively. +#[must_use] +fn fold_levels(mut level: Vec<[u8; 32]>, levels: u32) -> [u8; 32] { + if level.is_empty() { + return [0u8; 32]; + } + for _ in 0..levels { + let mut next = Vec::with_capacity(level.len().div_ceil(2)); + let mut i = 0; + while i < level.len() { + let left = level[i]; + // Missing right sibling → self-pair the last node, identical to + // `build_next_level`. Within a selected block this happens only at + // the tree's odd tail, so it matches the committed build exactly. + let right = level.get(i + 1).copied().unwrap_or(left); + next.push(node_hash(&left, &right)); + i += 2; + } + level = next; + } + // After `levels` folds of a `2^levels`-span left-aligned block, exactly one + // node remains; defensively fall back if the block was shorter. + level.first().copied().unwrap_or([0u8; 32]) +} + +/// Build the per-leaf nonced freshness hash for a subtree leaf (responder +/// side), reusing the existing audit digest. +#[must_use] +pub fn nonced_leaf_hash( + nonce: &[u8; 32], + challenged_peer_id: &[u8; 32], + key: &XorName, + record_bytes: &[u8], +) -> [u8; 32] { + compute_audit_digest(nonce, challenged_peer_id, key, record_bytes) +} + +/// Why a responder could not build a subtree proof for a challenge. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum BuildProofError { + /// The challenge's `key_count` (from the pinned commitment) is out of + /// protocol range. Should never happen for a commitment we built. + BadKeyCount, + /// A selected leaf's key could not be resolved from the tree (internal + /// inconsistency; should never happen). + MissingKey { + /// The leaf index that could not be resolved. + leaf_index: u32, + }, + /// The responder no longer holds the bytes for a selected, committed key. + /// This is real storage loss / deliberate non-response — the caller turns + /// it into a confirmed audit failure, NOT a benign rejection. + MissingBytes { + /// The committed key whose bytes are gone. + key: XorName, + }, +} + +/// Build the single-contiguous-subtree proof for `(nonce, tree)` (responder). +/// +/// `bytes_for(&key)` returns the chunk bytes the responder holds for a key, or +/// `None` if it cannot read them. Walks the same nonce-selected path the +/// auditor will re-derive, reads the unselected sibling cut-hashes directly +/// from the committed tree (so they are provably consistent with the gossiped +/// root), and builds each selected leaf's plain and nonced hashes from the real +/// bytes. +/// +/// # Errors +/// +/// See [`BuildProofError`]. `MissingBytes` is the one the caller penalises; +/// the others indicate an internal inconsistency. +pub fn build_subtree_proof( + tree: &super::commitment::MerkleTree, + nonce: &[u8; 32], + challenged_peer_id: &[u8; 32], + bytes_for: impl Fn(&XorName) -> Option>, +) -> Result { + let plan = subtree_plan(tree, nonce)?; + let mut leaves = Vec::with_capacity(plan.leaf_keys.len()); + for key in &plan.leaf_keys { + let bytes = bytes_for(key).ok_or(BuildProofError::MissingBytes { key: *key })?; + leaves.push(subtree_leaf(nonce, challenged_peer_id, key, &bytes)); + } + Ok(SubtreeProof { + leaves, + sibling_cut_hashes: plan.sibling_cut_hashes, + }) +} + +/// The pure (no-bytes) geometry of a subtree proof. +/// +/// Holds the ordered keys whose bytes the responder must hash and the sibling +/// cut-hashes read from the tree. Splitting this out lets an async responder +/// read chunk bytes per leaf without forcing the tree-walking maths to be async. +#[derive(Debug, Clone)] +pub struct SubtreePlan { + /// The selected leaves' keys, in ascending leaf-index order. + pub leaf_keys: Vec, + /// One sibling cut-hash per level on the path to the subtree root, + /// root-first. + pub sibling_cut_hashes: Vec<[u8; 32]>, +} + +/// Compute the [`SubtreePlan`] for `(nonce, tree)` — selection geometry only, +/// no chunk bytes touched. +/// +/// # Errors +/// +/// [`BuildProofError::BadKeyCount`] for an out-of-protocol tree; +/// [`BuildProofError::MissingKey`] if a selected leaf index is not in the tree +/// (internal inconsistency). +pub fn subtree_plan( + tree: &super::commitment::MerkleTree, + nonce: &[u8; 32], +) -> Result { + let key_count = tree.key_count(); + let path = select_subtree_path(nonce, key_count).ok_or(BuildProofError::BadKeyCount)?; + + let mut leaf_keys = Vec::with_capacity(path.real_leaf_count() as usize); + for idx in path.leaf_start..path.leaf_end { + let key = tree + .key_at(idx as usize) + .ok_or(BuildProofError::MissingKey { leaf_index: idx })?; + leaf_keys.push(key); + } + + // Sibling cut-hashes, root-first. At descent step `d` (0-based from the + // root), the chosen child is on the side the nonce bit picks; the sibling + // is the other child at level `total_depth - (d + 1)` (counting up from + // leaves). On an odd-length level the missing sibling self-pairs, i.e. the + // sibling hash is the chosen node itself. + let total_depth = u32::try_from(tree.levels_count().saturating_sub(1)).unwrap_or(0); + let mut sibling_cut_hashes = Vec::with_capacity(path.depth as usize); + let mut slot = 0u64; + for d in 0..path.depth { + let go_left = nonce_bit(nonce, d); + let child = slot * 2 + u64::from(!go_left); + let sibling = child ^ 1; + let level_from_leaves = (total_depth - (d + 1)) as usize; + let chosen_hash = tree.node_at(level_from_leaves, child); + let sib_hash = tree + .node_at(level_from_leaves, sibling) + .or(chosen_hash) + .ok_or(BuildProofError::BadKeyCount)?; + sibling_cut_hashes.push(sib_hash); + slot = child; + } + + Ok(SubtreePlan { + leaf_keys, + sibling_cut_hashes, + }) +} + +/// Build one subtree leaf from its key and the chunk bytes the responder holds. +#[must_use] +pub fn subtree_leaf( + nonce: &[u8; 32], + challenged_peer_id: &[u8; 32], + key: &XorName, + bytes: &[u8], +) -> SubtreeLeaf { + SubtreeLeaf { + key: *key, + bytes_hash: *blake3::hash(bytes).as_bytes(), + nonced_hash: nonced_leaf_hash(nonce, challenged_peer_id, key, bytes), + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] +mod tests { + use super::*; + use crate::replication::commitment::MerkleTree; + + fn xn_u32(i: u32) -> XorName { + let mut k = [0u8; 32]; + k[..4].copy_from_slice(&i.to_be_bytes()); // big-endian so numeric order == sort order + k + } + fn nonce_of(seed: u8) -> [u8; 32] { + [seed; 32] + } + + // ---- sqrt_floor ------------------------------------------------------- + + #[test] + fn sqrt_floor_is_exact_ceil() { + assert_eq!(sqrt_floor(1), 1); + assert_eq!(sqrt_floor(4), 2); + assert_eq!(sqrt_floor(5), 3); // ceil(sqrt(5)) = 3 + assert_eq!(sqrt_floor(9), 3); + assert_eq!(sqrt_floor(10), 4); + assert_eq!(sqrt_floor(100), 10); + assert_eq!(sqrt_floor(101), 11); + assert_eq!(sqrt_floor(1_000_000), 1000); + } + + // ---- real_leaves_under ------------------------------------------------ + + #[test] + fn real_leaves_under_root_is_all() { + let d = tree_depth(100).unwrap(); + assert_eq!(real_leaves_under(0, 0, 100, d), 100); + } + + #[test] + fn real_leaves_under_padding_slot_is_zero() { + // key_count = 5, total_depth = 3 (next_pow2(5)=8). Leaf slots 5,6,7 + // at the bottom are padding. The right half at depth 1 (slot 1) covers + // leaves [4,8) → only leaf 4 is real. + let d = tree_depth(5).unwrap(); + assert_eq!(d, 3); + assert_eq!(real_leaves_under(1, 0, 5, d), 4); // [0,4) + assert_eq!(real_leaves_under(1, 1, 5, d), 1); // [4,8) ∩ [0,5) = {4} + assert_eq!(real_leaves_under(3, 7, 5, d), 0); // pure padding leaf + assert_eq!(real_leaves_under(2, 3, 5, d), 0); // [6,8) pure padding + } + + // ---- select_subtree_path: dead-block regression ----------------------- + + #[test] + fn selection_never_empty_across_many_sizes_and_nonces() { + for n in [ + 5u32, 6, 7, 9, 13, 17, 33, 65, 100, 129, 333, 1000, 1024, 1025, + ] { + let floor = sqrt_floor(n); + for seed in 0u8..=255 { + let path = select_subtree_path(&nonce_of(seed), n).unwrap(); + assert!( + path.real_leaf_count() >= floor.min(n), + "n={n} seed={seed}: real={} < floor={floor}", + path.real_leaf_count() + ); + assert!( + path.real_leaf_count() >= 1, + "n={n} seed={seed}: empty selection" + ); + assert!(path.leaf_end <= n); + assert!(path.leaf_start < path.leaf_end); + } + } + } + + #[test] + fn small_trees_select_whole_tree() { + for n in 1..=SMALL_TREE_FULL_AUDIT_FLOOR { + let path = select_subtree_path(&nonce_of(7), n).unwrap(); + assert_eq!(path.depth, 0); + assert_eq!(path.leaf_start, 0); + assert_eq!(path.leaf_end, n); + } + } + + #[test] + fn selection_is_deterministic() { + let n = 500; + let a = select_subtree_path(&nonce_of(42), n).unwrap(); + let b = select_subtree_path(&nonce_of(42), n).unwrap(); + assert_eq!(a, b); + } + + #[test] + fn different_nonces_cover_different_branches_over_time() { + // Not every nonce differs, but the set of selected ranges must be > 1. + let n = 1024; + let mut starts = std::collections::HashSet::new(); + for seed in 0u8..=255 { + let p = select_subtree_path(&nonce_of(seed), n).unwrap(); + starts.insert(p.leaf_start); + } + assert!( + starts.len() > 4, + "nonce should spread selection: {}", + starts.len() + ); + } + + /// Deterministic per-trial nonce (no RNG): hash a counter. + fn nonce_for_trial(i: u32) -> [u8; 32] { + let mut h = blake3::Hasher::new(); + h.update(b"detection-sim-trial"); + h.update(&i.to_le_bytes()); + *h.finalize().as_bytes() + } + + /// Catch rate over `trials` audits: fraction whose nonce-selected subtree + /// overlaps at least one deleted leaf index. + fn catch_rate(n: u32, deleted: &std::collections::HashSet, trials: u32) -> f64 { + let mut caught = 0u32; + for t in 0..trials { + let path = select_subtree_path(&nonce_for_trial(t), n).unwrap(); + if (path.leaf_start..path.leaf_end).any(|i| deleted.contains(&i)) { + caught += 1; + } + } + f64::from(caught) / f64::from(trials) + } + + #[test] + fn detection_uniform_fast_clustered_floor() { + // ADR-0002 Validation: uniform deletions are caught fast; clustered + // (contiguous-block) deletions are caught at roughly the deleted + // fraction per audit (a floor), much slower. This encodes the core + // security claim that the audit RATE (not per-audit cleverness) is the + // lever against a clustered deleter. + let n = 1024u32; // sqrt = 32 + let del_count = n / 10; // delete 10% ≈ 102 + + // Uniform: spread deletions evenly across the keyspace. + let uniform: std::collections::HashSet = + (0..del_count).map(|i| (i * n / del_count) % n).collect(); + let uniform_rate = catch_rate(n, &uniform, 256); + + // Clustered: one contiguous block of the same size. + let clustered: std::collections::HashSet = (0..del_count).collect(); + let clustered_rate = catch_rate(n, &clustered, 256); + + // Uniform should be caught on essentially every audit (spread across the + // whole tree; any selected subtree overlaps some deletion). + assert!( + uniform_rate > 0.95, + "uniform deletions should be caught almost every audit, got {uniform_rate}" + ); + // Clustered (one contiguous f-block) is a floor NEAR the deleted + // fraction f=0.1 — the quantitative ADR claim. The exact rate depends on + // selection geometry (a block of ~102 leaves is hit when the selected + // ~sqrt(N) subtree overlaps it), but it must sit in a tight band around + // f, well below the uniform rate. We bound it to [0.04, 0.30]. + assert!( + (0.04..=0.30).contains(&clustered_rate), + "clustered catch-rate should be near f=0.1, got {clustered_rate}" + ); + assert!( + uniform_rate > clustered_rate * 2.0, + "uniform ({uniform_rate}) must be far easier to catch than clustered ({clustered_rate})" + ); + } + + #[test] + fn subtree_size_near_sqrt_for_balanced_tree() { + // For a power-of-two tree the selection should land near sqrt(N). + let n = 1024; // sqrt = 32, floor = 32 + let path = select_subtree_path(&nonce_of(3), n).unwrap(); + // It stops as soon as a child would drop below floor; the subtree size + // is between floor and 2*floor for a balanced tree. + assert!(path.real_leaf_count() >= 32); + assert!( + path.real_leaf_count() <= 64, + "got {}", + path.real_leaf_count() + ); + } + + // ---- end-to-end proof build + verify ---------------------------------- + + /// Deterministic chunk bytes for a key (test fixture). The tree is built + /// from `BLAKE3` of exactly these bytes, so the proof and the committed + /// root agree — mirroring how a real responder hashes the chunk it holds. + fn chunk_bytes(key: &XorName) -> Vec { + // Distinct, non-trivial bytes derived from the key. + let mut v = key.to_vec(); + v.extend_from_slice(b"chunk-body"); + v + } + + /// Build tree entries `(key, BLAKE3(chunk_bytes(key)))` for `n` keys. + fn entries_for(n: u32) -> Vec<(XorName, [u8; 32])> { + (0..n) + .map(|i| { + let key = xn_u32(i); + let bytes_hash = *blake3::hash(&chunk_bytes(&key)).as_bytes(); + (key, bytes_hash) + }) + .collect() + } + + /// Reference responder: build a real subtree proof via the production + /// [`build_subtree_proof`] from a `MerkleTree` over `entries`. Leaves are + /// hashed from `chunk_bytes(key)` — the same bytes whose hash built the + /// tree — so an honest proof verifies. This makes the tests exercise the + /// exact builder the responder runs. + fn build_proof( + entries: &[(XorName, [u8; 32])], + nonce: &[u8; 32], + peer_id: &[u8; 32], + ) -> (SubtreeProof, StorageCommitment) { + let tree = MerkleTree::build(entries.to_vec()).unwrap(); + let key_count = tree.key_count(); + let proof = build_subtree_proof(&tree, nonce, peer_id, |k| Some(chunk_bytes(k))).unwrap(); + let commitment = fake_commitment(tree.root(), key_count, *peer_id); + (proof, commitment) + } + + fn fake_commitment(root: [u8; 32], key_count: u32, peer: [u8; 32]) -> StorageCommitment { + StorageCommitment { + root, + key_count, + sender_peer_id: peer, + sender_public_key: vec![0u8; 1952], + signature: vec![0u8; 3293], + } + } + + #[test] + fn honest_proof_verifies_at_many_sizes() { + let peer = [0xABu8; 32]; + for n in [5u32, 8, 13, 17, 64, 100, 256, 1000] { + let entries = entries_for(n); + for seed in [1u8, 2, 7, 42, 200] { + let nonce = nonce_of(seed); + let (proof, commitment) = build_proof(&entries, &nonce, &peer); + assert_eq!( + verify_subtree_proof(&proof, &nonce, &commitment), + StructureVerdict::Valid, + "n={n} seed={seed}" + ); + } + } + } + + #[test] + fn honest_proof_verifies_for_every_size_and_nonce() { + // Regression for the left-packed self-pairing geometry bug: the proof + // reconstruction must match the committed root for EVERY key count + // (not just powers of two / cherry-picked sizes) and every nonce. An + // earlier perfect-tree model false-failed honest nodes for ~70% of + // sizes; this guards against any reintroduction. + let peer = [7u8; 32]; + for n in 5u32..=600 { + let entries = entries_for(n); + for seed in 0u8..32 { + let nonce = nonce_of(seed.wrapping_mul(17).wrapping_add(3)); + let (proof, commitment) = build_proof(&entries, &nonce, &peer); + assert_eq!( + verify_subtree_proof(&proof, &nonce, &commitment), + StructureVerdict::Valid, + "honest proof must verify at n={n} seed={seed}" + ); + } + } + } + + #[test] + fn tampered_leaf_breaks_root() { + let peer = [9u8; 32]; + let entries = entries_for(100); + let nonce = nonce_of(5); + let (mut proof, commitment) = build_proof(&entries, &nonce, &peer); + proof.leaves[0].bytes_hash[0] ^= 0x01; + assert!(matches!( + verify_subtree_proof(&proof, &nonce, &commitment), + StructureVerdict::Invalid(_) + )); + } + + #[test] + fn tampered_cut_hash_breaks_root() { + let peer = [9u8; 32]; + let entries = entries_for(256); + let nonce = nonce_of(11); + let (mut proof, commitment) = build_proof(&entries, &nonce, &peer); + if let Some(c) = proof.sibling_cut_hashes.first_mut() { + c[0] ^= 0x01; + } + assert!(matches!( + verify_subtree_proof(&proof, &nonce, &commitment), + StructureVerdict::Invalid(_) + )); + } + + #[test] + fn wrong_leaf_count_rejected() { + let peer = [9u8; 32]; + let entries = entries_for(100); + let nonce = nonce_of(5); + let (mut proof, commitment) = build_proof(&entries, &nonce, &peer); + proof.leaves.pop(); + assert_eq!( + verify_subtree_proof(&proof, &nonce, &commitment), + StructureVerdict::Invalid("wrong leaf count") + ); + } + + #[test] + fn non_ascending_leaves_rejected() { + let peer = [9u8; 32]; + let entries = entries_for(100); + let nonce = nonce_of(5); + let (mut proof, commitment) = build_proof(&entries, &nonce, &peer); + if proof.leaves.len() >= 2 { + proof.leaves.swap(0, 1); + } + assert!(matches!( + verify_subtree_proof(&proof, &nonce, &commitment), + StructureVerdict::Invalid(_) + )); + } + + // ---- spot-check selection --------------------------------------------- + + #[test] + fn spotcheck_indices_in_range_and_distinct() { + let n = 1024; + let nonce = nonce_of(3); + let path = select_subtree_path(&nonce, n).unwrap(); + let k = 8; + let idxs = select_spotcheck_indices(&nonce, &path, k); + assert_eq!( + u32::try_from(idxs.len()).unwrap(), + k.min(path.real_leaf_count()) + ); + let mut seen = std::collections::HashSet::new(); + for i in &idxs { + assert!(*i < path.real_leaf_count()); + assert!(seen.insert(*i), "duplicate spot-check index {i}"); + } + } + + #[test] + fn build_proof_reports_missing_bytes() { + // A responder that no longer holds a selected, committed key's bytes + // must surface MissingBytes (the caller turns this into a confirmed + // failure, not a benign rejection). + let entries = entries_for(100); + let tree = MerkleTree::build(entries).unwrap(); + let nonce = nonce_of(5); + let path = select_subtree_path(&nonce, tree.key_count()).unwrap(); + let victim = tree.key_at(path.leaf_start as usize).unwrap(); + let err = build_subtree_proof(&tree, &nonce, &[1u8; 32], |k| { + if *k == victim { + None + } else { + Some(chunk_bytes(k)) + } + }) + .unwrap_err(); + assert_eq!(err, BuildProofError::MissingBytes { key: victim }); + } + + #[test] + fn spotcheck_returns_all_when_subtree_small() { + // Construct a path with few real leaves. + let path = SubtreePath { + depth: 0, + slot: 0, + leaf_start: 0, + leaf_end: 3, + }; + let idxs = select_spotcheck_indices(&nonce_of(1), &path, 8); + assert_eq!(idxs, vec![0, 1, 2]); + } + + #[test] + fn fabricated_nonced_hash_caught_by_spotcheck_probability() { + // Simulate the realness check: a responder fabricates a fraction x of + // nonced hashes. The auditor spot-checks k leaves; probability all k + // land on honest leaves is (1-x)^k. Here we just assert the auditor + // *would* catch a fabricated leaf when it samples that position. + let peer = [1u8; 32]; + let entries = entries_for(400); + let nonce = nonce_of(9); + let (mut proof, _commitment) = build_proof(&entries, &nonce, &peer); + // Fabricate the nonced hash on the first subtree leaf (wrong bytes). + proof.leaves[0].nonced_hash[0] ^= 0xFF; + // The realness check the caller runs: recompute from the real chunk + // bytes (the same fixture the honest tree was built from). + let leaf = &proof.leaves[0]; + let real_bytes = chunk_bytes(&leaf.key); + let expected = nonced_leaf_hash(&nonce, &peer, &leaf.key, &real_bytes); + assert_ne!( + leaf.nonced_hash, expected, + "fabricated nonced hash must differ from real" + ); + } + + // ---- branch-substitution attack --------------------------------------- + + #[test] + fn responder_cannot_substitute_a_different_branch() { + // ADR-0002 "Subtree selection": the random value alone fixes WHICH + // branch is selected, so "the audited node cannot choose a convenient + // branch to present." This is the load-bearing anti-substitution claim + // and no existing test exercises it — the tamper tests only mangle a + // hash within the *correct* branch. + // + // Attack: the responder builds a fully valid, internally-consistent + // subtree proof for a DIFFERENT nonce (which the selection maps to a + // different branch of the same committed tree), then presents it as the + // answer to the auditor's nonce. Every leaf hash and every cut-hash is + // genuine, the leaves are strictly ascending, and we deliberately pick + // a decoy whose branch has the SAME leaf count and SAME depth as the + // honest branch — so the cheap "wrong leaf count" / "wrong cut-hash + // count" gates do NOT fire. The ONLY thing that can reject it is the + // structural root re-derivation, which climbs using the auditor's + // nonce-derived slot parity and position. It must reject. + let peer = [0x5Au8; 32]; + let n = 1024u32; // balanced tree; sqrt floor = 32 + let entries = entries_for(n); + + let audit_nonce = nonce_of(7); + let audit_path = select_subtree_path(&audit_nonce, n).unwrap(); + + // Find a decoy nonce whose selected branch is a DIFFERENT slot but the + // SAME depth (hence same real-leaf count for this balanced tree). This + // forces rejection via the root check rather than a count mismatch. + let mut decoy: Option<([u8; 32], SubtreePath)> = None; + for seed in 0u8..=255 { + let cand_nonce = nonce_of(seed); + let cand = select_subtree_path(&cand_nonce, n).unwrap(); + if cand.depth == audit_path.depth + && cand.slot != audit_path.slot + && cand.real_leaf_count() == audit_path.real_leaf_count() + { + decoy = Some((cand_nonce, cand)); + break; + } + } + let (decoy_nonce, decoy_path) = + decoy.expect("a same-depth, different-slot decoy branch must exist for n=1024"); + + // Sanity: the decoy really is a different, equally-shaped branch. + assert_ne!(decoy_path.slot, audit_path.slot); + assert_eq!(decoy_path.depth, audit_path.depth); + assert_eq!(decoy_path.real_leaf_count(), audit_path.real_leaf_count()); + + // The responder builds a genuine proof for the DECOY branch. Note the + // nonced hashes are built with the decoy nonce too — but that does not + // matter: the structural check below never inspects nonced hashes, and + // the attack must already die on structure. + let tree = MerkleTree::build(entries.clone()).unwrap(); + let decoy_proof = + build_subtree_proof(&tree, &decoy_nonce, &peer, |k| Some(chunk_bytes(k))).unwrap(); + + // Pin the auditor's commitment to the genuine root of the same tree. + let commitment = fake_commitment(tree.root(), n, peer); + + // The honest answer to the SAME commitment + decoy nonce verifies, so + // the proof itself is well-formed — it is only "wrong" relative to the + // auditor's nonce. + assert_eq!( + verify_subtree_proof(&decoy_proof, &decoy_nonce, &commitment), + StructureVerdict::Valid, + "the decoy proof must be a genuinely valid proof for its own nonce" + ); + + // The attack: present the decoy-branch proof against the AUDIT nonce. + // The count gates cannot fire (same depth + leaf count by construction), + // so this is the root re-derivation rejecting a substituted branch. + let verdict = verify_subtree_proof(&decoy_proof, &audit_nonce, &commitment); + assert_eq!( + verdict, + StructureVerdict::Invalid("root mismatch"), + "substituting a different valid branch must be rejected by the root check, got {verdict:?}" + ); + } +} diff --git a/src/storage/handler.rs b/src/storage/handler.rs index d269aea8..fa440435 100644 --- a/src/storage/handler.rs +++ b/src/storage/handler.rs @@ -108,6 +108,14 @@ impl AntProtocol { Arc::clone(&self.storage) } + /// Test-only: the record count the quote generator currently prices on. + /// Used to assert that quote-time resync tracks records actually held. + #[cfg(test)] + #[must_use] + pub(crate) fn priced_records_stored(&self) -> usize { + self.quote_generator.records_stored() + } + /// Get a shared reference to the payment verifier. #[must_use] pub fn payment_verifier_arc(&self) -> Arc { @@ -263,10 +271,13 @@ impl AntProtocol { Ok(_) => { let content_len = request.content.len(); info!("Stored chunk {addr_hex} ({content_len} bytes)"); - // Increment the close-records counter consumed by calculate_price. - // The PaymentVerifier reads its current record count directly - // from LmdbStorage::current_chunks(), so we no longer need to - // push the value through a side counter here. + // Optimistically bump the close-records counter consumed by + // calculate_price. This is only a fast hint: the authoritative + // value is resynced from LmdbStorage::current_chunks() at quote + // time (see resync_quote_metric), which also accounts for + // deletions and pruning. (The PaymentVerifier separately reads + // its own record count from current_chunks() for payment + // verification.) self.quote_generator.record_store(); // 6. Notify replication engine for fresh fan-out. @@ -346,12 +357,38 @@ impl AntProtocol { } } + /// Resync the quoting metric to the authoritative count of records the node + /// actually holds. + /// + /// The quote price is driven by `QuoteGenerator::records_stored()`. Reading + /// the live LMDB entry count (an O(1) B-tree page-header read) right before + /// pricing makes the metric deletion-aware: any chunk removed by + /// [`LmdbStorage::delete`] or by the replication prune pass is reflected + /// immediately, with no risk of missing a delete path. + /// + /// On a storage read error the previous metric value is left untouched so a + /// transient LMDB error never disrupts quote generation. + fn resync_quote_metric(&self) { + match self.storage.current_chunks() { + Ok(count) => { + self.quote_generator + .resync_records(usize::try_from(count).unwrap_or(usize::MAX)); + } + Err(e) => { + warn!("Failed to read current_chunks() for quote metric resync: {e}"); + } + } + } + /// Handle a quote request. fn handle_quote(&self, request: &ChunkQuoteRequest) -> ChunkQuoteResponse { let addr_hex = hex::encode(request.address); let data_size = request.data_size; debug!("Handling quote request for {addr_hex} (size: {data_size})"); + // Price on records ACTUALLY HELD, not a monotonic store counter. + self.resync_quote_metric(); + // Check if the chunk is already stored so we can tell the client // to skip payment (already_stored = true). // The match intentionally logs the error when the `logging` feature is @@ -416,6 +453,9 @@ impl AntProtocol { request.merkle_payment_timestamp ); + // Price on records ACTUALLY HELD, not a monotonic store counter. + self.resync_quote_metric(); + let Ok(data_size_usize) = usize::try_from(request.data_size) else { return MerkleCandidateQuoteResponse::Error(ProtocolError::QuoteFailed(format!( "data_size {} overflows usize", @@ -1054,4 +1094,90 @@ mod tests { other => panic!("expected Success with already_stored=false, got: {other:?}"), } } + + /// Drive the real quote handler, then read the record count it priced on. + /// The handler calls `resync_quote_metric` first, so this reflects records + /// ACTUALLY HELD. + fn priced_records_after_quote(protocol: &AntProtocol) -> usize { + let quote_request = ChunkQuoteRequest { + address: [0xAAu8; 32], // a quote-only probe, not one of the stored chunks + data_size: 100, + data_type: DATA_TYPE_CHUNK, + }; + let _ = protocol.handle_quote("e_request); + protocol.priced_records_stored() + } + + /// The quote price must track records ACTUALLY HELD: deleting stored chunks + /// must lower the priced record count, not keep quoting as if the data were + /// still held. Exercises the storage-driven resync in `resync_quote_metric`. + #[tokio::test] + async fn test_quote_metric_reflects_deletions() { + let (protocol, _temp) = create_test_protocol().await; + + // Distinct content -> distinct content-addressed keys. + let contents: Vec> = (0u8..5).map(|i| vec![i; 64]).collect(); + let mut addresses = Vec::new(); + for content in &contents { + let addr = LmdbStorage::compute_address(content); + protocol.put_local(&addr, content).await.expect("put_local"); + addresses.push(addr); + } + + // 5 records held -> priced count 5. + assert_eq!(priced_records_after_quote(&protocol), 5); + + // Delete 2 chunks the node was holding. + for addr in addresses.iter().take(2) { + assert!(protocol.storage().delete(addr).await.expect("delete")); + } + assert_eq!(priced_records_after_quote(&protocol), 3); + + // Delete the rest; priced count floors at 0, never underflows. + for addr in addresses.iter().skip(2) { + assert!(protocol.storage().delete(addr).await.expect("delete")); + } + assert_eq!(priced_records_after_quote(&protocol), 0); + } + + /// Stronger, externally-observable proof: the actual quote PRICE returned + /// to a client must drop after the node deletes data it held. A monotonic + /// store counter would keep the price elevated; the resync ties price to + /// records actually held. + /// FLIPS IF: `resync_quote_metric` is removed — the price would stay at the + /// 10-record level even after deletions (`record_store` only ever increments). + #[tokio::test] + async fn test_quote_price_drops_after_deletion() { + use crate::payment::pricing::calculate_price; + + let (protocol, _temp) = create_test_protocol().await; + let contents: Vec> = (0u8..10).map(|i| vec![i; 64]).collect(); + let mut addresses = Vec::new(); + for content in &contents { + let addr = LmdbStorage::compute_address(content); + protocol.put_local(&addr, content).await.expect("put_local"); + addresses.push(addr); + } + + // Drive a real quote; the priced count must equal records held (10), + // and the price must equal calculate_price(10) — the externally + // observable contract. + assert_eq!(priced_records_after_quote(&protocol), 10); + let price_full = calculate_price(10); + + // Delete 8 of 10 held chunks. + for addr in addresses.iter().take(8) { + assert!(protocol.storage().delete(addr).await.expect("delete")); + } + // The next quote must price on 2 records, and the price must be the + // calculate_price(2) value — strictly different from the 10-record + // price (price is monotonic non-decreasing in records_stored). + assert_eq!(priced_records_after_quote(&protocol), 2); + let price_after = calculate_price(2); + assert!( + price_after < price_full, + "deleting data must lower the observable quote price \ + (full={price_full:?}, after={price_after:?})" + ); + } } diff --git a/tests/e2e/mod.rs b/tests/e2e/mod.rs index 87e63e21..994dc31f 100644 --- a/tests/e2e/mod.rs +++ b/tests/e2e/mod.rs @@ -63,6 +63,9 @@ mod replication; #[cfg(test)] mod security_attacks; +#[cfg(test)] +mod subtree_audit_testnet; + pub use anvil::TestAnvil; pub use harness::TestHarness; pub use testnet::{NetworkState, NodeState, TestNetwork, TestNetworkConfig, TestNode}; diff --git a/tests/e2e/replication.rs b/tests/e2e/replication.rs index 83fc792f..448ba545 100644 --- a/tests/e2e/replication.rs +++ b/tests/e2e/replication.rs @@ -389,6 +389,10 @@ async fn test_audit_challenge_returns_correct_digest() { let nonce = [0x42u8; 32]; // Send audit challenge from B to A + // Prune-confirmation single-key audit: the on-wire `AuditChallenge` is now + // handled by `handle_prune_audit_challenge`, which still answers with + // per-key `Digests`. (The storage audit moved to the separate + // `SubtreeAuditChallenge`/`SubtreeAuditResponse` path.) let challenge = AuditChallenge { challenge_id: 1234, nonce, @@ -805,6 +809,7 @@ async fn test_neighbor_sync_request_returns_hints() { replica_hints: vec![], paid_hints: vec![], bootstrapping: false, + commitment: None, }; let msg = ReplicationMessage { request_id: 2000, @@ -1254,6 +1259,7 @@ async fn scenario_14_sync_hints_cover_all_local_keys() { replica_hints: vec![], paid_hints: vec![], bootstrapping: false, + commitment: None, }; let msg = ReplicationMessage { request_id: 1400, @@ -1401,6 +1407,7 @@ async fn scenario_17_bidirectional_sync_when_sender_in_rt() { replica_hints: vec![inbound_hint], paid_hints: vec![], bootstrapping: false, + commitment: None, }; let msg = ReplicationMessage { request_id: 1700, diff --git a/tests/e2e/subtree_audit_testnet.rs b/tests/e2e/subtree_audit_testnet.rs new file mode 100644 index 00000000..773b8b2a --- /dev/null +++ b/tests/e2e/subtree_audit_testnet.rs @@ -0,0 +1,196 @@ +//! Local-testnet end-to-end tests for the gossip-triggered contiguous-subtree +//! storage audit (ADR-0002). +//! +//! These spin a real multi-node testnet and drive the SHIPPED audit over the +//! live wire (real `handle_subtree_challenge` responder + `run_subtree_audit` +//! auditor + real LMDB storage), via the test-only `audit_peer_now` / +//! `rebuild_commitment_now` engine hooks. They prove the two outcomes that +//! matter for a testnet: +//! +//! 1. HONEST: an honest node that holds its committed data passes the audit +//! (no false-positive eviction). +//! 2. ADVERSARY: a node that deletes the bytes it committed to fails the audit +//! (a confirmed failure that, once eviction is re-enabled, evicts it) while +//! honest nodes are unaffected. + +#![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] + +use super::TestHarness; +use ant_node::replication::audit::AuditTickResult; +use serial_test::serial; + +/// Store the same `n` chunks on both `a` (the audited holder) and `b` (the +/// auditor, so it holds the bytes it will spot-check), make `a` commit to them, +/// then deterministically seed `b`'s cache with `a`'s commitment (simulating +/// "b received a's gossip" without depending on neighbor-sync timing — that +/// propagation is covered by the dedicated neighbor-sync tests). After this, +/// `b.audit_peer_now(a)` pins `a`'s real commitment and runs the audit over the +/// live wire against `a`'s real responder. +async fn commit_and_seed( + harness: &TestHarness, + a_idx: usize, + b_idx: usize, + n: usize, +) -> Vec<[u8; 32]> { + let a = harness.test_node(a_idx).expect("node a"); + let b = harness.test_node(b_idx).expect("node b"); + let a_store = a.ant_protocol.as_ref().expect("a protocol").storage(); + let b_store = b.ant_protocol.as_ref().expect("b protocol").storage(); + + // Store identical chunks on A and B. Content-addressed: addr == BLAKE3(bytes). + let mut addrs = Vec::with_capacity(n); + for i in 0..n { + let content = format!("subtree-audit-testnet-chunk-{i}").into_bytes(); + let address = *blake3::hash(&content).as_bytes(); + a_store.put(&address, &content).await.expect("put on a"); + b_store.put(&address, &content).await.expect("put on b"); + addrs.push(address); + } + + // A commits to its current key set. + let a_engine = a.replication_engine.as_ref().expect("a engine"); + a_engine + .rebuild_commitment_now() + .await + .expect("a rebuild commitment"); + + // Grab A's freshly built commitment and seed it into B's cache so B can pin + // it (deterministic; no gossip-timing flake). + let a_peer = *a.p2p_node.as_ref().expect("a p2p").peer_id(); + let a_commitment = a_engine + .commitment_state() + .current() + .expect("a has a current commitment") + .commitment() + .clone(); + let b_engine = b.replication_engine.as_ref().expect("b engine"); + b_engine + .inject_peer_commitment_for_test(&a_peer, a_commitment) + .await; + addrs +} + +/// HONEST: a node holding its committed data passes the subtree audit. +#[tokio::test] +#[serial] +async fn honest_node_passes_subtree_audit() { + let harness = TestHarness::setup_small().await.expect("setup"); + harness.warmup_dht().await.expect("warmup"); + + let (a_idx, b_idx) = (3, 4); + commit_and_seed(&harness, a_idx, b_idx, 64).await; + + let a_peer = *harness + .test_node(a_idx) + .expect("a") + .p2p_node + .as_ref() + .expect("a p2p") + .peer_id(); + let b_engine = harness + .test_node(b_idx) + .expect("b") + .replication_engine + .as_ref() + .expect("b engine"); + + // Honest holder: B holds the chunks so it byte-verifies the proof → Passed. + let result = b_engine.audit_peer_now(&a_peer).await; + assert!( + matches!(result, AuditTickResult::Passed { keys_checked, .. } if keys_checked >= 1), + "honest node must pass with at least one byte-verified leaf, got {result:?}" + ); + + harness.teardown().await.expect("teardown"); +} + +/// ADVERSARY: a node that deletes the bytes it committed to FAILS the audit, +/// while honest peers are unaffected. +#[tokio::test] +#[serial] +async fn data_deleting_node_fails_subtree_audit() { + let harness = TestHarness::setup_small().await.expect("setup"); + harness.warmup_dht().await.expect("warmup"); + + let (a_idx, b_idx) = (5, 6); + let addrs = commit_and_seed(&harness, a_idx, b_idx, 64).await; + + // A is now committed-and-gossiped. The adversary deletes ALL the bytes it + // committed to (keeps the gossiped commitment — the classic "claim storage, + // hold nothing" attack). It does NOT rebuild its commitment, so it still + // advertises the now-unbacked root. + let a_store = harness + .test_node(a_idx) + .expect("a") + .ant_protocol + .as_ref() + .expect("a protocol") + .storage(); + for addr in &addrs { + a_store.delete(addr).await.expect("delete on adversary"); + } + + let a_peer = *harness + .test_node(a_idx) + .expect("a") + .p2p_node + .as_ref() + .expect("a p2p") + .peer_id(); + let b_engine = harness + .test_node(b_idx) + .expect("b") + .replication_engine + .as_ref() + .expect("b engine"); + + let result = b_engine.audit_peer_now(&a_peer).await; + // The adversary can no longer produce the subtree's bytes, so its responder + // rejects ("missing bytes for committed key") → a confirmed Failed. (It must + // NOT be Passed; Idle would mean B couldn't reach the audit, also a failure + // of the test setup.) + assert!( + matches!(result, AuditTickResult::Failed { .. }), + "a node that deleted its committed data must FAIL the audit, got {result:?}" + ); + + harness.teardown().await.expect("teardown"); +} + +/// NO FALSE POSITIVE: auditing an honest node repeatedly (different nonces) +/// never produces a confirmed failure. +#[tokio::test] +#[serial] +async fn honest_node_never_false_fails_across_repeated_audits() { + let harness = TestHarness::setup_small().await.expect("setup"); + harness.warmup_dht().await.expect("warmup"); + + let (a_idx, b_idx) = (7, 8); + commit_and_seed(&harness, a_idx, b_idx, 100).await; + + let a_peer = *harness + .test_node(a_idx) + .expect("a") + .p2p_node + .as_ref() + .expect("a p2p") + .peer_id(); + let b_engine = harness + .test_node(b_idx) + .expect("b") + .replication_engine + .as_ref() + .expect("b engine"); + + // Each audit uses a fresh random nonce (different selected subtree). None may + // ever be a confirmed Failed for an honest holder. + for round in 0..8 { + let result = b_engine.audit_peer_now(&a_peer).await; + assert!( + !matches!(result, AuditTickResult::Failed { .. }), + "honest node false-failed on round {round}: {result:?}" + ); + } + + harness.teardown().await.expect("teardown"); +} diff --git a/tests/e2e/testnet.rs b/tests/e2e/testnet.rs index 14216be0..d276789b 100644 --- a/tests/e2e/testnet.rs +++ b/tests/e2e/testnet.rs @@ -1244,11 +1244,21 @@ impl TestNetwork { let shutdown = CancellationToken::new(); let repl_config = ReplicationConfig::default(); let (_fresh_tx, fresh_rx) = tokio::sync::mpsc::unbounded_channel(); + let node_identity = if let Some(ref id) = node.node_identity { + Arc::clone(id) + } else { + warn!( + "Node {} has no identity; skipping replication engine", + node.index + ); + return Ok(()); + }; match ReplicationEngine::new( repl_config, Arc::clone(p2p), protocol.storage(), protocol.payment_verifier_arc(), + node_identity, &node.data_dir, fresh_rx, shutdown.clone(), diff --git a/tests/poc_audit_handler_live.rs b/tests/poc_audit_handler_live.rs new file mode 100644 index 00000000..84989c88 --- /dev/null +++ b/tests/poc_audit_handler_live.rs @@ -0,0 +1,328 @@ +//! Live responder-handler integration tests for the gossip-triggered +//! contiguous-subtree storage audit (ADR-0002). +//! +//! The pure proof maths are covered by the unit tests in +//! `src/replication/subtree.rs`, and the end-to-end attack composition by +//! `poc_commitment_audit_attacks`. This file fills the remaining gap: the +//! *live* responder control-flow branches in +//! [`ant_node::replication::audit::handle_subtree_challenge`] — the function the +//! network actually calls — driven against a real `LmdbStorage` and a real +//! `ResponderCommitmentState`, asserting on the exact `SubtreeAuditResponse` +//! variant produced. +//! +//! Each test is written to FAIL if the defence it covers is removed — see the +//! `// FLIPS IF:` note on each. They are not tautologies: the responder under +//! test is the production code path, not a reimplementation. + +#![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::missing_panics_doc, + clippy::cast_possible_truncation +)] + +use std::sync::Arc; + +use ant_node::replication::audit::handle_subtree_challenge; +use ant_node::replication::commitment_state::{BuiltCommitment, ResponderCommitmentState}; +use ant_node::replication::protocol::{SubtreeAuditChallenge, SubtreeAuditResponse}; +use ant_node::replication::subtree::{verify_subtree_proof, StructureVerdict}; +use ant_node::storage::{LmdbStorage, LmdbStorageConfig}; +use saorsa_core::identity::PeerId; +use saorsa_pqc::api::sig::{ml_dsa_65, MlDsaPublicKey, MlDsaSecretKey}; +use tempfile::TempDir; + +// --------------------------------------------------------------------------- +// Fixtures +// --------------------------------------------------------------------------- + +async fn test_storage() -> (LmdbStorage, TempDir) { + let temp_dir = TempDir::new().expect("create temp dir"); + let config = LmdbStorageConfig { + root_dir: temp_dir.path().to_path_buf(), + ..LmdbStorageConfig::test_default() + }; + let storage = LmdbStorage::new(config).await.expect("create storage"); + (storage, temp_dir) +} + +fn keypair() -> (MlDsaPublicKey, MlDsaSecretKey) { + ml_dsa_65().generate_keypair().unwrap() +} + +/// Deterministic chunk content for index `i` (>= store MIN size). Distinct per +/// index so each address is distinct. +fn chunk_content(i: u8) -> Vec { + (0..1024u32).map(|n| (n as u8) ^ i).collect() +} + +/// A responder identity bound to a freshly-built commitment over the given +/// chunk indices, with those chunks actually stored in `storage`. +struct Responder { + peer_id: PeerId, + peer_id_bytes: [u8; 32], + state: Arc, +} + +impl Responder { + /// Build a responder that has stored `indices` and committed to them. + /// The committed leaf binds `(address, BLAKE3(content))`; the responder + /// reads bytes by address at audit time and rehashes them. + async fn new(storage: &LmdbStorage, indices: &[u8]) -> Self { + let (pk, sk) = keypair(); + // Production identity derivation: peer_id == BLAKE3(pubkey_bytes). + let peer_id_bytes = *blake3::hash(&pk.to_bytes()).as_bytes(); + let peer_id = PeerId::from_bytes(peer_id_bytes); + + let mut entries = Vec::new(); + for &i in indices { + let content = chunk_content(i); + let addr = LmdbStorage::compute_address(&content); + storage.put(&addr, &content).await.expect("put chunk"); + let bytes_hash = *blake3::hash(&content).as_bytes(); + entries.push((addr, bytes_hash)); + } + let built = + BuiltCommitment::build(entries, &peer_id_bytes, &sk, &pk.to_bytes()).expect("build"); + let state = Arc::new(ResponderCommitmentState::new()); + state.rotate(built); + + Self { + peer_id, + peer_id_bytes, + state, + } + } + + fn current_hash(&self) -> [u8; 32] { + self.state.current().unwrap().hash() + } + + fn address(i: u8) -> [u8; 32] { + LmdbStorage::compute_address(&chunk_content(i)) + } +} + +fn challenge_for(responder: &Responder, pin: [u8; 32], nonce: [u8; 32]) -> SubtreeAuditChallenge { + SubtreeAuditChallenge { + challenge_id: 42, + nonce, + challenged_peer_id: responder.peer_id_bytes, + expected_commitment_hash: pin, + } +} + +// --------------------------------------------------------------------------- +// 1. Honest responder, pinned to its gossiped commitment -> Proof +// --------------------------------------------------------------------------- + +/// Baseline: a challenge pinned to the responder's retained commitment, with +/// all committed bytes present, yields a `Proof` whose commitment matches the +/// pin and whose subtree proof passes `verify_subtree_proof`. Anchors the +/// failure-path tests — it proves the happy path is reachable, so a Rejected in +/// another test is the defence firing, not an unrelated error. +#[tokio::test] +async fn honest_responder_answers_with_valid_proof() { + let (storage, _t) = test_storage().await; + // Enough leaves to exercise a real (non-whole-tree) subtree selection. + let indices: Vec = (1..=64u8).collect(); + let r = Responder::new(&storage, &indices).await; + let pin = r.current_hash(); + let nonce = [0x11u8; 32]; + let challenge = challenge_for(&r, pin, nonce); + + let resp = + handle_subtree_challenge(&challenge, &storage, &r.peer_id, false, Some(&r.state)).await; + + match resp { + SubtreeAuditResponse::Proof { + challenge_id, + commitment, + proof, + } => { + assert_eq!(challenge_id, 42); + // The answered commitment is the pinned one. + assert_eq!( + ant_node::replication::commitment::commitment_hash(&commitment), + Some(pin), + ); + // And the proof structurally verifies under the nonce + commitment. + assert_eq!( + verify_subtree_proof(&proof, &nonce, &commitment), + StructureVerdict::Valid, + "honest responder's proof must verify" + ); + } + other => panic!("expected Proof, got {other:?}"), + } +} + +// --------------------------------------------------------------------------- +// 2. Bootstrapping responder -> Bootstrapping (never penalised) +// --------------------------------------------------------------------------- + +/// A responder still bootstrapping answers `Bootstrapping`, not a proof — it +/// must not be penalised for not yet holding data. +/// +/// FLIPS IF: the bootstrap shortcut were removed and a bootstrapping node tried +/// (and failed) to build a proof, exposing fresh nodes to audit penalties. +#[tokio::test] +async fn bootstrapping_responder_reports_bootstrapping() { + let (storage, _t) = test_storage().await; + let r = Responder::new(&storage, &[1, 2, 3, 4]).await; + let pin = r.current_hash(); + let challenge = challenge_for(&r, pin, [0x11u8; 32]); + + let resp = handle_subtree_challenge( + &challenge, + &storage, + &r.peer_id, + /* is_bootstrapping */ true, + Some(&r.state), + ) + .await; + + assert!( + matches!( + resp, + SubtreeAuditResponse::Bootstrapping { challenge_id: 42 } + ), + "expected Bootstrapping, got {resp:?}" + ); +} + +// --------------------------------------------------------------------------- +// 3. Challenge targeting the wrong peer -> Rejected +// --------------------------------------------------------------------------- + +/// A challenge whose `challenged_peer_id` is not this node is rejected — a node +/// must only answer audits addressed to it (so an attacker can't make node A +/// answer for node B's committed tree). +/// +/// FLIPS IF: the target-peer check were dropped and a node answered challenges +/// addressed to anyone. +#[tokio::test] +async fn wrong_target_peer_is_rejected() { + let (storage, _t) = test_storage().await; + let r = Responder::new(&storage, &[1, 2, 3, 4]).await; + let pin = r.current_hash(); + let mut challenge = challenge_for(&r, pin, [0x11u8; 32]); + // Address the challenge to a different peer. + challenge.challenged_peer_id = [0x99u8; 32]; + + let resp = + handle_subtree_challenge(&challenge, &storage, &r.peer_id, false, Some(&r.state)).await; + + match resp { + SubtreeAuditResponse::Rejected { + challenge_id, + reason, + } => { + assert_eq!(challenge_id, 42); + assert!( + reason.contains("does not match this node"), + "expected wrong-peer rejection, got: {reason}" + ); + } + other => panic!("expected Rejected(wrong peer), got {other:?}"), + } +} + +// --------------------------------------------------------------------------- +// 4. Pinned hash the responder does not retain -> Rejected "unknown commitment" +// --------------------------------------------------------------------------- + +/// A challenge pinned to a commitment hash the responder's state does not +/// contain is rejected with "unknown commitment hash", NOT silently answered +/// against the current commitment. Since the auditor only pins a hash the peer +/// just gossiped, this rejection is the auditor's confirmed-failure signal. +/// +/// FLIPS IF: the responder ignored the pin and answered against its current +/// commitment regardless — the pin contract would be void and a lazy node could +/// answer any challenge with any tree. +#[tokio::test] +async fn unknown_pinned_hash_is_rejected() { + let (storage, _t) = test_storage().await; + let r = Responder::new(&storage, &[1, 2, 3, 4]).await; + // A hash the responder never built/retained. + let bogus_pin = [0x99u8; 32]; + let challenge = challenge_for(&r, bogus_pin, [0x11u8; 32]); + + let resp = + handle_subtree_challenge(&challenge, &storage, &r.peer_id, false, Some(&r.state)).await; + + match resp { + SubtreeAuditResponse::Rejected { reason, .. } => { + assert!( + reason.contains("unknown commitment hash"), + "expected unknown-commitment-hash rejection, got: {reason}" + ); + } + other => panic!("expected Rejected(unknown commitment hash), got {other:?}"), + } +} + +/// No commitment state at all (e.g. before the first rotation during rollout) +/// is likewise rejected — there is nothing to answer the pin against. +#[tokio::test] +async fn missing_commitment_state_is_rejected() { + let (storage, _t) = test_storage().await; + let r = Responder::new(&storage, &[1, 2, 3, 4]).await; + let pin = r.current_hash(); + let challenge = challenge_for(&r, pin, [0x11u8; 32]); + + // Pass None for commitment_state. + let resp = handle_subtree_challenge(&challenge, &storage, &r.peer_id, false, None).await; + + assert!( + matches!(resp, SubtreeAuditResponse::Rejected { .. }), + "expected Rejected when no commitment state, got {resp:?}" + ); +} + +// --------------------------------------------------------------------------- +// 5. Committed key whose bytes were deleted -> Rejected "missing bytes..." +// --------------------------------------------------------------------------- + +/// The chunk-deleter case: the responder committed to a key, the auditor pins +/// that commitment, but the responder has since dropped the actual bytes for a +/// key the nonce-selected subtree covers. It cannot fabricate the leaf (the +/// nonced hash is bound to the bytes), so it rejects with the distinct "missing +/// bytes for committed key" reason — which the auditor treats as real storage +/// loss and penalises. +/// +/// To guarantee the deleted key falls inside the selected subtree, we delete +/// EVERY committed chunk's bytes, so whichever leaves the nonce selects, at +/// least one is missing. +/// +/// FLIPS IF: the responder could answer a committed key without holding the +/// bytes — exactly the Finding-1 storage-binding hole the subtree audit closes. +#[tokio::test] +async fn committed_key_with_missing_bytes_is_rejected() { + let (storage, _t) = test_storage().await; + let indices: Vec = (1..=32u8).collect(); + let r = Responder::new(&storage, &indices).await; + let pin = r.current_hash(); + + // Drop the bytes for every committed chunk AFTER committing, so any selected + // subtree contains at least one key whose bytes are gone. + for &i in &indices { + let addr = Responder::address(i); + storage.delete(&addr).await.expect("delete chunk"); + } + + let challenge = challenge_for(&r, pin, [0x11u8; 32]); + let resp = + handle_subtree_challenge(&challenge, &storage, &r.peer_id, false, Some(&r.state)).await; + + match resp { + SubtreeAuditResponse::Rejected { reason, .. } => { + assert!( + reason.contains("missing bytes for committed key"), + "expected missing-bytes rejection, got: {reason}" + ); + } + other => panic!("expected Rejected(missing bytes), got {other:?}"), + } +} diff --git a/tests/poc_bootstrap_stall.rs b/tests/poc_bootstrap_stall.rs new file mode 100644 index 00000000..6364f717 --- /dev/null +++ b/tests/poc_bootstrap_stall.rs @@ -0,0 +1,265 @@ +//! Proof-of-concept regression test for the **bootstrap stall** attack +//! against the neighbour-sync admission / drain detector. +//! +//! ## The attack (no fix yet) +//! +//! While a node is bootstrapping, every inbound `NeighborSyncRequest` +//! whose admission overflows `MAX_PENDING_VERIFY_PER_PEER` (the per-peer +//! cap is the first to bite for any single peer) calls +//! `bootstrap::note_capacity_rejected(source)`. The drain check in +//! `bootstrap::check_bootstrap_drained` then refuses to complete +//! bootstrap while the set is non-empty: +//! +//! ```ignore +//! if !state.capacity_rejected_sources.is_empty() { +//! return false; // "not yet drained" +//! } +//! ``` +//! +//! The set entry for `source` is cleared only when **the same source** +//! later completes an admission cycle with zero rejections. A single +//! peer that keeps sending over-cap hints faster than the verification +//! queue drains never has a "clean cycle" — so it is **permanently** +//! in `capacity_rejected_sources`, and bootstrap **never completes**. +//! +//! ## Why this matters +//! +//! While `is_bootstrapping == true`: +//! - **Audits are paused** (`replication::audit::audit_tick` returns +//! `Idle` if `is_bootstrapping`, see `audit.rs` Invariant 19). A +//! victim stuck in bootstrap mode is effectively a node that does no +//! auditing — bad nodes around it accrue no trust penalties. +//! - Other replication invariants gated on `bootstrap_drained` (paid +//! list repair flow, prune confirmation paths) also stay off. +//! +//! A single Byzantine peer in the victim's routing table can therefore +//! disable the entire reputation system on that victim, for free, +//! using nothing but well-formed `NeighborSyncRequest` messages that +//! the victim's admission path accepts as legitimate. +//! +//! ## What this test proves +//! +//! Drives the in-process pieces (`ReplicationQueues`, `BootstrapState`, +//! `bootstrap::note_capacity_rejected` / +//! `bootstrap::check_bootstrap_drained`) end-to-end through the same +//! call sequence that the live replication loop runs when handling an +//! over-cap `NeighborSyncRequest`. With no fix this test passes — i.e. +//! it documents the buggy behaviour by asserting the victim never +//! drains. The fix (whatever shape it takes — per-source rate limits, +//! capacity-reject decay, trust-event escalation, ...) will need a +//! follow-up test asserting drain happens within a bounded number of +//! over-cap cycles. + +#![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::missing_panics_doc, + clippy::significant_drop_tightening +)] + +use std::collections::HashSet; +use std::sync::Arc; +use std::time::Instant; + +use tokio::sync::RwLock; + +use ant_node::replication::bootstrap::{ + check_bootstrap_drained, clear_capacity_rejected, note_capacity_rejected, +}; +use ant_node::replication::scheduling::{ + AdmissionResult, ReplicationQueues, MAX_PENDING_VERIFY_PER_PEER, +}; +use ant_node::replication::types::{ + BootstrapState, HintPipeline, VerificationEntry, VerificationState, +}; +use saorsa_core::identity::PeerId; + +fn peer(b: u8) -> PeerId { + let mut bytes = [0u8; 32]; + bytes[0] = b; + PeerId::from_bytes(bytes) +} + +fn entry(sender: PeerId) -> VerificationEntry { + VerificationEntry { + state: VerificationState::PendingVerify, + pipeline: HintPipeline::Replica, + verified_sources: Vec::new(), + tried_sources: HashSet::new(), + created_at: Instant::now(), + hint_sender: sender, + } +} + +fn unique_key(i: u32) -> [u8; 32] { + let mut k = [0u8; 32]; + k[..4].copy_from_slice(&i.to_le_bytes()); + k +} + +/// Simulates one inbound `NeighborSyncRequest` from `source` carrying +/// `hint_count` hints — returns the number of admissions that capacity- +/// rejected (i.e. what `AdmissionOutcome::capacity_rejected_count` would +/// be in the live loop), and as a side effect mutates `queues` and the +/// bootstrap-state in exactly the same way the live `admit_and_queue_hints` +/// followed by the bootstrap-drain accounting do. +async fn simulate_inbound_sync( + queues: &Arc>, + bootstrap_state: &Arc>, + source: PeerId, + key_offset: u32, + hint_count: u32, +) -> usize { + let mut capacity_rejected_count: usize = 0; + + { + let mut q = queues.write().await; + for i in 0..hint_count { + let result = q.add_pending_verify(unique_key(key_offset + i), entry(source)); + match result { + AdmissionResult::Admitted | AdmissionResult::AlreadyPresent => {} + AdmissionResult::CapacityRejected => { + capacity_rejected_count += 1; + } + } + } + } + + // Mirror replication/mod.rs:1391-1400: while bootstrapping, note or + // clear capacity rejection for this source based on the outcome. + if capacity_rejected_count > 0 { + note_capacity_rejected(bootstrap_state, source).await; + } else { + clear_capacity_rejected(bootstrap_state, &source).await; + } + + capacity_rejected_count +} + +/// **The attack.** A single peer keeps the victim's bootstrap permanently +/// undrained by always sending one more hint than the per-peer pending +/// quota can accept. The victim's `capacity_rejected_sources` set stays +/// non-empty forever, so `check_bootstrap_drained` never returns `true`. +/// +/// Pre-fix behaviour: this test passes (the attack succeeds — drain never +/// completes). The presence of this test is the regression marker. +/// +/// Post-fix behaviour: the fix MUST cause `check_bootstrap_drained` to +/// return `true` within a bounded number of cycles regardless of attacker +/// flood pattern. A follow-up test should assert that bound. +#[tokio::test] +async fn poc_bootstrap_stall_via_persistent_per_peer_overflow() { + let queues = Arc::new(RwLock::new(ReplicationQueues::new())); + let bootstrap_state = Arc::new(RwLock::new(BootstrapState::new())); + + let attacker = peer(0xAA); + + // Round 1: attacker sends per-peer-cap + 1 hints. The first + // MAX_PENDING_VERIFY_PER_PEER admit; the last over-cap one rejects. + // After this round, `capacity_rejected_sources` contains the attacker. + let mut next_key: u32 = 0; + #[allow(clippy::cast_possible_truncation)] + let flood = MAX_PENDING_VERIFY_PER_PEER as u32 + 1; + let rejected = + simulate_inbound_sync(&queues, &bootstrap_state, attacker, next_key, flood).await; + next_key += flood; + assert!( + rejected >= 1, + "round 1 must over-cap (got {rejected} rejections); test is mis-sized" + ); + + // Victim has nothing else outstanding: no other pending peer requests, + // no other pending keys discovered. The ONLY thing preventing drain + // is `capacity_rejected_sources` containing the attacker. + let drained_before_attack_continues = { + let q = queues.read().await; + check_bootstrap_drained(&bootstrap_state, &q).await + }; + assert!( + !drained_before_attack_continues, + "bootstrap must NOT drain while attacker has outstanding capacity-rejected hints" + ); + + // Round 2..N: attacker keeps sending one more over-cap hint each + // round. In the live loop, the victim's verification cycle would + // drain a few entries between rounds, but the attacker just sends + // more hints than fit. Here we simulate that pattern by NEVER + // draining queues between attacker rounds: this is the worst-case + // for the victim and matches an attacker who paces hints to keep + // pending_per_sender[attacker] always at the cap. + for round in 0..32 { + let r = simulate_inbound_sync(&queues, &bootstrap_state, attacker, next_key, 1).await; + next_key += 1; + // Each round must keep capacity-rejecting (per-peer cap still hit + // because we never freed slots for this sender). + assert!( + r >= 1, + "round {round}: attacker hint must continue to capacity-reject \ + (per-peer cap still full); got {r}" + ); + + let drained = { + let q = queues.read().await; + check_bootstrap_drained(&bootstrap_state, &q).await + }; + assert!( + !drained, + "round {round}: bootstrap drained despite attacker still capacity-rejecting" + ); + } + + // After 32 rounds (could be 32 million) the attacker is STILL in + // `capacity_rejected_sources`. The victim is permanently in + // bootstrap mode. This is the bug. + let state = bootstrap_state.read().await; + assert!( + state.capacity_rejected_sources.contains(&attacker), + "attacker peer is still in capacity_rejected_sources after the flood — \ + this is the documented stall: the victim has no mechanism to retire \ + the attacker without the attacker's cooperation (a 'clean' admission \ + cycle), so a hostile peer can stall bootstrap indefinitely" + ); + assert_eq!( + state.capacity_rejected_sources.len(), + 1, + "only the attacker is outstanding; honest peers are unaffected — \ + which is exactly what makes this a single-peer DoS" + ); +} + +/// Honest peers are unaffected: the per-source quota means a flood from +/// the attacker cannot starve an honest peer's hints. The honest peer's +/// "clean" cycle correctly clears its bootstrap entry. This test +/// confirms the per-source isolation that D1 already established — +/// included so a future fix doesn't accidentally break it. +#[tokio::test] +async fn honest_peer_drains_normally_alongside_attacker() { + let queues = Arc::new(RwLock::new(ReplicationQueues::new())); + let bootstrap_state = Arc::new(RwLock::new(BootstrapState::new())); + + let attacker = peer(0xAA); + let honest = peer(0x01); + + // Attacker over-caps. + #[allow(clippy::cast_possible_truncation)] + let flood = MAX_PENDING_VERIFY_PER_PEER as u32 + 1; + let r_atk = simulate_inbound_sync(&queues, &bootstrap_state, attacker, 0, flood).await; + assert!(r_atk >= 1); + + // Honest peer sends a small clean batch. + let r_honest = simulate_inbound_sync(&queues, &bootstrap_state, honest, flood + 100, 16).await; + assert_eq!( + r_honest, 0, + "honest peer's small batch must NOT capacity-reject — per-source quota isolates them" + ); + + let state = bootstrap_state.read().await; + assert!( + state.capacity_rejected_sources.contains(&attacker), + "attacker is outstanding" + ); + assert!( + !state.capacity_rejected_sources.contains(&honest), + "honest peer is NOT outstanding; its clean cycle cleared (or never created) its entry" + ); +} diff --git a/tests/poc_commitment_audit_attacks.rs b/tests/poc_commitment_audit_attacks.rs new file mode 100644 index 00000000..f517fd50 --- /dev/null +++ b/tests/poc_commitment_audit_attacks.rs @@ -0,0 +1,865 @@ +//! Threat-model proof-of-concept tests for the gossip-triggered +//! contiguous-subtree storage audit (ADR-0002, +//! `docs/.../v13-gossip-subtree-audit`). +//! +//! Each test models a specific storage-binding attack from the original +//! Finding-1 / Finding-2 reports +//! (`notes/security-findings-2026-05-22/{01,02}-*.md`) and asserts that the +//! subtree-audit mechanisms reject it. This file is the single canonical place +//! to look for "does the subtree audit actually close the storage-binding +//! findings?" — each `#[test]` docstring links the attack back to its finding. +//! +//! ## How the auditor is modelled here +//! +//! The production auditor's `verify_subtree_response` (in +//! `src/replication/audit.rs`) is private, so this file reproduces the exact +//! ordered gates it runs — pin, peer-id binding, signature, structural +//! [`verify_subtree_proof`], then a real-bytes spot-check on a few subtree +//! leaves — via the public primitives. The helper [`auditor_accepts`] runs them +//! in the same order with the same failure semantics, so a reviewer can see +//! each attack is caught at the same gate the network code would catch it. +//! +//! ## What changed from the old per-key audit (and why) +//! +//! The OLD audit named individual keys and sampled a per-key Merkle inclusion +//! proof + digest. The subtree audit names NO keys: the nonce alone selects one +//! contiguous subtree, the responder must expand it in full, and a few leaves +//! are byte-checked. Consequently these per-key-only attacks were DROPPED — they +//! have no analogue under subtree sampling: +//! +//! * "key not in commitment" / overclaim-via-partial-commitment — the auditor +//! never names a key, so a responder can't be asked to prove an uncommitted +//! key; it proves whatever the nonce selects from its own committed tree. +//! * per-key digest order / per-key path tamper — replaced by the subtree +//! structural checks (leaf count, ascending order, cut-hash count, root +//! rebuild) and the per-leaf real-bytes spot-check. +//! * `RecentProvers` holder-credit revocation/rotation tests — those exercised +//! the cache binding, not the audit proof, and now live with the cache; the +//! subtree auditor credits per proven leaf (`AuditCredit`) but the credit +//! binding itself is unchanged and tested elsewhere. +//! +//! Attacks PRESERVED in spirit, ported to the subtree model: fresh-commitment +//! substitution, cross-peer commitment substitution, throwaway-key +//! substitution, wrong-signer, replay-under-fresh-nonce, repudiation of a +//! recently gossiped pin, and the lazy/relay "holds addresses not bytes" +//! fabricated-possession attack. Plus subtree-native structural attacks: +//! tampered cut-hash, wrong leaf count, reordered leaves. + +#![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::missing_panics_doc, + clippy::redundant_clone, + clippy::cast_possible_truncation, + clippy::doc_markdown, + clippy::needless_borrows_for_generic_args +)] + +use ant_node::replication::commitment::{ + commitment_hash, leaf_hash, sign_commitment, verify_commitment_signature, MerkleTree, + StorageCommitment, +}; +use ant_node::replication::commitment_state::{BuiltCommitment, ResponderCommitmentState}; +use ant_node::replication::subtree::{ + build_subtree_proof, nonced_leaf_hash, select_spotcheck_indices, select_subtree_path, + verify_subtree_proof, StructureVerdict, SubtreeProof, +}; +use saorsa_pqc::api::sig::{ml_dsa_65, MlDsaPublicKey, MlDsaSecretKey}; + +// --------------------------------------------------------------------------- +// Fixtures +// --------------------------------------------------------------------------- + +fn keypair() -> (MlDsaPublicKey, MlDsaSecretKey) { + ml_dsa_65().generate_keypair().unwrap() +} + +/// Deterministic chunk bytes for key index `i`. The committed tree is built +/// from `BLAKE3(content(i))`, so an honest proof — which hashes the same bytes — +/// reconstructs the committed root and passes the real-bytes spot-check. +fn content(i: u32) -> Vec { + let mut v = key(i).to_vec(); + v.extend_from_slice(b"subtree-audit-chunk-body"); + v.extend_from_slice(&i.to_le_bytes()); + v +} + +fn content_hash(i: u32) -> [u8; 32] { + *blake3::hash(&content(i)).as_bytes() +} + +/// Big-endian key so numeric order matches the MerkleTree sort order; this lets +/// us reason about leaf positions when we tamper with them. +fn key(i: u32) -> [u8; 32] { + let mut k = [0u8; 32]; + k[..4].copy_from_slice(&i.to_be_bytes()); + k +} + +/// A responder identity (real ML-DSA keypair) plus its retention state. Peer +/// identity is derived from the public key exactly as in production +/// (saorsa-core `peer_id_from_public_key` = `BLAKE3(pubkey_bytes)`). +struct Responder { + state: ResponderCommitmentState, + public_key: MlDsaPublicKey, + secret_key: MlDsaSecretKey, + peer_id_bytes: [u8; 32], +} + +impl Responder { + fn new() -> Self { + let (public_key, secret_key) = keypair(); + let peer_id_bytes = *blake3::hash(&public_key.to_bytes()).as_bytes(); + Self { + state: ResponderCommitmentState::new(), + public_key, + secret_key, + peer_id_bytes, + } + } + + /// Commit to keys `[0, n)` and rotate that commitment into `current`. + /// Returns the new commitment hash. + fn commit_to_range(&self, n: u32) -> [u8; 32] { + let entries: Vec<_> = (0..n).map(|i| (key(i), content_hash(i))).collect(); + let built = BuiltCommitment::build( + entries, + &self.peer_id_bytes, + &self.secret_key, + &self.public_key.to_bytes(), + ) + .unwrap(); + let h = built.hash(); + self.state.rotate(built); + h + } +} + +/// Bytes source for an HONEST responder: it really holds every chunk it +/// committed to, so it can always produce a correct `nonced_hash`. +fn honest_bytes(k: &[u8; 32]) -> Option> { + for i in 0..4096u32 { + if &key(i) == k { + return Some(content(i)); + } + } + None +} + +/// The auditor's full ordered verification, mirroring the production +/// `verify_subtree_response` gates. Returns `Ok(byte_checked_count)` on accept. +/// +/// `auditor_local_bytes(k)` is the auditor's OWN copy of a chunk (used for the +/// real-bytes spot-check); a leaf the auditor cannot byte-check is skipped, and +/// if it could check none the audit is inconclusive (`AuditError::Inconclusive`, +/// the production "Idle, no credit, no penalty" outcome) — never a free pass. +fn auditor_accepts( + challenged_peer_id: &[u8; 32], + expected_commitment_hash: &[u8; 32], + nonce: &[u8; 32], + commitment: &StorageCommitment, + proof: &SubtreeProof, + auditor_local_bytes: impl Fn(&[u8; 32]) -> Option>, +) -> Result { + // -- Gate: pin + peer-id binding + signature ---------------------------- + if commitment.sender_peer_id != *challenged_peer_id { + return Err(AuditError::SenderPeerIdMismatch); + } + let derived = *blake3::hash(&commitment.sender_public_key).as_bytes(); + if derived != commitment.sender_peer_id { + return Err(AuditError::PeerIdKeyMismatch); + } + match commitment_hash(commitment) { + Some(h) if &h == expected_commitment_hash => {} + _ => return Err(AuditError::CommitmentHashMismatch), + } + if !verify_commitment_signature(commitment) { + return Err(AuditError::SignatureInvalid); + } + + // -- Gate: structure ---------------------------------------------------- + if let StructureVerdict::Invalid(why) = verify_subtree_proof(proof, nonce, commitment) { + return Err(AuditError::StructureInvalid(why)); + } + + // -- Gate: real bytes (per-leaf possession) ----------------------------- + let path = select_subtree_path(nonce, commitment.key_count) + .ok_or(AuditError::StructureInvalid("out-of-protocol key_count"))?; + let spot = select_spotcheck_indices(nonce, &path, 8); + let mut checked = 0usize; + for idx in spot { + let leaf = proof + .leaves + .get(idx as usize) + .ok_or(AuditError::StructureInvalid("spot index out of range"))?; + let Some(bytes) = auditor_local_bytes(&leaf.key) else { + continue; // auditor lacks this chunk; not the responder's fault + }; + let plain = *blake3::hash(&bytes).as_bytes(); + let nonced = nonced_leaf_hash(nonce, &commitment.sender_peer_id, &leaf.key, &bytes); + if leaf.bytes_hash != plain || leaf.nonced_hash != nonced { + return Err(AuditError::RealBytesMismatch); + } + checked += 1; + } + if checked == 0 { + // The structurally-valid proof binds only PUBLIC data (the leaf + // bytes_hash IS the chunk address). With no byte-verified leaf the + // audit proves nothing about possession — inconclusive, not a pass. + return Err(AuditError::Inconclusive); + } + Ok(checked) +} + +#[derive(Debug, PartialEq, Eq)] +enum AuditError { + SenderPeerIdMismatch, + PeerIdKeyMismatch, + CommitmentHashMismatch, + SignatureInvalid, + StructureInvalid(&'static str), + RealBytesMismatch, + Inconclusive, +} + +/// Build an honest subtree proof for `nonce` against the responder's current +/// committed tree, returning `(proof, commitment)` as the auditor would receive +/// them in a `SubtreeAuditResponse::Proof`. +fn honest_proof_and_commitment( + r: &Responder, + nonce: &[u8; 32], +) -> (SubtreeProof, StorageCommitment) { + let built = r.state.current().unwrap(); + let proof = build_subtree_proof(built.tree(), nonce, &r.peer_id_bytes, honest_bytes).unwrap(); + (proof, built.commitment().clone()) +} + +// --------------------------------------------------------------------------- +// Sanity: the honest path the attack tests are measured against actually passes +// --------------------------------------------------------------------------- + +/// Anchor: an honest responder that committed to its keys and still holds the +/// bytes produces a proof the (modelled) auditor accepts. Without this, the +/// rejection assertions below could pass vacuously. +#[test] +fn honest_responder_passes_audit() { + let nonce = [0xCD; 32]; + let honest = Responder::new(); + let pin = honest.commit_to_range(64); + let (proof, commitment) = honest_proof_and_commitment(&honest, &nonce); + + let res = auditor_accepts( + &honest.peer_id_bytes, + &pin, + &nonce, + &commitment, + &proof, + honest_bytes, + ); + assert!(res.is_ok(), "honest path must pass, got {res:?}"); + assert!(res.unwrap() >= 1, "must byte-check at least one leaf"); +} + +// --------------------------------------------------------------------------- +// Finding 1, Path A: lazy/relay node holds chunk ADDRESSES, not bytes +// --------------------------------------------------------------------------- + +/// Attack 1a (Finding 1, Path A) — the storage-binding heart of the subtree +/// audit. A lazy/relay node retained the gossiped commitment and knows every +/// leaf's `bytes_hash` (that value IS the chunk's network address, which is +/// public), but it DROPPED the actual bytes. It fabricates a proof: correct +/// `key` and correct `bytes_hash` for every selected leaf (so the structural +/// root rebuild passes), but it cannot compute the `nonced_hash`, which requires +/// the real bytes under a fresh nonce. It fills in a forged `nonced_hash`. +/// +/// The structural gate PASSES (addresses alone rebuild the root), proving that +/// structure is NOT sufficient — exactly the Finding-1 hole. The real-bytes +/// spot-check is what catches it: the auditor recomputes the nonced hash from +/// its own copy of the chunk and finds the forged one wrong. +#[test] +fn relay_holding_only_addresses_caught_by_real_bytes_check() { + let nonce = [0x77; 32]; + let honest_keyset = Responder::new(); + let pin = honest_keyset.commit_to_range(100); + let built = honest_keyset.state.current().unwrap(); + + // The lazy node fabricates the proof from PUBLIC data only: it knows each + // leaf key and its bytes_hash (== address), but NOT the bytes, so it forges + // every nonced_hash. + let path = select_subtree_path(&nonce, built.commitment().key_count).unwrap(); + let mut leaves = Vec::new(); + for idx in path.leaf_start..path.leaf_end { + let k = built.tree().key_at(idx as usize).unwrap(); + // bytes_hash is public (== the chunk address); the responder fakes the + // possession hash because it lacks the bytes. + let forged_nonced = *blake3::hash(b"i-do-not-have-the-bytes").as_bytes(); + leaves.push(ant_node::replication::subtree::SubtreeLeaf { + key: k, + bytes_hash: content_hash(idx), + nonced_hash: forged_nonced, + }); + } + // Real sibling cut-hashes from the committed tree (public, derivable). + let plan = ant_node::replication::subtree::subtree_plan(built.tree(), &nonce).unwrap(); + let forged = SubtreeProof { + leaves, + sibling_cut_hashes: plan.sibling_cut_hashes, + }; + + // Structure alone PASSES — addresses are enough to rebuild the root. This + // is the precise reason structure is insufficient on its own. + assert_eq!( + verify_subtree_proof(&forged, &nonce, built.commitment()), + StructureVerdict::Valid, + "address-only proof rebuilds the root (structure cannot bind possession)" + ); + + // The full auditor (with the real-bytes spot-check) rejects: the auditor + // holds the real chunks and recomputes the nonced hash. + let res = auditor_accepts( + &honest_keyset.peer_id_bytes, + &pin, + &nonce, + built.commitment(), + &forged, + honest_bytes, + ); + assert_eq!( + res, + Err(AuditError::RealBytesMismatch), + "forged nonced_hash must be caught by the real-bytes spot-check, got {res:?}" + ); +} + +/// Attack 1a, detection-probability framing: a responder that fabricates a +/// FRACTION of leaves (holds some bytes, forged the rest) survives one audit +/// only with probability `(1 - x)^k` over `k` spot-checked leaves. This pins +/// that any spot-check landing on a forged leaf is fatal — the responder cannot +/// predict which leaves are sampled, because the spot-check indices are derived +/// from the same nonce that fixes the whole proof. +#[test] +fn fabricated_fraction_is_caught_when_a_forged_leaf_is_sampled() { + let nonce = [0x31; 32]; + let r = Responder::new(); + let pin = r.commit_to_range(400); + let (mut proof, commitment) = honest_proof_and_commitment(&r, &nonce); + + // Forge the nonced hash on every spot-checked position (worst case for the + // attacker: all sampled leaves are fabricated → guaranteed catch). + let path = select_subtree_path(&nonce, commitment.key_count).unwrap(); + for idx in select_spotcheck_indices(&nonce, &path, 8) { + if let Some(leaf) = proof.leaves.get_mut(idx as usize) { + leaf.nonced_hash[0] ^= 0xFF; + } + } + + let res = auditor_accepts( + &r.peer_id_bytes, + &pin, + &nonce, + &commitment, + &proof, + honest_bytes, + ); + assert_eq!( + res, + Err(AuditError::RealBytesMismatch), + "a forged leaf landing under the spot-check must fail, got {res:?}" + ); +} + +/// Attack 1a, inconclusive lane (NOT a free pass): a relay returns a +/// structurally-valid, address-only proof to an auditor that happens to hold +/// NONE of the spot-checked chunks. The auditor cannot byte-verify anything, so +/// it must treat the audit as INCONCLUSIVE — no credit, no penalty — rather than +/// passing the relay for free. This closes the "structure-only pass" hole even +/// when the auditor lacks the bytes. +#[test] +fn relay_with_no_auditor_overlap_is_inconclusive_not_passed() { + let nonce = [0x19; 32]; + let r = Responder::new(); + let pin = r.commit_to_range(100); + // Honest structure (real bytes), so structure passes; the point is the + // auditor holds none of the chunks. + let (proof, commitment) = honest_proof_and_commitment(&r, &nonce); + + let auditor_holds_nothing = |_k: &[u8; 32]| -> Option> { None }; + let res = auditor_accepts( + &r.peer_id_bytes, + &pin, + &nonce, + &commitment, + &proof, + auditor_holds_nothing, + ); + assert_eq!( + res, + Err(AuditError::Inconclusive), + "no byte-verifiable leaf ⇒ inconclusive, never a free pass, got {res:?}" + ); +} + +// --------------------------------------------------------------------------- +// Finding 1, Path B: fresh-commitment substitution +// --------------------------------------------------------------------------- + +/// Attack 1b (Finding 1, Path B): a responder builds a FRESH commitment over a +/// different key set and answers with a valid proof against THAT commitment, +/// while the auditor pinned the hash of the commitment the peer actually +/// gossiped. The auditor's pin (`commitment_hash == expected_commitment_hash`) +/// rejects the substitution before any structural work. +#[test] +fn fresh_commitment_substitution_rejected_by_pin() { + let nonce = [0xCD; 32]; + + let original = Responder::new(); + let pinned_hash = original.commit_to_range(64); + + // Same peer rotates to a fresh commitment over a different range; it can + // build a perfectly valid proof against the NEW commitment. + let fresh_hash = original.commit_to_range(32); + assert_ne!(pinned_hash, fresh_hash); + let (proof, fresh_commitment) = honest_proof_and_commitment(&original, &nonce); + + // Auditor still pins the ORIGINAL hash. + let res = auditor_accepts( + &original.peer_id_bytes, + &pinned_hash, // <- original pin, not fresh_hash + &nonce, + &fresh_commitment, + &proof, + honest_bytes, + ); + assert_eq!( + res, + Err(AuditError::CommitmentHashMismatch), + "fresh-commitment substitution must trip the pin, got {res:?}" + ); +} + +// --------------------------------------------------------------------------- +// Finding 1, Path C: cross-peer commitment substitution +// --------------------------------------------------------------------------- + +/// Attack 1c (Finding 1 — peer impersonation): peer Q lifts peer P's signed +/// commitment from gossip and embeds it in its own response, hoping the auditor +/// verifies P's signature by mistake. The auditor binds the commitment's +/// `sender_peer_id` to the challenged peer; the stolen commitment names P, not +/// Q, so it is rejected before any signature/structure work. +#[test] +fn cross_peer_commitment_substitution_rejected_by_sender_id() { + let nonce = [0xCD; 32]; + + let real_p = Responder::new(); + let p_hash = real_p.commit_to_range(64); + let (p_proof, p_commitment) = honest_proof_and_commitment(&real_p, &nonce); + + // Auditor is challenging Q (a different peer id) but somehow holds p_hash in + // its pin (modelling a mis-binding); Q replays P's commitment + proof. + let q_peer_id = [0xCC; 32]; + let res = auditor_accepts( + &q_peer_id, // challenged peer is Q + &p_hash, + &nonce, + &p_commitment, // sender_peer_id == P, not Q + &p_proof, + honest_bytes, + ); + assert_eq!( + res, + Err(AuditError::SenderPeerIdMismatch), + "cross-peer substitution must trip the sender-id binding, got {res:?}" + ); +} + +/// Attack 1c': throwaway-key substitution. An adversary wants to answer as peer +/// P (whose pubkey it does NOT control). It builds a commitment naming P's +/// peer_id but embedding a throwaway pubkey it can sign with — the signature +/// verifies under the embedded key. The peer-id↔key binding +/// (`peer_id == BLAKE3(embedded_pubkey)`) rejects it: the embedded throwaway key +/// does not hash to P's peer_id. +#[test] +#[allow(clippy::similar_names)] +fn throwaway_key_substitution_rejected_by_pubkey_binding() { + let nonce = [0xCD; 32]; + + // P's real identity (adversary does not hold P's secret key). + let (p_pubkey, _p_secret) = keypair(); + let p_peer_id = *blake3::hash(&p_pubkey.to_bytes()).as_bytes(); + + // Adversary's throwaway keypair. + let (throwaway_pk, throwaway_sk) = keypair(); + let throwaway_pk_bytes = throwaway_pk.to_bytes(); + + // Build a commitment naming P's peer_id but embedding+signing with the + // throwaway key. + let entries: Vec<_> = (0..8u32).map(|i| (key(i), content_hash(i))).collect(); + let tree = MerkleTree::build(entries).unwrap(); + let root = tree.root(); + let key_count = tree.key_count(); + let sig = sign_commitment( + &throwaway_sk, + &root, + key_count, + &p_peer_id, // claims P (the lie) + &throwaway_pk_bytes, + ) + .unwrap(); + let bad_commit = StorageCommitment { + root, + key_count, + sender_peer_id: p_peer_id, + sender_public_key: throwaway_pk_bytes, + signature: sig, + }; + let pin = commitment_hash(&bad_commit).unwrap(); + + // A perfectly valid proof against the bad commitment's own tree. + let proof = build_subtree_proof(&tree, &nonce, &p_peer_id, honest_bytes).unwrap(); + + let res = auditor_accepts(&p_peer_id, &pin, &nonce, &bad_commit, &proof, honest_bytes); + assert_eq!( + res, + Err(AuditError::PeerIdKeyMismatch), + "throwaway-key attack must trip the peer-id↔key binding, got {res:?}" + ); +} + +/// Attack 1c'' — wrong signer at the signature gate. To isolate the signature +/// gate from the bindings above, the adversary swaps BOTH the embedded pubkey +/// and the sender_peer_id to a consistent (wrong) identity, and re-pins the +/// auditor to the mutated commitment. Now the peer-id binding and pin pass, but +/// the signature was produced under the ORIGINAL secret key over the ORIGINAL +/// payload — it cannot verify under the swapped key. +#[test] +fn wrong_signer_rejected_at_signature_gate() { + let nonce = [0xCD; 32]; + + let responder = Responder::new(); + responder.commit_to_range(16); + let (proof, commitment) = honest_proof_and_commitment(&responder, &nonce); + + let (wrong_pk, _wrong_sk) = keypair(); + let wrong_pk_bytes = wrong_pk.to_bytes(); + let wrong_peer_id = *blake3::hash(&wrong_pk_bytes).as_bytes(); + + let mut bad_commit = commitment.clone(); + bad_commit.sender_public_key = wrong_pk_bytes; + bad_commit.sender_peer_id = wrong_peer_id; + let new_pin = commitment_hash(&bad_commit).unwrap(); + + // The proof's leaves bind the ORIGINAL peer_id in their nonced hashes, but + // the signature gate fires BEFORE the structural/real-bytes gates, so it is + // the first (and asserted) failure. + let res = auditor_accepts( + &wrong_peer_id, + &new_pin, + &nonce, + &bad_commit, + &proof, + honest_bytes, + ); + assert_eq!( + res, + Err(AuditError::SignatureInvalid), + "swapped embedded key must trip the signature gate, got {res:?}" + ); +} + +// --------------------------------------------------------------------------- +// Finding 1, Path D: replay an old response under a fresh nonce +// --------------------------------------------------------------------------- + +/// Attack 1d (Finding 1 — replay): the auditor issues a fresh nonce each audit. +/// The nonce both selects the subtree AND freshens every leaf's possession hash, +/// so a response captured under an old nonce cannot be replayed: the new nonce +/// selects a different subtree (wrong leaf set / cut-hash count) and the stale +/// nonced hashes no longer match. Asserts the structural gate alone already +/// rejects the stale proof under the new nonce. +#[test] +fn audit_response_replay_blocked_by_fresh_nonce() { + let old_nonce = [0xCD; 32]; + let fresh_nonce = [0xEF; 32]; + + let r = Responder::new(); + let pin = r.commit_to_range(256); + let (stale_proof, commitment) = honest_proof_and_commitment(&r, &old_nonce); + + // Sanity: the stale proof was valid under its own (old) nonce. + assert_eq!( + verify_subtree_proof(&stale_proof, &old_nonce, &commitment), + StructureVerdict::Valid + ); + + // Replayed verbatim under the fresh nonce, it fails — the new nonce selects + // a different subtree, so even the structure no longer reconstructs. + let res = auditor_accepts( + &r.peer_id_bytes, + &pin, + &fresh_nonce, // <- different nonce + &commitment, + &stale_proof, + honest_bytes, + ); + assert!( + matches!(res, Err(AuditError::StructureInvalid(_))), + "replay under a fresh nonce must fail the structural gate, got {res:?}" + ); +} + +// --------------------------------------------------------------------------- +// Subtree-native structural attacks (replace the old per-key path/order tamper) +// --------------------------------------------------------------------------- + +/// Tampering a sibling cut-hash breaks the root rebuild. (Subtree analogue of +/// the old per-key "tamper the inclusion path" attack.) +#[test] +fn tampered_cut_hash_rejected() { + let nonce = [0x0B; 32]; + let r = Responder::new(); + let pin = r.commit_to_range(256); + let (mut proof, commitment) = honest_proof_and_commitment(&r, &nonce); + assert!( + !proof.sibling_cut_hashes.is_empty(), + "a 256-leaf tree selects a deep subtree with cut-hashes" + ); + if let Some(c) = proof.sibling_cut_hashes.first_mut() { + c[0] ^= 0x01; + } + let res = auditor_accepts( + &r.peer_id_bytes, + &pin, + &nonce, + &commitment, + &proof, + honest_bytes, + ); + assert!( + matches!(res, Err(AuditError::StructureInvalid(_))), + "tampered cut-hash must fail structure, got {res:?}" + ); +} + +/// Dropping a leaf yields the wrong leaf count for the agreed subtree. The +/// auditor re-derives the exact expected count from `(nonce, key_count)` and +/// rejects. +#[test] +fn wrong_leaf_count_rejected() { + let nonce = [0x0C; 32]; + let r = Responder::new(); + let pin = r.commit_to_range(100); + let (mut proof, commitment) = honest_proof_and_commitment(&r, &nonce); + proof.leaves.pop(); + let res = auditor_accepts( + &r.peer_id_bytes, + &pin, + &nonce, + &commitment, + &proof, + honest_bytes, + ); + assert_eq!( + res, + Err(AuditError::StructureInvalid("wrong leaf count")), + "dropped leaf must fail the leaf-count check, got {res:?}" + ); +} + +/// Reordering leaves violates the strict ascending-key order the committed tree +/// enforces (and would otherwise let a responder shuffle leaves to dodge the +/// spot-check). Rejected structurally. +#[test] +fn reordered_leaves_rejected() { + let nonce = [0x0D; 32]; + let r = Responder::new(); + let pin = r.commit_to_range(100); + let (mut proof, commitment) = honest_proof_and_commitment(&r, &nonce); + assert!(proof.leaves.len() >= 2); + proof.leaves.swap(0, 1); + let res = auditor_accepts( + &r.peer_id_bytes, + &pin, + &nonce, + &commitment, + &proof, + honest_bytes, + ); + assert!( + matches!(res, Err(AuditError::StructureInvalid(_))), + "reordered leaves must fail structure, got {res:?}" + ); +} + +/// Tampering a leaf's `bytes_hash` (claiming a different chunk at a committed +/// position) breaks the root rebuild — the leaf hash binds (key, bytes_hash). +#[test] +fn tampered_leaf_bytes_hash_rejected() { + let nonce = [0x0E; 32]; + let r = Responder::new(); + let pin = r.commit_to_range(100); + let (mut proof, commitment) = honest_proof_and_commitment(&r, &nonce); + proof.leaves[0].bytes_hash[0] ^= 0x01; + let res = auditor_accepts( + &r.peer_id_bytes, + &pin, + &nonce, + &commitment, + &proof, + honest_bytes, + ); + assert!( + matches!(res, Err(AuditError::StructureInvalid(_))), + "tampered bytes_hash must fail structure, got {res:?}" + ); +} + +// --------------------------------------------------------------------------- +// Repudiation: rejecting a recently-gossiped pinned commitment +// --------------------------------------------------------------------------- + +/// Attack: a responder repudiates a commitment it just gossiped — it answers a +/// pin for a commitment it no longer retains. Because the auditor only ever pins +/// a commitment the peer JUST gossiped, and an honest responder retains its last +/// two GOSSIPED commitments, a `lookup_by_hash` miss for a gossiped pin is a +/// confirmed failure. This test pins the retention contract: a gossiped pin +/// stays answerable across the next rotation, but a NEVER-gossiped commitment is +/// dropped on the next rotation (so the responder rightly cannot answer a pin it +/// never put on the wire). +#[test] +fn repudiating_a_gossiped_pin_is_detectable_via_lookup_miss() { + let r = Responder::new(); + let state = &r.state; + + // c1 is gossiped → must stay answerable across one rotation. + let h1 = r.commit_to_range(8); + state.mark_gossiped(h1); + assert!( + state.lookup_by_hash(&h1).is_some(), + "gossiped pin must be answerable immediately" + ); + + // Rotate + gossip c2. c1 is within the last-2-gossiped window → still here. + let h2 = r.commit_to_range(16); + state.mark_gossiped(h2); + assert!( + state.lookup_by_hash(&h1).is_some(), + "a gossiped commitment must survive one rotation (no false repudiation)" + ); + + // Rotate + gossip c3. Now the last-2-gossiped are {h3, h2}; h1 has aged out + // and is legitimately dropped (the auditor would no longer pin it). + let h3 = r.commit_to_range(24); + state.mark_gossiped(h3); + assert!( + state.lookup_by_hash(&h1).is_none(), + "h1 aged out of the gossip window" + ); + assert!(state.lookup_by_hash(&h2).is_some()); + assert!(state.lookup_by_hash(&h3).is_some()); + + // The detection edge: a commitment that was NEVER gossiped is dropped on the + // very next rotation, so a responder asked to answer a pin for an + // ungossiped-then-rotated commitment returns a lookup MISS — which the + // auditor (since it only pins gossiped roots) reads as repudiation. + let r2 = Responder::new(); + let ungossiped = r2.commit_to_range(8); + assert!(r2.state.lookup_by_hash(&ungossiped).is_some()); + let _next = r2.commit_to_range(16); // rotate without gossiping `ungossiped` + assert!( + r2.state.lookup_by_hash(&ungossiped).is_none(), + "an ungossiped commitment is dropped on the next rotation" + ); +} + +// --------------------------------------------------------------------------- +// Cross-check lemmas: the primitives the rejection tests rest on +// --------------------------------------------------------------------------- + +/// The commitment-hash pin is sensitive to every field. This underwrites every +/// "pin doesn't match" assertion above. +#[test] +fn commitment_hash_is_field_sensitive() { + let (pk, sk) = keypair(); + let pk_bytes = pk.to_bytes(); + let sig = sign_commitment(&sk, &[0; 32], 1, &[0; 32], &pk_bytes).unwrap(); + let c1 = StorageCommitment { + root: [0; 32], + key_count: 1, + sender_peer_id: [0; 32], + sender_public_key: pk_bytes, + signature: sig, + }; + let h1 = commitment_hash(&c1).unwrap(); + + for mutate in 0..5u8 { + let mut c = c1.clone(); + match mutate { + 0 => c.root[0] ^= 1, + 1 => c.key_count += 1, + 2 => c.sender_peer_id[0] ^= 1, + 3 => c.signature[0] ^= 1, + 4 => c.sender_public_key[0] ^= 1, + _ => unreachable!(), + } + let h = commitment_hash(&c).unwrap(); + assert_ne!(h, h1, "mutation {mutate} should change commitment_hash"); + } +} + +/// The leaf hash binds (key, bytes_hash): same key + different bytes → different +/// leaf → different root. Underwrites the structural rejections. +#[test] +fn leaf_hash_binds_key_and_bytes() { + let h1 = leaf_hash(&key(1), &content_hash(1)); + let h2 = leaf_hash(&key(1), &content_hash(2)); + let h3 = leaf_hash(&key(2), &content_hash(1)); + assert_ne!(h1, h2); + assert_ne!(h1, h3); + assert_ne!(h2, h3); +} + +/// The signature verifies under the embedded key and only that key. +#[test] +fn signature_round_trips_correctly() { + let (pk1, sk1) = keypair(); + let (pk2, _sk2) = keypair(); + let pk1_bytes = pk1.to_bytes(); + let pk2_bytes = pk2.to_bytes(); + let sig = sign_commitment(&sk1, &[7; 32], 42, &[3; 32], &pk1_bytes).unwrap(); + let c = StorageCommitment { + root: [7; 32], + key_count: 42, + sender_peer_id: [3; 32], + sender_public_key: pk1_bytes, + signature: sig, + }; + assert!(verify_commitment_signature(&c)); + let mut c2 = c.clone(); + c2.sender_public_key = pk2_bytes; + assert!(!verify_commitment_signature(&c2)); +} + +/// The per-leaf possession hash binds nonce, peer, key, and bytes — the +/// foundation of the real-bytes spot-check. Changing any input changes it, so a +/// responder cannot reuse a possession hash across nonces/peers/keys/chunks. +#[test] +fn nonced_leaf_hash_binds_all_inputs() { + let base = nonced_leaf_hash(&[1; 32], &[2; 32], &key(3), b"chunk"); + assert_ne!( + base, + nonced_leaf_hash(&[9; 32], &[2; 32], &key(3), b"chunk") + ); + assert_ne!( + base, + nonced_leaf_hash(&[1; 32], &[9; 32], &key(3), b"chunk") + ); + assert_ne!( + base, + nonced_leaf_hash(&[1; 32], &[2; 32], &key(9), b"chunk") + ); + assert_ne!( + base, + nonced_leaf_hash(&[1; 32], &[2; 32], &key(3), b"other") + ); +}