From d327747bf37bd16d9910f3a3ae1794599e29c3d4 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 15:26:22 +0000 Subject: [PATCH 01/14] post-merge: #632/#148 board hygiene + ogar-vocab lock bump clears COUNT_FUSE (68 == 68) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compile-time fuse fired in the between-merges window (mirror 68 vs locked ogar_vocab 65) exactly as designed, and cleared with the lock bump (lance-graph-ogar's own lockfile is gitignored — CI resolves the merged OGAR main fresh). lance-graph-ogar 81 tests green locally. Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .claude/board/LATEST_STATE.md | 1 + .claude/board/PR_ARC_INVENTORY.md | 14 ++++++++++++++ Cargo.lock | 10 +++++----- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/.claude/board/LATEST_STATE.md b/.claude/board/LATEST_STATE.md index 48dbb6be..8341854f 100644 --- a/.claude/board/LATEST_STATE.md +++ b/.claude/board/LATEST_STATE.md @@ -100,6 +100,7 @@ Membrane consumers can now pull BOTH halves of a render `classid` BBB-safely fro | PR | Merged | Title | What it added | |---|---|---|---| +| **#632** | 2026-07-02 | Cross-session intake: RouteBucketTyped (C6) + emission_scan + OCR codebook mirror + GraphRAG-rs inventory + operator rulings | Three sibling wishlists dispositioned; C6 merged verbatim (nexgen retires vendor diff); emission_scan = 2nd scan-family instance (pattern NAMED); OCR 0x08XX mirror of OGAR #148 (fuses arc: flip fuse + two-sided COUNT_FUSE — fuse FIRED 65v68 in the merge window, cleared by lock bump, 68==68); rulings: ownership+tripwires, R-1 naming phantom (domain:appid:classview), R-2 closed (512B row frozen, strided edges), L3 schema design killed; codex P2 x2 resolved (precedence global; collision documented). Contract 792/792. Merge `df367471`. | | **#631** | 2026-07-02 | W1b LIVE: WAL batch writer (4 probes green) + M15 rename + temporal synthesis + live oracle numbers | batch_writer implemented: BTreeMap WAL board, ack(cast, LanceVersion) join, delegation cache, never-refuses stacking (probe 4); M15 MulGateDecision rename (W2 unblocked; collapse_gate confirmed 3rd distinct type); operator rulings pinned (zero-copy descriptor casts + eager drain + mutual masking; melden macht frei — freeze retracted; temporal.rs = the read side, replay = QueryReference::at + deinterlace, M24=M25=time-travel ONE mechanism). Measured live: W3c oracle 1-2 ms framework overhead vs 8.4-8.7 s LLM round trip (rig->xAI grok-4 via FlowRunner); JITSON serve.rs = local CI oracle delta. Planner lib 204 + probes 4/4. Merge `c7149eab`. | | **#630** | 2026-07-02 | V3 W1 START: preflight deltas + WAL writer probes + adoption scan + D-PERT-1 + temporal synthesis | Fable-5 ten-point preflight (M24 board=WAL, W6a baseline inversion, W3 oracle ratchet, W2 probe-first reorder) + operator rulings folded live: zero-copy sink (cast = descriptor never bytes, flush via NodeRowPacket::as_le_bytes), "melden macht frei" (stacked casts never refused — 4 ignored probes define W1b green), temporal.rs deinterlace = the READ side (replay = QueryReference::at + deinterlace; M24/M25/time-travel are ONE mechanism; ack carries LanceVersion). Landed code: batch_writer skeleton + 4 probes; contract::classid_scan (771 green); D-PERT-1 rename (462 green). Audits: planner-SoA type-real/wiring-dormant (M15 GateDecision rename BLOCKING before W2); M7 corrected (NodeRowPacket IS production SoaEnvelope, codex P2); graph-flow benched ~0.4-0.5us/step (two-speed confirmed); M25 KanbanSessionStorage design (graph-flow-kanban envelope exists — wire don't invent). Merge `9a6df2a1`. | | **#629** | 2026-07-02 | V3 SUBSTRATE consolidated entry point (`.claude/v3/`) + ractor ownership attestation | `.claude/v3/` tree shipped: README (orientation), INTEGRATION-PLAN (W0–W6), COMPONENT-MAP (reuse/repurpose/retire), ENTROPY-MILESTONES (N→1 ledger), MODULE-TABLE (per-file census core/contract/planner), soa_layout/ (LE contract, tenant lanes, consumer map, routing), knowledge/ (substrate primer, mailbox-kanban model, sonnet-worker-guardrails), agents/BOOT.md (4 V3 cards); `/v3` skill + `/v3-audit` command; CLAUDE.md/BOOT.md ★ entrypoint. Review sharpenings folded: LE byte-order range-scan caveat, 3-shape legacy corpus scanner (incl. `0xAAAA_DDCC`), ractor helper-scope ruling (NOT messaging — slow; helper only: spawn/supervision/occasional control RPC). Ownership compile attestation: `KanbanActor` `type State = O`, owner MOVES in at pre_start; 22 supervisor tests green on the AdaWorldAPI ractor fork. Merge `28f17cd7`. | diff --git a/.claude/board/PR_ARC_INVENTORY.md b/.claude/board/PR_ARC_INVENTORY.md index 133f8481..90bd11b3 100644 --- a/.claude/board/PR_ARC_INVENTORY.md +++ b/.claude/board/PR_ARC_INVENTORY.md @@ -35,6 +35,20 @@ --- +## #632 lance-graph: Cross-session intake — RouteBucketTyped (C6) merged, emission_scan minted, OCR codebook mirror, GraphRAG-rs inventory + operator rulings + +**Status:** MERGED 2026-07-02 (merge commit `df367471`), branch `claude/v3-substrate-migration-review-o0yoxv`. Companion: OGAR #148 (merge `75d955b`) — the cross-repo fuses + serialized allocation batch; merged FIRST per the lockstep note; this post-merge commit carries the ogar-vocab lock bumps that cleared COUNT_FUSE (fired 65 vs 68 in the in-between state — first live demonstration of the fuse mechanism, then green 68 == 68, lance-graph-ogar 81 tests). + +**Added:** `codegen_spine::RouteBucketTyped` (kind-generic sibling + `?Sized` blanket bridge, merged verbatim from op-nexgen's vendored C6 diff); `contract::emission_scan` (TypedForm + classify_ddl_type + EmissionCounts; the scan family named as a contract pattern); `ogar_codebook` 0x08XX OCR rows (unicharset/recoder/charset — mirror of OGAR #148's mint; CODEBOOK 68); `.claude/knowledge/graphrag-rs-inventory.md`; the cross-session intake handover + appendices. + +**Locked:** per-consumer classid ownership + tripwires (operator ruling — fuses ARE the coordination mechanism; flip fuse + two-sided COUNT_FUSE are the pattern instances); R-1 naming CLOSED as phantom (`domain:appid:classview`; "concept" names the whole hi u16; the "app" homonym across halves caused the thread; both ledgers carry the line); R-2 CLOSED (512-byte SoA row frozen — kv-lance + batch-writer tested; edges read via the NODE_ROW_COLUMNS strided 16-of-512 view; no storage change); L3 new-Arrow-schema design KILLED ("we already have a working SoA schema" — extraction lands as SoA rows via the W1b cast path; survivors: minter@sha provenance + ndjson-as-golden); scan-family pattern (third counter mirrors, never a bespoke grep); emission_scan precedence global-not-first-token (codex P2 fix); RouteBucketTyped name collision deliberate + UFCS rule (codex P2 documented). + +**Deferred:** L4 DAG-materialization flag (M19/W5); OGAR fields_for(u32) ClassView routing; F17 body triage (most-agreed fleet next move); corpus proof vs a real bake (no classid-keyed corpora in-container); R-3 per-entry board files + R-4 probe-ledger Wave A (operator council queue); q2 APP_PREFIX row (mint when q2 renders classviews — not blocked). + +**Docs:** intake handover (5 appendices incl. rulings + vart mirror recipe), Addendum-10/11, E-V3-XSESSION-INTAKE-1(+RULINGS), E-V3-GRAPHRAG-INV-1, CROSS_SESSION_BROADCAST merge-event entry, OGAR D-CLASSID-HI-U16-SPELLING + D-TRUNCATION-DISALLOWED-SOC-REROUTE. + +**Confidence (2026-07-02):** HIGH — contract 792/792 at merge; lance-graph-ogar 81 green post lock-bump; OGAR vocab 96/96 + class-view 12/12 (flip fuse vs live main). + ## #631 lance-graph: W1b LIVE — WAL batch writer implemented, M15 rename, temporal synthesis, live oracle measurements **Status:** MERGED 2026-07-02 (merge commit `c7149eab`), branch `claude/v3-substrate-migration-review-o0yoxv`. diff --git a/Cargo.lock b/Cargo.lock index 45e4b488..65d86866 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4932,7 +4932,7 @@ dependencies = [ [[package]] name = "lance-graph-contract" version = "0.1.0" -source = "git+https://github.com/AdaWorldAPI/lance-graph?branch=main#c8e1ec42afb25bf6abcdf598bcaa59aa2707322b" +source = "git+https://github.com/AdaWorldAPI/lance-graph?branch=main#df367471df8cc74464733554b4c2338f90eae7d4" dependencies = [ "glob", "serde", @@ -6042,7 +6042,7 @@ dependencies = [ [[package]] name = "ogar-adapter-surrealql" version = "0.1.0" -source = "git+https://github.com/AdaWorldAPI/OGAR?branch=main#a0c7936bc39a14dc3472a2723a21c1b10ab66e3e" +source = "git+https://github.com/AdaWorldAPI/OGAR?branch=main#75d955bd3bfd1b97987ab9e4c2c1cedfcda1adfe" dependencies = [ "ogar-vocab", ] @@ -6050,7 +6050,7 @@ dependencies = [ [[package]] name = "ogar-class-view" version = "0.1.0" -source = "git+https://github.com/AdaWorldAPI/OGAR?branch=main#a0c7936bc39a14dc3472a2723a21c1b10ab66e3e" +source = "git+https://github.com/AdaWorldAPI/OGAR?branch=main#75d955bd3bfd1b97987ab9e4c2c1cedfcda1adfe" dependencies = [ "lance-graph-contract 0.1.0 (git+https://github.com/AdaWorldAPI/lance-graph?branch=main)", "ogar-vocab", @@ -6059,12 +6059,12 @@ dependencies = [ [[package]] name = "ogar-ontology" version = "0.1.0" -source = "git+https://github.com/AdaWorldAPI/OGAR?branch=main#a0c7936bc39a14dc3472a2723a21c1b10ab66e3e" +source = "git+https://github.com/AdaWorldAPI/OGAR?branch=main#75d955bd3bfd1b97987ab9e4c2c1cedfcda1adfe" [[package]] name = "ogar-vocab" version = "0.1.0" -source = "git+https://github.com/AdaWorldAPI/OGAR?branch=main#a0c7936bc39a14dc3472a2723a21c1b10ab66e3e" +source = "git+https://github.com/AdaWorldAPI/OGAR?branch=main#75d955bd3bfd1b97987ab9e4c2c1cedfcda1adfe" [[package]] name = "once_cell" From a97c5deae6afbe9e3bf62561de55d039a3aa3c72 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 15:58:37 +0000 Subject: [PATCH 02/14] =?UTF-8?q?docs(v3):=20Addendum-12=20=E2=80=94=20the?= =?UTF-8?q?=20W2+W3b=20arc=20plan=20(probes=20in=20flight,=20W2a=20board-r?= =?UTF-8?q?ow=20spec=20sketch,=20execution=20order)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .claude/v3/INTEGRATION-PLAN.md | 44 ++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/.claude/v3/INTEGRATION-PLAN.md b/.claude/v3/INTEGRATION-PLAN.md index 67120fcb..7fa1f544 100644 --- a/.claude/v3/INTEGRATION-PLAN.md +++ b/.claude/v3/INTEGRATION-PLAN.md @@ -438,3 +438,47 @@ tasks but manageable if done properly"):** 5. **Per-consumer accommodation** — ruff + OGAR adaptations, lance-graph UnifiedBridge ↔ OGAR, AST contract evolution (the W5 consumer wave, now with the fuse doctrine as its safety rail). + +### Addendum-12 2026-07-02 — the W2+W3b arc (operator: "can you handle those 2 lanes?" — yes; this session owns them) + +**Lanes confirmed:** (1) thinking ↔ substrate / V3 migration (W2), (2) +orchestration rs-graph-llm (W3b). The parallel OGAR F17/DO-arm arc is +fenced to another session (prompt issued; see the intake handover). + +**Execution order (probes first, W1 pattern):** + +1. **P-W2b** (in flight): KanbanActor spawned over the REAL + `cognitive_shader_driver::mailbox_soa::MailboxSoA` via + `MailboxSoaOwner::try_advance_phase` — dev-dep only (the structural- + owner proof gains no runtime dep). Gates: legal advances visible + through MailboxSoaView on real rows; illegal transition surfaces the + lifecycle-DAG error with the row unchanged. +2. **P-W3b** (in flight): `KanbanSessionStorage` in graph-flow behind a + new optional `kanban` feature (graph-flow is the composer; the + envelope crate stays contract-only). Snapshot upsert + append-only + real-KanbanMove log; **V1 Rubicon mapping** (orchestrator-decided, + revisable, tests pin it): first-save→Planning, task-change→ + CognitiveWork, waiting→Evaluation, completed→Commit, error→Prune. + Gate = the M25 kill-mid-graph replay test (resume identically; no + repeated/skipped tasks; move-log column sequence pinned). Storage + carries the mailbox (acts on behalf of one mailbox) — never the + Session (DTO purity). Live-oracle validation follows (keys present; + same spend discipline: few calls, small max_tokens). +3. **W2a board-as-tenant SPEC** (envelope-auditor-gated before any + byte lands): per the frozen-row ruling the board mints NO new byte + lane — the mailbox's board is a **dedicated board ROW** (a NodeRow + whose classid marks board-of-mailbox; classid → ClassView resolves + the value-slab interpretation; per-row `KanbanTenant` stays + per-work-item; the board row's slab carries board-level aggregates). + "One mailbox = one kanban board as tenant" = the board is addressed + AS a row of the mailbox. Requires: field-isolation matrix, a board + classid allocation (BATCHED mint per the mint rule — goes through + the next allocation batch, never a solo edit), zero + ENVELOPE_LAYOUT_VERSION change (classid routing only — RESERVE, + DON'T RECLAIM). Auditor reviews the spec before implementation. +4. **W2c symbiont arm**: one dependency-uncomment + ~10 min cold build + (BlockedColdBuild is deliberate); attempt in-container after 1–3, + disk permitting. +5. **W2d** 550 ms budget via elevation/ (extend, don't shadow) — M12. +6. **R-2 residual**: edges-only strided-read test over NODE_ROW_COLUMNS + (read-side; 16-of-512; no storage change) — rides with W2a's PR. From ad5555b2bb85df6efe1ac689bf2cd5f23f98b47f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 16:02:49 +0000 Subject: [PATCH 03/14] =?UTF-8?q?probe(w2b):=20KanbanActor=20spawned=20ove?= =?UTF-8?q?r=20the=20REAL=20MailboxSoA=20=E2=80=94=20first=20wiring=20of?= =?UTF-8?q?=20the=20actor=20to=20the=20production=20owner?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the D-V3-W2b gap (actor was only ever exercised against its own TestBoard): dev-dependency-only on cognitive-shader-driver (the structural-owner proof gains no runtime dep); three probes green — two legal advances persist on the real SoA via KanbanMsg only, Planning->Commit rejected with RubiconTransitionError and the row unchanged, and sole-mutator shown structurally (the SoA is moved into Actor::spawn; no handle survives). Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- Cargo.lock | 1 + crates/lance-graph-supervisor/Cargo.toml | 5 + .../tests/w2b_real_owner_probe.rs | 159 ++++++++++++++++++ 3 files changed, 165 insertions(+) create mode 100644 crates/lance-graph-supervisor/tests/w2b_real_owner_probe.rs diff --git a/Cargo.lock b/Cargo.lock index 65d86866..3ecf60db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5003,6 +5003,7 @@ dependencies = [ name = "lance-graph-supervisor" version = "0.1.0" dependencies = [ + "cognitive-shader-driver", "lance-graph-callcenter", "lance-graph-contract 0.1.0", "ractor", diff --git a/crates/lance-graph-supervisor/Cargo.toml b/crates/lance-graph-supervisor/Cargo.toml index 7380f965..012a053d 100644 --- a/crates/lance-graph-supervisor/Cargo.toml +++ b/crates/lance-graph-supervisor/Cargo.toml @@ -45,3 +45,8 @@ supervisor-lifecycle-audit = ["supervisor"] [dev-dependencies] tokio = { version = "1", features = ["rt-multi-thread", "macros"] } static_assertions = "1" +# D-V3-W2b integration probe ONLY: spawns KanbanActor over the REAL production +# MailboxSoaOwner impl instead of the in-file TestBoard fake. dev-dependencies +# never ship in the compiled crate/runtime dep graph — this proves the +# structural-owner contract against real code without adding a runtime dep. +cognitive-shader-driver = { path = "../cognitive-shader-driver" } diff --git a/crates/lance-graph-supervisor/tests/w2b_real_owner_probe.rs b/crates/lance-graph-supervisor/tests/w2b_real_owner_probe.rs new file mode 100644 index 00000000..ade43c6b --- /dev/null +++ b/crates/lance-graph-supervisor/tests/w2b_real_owner_probe.rs @@ -0,0 +1,159 @@ +//! D-V3-W2b integration probe: KanbanActor spawned over the REAL production +//! `MailboxSoaOwner` (`cognitive_shader_driver::mailbox_soa::MailboxSoA`), not +//! the in-file `TestBoard` fake that `kanban_actor.rs`'s own unit tests use. +//! +//! Closes the gap named in D-V3-W2b: until this probe, `KanbanActor` was +//! only ever exercised against `kanban_actor::tests::TestBoard` — a minimal +//! in-RAM stand-in with no SoA columns. This probe proves the SAME actor +//! message surface (`KanbanMsg::Advance` / `KanbanMsg::Phase`) drives the +//! REAL owner's `try_advance_phase` (via the contract's `MailboxSoaOwner` +//! trait), that illegal transitions are rejected with no mutation on the +//! real SoA, and that the actor is the ONLY path this probe ever uses to +//! mutate the row (no direct `advance_phase`/`try_advance_phase` call from +//! the probe itself — only through `KanbanMsg`). +//! +//! Spec: `.claude/board/*` D-V3-W2b (KanbanActor never spawned over real +//! MailboxSoA — this file closes that gap). + +#[cfg(feature = "supervisor")] +mod w2b_real_owner_probe { + use cognitive_shader_driver::mailbox_soa::MailboxSoA; + use lance_graph_contract::kanban::KanbanColumn; + use lance_graph_contract::soa_view::MailboxSoaView; + use lance_graph_supervisor::kanban_actor::{KanbanActor, KanbanMsg}; + use ractor::Actor; + + /// Small capacity — the probe only needs the owner's phase column, not a + /// realistic row count. Mirrors `mailbox_soa.rs`'s own unit tests + /// (`MailboxSoA<8>` in `test_mailbox_soa_new_zero`). + type ProbeMailbox = MailboxSoA<8>; + + /// Construct a real `MailboxSoA` the same way `mailbox_soa.rs`'s own + /// tests do: `MailboxSoA::::new(mailbox_id, w_slot, threshold)` + /// followed by `set_populated` (W1c discipline). This mirrors the + /// crate's own construction idiom, not an invented shape. + fn real_mailbox() -> ProbeMailbox { + let mut mb = MailboxSoA::new(/* mailbox_id */ 77, /* w_slot */ 3, /* threshold */ 1.0); + // Declare 1 populated row so MailboxSoaView::n_rows() is non-zero, + // matching how a real spawn would declare its logical size + // (`MailboxSoA::set_populated` docs: "mirrors fixing BindSpace::len + // at construction"). `phase()` itself is a mailbox-level field, not + // per-row, so this is not required for the phase assertions below — + // it is here so the probe's owner is representative of a real spawn + // rather than a zero-row empty shell. + mb.set_populated(1); + mb + } + + #[tokio::test] + async fn w2b_real_owner_two_legal_advances_persist_on_the_real_soa() { + let mb = real_mailbox(); + assert_eq!( + mb.phase(), + KanbanColumn::Planning, + "MailboxSoA::new starts in Planning (mirrors TestBoard's board(Planning) helper \ + in kanban_actor.rs's own unit tests)" + ); + + let (actor, handle) = Actor::spawn(None, KanbanActor::::default(), mb) + .await + .expect("spawn kanban actor over the REAL MailboxSoA"); + + // Legal edge #1: Planning -> CognitiveWork, driven ONLY through the actor. + let mv1 = ractor::call!(actor, |reply| KanbanMsg::Advance { + to: KanbanColumn::CognitiveWork, + reply + }) + .expect("rpc") + .expect("Planning -> CognitiveWork is a legal Rubicon edge"); + assert_eq!(mv1.from, KanbanColumn::Planning); + assert_eq!(mv1.to, KanbanColumn::CognitiveWork); + + // Read back through MailboxSoaView::phase() (via KanbanMsg::Phase) — + // the real SoA row reflects the advance. + let phase1 = ractor::call!(actor, |reply| KanbanMsg::Phase { reply }).expect("rpc"); + assert_eq!(phase1, KanbanColumn::CognitiveWork); + + // Legal edge #2: CognitiveWork -> Evaluation. + let mv2 = ractor::call!(actor, |reply| KanbanMsg::Advance { + to: KanbanColumn::Evaluation, + reply + }) + .expect("rpc") + .expect("CognitiveWork -> Evaluation is a legal Rubicon edge"); + assert_eq!(mv2.from, KanbanColumn::CognitiveWork); + assert_eq!(mv2.to, KanbanColumn::Evaluation); + + let phase2 = ractor::call!(actor, |reply| KanbanMsg::Phase { reply }).expect("rpc"); + assert_eq!( + phase2, + KanbanColumn::Evaluation, + "the real MailboxSoA row reflects both advances, read back via MailboxSoaView" + ); + + actor.stop(None); + handle.await.expect("actor join"); + } + + #[tokio::test] + async fn w2b_real_owner_illegal_edge_rejected_no_mutation_on_the_real_soa() { + let mb = real_mailbox(); + let (actor, handle) = Actor::spawn(None, KanbanActor::::default(), mb) + .await + .expect("spawn kanban actor over the REAL MailboxSoA"); + + // Planning -> Commit is NOT a legal Rubicon edge (same DAG the + // in-file TestBoard tests exercise) — must surface the typed + // RubiconTransitionError from MailboxSoaOwner::try_advance_phase, + // relayed through the actor's Advance message, with NO mutation on + // the real row. + let err = ractor::call!(actor, |reply| KanbanMsg::Advance { + to: KanbanColumn::Commit, + reply + }) + .expect("rpc") + .expect_err("Planning -> Commit must be rejected by the real owner's lifecycle DAG"); + assert_eq!(err.from, KanbanColumn::Planning); + assert_eq!(err.to, KanbanColumn::Commit); + + // The real SoA's phase column is UNCHANGED after the rejected edge. + let phase = ractor::call!(actor, |reply| KanbanMsg::Phase { reply }).expect("rpc"); + assert_eq!( + phase, + KanbanColumn::Planning, + "rejected transition must not mutate the real MailboxSoA row" + ); + + actor.stop(None); + handle.await.expect("actor join"); + } + + #[tokio::test] + async fn w2b_real_owner_actor_is_the_sole_mutator_structural_check() { + // Structural proof (mailbox-as-owner, E-CE64-MB-4): the probe never + // calls `MailboxSoaOwner::advance_phase` / `try_advance_phase` + // directly on a `MailboxSoA` value it holds after spawn — the real + // `MailboxSoA` is MOVED into `Actor::spawn` (ownership transfer), + // and the only handle this test touches from that point on is the + // `ActorRef`. Any mutation not routed through + // `KanbanMsg::Advance` would be a compile error here (`mb` is no + // longer in scope), not a runtime bug this test could silently miss. + let mb = real_mailbox(); + let (actor, handle) = Actor::spawn(None, KanbanActor::::default(), mb) + .await + .expect("spawn kanban actor over the REAL MailboxSoA"); + // `mb` was moved into `Actor::spawn` above and is not usable here — + // the only remaining handle to the owner is `actor`. + + let mv = ractor::call!(actor, |reply| KanbanMsg::Advance { + to: KanbanColumn::CognitiveWork, + reply + }) + .expect("rpc") + .expect("legal edge applied via the actor, the only mutation surface reachable here"); + assert_eq!(mv.to, KanbanColumn::CognitiveWork); + + actor.stop(None); + handle.await.expect("actor join"); + } +} From 6fff454b95b6fc7e5d75b7fa2d8634132c16fde4 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 16:05:01 +0000 Subject: [PATCH 04/14] =?UTF-8?q?test(contract):=20edges-only=20strided=20?= =?UTF-8?q?read=20via=20NODE=5FROW=5FCOLUMNS=20descriptors=20=E2=80=94=20R?= =?UTF-8?q?-2=20closure=20residual?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The read-side proof of the operator's edges-cheap ruling: every row's EdgeBlock recovered touching exactly 16 bytes/row, driven purely by the Edges descriptor's (row_offset, elems_per_row) + NODE_ROW_STRIDE; the 512-byte storage unit untouched. Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .../src/canonical_node.rs | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/crates/lance-graph-contract/src/canonical_node.rs b/crates/lance-graph-contract/src/canonical_node.rs index 852ec1a7..349d4088 100644 --- a/crates/lance-graph-contract/src/canonical_node.rs +++ b/crates/lance-graph-contract/src/canonical_node.rs @@ -1582,6 +1582,57 @@ mod tests { assert_eq!(view.as_ptr().cast::(), rows.as_ptr().cast::()); } + #[test] + fn edges_only_strided_read_via_descriptors_r2_residual() { + // R-2 closure residual (operator ruling 2026-07-02): "edges cheap + // without having to load the whole values" is served by the layout + // AS IT IS — an edge sweep is a strided 16-of-512 slice read over + // the existing backing store via the NODE_ROW_COLUMNS descriptor, + // never a storage change. This test IS the read-side proof: it + // recovers every row's EdgeBlock touching exactly + // `elems_per_row = 16` bytes per row, driven purely by the + // descriptor's (row_offset, elems_per_row) + NODE_ROW_STRIDE — + // no value-slab byte is ever indexed. + let rows = vec![ + NodeRow { + key: NodeGuid::new(NodeGuid::CLASSID_OSINT, 1, 2, 3, 0xAB, 0xCD), + edges: EdgeBlock { + in_family: [0xA1; 12], + out_family: [0xB2; 4], + }, + value: [7u8; 480], + }, + NodeRow { + key: NodeGuid::new(NodeGuid::CLASSID_PROJECT, 4, 5, 6, 0x11, 0x22), + edges: EdgeBlock { + in_family: [0xC3; 12], + out_family: [0xD4; 4], + }, + value: [9u8; 480], + }, + ]; + let packet = NodeRowPacket::new(&rows, 0); + let bytes = packet.as_le_bytes(); + + let edges_col = NODE_ROW_COLUMNS + .iter() + .find(|c| c.name_id == NodeRowColumn::Edges as u16) + .expect("Edges descriptor is canonical"); + let elems = usize::try_from(edges_col.elems_per_row).expect("fits usize"); + let offset = usize::try_from(edges_col.row_offset).expect("fits usize"); + assert_eq!(elems, 16); + assert_eq!(offset, 16); + + for (i, row) in rows.iter().enumerate() { + let start = i * NODE_ROW_STRIDE + offset; + let slice = &bytes[start..start + elems]; + // 16 bytes/row, straight out of the store: 12 in-family then + // 4 out-of-family (repr(C) field order within the block). + assert_eq!(&slice[..12], &row.edges.in_family); + assert_eq!(&slice[12..], &row.edges.out_family); + } + } + #[test] fn node_rows_from_le_bytes_rejects_bad_inputs() { let rows = vec![ From a757644c5bf278222f15293b51f965f5fc4cf94f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 16:06:41 +0000 Subject: [PATCH 05/14] =?UTF-8?q?board:=20E-SEMANTIC-OS-CONVERGENCE-1=20?= =?UTF-8?q?=E2=80=94=20operator=20capstone=20(semantic=20OS,=20membranes,?= =?UTF-8?q?=20'do=20not=20copy=20meaning')=20pinned=20with=20grounding=20t?= =?UTF-8?q?able=20+=20broadcast?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .claude/board/CROSS_SESSION_BROADCAST.md | 12 ++++++++++++ .claude/board/EPIPHANIES.md | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/.claude/board/CROSS_SESSION_BROADCAST.md b/.claude/board/CROSS_SESSION_BROADCAST.md index b18c89d0..02cf2783 100644 --- a/.claude/board/CROSS_SESSION_BROADCAST.md +++ b/.claude/board/CROSS_SESSION_BROADCAST.md @@ -175,3 +175,15 @@ can synthesize via existing `unpack_lo/hi_epi8` + I32 ops. `.claude/handovers/2026-07-02-cross-session-wishlist-intake.md`. - **Citation rule adopted:** cross-session references carry board `E-` keys or file paths, never per-session ordinals. + +## 2026-07-02 — E-SEMANTIC-OS-CONVERGENCE-1: the operator's capstone framing is on the board — cite it, don't re-derive it + +**For:** every parallel session. The operator synthesized the fleet-wide +convergence: the center is a verified semantic substrate, the recurring +epiphany is MEMBRANES, and the one law is **"Do not copy meaning. +Reference it, mask it, materialize it, trace it."** Full canonical text + +the [G] grounding table (every semantic-OS analogy row mapped to its +shipped artifact) + two sharpenings (the third membrane failure mode is +DUPLICATION; a membrane without a build-failing tripwire is prose): +`.claude/board/EPIPHANIES.md` E-SEMANTIC-OS-CONVERGENCE-1. New design +docs should cite this entry as the frame instead of restating fragments. diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index 4babdd57..7a90f576 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -1,3 +1,12 @@ +## 2026-07-02 — E-SEMANTIC-OS-CONVERGENCE-1: the operator's capstone — "code is no longer the center; the center is a verified semantic substrate" +**Status:** FINDING (operator synthesis across 7+ parallel sessions, recorded verbatim below; grounding table + two sharpenings appended by this session) + +**Operator's text (canonical, preserved):** The fleet keeps rediscovering ONE shape — `semantic facts → classid/codebook → mask/ClassView → deterministic materialization → adapter → execution → trace/falsifier/replay` — and the repeated epiphany is MEMBRANES (classid opacity; public/private evidence; data/behaviour; source/materialization; task/state/WAL; frontend/backend; human-session/claim-board). Every failure came from a membrane missing or leaky (proprietary leak, classid bit math, stale toolchain assumptions, duplicate session work, unacked graph steps, hand-census drift, template/accessor drift, floating pins, green CI that didn't test the real fuse); every success from making the membrane explicit (opaque facet_classid → the flip was free; FieldMask → UI/RBAC/render convergence; predicate registry → reassemble inverse mint; WAL ack CastId↔LanceVersion → replayable task graph; claim files/board entries → fewer collisions; generated scans → no census rot). The convergence: **business logic becomes addressable semantic topology; execution becomes a reversible, audited projection; coordination becomes the same substrate applied to humans and agents.** The one law: **"Do not copy meaning. Reference it, mask it, materialize it, trace it."** Sharper: ISA compresses meaning · ClassView projects meaning · adapters incarnate meaning · falsifiers protect meaning · boards coordinate meaning. The meta-result: not "Odoo in Rust" or "graph-flow orchestration" — a **semantic operating system**: ontology = filesystem · classid = inode · ClassView = permissioned view · adapter = device driver · kanban/WAL = process journal · falsifier = fsck · **LLM = oracle interrupt**. + +**Grounding table (every analogy row has a shipped artifact — this is [G], not aspiration):** classid=inode → OGAR-CONSUMER-BEST-PRACTICES' literal one-liner "the classid is pure address; the magic is what it resolves to" + the opacity accessors (`concept_of`/`app_of`, bit math banned); ClassView=permissioned view → FieldMask + the RBAC keystone + `value_schema`/`edge_codec_flavor` lens selection; adapter=device driver → `ogar-adapter-{surrealql,ttl,clickhouse-ddl}` emitting from the Core, never carrying state; kanban/WAL=process journal → M24 board-as-WAL + the CastId↔LanceVersion ack join + temporal deinterlace (replay = a read at a pinned QueryReference); falsifier=fsck → the probe queues, the flip fuse + COUNT_FUSE (which demonstrably FIRED in the #148/#632 merge window and cleared — fsck catching a real inconsistency on day one), the scan family (classid_scan/emission_scan); LLM=oracle interrupt → MEASURED, not metaphor: 1–2 ms framework vs 8.4–8.7 s oracle round trip (E-V3-ORACLE-LIVE-1) — the oracle is invoked on FailureTicket exactly like a page-fault handler, never polled. "Do not copy meaning" is the one law the whole ENTROPY ledger already enforces case-by-case: every M1–M25 row is a "meaning was copied N times → reference it once" collapse, and wire-don't-invent / the consumer preflight (never copy the codebook) / zero-copy-to-tombstone / REUSE-AS-REFERENCE verdicts are its instances. + +**Two sharpenings (this session's additions):** (1) the failure list has a THIRD membrane failure mode beyond missing/leaky — **duplicated**: two membranes for one boundary (two GateDecisions, two ResonanceDtos, three coordination-channel proposals) — and the entropy ledger is the fsck FOR membranes themselves; (2) a membrane is only real when crossing it wrongly **fails a build** — "membrane" without a tripwire is prose; the per-consumer-ownership + fuses ruling is the enforcement half of the law, so the doctrine's operational test is: *name the fuse or it isn't a membrane yet*. + ## 2026-07-02 — E-V3-XSESSION-INTAKE-1-RULINGS: operator closed the intake's escalations — R-1 was a PHANTOM, R-2 is an edges-projection requirement, L3 schema design is KILLED **Status:** CORRECTION + FINDING (three operator rulings on E-V3-XSESSION-INTAKE-1; full text in the intake handover appendices) From 61494b797203183e56e63fd7bd8a15045dddb33e Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 16:11:58 +0000 Subject: [PATCH 06/14] =?UTF-8?q?docs(v3):=20Addendum-12a=20=E2=80=94=20W2?= =?UTF-8?q?a=20envelope-audit=20ruling=20(LAYOUT-GATED;=20BoardAggregates?= =?UTF-8?q?=2010th=20tenant=20decided;=20T1-T6=20+=20sweeper=20exposure)?= =?UTF-8?q?=20+=20guardrails=20vocab=20reconciled?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .claude/v3/INTEGRATION-PLAN.md | 26 +++++++++++++++++++ .../v3/knowledge/sonnet-worker-guardrails.md | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/.claude/v3/INTEGRATION-PLAN.md b/.claude/v3/INTEGRATION-PLAN.md index 7fa1f544..d8d1e9d5 100644 --- a/.claude/v3/INTEGRATION-PLAN.md +++ b/.claude/v3/INTEGRATION-PLAN.md @@ -482,3 +482,29 @@ fenced to another session (prompt issued; see the intake handover). 5. **W2d** 550 ms budget via elevation/ (extend, don't shadow) — M12. 6. **R-2 residual**: edges-only strided-read test over NODE_ROW_COLUMNS (read-side; 16-of-512; no storage change) — rides with W2a's PR. + +### Addendum-12a 2026-07-02 — W2a envelope-audit ruling: LAYOUT-GATED (spec-stage) + +Verdict on the Addendum-12 §3 board-row sketch: **byte-sound, zero +ENVELOPE_LAYOUT_VERSION change, NOT LAYOUT-BREAK — but the textbook +I-LEGACY-API-FEATURE-GATED shape** (same stored bytes, meaning selected +by routing). It lands only with: + +- **Crux resolved (orchestrator decision per the ruling):** board + aggregates take the **NEW append-only 10th `ValueTenant` + (`BoardAggregates`, row_offset 152)** + a board preset + a + `BUILTIN_READ_MODES` entry — NOT a reuse-reinterpretation of existing + tenant bytes (focus-lens reading inexpressible pre-P4). The sketch's + "no new lane" is CORRECTED: additive-at-the-end IS a lane and IS still + layout-clean/version-free; guardrails §2's original wording was right. +- **Mandatory tests T1–T6:** field-isolation matrix; cross-classid + reinterpretation guard (paired observability); board-classid + registration — fall-through to ReadMode::DEFAULT=Full is FORBIDDEN; + mixed-batch zero-copy round-trip; fixed-offset-sweeper safety + (`nan_projection` Energy [134,138) + symbiont bridge/domino are the + two EXPOSED readers — gate via `schema.has()` or prove the board's + Energy slot dormant-zeroed); ENVELOPE_LAYOUT_VERSION==2 regression. +- **Board classid via the next BATCHED allocation mint** (never solo); + implementation WAITS on the mint, tenant + tests prepared behind it. +- STOP conditions 1–6 bind every implementer; ocr.rs's per-row + `schema.has()` reader is the SAFE pattern to copy. diff --git a/.claude/v3/knowledge/sonnet-worker-guardrails.md b/.claude/v3/knowledge/sonnet-worker-guardrails.md index 31ba1fc6..65f57a68 100644 --- a/.claude/v3/knowledge/sonnet-worker-guardrails.md +++ b/.claude/v3/knowledge/sonnet-worker-guardrails.md @@ -59,7 +59,7 @@ exactly the row below, never a neighbor. |---|---|---|---| | **tenant (value tenant)** | a lane in the 480-byte value slab, selected by `classid_read_mode(c).value_schema` | a customer/org (that's consumer-app tenancy) | invent a lane; new lanes are envelope-auditor-gated | | **KanbanTenant (per-row)** | the existing per-row kanban state type | the per-mailbox board | extend it to carry board state | -| **kanban board (per-mailbox)** | W2a deliverable: the mailbox's OWN board, a NEW tenant SIBLING of KanbanTenant | a global/singleton board | implement it without the W2a spec + field-isolation matrix | +| **kanban board (per-mailbox)** | W2a deliverable: a dedicated board ROW (classid-routed) whose aggregates live in the NEW append-only 10th ValueTenant `BoardAggregates` (row_offset 152) — 2026-07-02 envelope-audit ruling, plan Addendum-12a: LAYOUT-GATED, tests T1-T6 + batched classid mint mandatory | a global/singleton board; a reuse-reinterpretation of existing tenant bytes (pre-P4 inexpressible) | implement without Addendum-12a's T1-T6 + STOP list; let the board classid fall through to ReadMode::DEFAULT | | **cascade** | HEEL/HIP/TWIG key tiers (GUID canon; 256×256 centroid tiles) | the perturbation field | rename anything containing "cascade" | | **PerturbationDto** (was dto.rs `ResonanceDto`) | the MECHANICAL Morton-tile inverse-pyramid field (Ψ) | awareness/perspective | touch awareness_dto.rs during D-PERT-1 | | **ResonanceDto (awareness_dto.rs)** | the PERSPECTIVAL (Piaget Three-Mountains) resonance — KEEPS its name | a duplicate to dedup | rename/merge it with the Ψ DTO | From cda3b9c1617c46aceb0a93f512435fbc509d8c64 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 16:15:36 +0000 Subject: [PATCH 07/14] board: M25 SHIPPED v1 (kill-mid-graph replay green) + W2-arc broadcast Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .claude/board/CROSS_SESSION_BROADCAST.md | 15 +++++++++++++++ .claude/v3/ENTROPY-MILESTONES.md | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/.claude/board/CROSS_SESSION_BROADCAST.md b/.claude/board/CROSS_SESSION_BROADCAST.md index 02cf2783..ec9450e3 100644 --- a/.claude/board/CROSS_SESSION_BROADCAST.md +++ b/.claude/board/CROSS_SESSION_BROADCAST.md @@ -187,3 +187,18 @@ shipped artifact) + two sharpenings (the third membrane failure mode is DUPLICATION; a membrane without a build-failing tripwire is prose): `.claude/board/EPIPHANIES.md` E-SEMANTIC-OS-CONVERGENCE-1. New design docs should cite this entry as the frame instead of restating fragments. + +## 2026-07-02 — W2 arc: W2b real-owner probe + W3b KanbanSessionStorage (M25) SHIPPED; W2a ruled LAYOUT-GATED + +**For:** all sessions. On the shared branch: (1) **W2b** — KanbanActor +spawned over the REAL `MailboxSoA` for the first time (3 probes green; +dev-dep only). (2) **W3b/M25** — `graph-flow` gains a `kanban` feature: +`KanbanSessionStorage` (snapshot + append-only real-KanbanMove log, V1 +Rubicon mapping) with the kill-mid-graph replay gate GREEN — every +graph-flow execution is now replayable from the board (rs-graph-llm +branch). (3) **W2a** envelope-audit: LAYOUT-GATED — BoardAggregates = +NEW append-only 10th ValueTenant @152; T1–T6 mandatory; the board +classid MUST come through the next batched mint (queue yours); +`nan_projection` + symbiont fixed-offset readers are the two EXPOSED +sweepers to gate. (4) R-2 residual closed (edges-only strided read +proof). Capstone frame for all of it: E-SEMANTIC-OS-CONVERGENCE-1. diff --git a/.claude/v3/ENTROPY-MILESTONES.md b/.claude/v3/ENTROPY-MILESTONES.md index bbb822c6..e2146802 100644 --- a/.claude/v3/ENTROPY-MILESTONES.md +++ b/.claude/v3/ENTROPY-MILESTONES.md @@ -47,7 +47,7 @@ | M22 | 2 divergent q2 OSINT V3 bakes (crates/osint-bake canon-high 0x0700_0000 vs data/osint-v3 STALE pre-flip 0x1000_0700 dual-GUID scheme) | one canon-high bake against osint_classview.rs's 0x0700/0x0701 reservation | re-bake; grep pre-flip forms in q2 data/ = zero (or dual-alias-read only) | QUEUED (W5; latent until a reader assumes canon-high) | | M23 | 2 write-path doctrines coexisting (owner-stamped V3 writes vs smb-office-rs `LanceConnector::upsert` — the ONE live online consumer write, no stamp/classid/envelope) | all online consumer writes routed through the batch-writer cast | consumer-map §2 table shows zero ORPHAN-WRITE rows; warden green fleet-wide | QUEUED (W5 first live migration; medcare-soa writer BORN stamped as the prevention half) | | M24 | 2 write-intent bookkeepers (batch-writer internal state vs kanban board AHEAD update) | ONE: the kanban board IS the write-ahead log — cast = move = intent record; ack = confirmation | kill-after-cast-before-ack replay test green; grep writer-internal intent queues = zero; cast carries descriptor never bytes (zero-copy sink through NodeRowPacket); stacked-casts-never-refused test (melden macht frei — Addendum-7; sink coalesces naturally via live-store reads) | QUEUED (shapes W1b from first line; operator ruling Addendum-6: eager drain + mutual masking via phase machine) | -| M25 | 2 persistence surfaces for orchestration state (graph-flow SessionStorage backend vs mailbox kanban board) | ONE: KanbanSessionStorage — Session transitions ARE KanbanMoves via the W1b writer; replay = rebuild from board | kill-mid-graph replay test: session resumes identically from board-recorded moves; grep non-kanban SessionStorage impls in lance-graph = zero | QUEUED (W3b; feeds off M24) | +| M25 | 2 persistence surfaces for orchestration state (graph-flow SessionStorage backend vs mailbox kanban board) | ONE: KanbanSessionStorage — Session transitions ARE KanbanMoves via the W1b writer; replay = rebuild from board | kill-mid-graph replay test: session resumes identically from board-recorded moves; grep non-kanban SessionStorage impls in lance-graph = zero | SHIPPED v1 2026-07-02 (rs-graph-llm graph-flow `kanban` feature: KanbanSessionStorage, snapshot+move-log; kill-mid-graph replay test GREEN — no repeats/gaps, pinned Rubicon column sequence; grep gate holds: no non-kanban SessionStorage impl in lance-graph) | ## The meta-rule (why this ledger exists) From 049a613482cae35663fd3e40b49d5b5dc036de65 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 16:31:40 +0000 Subject: [PATCH 08/14] fix(contract): usize::from for the infallible u16 conversion (clippy -D warnings); row_offset (u32) keeps try_from Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- crates/lance-graph-contract/src/canonical_node.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/lance-graph-contract/src/canonical_node.rs b/crates/lance-graph-contract/src/canonical_node.rs index 349d4088..ff399ba1 100644 --- a/crates/lance-graph-contract/src/canonical_node.rs +++ b/crates/lance-graph-contract/src/canonical_node.rs @@ -1618,7 +1618,9 @@ mod tests { .iter() .find(|c| c.name_id == NodeRowColumn::Edges as u16) .expect("Edges descriptor is canonical"); - let elems = usize::try_from(edges_col.elems_per_row).expect("fits usize"); + let elems = usize::from(edges_col.elems_per_row); + // row_offset is u32: no infallible From for usize in std, so + // try_from stays (unlike the u16 elems_per_row above). let offset = usize::try_from(edges_col.row_offset).expect("fits usize"); assert_eq!(elems, 16); assert_eq!(offset, 16); From d59484cbd58be1d671127b050b9b3eab22c8c507 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 16:34:29 +0000 Subject: [PATCH 09/14] ci: run lance-graph-supervisor tests with --features supervisor (W2b probes were never executed in CI) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The whole crate is feature-gated, so no existing step reached it — the 'green CI that didn't test the real fuse' failure mode. Flagged by coderabbit on #634. Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .github/workflows/rust-test.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/rust-test.yml b/.github/workflows/rust-test.yml index f2da8693..bf9e68be 100644 --- a/.github/workflows/rust-test.yml +++ b/.github/workflows/rust-test.yml @@ -113,6 +113,15 @@ jobs: # doctests, fast (no lance/datafusion/ndarray deps). Gating. - name: Run deepnsm tests run: cargo test --manifest-path crates/deepnsm/Cargo.toml + # lance-graph-supervisor: every test in the crate (including the W2b + # real-owner probes over the production MailboxSoA, PR #634) is gated + # behind the `supervisor` feature (ractor dep), so no step above ever + # executes them — the exact "green CI that didn't test the real fuse" + # membrane failure (E-SEMANTIC-OS-CONVERGENCE-1). Flagged by + # coderabbit on #634. Cheap: contract + ractor + the shader-driver + # dev-dep, no lance/datafusion. + - name: Run supervisor tests (W2b real-owner probes) + run: cargo test --manifest-path crates/lance-graph-supervisor/Cargo.toml --features supervisor test-with-coverage: runs-on: ubuntu-24.04 From b4f75e4078e559582cd4dce660bfa5ec799ac673 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 16:36:53 +0000 Subject: [PATCH 10/14] =?UTF-8?q?docs(v3):=20Addendum-13=20=E2=80=94=201BR?= =?UTF-8?q?C=20substrate=20probe,=20lanes=20A-F=20incl.=20the=20substrate-?= =?UTF-8?q?native=20Morton-tile=20cascaded=20shader=20lane?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .claude/v3/INTEGRATION-PLAN.md | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/.claude/v3/INTEGRATION-PLAN.md b/.claude/v3/INTEGRATION-PLAN.md index d8d1e9d5..d486985f 100644 --- a/.claude/v3/INTEGRATION-PLAN.md +++ b/.claude/v3/INTEGRATION-PLAN.md @@ -508,3 +508,44 @@ by routing). It lands only with: implementation WAITS on the mint, tenant + tests prepared behind it. - STOP conditions 1–6 bind every implementer; ocr.rs's per-row `schema.has()` reader is the SAFE pattern to copy. + +### Addendum-13 2026-07-02 — the 1BRC substrate probe (operator-requested; W2d/W2e load instrument) + +**Why:** the dispatch bench measured scheduler OVERHEAD on no-op tasks; +kanban-concurrency tuning (W2d 550 ms budget, lane sizing) needs +THROUGHPUT UNDER REAL WORK. The One Billion Row Challenge +(automataIA/1brc-rs as reference baseline — REUSE-AS-REFERENCE, never a +dep) stresses exactly the substrate's claims: SIMD scanning, zero-copy +slicing (data-flow rule 1), owned-microcopy + commutative-merge +aggregation (the borrow-strategy doc IS the 1BRC merge shape), and +scheduling. Container scale: 100M rows (~1.4 GB; 1B = 13 GB does not +fit); every contender runs the SAME seeded corpus, recipe+hash archived +with every number (the archival convention). truth-architect reviews all +numbers; baselines land before any tuned lane. + +**Crate:** `crates/onebrc-probe` (standalone, workspace-EXCLUDED, +std-only for the baselines). Scaffold + lanes A/C in flight. + +**Lanes:** +- **A** scalar single-thread (the honest floor). +- **B** ndarray-SIMD scan (delimiter find + parse via the simd dispatch). +- **C** threaded chunks + borrow-strategy merge (owned maps, commutative + merge at the end — never raw `=` on shared state). +- **D** ractor actor-per-worker — QUANTIFIES the "ractor is a helper, + not a messaging path" ruling as a measured ratio vs C. +- **E** kanban-scheduled chunks (casts through the W1b writer shape; + KanbanActor lanes) — the scheduling tax on real work; feeds W2d. +- **F — the substrate-native lane (operator: "process it as cognitive + shader in a morton tile cascaded batch"):** station identity → key → + Morton-tile cascade position (HHTL prefix route); records bucketed + tile-cascaded so accumulation is prefix-local (cache-coherent SIMD + sweeps); aggregation = gated write-back into SoA-shaped accumulators + (bundle merge — the write masks the thinking); kanban lanes schedule + the tile batches. THE thesis test: group-by-identity as a prefix + ROUTE, aggregation as a gated write — the semantic OS doing raw OLAP. + **Honest framing:** the Morton route is radix bucketing wearing our + address; the fastest known 1BRC entries are radix/perfect-hash — so F + vs the classic map (A/C) vs a plain radix bucket isolates the + ADDRESSING TAX exactly. F winning or tying validates + addressing-is-aggregation; F losing prices the address layer — either + result tunes W2d and gives W2e its "winner owns the hot path" number. From cc14f899bcc57f794ab7b930a85ab99b4ce6bdb7 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 16:44:33 +0000 Subject: [PATCH 11/14] =?UTF-8?q?probe(1brc):=20onebrc-probe=20scaffold=20?= =?UTF-8?q?=E2=80=94=20lanes=20A/C=20baselines=20on=20the=20seeded=201BRC?= =?UTF-8?q?=20workload=20(Addendum-13)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Standalone workspace-excluded crate, zero deps for the baselines. Deterministic SplitMix64 corpus generator (procedural station names, no external dataset) with recipe+hash emission per the archival convention; in-crate SHA-256 verified against system sha256sum on the real corpus. Lane A scalar single-pass (integer-tenths parse, no float in the hot loop); lane C newline-aligned chunks + owned per-worker maps + commutative merge (the borrow-strategy shape). 11/11 tests incl. A==C aggregate equality and generator determinism. t0 @ 10M rows, 4 cores (container): A 7.11 Mrows/s; C 26.41 Mrows/s (3.71x). recipe rows=10000000 seed=42 sha256=f1853caa...5691. Lanes B (ndarray SIMD) / D (ractor ratio) / E (kanban scheduling tax) / F (Morton-tile cascaded shader — the addressing-is-aggregation thesis test) are README-stubbed follow-ups. Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- Cargo.toml | 5 + crates/onebrc-probe/Cargo.lock | 7 + crates/onebrc-probe/Cargo.toml | 22 ++ crates/onebrc-probe/README.md | 157 ++++++++++++++ crates/onebrc-probe/src/gen.rs | 171 +++++++++++++++ crates/onebrc-probe/src/lib.rs | 335 ++++++++++++++++++++++++++++++ crates/onebrc-probe/src/main.rs | 106 ++++++++++ crates/onebrc-probe/src/sha256.rs | 208 +++++++++++++++++++ 8 files changed, 1011 insertions(+) create mode 100644 crates/onebrc-probe/Cargo.lock create mode 100644 crates/onebrc-probe/Cargo.toml create mode 100644 crates/onebrc-probe/README.md create mode 100644 crates/onebrc-probe/src/gen.rs create mode 100644 crates/onebrc-probe/src/lib.rs create mode 100644 crates/onebrc-probe/src/main.rs create mode 100644 crates/onebrc-probe/src/sha256.rs diff --git a/Cargo.toml b/Cargo.toml index a4e84d4f..abe6dfcb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -103,6 +103,11 @@ exclude = [ # binary. Own [workspace] + git-deps (sibling of crates/symbiont). Verify via # `cargo build --manifest-path crates/cognitive-stack/Cargo.toml` or the Dockerfile. "crates/cognitive-stack", + # 1BRC substrate probe — standalone zero-dep groupby-aggregate throughput + # baseline (Lane A scalar / Lane C std::thread) at container scale. See + # crates/onebrc-probe/README.md. Verify via + # `cargo test --manifest-path crates/onebrc-probe/Cargo.toml`. + "crates/onebrc-probe", ] resolver = "2" diff --git a/crates/onebrc-probe/Cargo.lock b/crates/onebrc-probe/Cargo.lock new file mode 100644 index 00000000..f14eef8b --- /dev/null +++ b/crates/onebrc-probe/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "onebrc-probe" +version = "0.1.0" diff --git a/crates/onebrc-probe/Cargo.toml b/crates/onebrc-probe/Cargo.toml new file mode 100644 index 00000000..8b12a723 --- /dev/null +++ b/crates/onebrc-probe/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "onebrc-probe" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +publish = false +description = "1BRC (One Billion Row Challenge) substrate probe: measures the V3 SoA/mailbox substrate's groupby-aggregate throughput at container scale (100M rows). Lane A (single-thread scalar baseline) and Lane C (std::thread parallel baseline) ship here; Lane B (ndarray SIMD), Lane D (ractor actors), Lane E (kanban) are follow-up work — see README.md." + +# Standalone, workspace-EXCLUDED crate (see root Cargo.toml `exclude`, same +# precedent as crates/bgz17 and crates/deepnsm). Zero dependencies for lanes +# A/C (std only). Verify with: +# cargo test --manifest-path crates/onebrc-probe/Cargo.toml + +# Empty [workspace] table: this crate is `exclude`d from the parent +# lance-graph workspace, but cargo's manifest auto-discovery would otherwise +# walk up from a nested worktree and pick up the outer workspace root +# (identical precedent/comment to crates/deepnsm/Cargo.toml). +[workspace] + +[dependencies] + +[dev-dependencies] diff --git a/crates/onebrc-probe/README.md b/crates/onebrc-probe/README.md new file mode 100644 index 00000000..6058a553 --- /dev/null +++ b/crates/onebrc-probe/README.md @@ -0,0 +1,157 @@ +# onebrc-probe + +A standalone, workspace-**excluded** crate (see root `Cargo.toml` `exclude`, +same precedent as `crates/bgz17` / `crates/deepnsm`) that measures the V3 +substrate's throughput on the classic **1BRC** (One Billion Row Challenge) +groupby-aggregate workload, at **container scale (100M rows)**. + +Verify standalone: + +```bash +cargo test --manifest-path crates/onebrc-probe/Cargo.toml +``` + +Zero external dependencies for lanes A/C (std only) — see `Cargo.toml`. + +--- + +## §1 — Reference inventory + +`automataIA/1brc-rs` (fetched via +`https://raw.githubusercontent.com/automataIA/1brc-rs/main/...`, per the +worker-agent environment preamble — `api.github.com` / `codeload` / +`github.com` HTML are session-denied for unscoped repos, `raw.githubusercontent.com` +is not) is a **reference to study, never a dependency**. Every technique +below is **reimplemented** in this crate's own words, not vendored. + +### What the reference does + +| Technique | Reference location | Reimplemented here? | +|---|---|---| +| `memmap2::Mmap` zero-copy file access ("the only path to break the 2-second barrier": avoids ~13 GB explicit allocation vs `fs::read`) | `README.md` § Memory Access | **No** — see "mmap note" below | +| `hashbrown::HashMap::raw_entry_mut` + precomputed `FxHash`, inlined on the hot loop | `README.md` § Hashing Strategy (`v15_raw_hash`) | **No** — `lane_a_scalar` uses `std::collections::BTreeMap` | +| merykitty SWAR parser: reads 8 bytes as `u64`, finds `.` via `(!w & 0x10101010).trailing_zeros()`, branchlessly selects `X.Y` vs `XY.Z` layout | `README.md` § Temperature Parsing | **No** — `parse_temp_tenths` in `lib.rs` is a plain byte-scan integer parser (still float-free, just not branchless/SWAR) | +| `chunk_bounds(data, n) -> Vec<(usize, usize)>`, newline-aligned parallel work distribution | `src/lib.rs` (per repo tree) | **Yes** — `chunk_bounds` in this crate's `lib.rs`, same signature shape, own implementation | +| Threading: `std::thread::scope` (early, v3) then `rayon` work-stealing (later variants) | `README.md` § Threading Model | **Partially** — `lane_c_threads` uses `std::thread::scope` (the v3-era approach); no `rayon` (would break the zero-dep contract for lanes A/C) | +| Safe SIMD: `pulp::Arch::dispatch` (v11) / `wide::u8x32::cmp_eq` (v12) to find semicolons; manual AVX-512 rejected (loses on Zen 4 — AMD implements AVX-512 with 256-bit execution units) | `README.md` § SIMD Techniques | **No** — deferred to **Lane B** (see below); this workspace's SIMD rule is "all SIMD from `ndarray::simd`" (`simd-savant` agent), so Lane B routes through `ndarray::simd`, not `pulp`/`wide` | +| `Stats { min: i32, max: i32, sum: i64, count: u32 }` + `merge` | `src/lib.rs` (per repo tree) | **Yes**, same field shape — `Stats` in this crate's `lib.rs`, own implementation, with a doc-comment tying `merge` to this workspace's borrow-strategy rule | +| LCG-seeded generator (`0xDEADBEEFCAFEBABE`), fixed 413-station list sourced from the original Java 1BRC, Gaussian temps via Box-Muller, clamped to `[-99.9, 99.9]` | `src/bin/gen.rs` | **No, deliberately different** — this crate's `gen()` uses SplitMix64 (not an LCG) and procedurally INVENTS ~400 station names from synthetic syllables (no external Java-1BRC city list), because the archival-recipe contract (§2 below) wants the corpus reconstructible from `(rows, seed)` with **zero external dataset dependency** | +| `tests/equivalence.rs` — byte-for-byte output validation across variants | `tests/` | **Yes, in spirit** — `lane_a_and_lane_c_agree_on_generated_corpus` in `src/lib.rs` | + +Repository layout (from the GitHub tree page, `main` branch): +`Cargo.toml`, `.cargo/config.toml` (`target-cpu=native`), `src/lib.rs` +(`Stats`, `chunk_bounds`, parser, formatter), `src/hash.rs` (custom hash +table), `src/bin/{gen.rs, v0_naive.rs .. v15_raw_hash.rs}` (16 progressive +solver variants), `tests/equivalence.rs`, `scripts/{gen_data,run_all,build_pgo}.sh`, +`benches/bench_variants.rs`, `data/` (gitignored generated corpora). + +### mmap note + +This crate deliberately uses `std::fs::read` (a single owned `Vec`), +**not** `mmap`, in `main.rs`'s `run` subcommand. The reference's own +documentation states mmap is "the only path" past the 2-second barrier at +the full 1B-row scale — that tradeoff is real and left for a follow-up +(Lane B or later) that's allowed to add a dependency (`memmap2` has no +AdaWorldAPI fork requirement bearing on it, per `CLAUDE.md`'s fork policy, +but adding ANY dependency changes this crate's "zero deps for lanes A/C" +contract, so it's out of scope for this brief). + +--- + +## §2 — Archival convention + +Every generated corpus travels with its **recipe**: `(rows, seed)` fully +determine the corpus bytes, and `gen()` streams a SHA-256 digest while +writing so the recipe line — + +```text +rows= seed= sha256= +``` + +— is printed without a second read pass. Reproduce any measurement by +regenerating with the same `(rows, seed)` and diffing the printed +`sha256=` line. + +--- + +## §3 — Lanes + +| Lane | What it measures | Status | +|---|---|---| +| **A** — `lane_a_scalar` | Single-thread scalar baseline: one pass, byte-wise `;`/`\n` scan, integer temp parse, `BTreeMap` accumulation | **Shipped** | +| **C** — `lane_c_threads` | `std::thread` parallel baseline: newline-aligned `chunk_bounds` split, per-worker owned `BTreeMap`, commutative `Stats::merge` combine | **Shipped** | +| **B** — ndarray SIMD | Vectorized semicolon/newline scanning and/or batched parse via `ndarray::simd` (per the workspace's SIMD rule — never raw `pulp`/`wide`/hand intrinsics in a consumer crate; see `.claude/knowledge/ndarray-vertical-simd-alien-magic.md`). Would also evaluate whether an `ndarray`-backed SIMD hash or SWAR-style parse closes the gap to the reference's `v15_raw_hash`. | **Not implemented** — orchestrator follow-up | +| **D** — `ractor` actors | Same groupby-aggregate workload, but the aggregation runs as `ractor`-supervised actors (per this workspace's `lance-graph-supervisor` precedent) instead of bare `std::thread::scope`, to measure actor-model overhead/benefit vs Lane C's raw threads at this workload's arrival rate. | **Not implemented** — orchestrator follow-up | +| **E** — kanban | Routes the aggregation through the V3 kanban execution machinery (`v3-kanban-executor-engineer` domain — `KanbanPhase` lifecycle, ahead-firing batch writer) to measure the substrate's own scheduling/dispatch overhead against the bare-metal Lane A/C numbers as a ceiling reference. | **Not implemented** — orchestrator follow-up | + +--- + +## §4 — CLI + +```text +onebrc-probe gen +onebrc-probe run [workers] +``` + +`run` prints: + +```text +lane= rows= workers= elapsed_ms= throughput_mrows_s= +-- first 3 stations -- + ... +-- last 3 stations -- + ... +``` + +The first/last-3-stations dump (map is a `BTreeMap`, so this is +sorted-by-name order) is the correctness spot-check surface — a cheap +sanity signal that the aggregate isn't obviously garbage without diffing +the full ~400-station map. + +--- + +## §5 — t0 baselines (10M rows, container) + +Smoke-scale measurement (10M rows, not the full 100M-row container-scale +target — disk-cheap: ~140 MB, generated to `/tmp` and deleted after). +Machine: `nproc` = 4 (container). + +Commands run (release build): + +```bash +cargo run --release --manifest-path crates/onebrc-probe/Cargo.toml -- \ + gen /tmp/onebrc_10m.txt 10000000 42 +cargo run --release --manifest-path crates/onebrc-probe/Cargo.toml -- \ + run /tmp/onebrc_10m.txt a +cargo run --release --manifest-path crates/onebrc-probe/Cargo.toml -- \ + run /tmp/onebrc_10m.txt c 4 +rm /tmp/onebrc_10m.txt +``` + +Recipe line (corpus is reconstructible from this alone): + +```text +rows=10000000 seed=42 sha256=f1853caa30a765883aa655be1c304d956ad8b03e19b3557df2af431d9a955691 +``` + +File size: 142 MB (10,000,000 rows × `station;temp\n`). + +| Lane | workers | elapsed_ms | throughput (Mrows/s) | speedup vs Lane A | +|---|---|---|---|---| +| A (scalar) | 1 | 1405.850 | 7.113 | 1.0x | +| C (threads) | 4 | 378.705 | 26.406 | 3.71x | + +Both lanes agreed on every station's `Stats` (spot-checked via the +first/last-3-stations dump — identical `min`/`max`/`sum`/`count` for +`Belgoryoltuv`, `Belhumo`, `Belhuzephri`, `Zephven`, `Zephtuvhuhu`, +`Zephsaeshikra` across both runs), consistent with the +`lane_a_and_lane_c_agree_on_generated_corpus` unit test. + +3.71x on 4 cores (not a clean 4.0x) is expected at this scale: Lane C pays +`std::fs::read`'s single-threaded I/O + the newline-scan in `chunk_bounds` +before any worker starts, and `BTreeMap`'s per-insert `O(log n)` cost means +the aggregation itself isn't perfectly parallel-friendly (~400 stations +keeps the tree shallow, but each of the 4 workers still walks its own tree +independently rather than sharing one flat hash table). Lane B (SIMD +scan/parse) and a hash-map swap are the natural next levers — deferred to +the orchestrator's follow-up per §3. diff --git a/crates/onebrc-probe/src/gen.rs b/crates/onebrc-probe/src/gen.rs new file mode 100644 index 00000000..4efa80e2 --- /dev/null +++ b/crates/onebrc-probe/src/gen.rs @@ -0,0 +1,171 @@ +//! Deterministic 1BRC-format corpus generator. +//! +//! Produces `station;temp\n` lines with ~400 procedurally-generated station +//! names (invented syllables, NOT the upstream Java-1BRC city list — see +//! README "Reference inventory" for why: the archival-recipe contract wants +//! the corpus fully reconstructible from `(rows, seed)` alone, with no +//! external dataset dependency). Streams a SHA-256 digest while writing so +//! the recipe line (`rows= seed= sha256=`) never needs a second +//! read pass over the file. + +use crate::sha256::Sha256; +use std::fs::File; +use std::io::{self, BufWriter, Write}; +use std::path::Path; + +/// Number of distinct stations in every generated corpus. +pub const STATION_COUNT: usize = 400; + +/// Result of a `gen()` call — the archival recipe: everything needed to +/// reproduce the exact same corpus bytes again. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct GenResult { + pub rows: u64, + pub seed: u64, + pub sha256_hex: String, +} + +/// SplitMix64 — a small, fast, well-distributed deterministic PRNG. Chosen +/// over `std`'s (nonexistent) RNG or a crates.io dependency because this +/// crate is dependency-free by design; SplitMix64 is a widely published +/// public-domain algorithm (Vigna 2015), reimplemented here in ~10 lines. +struct SplitMix64(u64); + +impl SplitMix64 { + fn new(seed: u64) -> Self { + Self(seed) + } + + fn next_u64(&mut self) -> u64 { + self.0 = self.0.wrapping_add(0x9E3779B97F4A7C15); + let mut z = self.0; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58476D1CE4E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D049BB133111EB); + z ^ (z >> 31) + } +} + +/// Invented syllable pool used to build station names — purely synthetic, +/// no relation to any real-world gazetteer or the upstream 1BRC city list. +const SYLLABLES: &[&str] = &[ + "ka", "ri", "mo", "ta", "lu", "ven", "dor", "shi", "zan", "qui", "bel", "fen", "gor", "hu", + "ith", "jol", "kra", "myn", "non", "oru", "pex", "ryn", "sae", "tuv", "uli", "vex", "wren", + "xan", "yol", "zeph", +]; + +/// Deterministic, seed-derived station names. Same `(seed, count)` always +/// produces the same `Vec` in the same order (the RNG stream and +/// insertion order are both deterministic; the `HashSet` below is used only +/// for a membership check, never iterated, so its unordered nature never +/// leaks into the output). +pub fn station_names(seed: u64, count: usize) -> Vec { + let mut rng = SplitMix64::new(seed ^ 0xA11CE_5EED); + let mut seen = std::collections::HashSet::with_capacity(count * 2); + let mut names = Vec::with_capacity(count); + while names.len() < count { + let syl_count = 2 + (rng.next_u64() % 3) as usize; // 2..=4 syllables + let mut raw = String::new(); + for _ in 0..syl_count { + let idx = (rng.next_u64() as usize) % SYLLABLES.len(); + raw.push_str(SYLLABLES[idx]); + } + let mut chars = raw.chars(); + let name = match chars.next() { + Some(c) => c.to_uppercase().collect::() + chars.as_str(), + None => continue, + }; + if seen.insert(name.clone()) { + names.push(name); + } + } + names +} + +/// Format tenths-of-a-degree as a `[-]D+.D` string (one decimal place), +/// matching the exact 1BRC wire format the parser in `lib.rs` expects. +fn format_tenths(t: i32) -> String { + let neg = t < 0; + let abs = t.unsigned_abs(); + let whole = abs / 10; + let frac = abs % 10; + if neg { + format!("-{whole}.{frac}") + } else { + format!("{whole}.{frac}") + } +} + +/// Generate a deterministic 1BRC-format corpus at `path`: `rows` lines of +/// `station;temp\n`, fully reproducible from `(rows, seed)`. +/// +/// Per-station means are spread across `[-20.0, 20.0)` degrees, derived +/// from station INDEX (not the RNG stream) so the mean table is stable +/// regardless of how many random draws the row-generation loop performs. +/// Each row's temperature is `mean + variate` (variate uniform in +/// `[-10.0, 10.0]`), clamped to `[-99.9, 99.9]`. +pub fn gen(path: &Path, rows: u64, seed: u64) -> io::Result { + let stations = station_names(seed, STATION_COUNT); + let means: Vec = (0..stations.len()) + .map(|i| ((i * 131 + 37) % 400) as i32 - 200) + .collect(); + + let file = File::create(path)?; + let mut writer = BufWriter::with_capacity(1 << 20, file); + let mut hasher = Sha256::new(); + let mut rng = SplitMix64::new(seed); + let mut line_buf = Vec::with_capacity(32); + + for _ in 0..rows { + let idx = (rng.next_u64() as usize) % stations.len(); + let variate = (rng.next_u64() % 201) as i32 - 100; // +-10.0 degrees, in tenths + let tenths = (means[idx] + variate).clamp(-999, 999); + + line_buf.clear(); + line_buf.extend_from_slice(stations[idx].as_bytes()); + line_buf.push(b';'); + line_buf.extend_from_slice(format_tenths(tenths).as_bytes()); + line_buf.push(b'\n'); + + writer.write_all(&line_buf)?; + hasher.update(&line_buf); + } + writer.flush()?; + + let digest = hasher.finalize(); + Ok(GenResult { + rows, + seed, + sha256_hex: Sha256::hex(&digest), + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn station_names_are_deterministic_and_unique() { + let a = station_names(42, STATION_COUNT); + let b = station_names(42, STATION_COUNT); + assert_eq!(a, b); + assert_eq!(a.len(), STATION_COUNT); + let unique: std::collections::HashSet<_> = a.iter().collect(); + assert_eq!(unique.len(), STATION_COUNT, "station names must be unique"); + } + + #[test] + fn different_seeds_diverge() { + let a = station_names(1, 50); + let b = station_names(2, 50); + assert_ne!(a, b); + } + + #[test] + fn format_tenths_examples() { + assert_eq!(format_tenths(0), "0.0"); + assert_eq!(format_tenths(53), "5.3"); + assert_eq!(format_tenths(-53), "-5.3"); + assert_eq!(format_tenths(999), "99.9"); + assert_eq!(format_tenths(-999), "-99.9"); + } +} diff --git a/crates/onebrc-probe/src/lib.rs b/crates/onebrc-probe/src/lib.rs new file mode 100644 index 00000000..3aeb7c3a --- /dev/null +++ b/crates/onebrc-probe/src/lib.rs @@ -0,0 +1,335 @@ +//! `onebrc-probe` — 1BRC (One Billion Row Challenge) substrate probe. +//! +//! Measures groupby-aggregate throughput (the classic 1BRC workload: parse +//! `station;temp` lines, aggregate min/max/sum/count per station) as a +//! stand-in for the V3 substrate's own aggregation paths, at container +//! scale (100M rows). This crate is standalone and workspace-excluded (see +//! root `Cargo.toml`); it ships two baseline lanes: +//! +//! - **Lane A** (`lane_a_scalar`) — single-thread scalar baseline. +//! - **Lane C** (`lane_c_threads`) — `std::thread` parallel baseline, +//! newline-aligned chunk split + commutative merge. +//! +//! Lane B (ndarray SIMD), Lane D (ractor actors), Lane E (kanban) are +//! follow-up work — see `README.md` for the stub sections describing what +//! each will measure. +//! +//! ## Reference inventory +//! +//! Techniques below are REIMPLEMENTED from reading automataIA/1brc-rs (a +//! reference to study, never a dependency — see README §1 for the full +//! inventory + file pointers): +//! +//! - Newline-aligned chunk splitting for parallel work distribution +//! (`chunk_bounds`, mirrors the reference's function of the same name). +//! - Integer (never float) temperature parsing in the hot loop. +//! - `min`/`max`/`sum`/`count` per-station `Stats` aggregate with a +//! commutative `merge`. +//! +//! NOT reimplemented here (left to Lane B / follow-up): mmap zero-copy +//! file access, `FxHash` + `raw_entry_mut` hashing, the merykitty SWAR +//! branchless parser, and SIMD semicolon-finding (`pulp` / `wide`). + +pub mod gen; +pub mod sha256; + +use std::collections::BTreeMap; + +/// Per-station aggregate: min/max/sum/count, in tenths-of-a-degree. +/// +/// `merge` is commutative and associative — the workspace's owned-microcopy +/// / gated-commutative-merge borrow-strategy rule (see +/// `.claude/rules/borrow-strategy.md`, "Multiple writers -> BUNDLE (majority +/// vote, commutative)"): each Lane-C worker computes an OWNED `Stats` value +/// independently (never a shared `&mut` reference across threads), and +/// `merge` combines two owned values into one via min/max/sum/count — never +/// a raw `=` assignment onto shared state. Merge order never matters. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Stats { + pub min: i32, + pub max: i32, + pub sum: i64, + pub count: u32, +} + +impl Stats { + /// A fresh aggregate seeded with one observation. + pub fn single(tenths: i32) -> Self { + Self { + min: tenths, + max: tenths, + sum: tenths as i64, + count: 1, + } + } + + /// Fold one more observation into this (owned) aggregate. + pub fn observe(&mut self, tenths: i32) { + if tenths < self.min { + self.min = tenths; + } + if tenths > self.max { + self.max = tenths; + } + self.sum += tenths as i64; + self.count += 1; + } + + /// Commutative, associative merge of two owned aggregates. See the + /// struct-level doc comment for why this is the only legal write-back + /// shape for multi-writer (Lane C) accumulation. + pub fn merge(&mut self, other: &Stats) { + if other.min < self.min { + self.min = other.min; + } + if other.max > self.max { + self.max = other.max; + } + self.sum += other.sum; + self.count += other.count; + } + + /// Mean, in whole degrees (tenths / 10). + pub fn mean_tenths(&self) -> f64 { + if self.count == 0 { + 0.0 + } else { + self.sum as f64 / self.count as f64 + } + } +} + +/// Manual integer parse of a `[-]D[D].D` temperature field into tenths of a +/// degree — no float in the hot loop. `bytes` must be exactly the temp +/// field (no leading/trailing `;`/`\n`), matching the `gen::gen` output +/// format and the merykitty-SWAR-parser family's "parse to an integer, +/// never to f64" shape (see module doc "Reference inventory"). +fn parse_temp_tenths(bytes: &[u8]) -> i32 { + let mut i = 0usize; + let neg = bytes[0] == b'-'; + if neg { + i += 1; + } + let mut val: i32 = 0; + while bytes[i] != b'.' { + val = val * 10 + (bytes[i] - b'0') as i32; + i += 1; + } + i += 1; // skip '.' + val = val * 10 + (bytes[i] - b'0') as i32; + if neg { + -val + } else { + val + } +} + +/// Lane A — single-thread scalar baseline. One pass over `data`, byte-wise +/// scan for `;` and `\n`, integer temp parse, `BTreeMap` +/// accumulation (owned per-station microcopies — see `Stats::merge` doc). +pub fn lane_a_scalar(data: &[u8]) -> BTreeMap { + let mut map: BTreeMap = BTreeMap::new(); + let len = data.len(); + let mut i = 0usize; + while i < len { + let name_start = i; + while data[i] != b';' { + i += 1; + } + let name = + std::str::from_utf8(&data[name_start..i]).expect("station name is valid utf8"); + i += 1; // skip ';' + let temp_start = i; + while data[i] != b'\n' { + i += 1; + } + let tenths = parse_temp_tenths(&data[temp_start..i]); + i += 1; // skip '\n' + + match map.get_mut(name) { + Some(stats) => stats.observe(tenths), + None => { + map.insert(name.to_string(), Stats::single(tenths)); + } + } + } + map +} + +/// Split `data` into `n` byte ranges aligned on `\n` boundaries — each +/// `(start, end)` is a `[start, end)` half-open range that always ends +/// immediately after a newline (or at `data.len()`), so no record straddles +/// a chunk boundary. Mirrors automataIA/1brc-rs's `chunk_bounds` function +/// (reimplemented, not vendored — see README §1). +pub fn chunk_bounds(data: &[u8], n: usize) -> Vec<(usize, usize)> { + let len = data.len(); + if n <= 1 || len == 0 { + return vec![(0, len)]; + } + let mut bounds = Vec::with_capacity(n); + let mut start = 0usize; + for i in 0..n { + if i == n - 1 { + bounds.push((start, len)); + break; + } + let target = (len / n) * (i + 1); + let mut end = target.min(len); + while end < len && data[end] != b'\n' { + end += 1; + } + if end < len { + end += 1; // include the newline itself in this chunk + } + bounds.push((start, end)); + start = end; + } + bounds +} + +/// Commutative merge of N owned per-worker maps into one — the multi-writer +/// BUNDLE step (see `Stats::merge` doc); order of the input `Vec` never +/// affects the result. +fn merge_maps(maps: Vec>) -> BTreeMap { + let mut out: BTreeMap = BTreeMap::new(); + for m in maps { + for (name, stats) in m { + match out.get_mut(&name) { + Some(existing) => existing.merge(&stats), + None => { + out.insert(name, stats); + } + } + } + } + out +} + +/// Lane C — `std::thread` parallel baseline. Splits `data` into `workers` +/// newline-aligned chunks (`chunk_bounds`), each worker runs `lane_a_scalar` +/// on its OWN slice producing an owned `BTreeMap` (no shared +/// `&mut` state across threads — per-worker microcopies), then merges all +/// worker maps via the commutative `Stats::merge` (order-independent +/// BUNDLE; see struct-level doc on `Stats`). +pub fn lane_c_threads(data: &[u8], workers: usize) -> BTreeMap { + let workers = workers.max(1); + let bounds = chunk_bounds(data, workers); + let results: Vec> = std::thread::scope(|scope| { + let handles: Vec<_> = bounds + .iter() + .map(|&(start, end)| { + let slice = &data[start..end]; + scope.spawn(move || lane_a_scalar(slice)) + }) + .collect(); + handles + .into_iter() + .map(|h| h.join().expect("lane C worker panicked")) + .collect() + }); + merge_maps(results) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn merge_is_commutative_and_associative() { + let a = Stats { + min: -50, + max: 100, + sum: 500, + count: 10, + }; + let b = Stats { + min: -80, + max: 60, + sum: -200, + count: 5, + }; + let c = Stats { + min: 0, + max: 40, + sum: 120, + count: 3, + }; + + let mut ab = a; + ab.merge(&b); + let mut ba = b; + ba.merge(&a); + assert_eq!(ab, ba, "merge must be commutative"); + + let mut ab_c = ab; + ab_c.merge(&c); + let mut a_bc = a; + let mut bc = b; + bc.merge(&c); + a_bc.merge(&bc); + assert_eq!(ab_c, a_bc, "merge must be associative"); + } + + #[test] + fn parse_temp_tenths_examples() { + assert_eq!(parse_temp_tenths(b"0.0"), 0); + assert_eq!(parse_temp_tenths(b"5.3"), 53); + assert_eq!(parse_temp_tenths(b"-5.3"), -53); + assert_eq!(parse_temp_tenths(b"99.9"), 999); + assert_eq!(parse_temp_tenths(b"-99.9"), -999); + assert_eq!(parse_temp_tenths(b"12.0"), 120); + } + + #[test] + fn chunk_bounds_covers_data_exactly_and_ends_on_newlines() { + let data = b"aa;1.0\nbb;2.0\ncc;3.0\ndd;4.0\nee;5.0\n".to_vec(); + let bounds = chunk_bounds(&data, 3); + assert_eq!(bounds.first().unwrap().0, 0); + assert_eq!(bounds.last().unwrap().1, data.len()); + // Every boundary (except the very end) must land right after a '\n'. + for &(_, end) in &bounds { + if end < data.len() { + assert_eq!(data[end - 1], b'\n'); + } + } + // Ranges must be contiguous, non-overlapping. + for w in bounds.windows(2) { + assert_eq!(w[0].1, w[1].0); + } + } + + /// Lane A and Lane C must agree byte-for-byte on aggregate output — + /// the correctness spot check for the parallel split + merge path. + #[test] + fn lane_a_and_lane_c_agree_on_generated_corpus() { + let dir = std::env::temp_dir(); + let path = dir.join(format!("onebrc_probe_test_{}.txt", std::process::id())); + let result = gen::gen(&path, 100_000, 42).expect("gen"); + assert_eq!(result.rows, 100_000); + + let data = std::fs::read(&path).expect("read generated corpus"); + std::fs::remove_file(&path).ok(); + + let a = lane_a_scalar(&data); + let c = lane_c_threads(&data, 4); + assert_eq!(a, c, "lane A and lane C must produce identical aggregates"); + assert!(!a.is_empty()); + } + + /// Same seed => same corpus bytes (checksum equality) — the recipe + /// contract (`rows`, `seed`, `sha256`) must be reproducible. + #[test] + fn generator_is_deterministic() { + let dir = std::env::temp_dir(); + let p1 = dir.join(format!("onebrc_probe_det_1_{}.txt", std::process::id())); + let p2 = dir.join(format!("onebrc_probe_det_2_{}.txt", std::process::id())); + let r1 = gen::gen(&p1, 10_000, 42).expect("gen 1"); + let r2 = gen::gen(&p2, 10_000, 42).expect("gen 2"); + std::fs::remove_file(&p1).ok(); + std::fs::remove_file(&p2).ok(); + assert_eq!( + r1.sha256_hex, r2.sha256_hex, + "same seed must produce same sha256" + ); + } +} diff --git a/crates/onebrc-probe/src/main.rs b/crates/onebrc-probe/src/main.rs new file mode 100644 index 00000000..a9c9711d --- /dev/null +++ b/crates/onebrc-probe/src/main.rs @@ -0,0 +1,106 @@ +//! `onebrc-probe` CLI — generate a deterministic corpus, or run a lane +//! against one, printing throughput + a correctness spot-check. +//! +//! ```text +//! onebrc-probe gen +//! onebrc-probe run [workers] +//! ``` + +use onebrc_probe::{gen::gen, lane_a_scalar, lane_c_threads}; +use std::env; +use std::fs; +use std::path::PathBuf; +use std::time::Instant; + +fn main() { + let args: Vec = env::args().collect(); + match args.get(1).map(String::as_str) { + Some("gen") => cmd_gen(&args[2..]), + Some("run") => cmd_run(&args[2..]), + _ => { + eprintln!( + "usage:\n onebrc-probe gen \n onebrc-probe run [workers]" + ); + std::process::exit(2); + } + } +} + +fn cmd_gen(args: &[String]) { + let path = PathBuf::from( + args.first() + .expect("usage: gen "), + ); + let rows: u64 = args + .get(1) + .expect("usage: gen ") + .parse() + .expect("rows must be a u64"); + let seed: u64 = args + .get(2) + .expect("usage: gen ") + .parse() + .expect("seed must be a u64"); + + let result = gen(&path, rows, seed).expect("corpus generation failed"); + // The archival recipe line: input + recipe + hash travel together, per + // the workspace's archival convention (see README §2, gen.rs doc). + println!( + "rows={} seed={} sha256={}", + result.rows, result.seed, result.sha256_hex + ); +} + +fn cmd_run(args: &[String]) { + let path = PathBuf::from( + args.first() + .expect("usage: run [workers]"), + ); + let lane = args.get(1).map(String::as_str).unwrap_or("a"); + let workers: usize = args + .get(2) + .map(|s| s.parse().expect("workers must be a usize")) + .unwrap_or_else(|| { + std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(1) + }); + + // NOTE (mmap note): plain `std::fs::read`, NOT mmap. automataIA/1brc-rs + // treats `memmap2::Mmap` as "the only path to break the 2-second + // barrier" for the full 1B-row file (avoiding ~13 GB of explicit + // allocation). This probe deliberately trades that peak-throughput + // headroom for staying at zero external dependencies for lanes A/C + // (see Cargo.toml + README §1). Revisit if/when a Lane B or later adds + // an mmap-capable dependency. + let data = fs::read(&path).expect("read corpus file"); + let rows = data.iter().filter(|&&b| b == b'\n').count(); + + let start = Instant::now(); + let map = match lane { + "a" => lane_a_scalar(&data), + "c" => lane_c_threads(&data, workers), + other => { + eprintln!("unknown lane '{other}' (expected 'a' or 'c')"); + std::process::exit(2); + } + }; + let elapsed = start.elapsed(); + let elapsed_ms = elapsed.as_secs_f64() * 1000.0; + let throughput_mrows_s = (rows as f64 / 1_000_000.0) / elapsed.as_secs_f64(); + + println!( + "lane={lane} rows={rows} workers={workers} elapsed_ms={elapsed_ms:.3} throughput_mrows_s={throughput_mrows_s:.3}" + ); + + // Correctness spot-check surface — first/last 3 stations by name (map + // is a BTreeMap, so iteration order is the sorted station-name order). + println!("-- first 3 stations --"); + for (name, stats) in map.iter().take(3) { + println!(" {name}: {stats:?}"); + } + println!("-- last 3 stations --"); + for (name, stats) in map.iter().rev().take(3) { + println!(" {name}: {stats:?}"); + } +} diff --git a/crates/onebrc-probe/src/sha256.rs b/crates/onebrc-probe/src/sha256.rs new file mode 100644 index 00000000..169daefd --- /dev/null +++ b/crates/onebrc-probe/src/sha256.rs @@ -0,0 +1,208 @@ +//! Minimal, dependency-free, streaming SHA-256 (RFC 6234 / FIPS 180-4). +//! +//! Written in-crate rather than pulled from crates.io so `onebrc-probe` +//! stays at zero external dependencies for lanes A/C (see `Cargo.toml`). +//! Not constant-time, not audited for cryptographic use — this crate uses +//! it purely as a corpus-integrity checksum for the archival recipe line +//! (`rows= seed= sha256=`), never for anything security-bearing. +//! Correctness is pinned by the standard `""` / `"abc"` test vectors below. + +const K: [u32; 64] = [ + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +]; + +const H0: [u32; 8] = [ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +]; + +/// Streaming SHA-256 hasher. `update()` any number of times, then `finalize()`. +pub struct Sha256 { + h: [u32; 8], + buffer: [u8; 64], + buffer_len: usize, + total_len: u64, +} + +impl Default for Sha256 { + fn default() -> Self { + Self::new() + } +} + +impl Sha256 { + pub fn new() -> Self { + Self { + h: H0, + buffer: [0u8; 64], + buffer_len: 0, + total_len: 0, + } + } + + /// Feed more bytes into the running digest. + pub fn update(&mut self, data: &[u8]) { + self.total_len = self.total_len.wrapping_add(data.len() as u64); + self.absorb(data); + } + + /// Absorb bytes into the block buffer WITHOUT touching `total_len` — + /// used both by `update` (which tracks length separately) and by + /// `finalize`'s padding (whose 0x80/zero/length-suffix bytes must + /// never be counted as message length). + fn absorb(&mut self, mut data: &[u8]) { + if self.buffer_len > 0 { + let need = 64 - self.buffer_len; + let take = need.min(data.len()); + self.buffer[self.buffer_len..self.buffer_len + take].copy_from_slice(&data[..take]); + self.buffer_len += take; + data = &data[take..]; + if self.buffer_len == 64 { + let block = self.buffer; + self.process_block(&block); + self.buffer_len = 0; + } + } + while data.len() >= 64 { + let mut block = [0u8; 64]; + block.copy_from_slice(&data[..64]); + self.process_block(&block); + data = &data[64..]; + } + if !data.is_empty() { + self.buffer[..data.len()].copy_from_slice(data); + self.buffer_len = data.len(); + } + } + + /// Consume the hasher, returning the 32-byte digest. + pub fn finalize(mut self) -> [u8; 32] { + let bit_len = self.total_len.wrapping_mul(8); + self.absorb(&[0x80]); + while self.buffer_len != 56 { + self.absorb(&[0x00]); + } + self.absorb(&bit_len.to_be_bytes()); + + let mut out = [0u8; 32]; + for (i, word) in self.h.iter().enumerate() { + out[i * 4..i * 4 + 4].copy_from_slice(&word.to_be_bytes()); + } + out + } + + /// Lowercase hex encoding of a digest — the form used in the crate's + /// `sha256=` recipe line. + pub fn hex(digest: &[u8; 32]) -> String { + let mut s = String::with_capacity(64); + for b in digest { + s.push_str(&format!("{b:02x}")); + } + s + } + + fn process_block(&mut self, block: &[u8; 64]) { + let mut w = [0u32; 64]; + for i in 0..16 { + w[i] = u32::from_be_bytes([block[i * 4], block[i * 4 + 1], block[i * 4 + 2], block[i * 4 + 3]]); + } + for i in 16..64 { + let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); + let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10); + w[i] = w[i - 16] + .wrapping_add(s0) + .wrapping_add(w[i - 7]) + .wrapping_add(s1); + } + + let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut hh] = self.h; + + for i in 0..64 { + let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25); + let ch = (e & f) ^ ((!e) & g); + let temp1 = hh + .wrapping_add(s1) + .wrapping_add(ch) + .wrapping_add(K[i]) + .wrapping_add(w[i]); + let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22); + let maj = (a & b) ^ (a & c) ^ (b & c); + let temp2 = s0.wrapping_add(maj); + + hh = g; + g = f; + f = e; + e = d.wrapping_add(temp1); + d = c; + c = b; + b = a; + a = temp1.wrapping_add(temp2); + } + + self.h[0] = self.h[0].wrapping_add(a); + self.h[1] = self.h[1].wrapping_add(b); + self.h[2] = self.h[2].wrapping_add(c); + self.h[3] = self.h[3].wrapping_add(d); + self.h[4] = self.h[4].wrapping_add(e); + self.h[5] = self.h[5].wrapping_add(f); + self.h[6] = self.h[6].wrapping_add(g); + self.h[7] = self.h[7].wrapping_add(hh); + } +} + +/// Convenience one-shot hash + hex. +pub fn hash_hex(data: &[u8]) -> String { + let mut h = Sha256::new(); + h.update(data); + Sha256::hex(&h.finalize()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_string_vector() { + // Standard SHA-256("") test vector. + assert_eq!( + hash_hex(b""), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + ); + } + + #[test] + fn abc_vector() { + // Standard SHA-256("abc") test vector. + assert_eq!( + hash_hex(b"abc"), + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" + ); + } + + #[test] + fn multi_block_matches_single_update() { + // 1000 repetitions of "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" + // exercises the multi-block path (block boundary crossing via + // several small `update()` calls vs one large call). + let chunk = b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"; + let mut whole = Vec::new(); + for _ in 0..1000 { + whole.extend_from_slice(chunk); + } + + let mut streamed = Sha256::new(); + for c in whole.chunks(7) { + streamed.update(c); + } + let streamed_hex = Sha256::hex(&streamed.finalize()); + + let one_shot_hex = hash_hex(&whole); + assert_eq!(streamed_hex, one_shot_hex); + } +} From 70684c628c36a84a860ced7046da354ce1bdf2b6 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 17:04:22 +0000 Subject: [PATCH 12/14] =?UTF-8?q?probe(1brc):=20lanes=20B=20(ndarray-SIMD?= =?UTF-8?q?=20scan)=20+=20D=20(ractor=20actor-per-worker)=20=E2=80=94=20t1?= =?UTF-8?q?=20measured?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lane B: 32-byte-stride ;/\n scan via ndarray::simd::U8x32::cmpeq_mask (all SIMD from ndarray::simd per the workspace rule; probe pinned to x86-64-v3 so the ops are real AVX2 intrinsics), cross-block state carry, scalar parse kept. Lane D: actor-per-worker over Arc> via the AdaWorldAPI ractor fork (supervisor coordinates), identical chunking and commutative merge as lane C — only the worker primitive changes. Feature-gated (lane-b/lane-d); lanes A/C stay zero-dep (11/13/12/14 tests green across the four feature combos; clippy -D warnings clean, incl. the pre-existing gen.rs byte-grouping lint fixed here). t1 (recipe corpus rows=10000000 seed=42 sha256=f1853caa…5691 re-verified, 4 cores, best-of-2): A 7.012 / B 7.455 (1.06x vs A — delimiter find is not the bottleneck) / C 27.586 / D 22.078 Mrows/s (0.80x vs C — the 'ractor is a helper, not a messaging path' ruling as a measured ~20% actor tax incl. the forced corpus copy). README §5.1 carries the tables. Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .claude/v3/INTEGRATION-PLAN.md | 13 + crates/onebrc-probe/.cargo/config.toml | 7 + crates/onebrc-probe/Cargo.lock | 695 +++++++++++++++++++++++++ crates/onebrc-probe/Cargo.toml | 24 +- crates/onebrc-probe/README.md | 53 +- crates/onebrc-probe/src/gen.rs | 2 +- crates/onebrc-probe/src/lane_b.rs | 138 +++++ crates/onebrc-probe/src/lane_d.rs | 138 +++++ crates/onebrc-probe/src/lib.rs | 128 ++++- crates/onebrc-probe/src/main.rs | 38 +- crates/onebrc-probe/src/sha256.rs | 7 +- 11 files changed, 1222 insertions(+), 21 deletions(-) create mode 100644 crates/onebrc-probe/.cargo/config.toml create mode 100644 crates/onebrc-probe/src/lane_b.rs create mode 100644 crates/onebrc-probe/src/lane_d.rs diff --git a/.claude/v3/INTEGRATION-PLAN.md b/.claude/v3/INTEGRATION-PLAN.md index d486985f..817c3b05 100644 --- a/.claude/v3/INTEGRATION-PLAN.md +++ b/.claude/v3/INTEGRATION-PLAN.md @@ -549,3 +549,16 @@ std-only for the baselines). Scaffold + lanes A/C in flight. ADDRESSING TAX exactly. F winning or tying validates addressing-is-aggregation; F losing prices the address layer — either result tunes W2d and gives W2e its "winner owns the hot path" number. + +#### Addendum-13 status update (2026-07-02, t1) + +Lanes B + D SHIPPED (feature-gated `lane-b`/`lane-d`; A/C stay zero-dep — +proven by the no-feature test run). t1 best-of-2 on the archived recipe +corpus (hash re-verified): A 7.012 / B 7.455 (**1.06× vs A** — delimiter +find alone is not the bottleneck; SWAR parse + hash swap are the next +levers) / C 27.586 / D 22.078 Mrows/s (**0.80× vs C** — the "ractor is a +helper, not a messaging path" ruling measured: ~20% actor tax incl. the +forced one-time Arc corpus copy). Full tables + readings: +`crates/onebrc-probe/README.md` §5.1. Remaining: lane E (E−D isolates the +kanban journaling tax; feeds W2d), lane F (Morton-tile shader vs plain +radix control — the addressing-tax isolator). diff --git a/crates/onebrc-probe/.cargo/config.toml b/crates/onebrc-probe/.cargo/config.toml new file mode 100644 index 00000000..278b4c56 --- /dev/null +++ b/crates/onebrc-probe/.cargo/config.toml @@ -0,0 +1,7 @@ +# Pin the probe to the workspace's CI baseline so lane-B numbers are +# comparable with lance-graph CI builds (rust-test.yml uses x86-64-v3). +# ndarray's simd module dispatches at COMPILE time on target_feature: +# at v3 the U8x32 ops are real AVX2 intrinsics (U8x64 falls back to +# scalar loops — that's why lane B uses U8x32). +[build] +rustflags = ["-C", "target-cpu=x86-64-v3"] diff --git a/crates/onebrc-probe/Cargo.lock b/crates/onebrc-probe/Cargo.lock index f14eef8b..eaf46169 100644 --- a/crates/onebrc-probe/Cargo.lock +++ b/crates/onebrc-probe/Cargo.lock @@ -2,6 +2,701 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fb67a6e08acf24fdeccbac2cb6ac4305825bd1f117462e0e6f2f193345ad56" + +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + +[[package]] +name = "bitflags" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" + +[[package]] +name = "blake3" +version = "1.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures", +] + +[[package]] +name = "bon" +version = "3.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a602c73c7b0148ec6d12af6fd5cc7a46e2eacc8878271a999abac56eed12f561" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "3.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dee98b0db6a962de883bf5d20362dee4d7ca0d12fe39a7c6c73c844e1cd7c1f" +dependencies = [ + "darling", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "bumpalo" +version = "3.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" + +[[package]] +name = "cc" +version = "1.2.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e228eec9be7c17ccb640b59b36a5cd805ea2a564a4c5e162c2f659fea30d3b96" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "dashmap" +version = "6.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6361d5c062261c78a176addb82d4c821ae42bed6089de0e12603cd25de2059c" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "js-sys" +version = "0.3.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53b44bfcdb3f8d5837a46dae1ca9660a837176eee74a28b229bc626816589102" +dependencies = [ + "cfg-if", + "futures-util", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "matrixmultiply" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "memchr" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" + +[[package]] +name = "ndarray" +version = "0.17.2" +dependencies = [ + "blake3", + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "paste", + "portable-atomic", + "portable-atomic-util", + "rawpointer", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + [[package]] name = "onebrc-probe" version = "0.1.0" +dependencies = [ + "ndarray", + "ractor", + "tokio", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "portable-atomic-util" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbc457d0c7a0759a614551b11a6409e5951f6c7537be1f1b7682b9ae9230368" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ractor" +version = "0.15.13" +source = "git+https://github.com/AdaWorldAPI/ractor#f4c474f4e4fc205a5059230a5ae36d4c06b346c5" +dependencies = [ + "bon", + "dashmap", + "futures", + "js-sys", + "once_cell", + "strum", + "tokio", + "tokio_with_wasm", + "tracing", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-time", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "shlex" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "strum" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9628de9b8791db39ceda2b119bbe13134770b56c138ec1d3af810d045c04f9bd" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab85eea0270ee17587ed4156089e10b9e6880ee688791d45a905f5b1ca36f664" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tokio" +version = "1.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" +dependencies = [ + "pin-project-lite", + "tokio-macros", + "tracing", +] + +[[package]] +name = "tokio-macros" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio_with_wasm" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef3ce6a8f5b5190dfe4851db6c969e8360a262759e16a0b75dfc43af19d97a86" +dependencies = [ + "js-sys", + "tokio", + "tokio_with_wasm_proc", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "tokio_with_wasm_proc" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d8aa1d26c1550eef93cfb2dafadc145b3220432dae8d156b5ba485880594ffe" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "wasm-bindgen" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b067c0c11094aef6b7a801c1e34a26affafdf3d051dba08456b868789aaf9a4" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c62df1340f32221cb9c54d6a27b030e3dba64361d4a95bed55f9aacb44da291d" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167ce5e579f6bcf889c4f7175a8a5a585de84e8ff93976ce393efa5f2837aab1" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3997c7839262f4ef12cf90b818d6340c18e80f263f1a94bf157d0ec4420380e" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1b4cb0cc549fcf58d7dfc081778139b3d283a081644e833e84682ad71cea24" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-sys" +version = "0.3.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8622dcb61c0bcc9fffa6938bed81210af2da9a7e4a1a834b2e37a59b6dfb6141" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" diff --git a/crates/onebrc-probe/Cargo.toml b/crates/onebrc-probe/Cargo.toml index 8b12a723..7c5e23a7 100644 --- a/crates/onebrc-probe/Cargo.toml +++ b/crates/onebrc-probe/Cargo.toml @@ -4,12 +4,15 @@ version = "0.1.0" edition = "2021" license = "Apache-2.0" publish = false -description = "1BRC (One Billion Row Challenge) substrate probe: measures the V3 SoA/mailbox substrate's groupby-aggregate throughput at container scale (100M rows). Lane A (single-thread scalar baseline) and Lane C (std::thread parallel baseline) ship here; Lane B (ndarray SIMD), Lane D (ractor actors), Lane E (kanban) are follow-up work — see README.md." +description = "1BRC (One Billion Row Challenge) substrate probe: measures the V3 SoA/mailbox substrate's groupby-aggregate throughput at container scale (100M rows). Lane A (single-thread scalar baseline), Lane C (std::thread parallel baseline), Lane B (ndarray SIMD, feature lane-b) and Lane D (ractor actors, feature lane-d) ship here; Lane E (kanban) is follow-up work — see README.md." # Standalone, workspace-EXCLUDED crate (see root Cargo.toml `exclude`, same -# precedent as crates/bgz17 and crates/deepnsm). Zero dependencies for lanes -# A/C (std only). Verify with: +# precedent as crates/bgz17 and crates/deepnsm). Lanes A/C are zero-dependency +# (std only); lanes B/D are opt-in via feature flags so the zero-dep contract +# for A/C is never silently broken. Verify with: # cargo test --manifest-path crates/onebrc-probe/Cargo.toml +# cargo test --manifest-path crates/onebrc-probe/Cargo.toml --features lane-b +# cargo test --manifest-path crates/onebrc-probe/Cargo.toml --features lane-d # Empty [workspace] table: this crate is `exclude`d from the parent # lance-graph workspace, but cargo's manifest auto-discovery would otherwise @@ -17,6 +20,21 @@ description = "1BRC (One Billion Row Challenge) substrate probe: measures the V3 # (identical precedent/comment to crates/deepnsm/Cargo.toml). [workspace] +[features] +# Lanes A/C stay dependency-free; B and D are opt-in. +lane-b = ["dep:ndarray"] +lane-d = ["dep:ractor", "dep:tokio"] + [dependencies] +# AdaWorldAPI fork (workspace P0 rule: fork over crates.io, always — see +# CLAUDE.md "P0 — AdaWorldAPI forks ONLY, NEVER crates.io upstream"). Same +# path convention as crates/lance-graph/Cargo.toml's ndarray dep. +# default-features = false + "std" exposes ndarray::simd (U8x32 etc.) +# without dragging in hpc-extras. +ndarray = { path = "../../../ndarray", optional = true, default-features = false, features = ["std"] } +# AdaWorldAPI fork of ractor (same coordinates as lance-graph-supervisor; +# fork carries the MessagingErr::Saturated non-exhaustive-match fix). +ractor = { git = "https://github.com/AdaWorldAPI/ractor", optional = true, default-features = false, features = ["tokio_runtime"] } +tokio = { version = "1", optional = true, default-features = false, features = ["rt-multi-thread", "macros"] } [dev-dependencies] diff --git a/crates/onebrc-probe/README.md b/crates/onebrc-probe/README.md index 6058a553..9bda6e1b 100644 --- a/crates/onebrc-probe/README.md +++ b/crates/onebrc-probe/README.md @@ -80,8 +80,8 @@ regenerating with the same `(rows, seed)` and diffing the printed |---|---|---| | **A** — `lane_a_scalar` | Single-thread scalar baseline: one pass, byte-wise `;`/`\n` scan, integer temp parse, `BTreeMap` accumulation | **Shipped** | | **C** — `lane_c_threads` | `std::thread` parallel baseline: newline-aligned `chunk_bounds` split, per-worker owned `BTreeMap`, commutative `Stats::merge` combine | **Shipped** | -| **B** — ndarray SIMD | Vectorized semicolon/newline scanning and/or batched parse via `ndarray::simd` (per the workspace's SIMD rule — never raw `pulp`/`wide`/hand intrinsics in a consumer crate; see `.claude/knowledge/ndarray-vertical-simd-alien-magic.md`). Would also evaluate whether an `ndarray`-backed SIMD hash or SWAR-style parse closes the gap to the reference's `v15_raw_hash`. | **Not implemented** — orchestrator follow-up | -| **D** — `ractor` actors | Same groupby-aggregate workload, but the aggregation runs as `ractor`-supervised actors (per this workspace's `lance-graph-supervisor` precedent) instead of bare `std::thread::scope`, to measure actor-model overhead/benefit vs Lane C's raw threads at this workload's arrival rate. | **Not implemented** — orchestrator follow-up | +| **B** — `lane_b::lane_b_simd` (feature `lane-b`) | Vectorized `;`/`\n` scanning via `ndarray::simd::U8x32::cmpeq_mask` (per the workspace's SIMD rule — never raw `pulp`/`wide`/hand intrinsics in a consumer crate; see `.claude/knowledge/ndarray-vertical-simd-alien-magic.md`), 32-byte-stride scan with cross-block `line_start`/`pending_semi` carry, scalar temp parse (SWAR/branchless parse deliberately still deferred). | **Shipped** | +| **D** — `lane_d::lane_d_ractor` (feature `lane-d`) | Same groupby-aggregate workload as Lane C, but each `chunk_bounds` chunk is aggregated by a stateless `ractor` actor (actor-per-worker, ask-pattern reply, `lance-graph-supervisor`-style `Actor`/`RpcReplyPort` shape) instead of a bare `std::thread::scope` closure — identical chunking + commutative merge, only the worker primitive changes. | **Shipped** | | **E** — kanban | Routes the aggregation through the V3 kanban execution machinery (`v3-kanban-executor-engineer` domain — `KanbanPhase` lifecycle, ahead-firing batch writer) to measure the substrate's own scheduling/dispatch overhead against the bare-metal Lane A/C numbers as a ceiling reference. | **Not implemented** — orchestrator follow-up | --- @@ -90,7 +90,17 @@ regenerating with the same `(rows, seed)` and diffing the printed ```text onebrc-probe gen -onebrc-probe run [workers] +onebrc-probe run [workers] +``` + +Lane `b` requires `--features lane-b`; lane `d` requires `--features lane-d` +(see `Cargo.toml` `[features]`). Lanes A/C stay dependency-free either way: + +```bash +cargo run --release --manifest-path crates/onebrc-probe/Cargo.toml \ + --features lane-b -- run /tmp/onebrc_10m.txt b +cargo run --release --manifest-path crates/onebrc-probe/Cargo.toml \ + --features lane-d -- run /tmp/onebrc_10m.txt d 4 ``` `run` prints: @@ -155,3 +165,40 @@ keeps the tree shallow, but each of the 4 workers still walks its own tree independently rather than sharing one flat hash table). Lane B (SIMD scan/parse) and a hash-map swap are the natural next levers — deferred to the orchestrator's follow-up per §3. + +### §5.1 — t1 (lanes B/D) — measured 2026-07-02 + +Same recipe corpus (`rows=10000000 seed=42 sha256=f1853caa…5691`, hash +re-verified byte-identical at regeneration), same 4-core container, +release build `--features lane-b,lane-d` (probe pinned to +`target-cpu=x86-64-v3` via `.cargo/config.toml`, so `U8x32` ops are real +AVX2 intrinsics). Two passes per lane, best-of-2 reported (both passes +listed for honesty): + +| Lane | workers | elapsed_ms (best) | throughput (Mrows/s) | ratio | +|---|---|---|---|---| +| A (scalar) | 1 | 1426.066 | 7.012 (7.0, 7.0) | 1.00× vs A | +| B (SIMD scan) | 1 | 1341.374 | 7.455 (7.1, 7.5) | **1.06× vs A** | +| C (threads) | 4 | 362.508 | 27.586 (27.3, 27.6) | 3.93× vs A | +| D (ractor) | 4 | 452.936 | 22.078 (22.1, 19.7) | **0.80× vs C** | + +Readings: + +- **B vs A (1.06×):** vectorizing ONLY the delimiter find barely moves + the needle — the hot cost at this corpus is the scalar temp parse + + `BTreeMap` accumulation, exactly as §5's t0 analysis predicted. The + SIMD scan is not wasted (it is the prerequisite structure for lane F's + batched tile sweeps); it is just not the bottleneck by itself. Next + levers remain SWAR parse + hash-map swap (§1 inventory rows 2–3). +- **D vs C (0.80×):** the operator's "ractor is a helper, not a + messaging path" ruling, as a number — routing the identical chunked + workload through actor-per-worker costs ~20% at this arrival rate, + and that figure INCLUDES the one-time `Arc>` corpus copy + (142 MB) that the actor boundary forces and `std::thread::scope`'s + borrow does not (see `lane_d.rs` module doc). Actors buy supervision + and single-writer ownership, not raw throughput; on the V3 substrate + they own SoA mailboxes (W2b) rather than carry bulk data — this lane + is the measured cost of doing it the wrong way, kept as the fence. +- Lane E (kanban scheduling tax, E−D isolates the journaling cost) is + the next lane; lane F (Morton-tile cascaded shader vs plain radix + control) closes the set per Addendum-13. diff --git a/crates/onebrc-probe/src/gen.rs b/crates/onebrc-probe/src/gen.rs index 4efa80e2..3dece1c2 100644 --- a/crates/onebrc-probe/src/gen.rs +++ b/crates/onebrc-probe/src/gen.rs @@ -59,7 +59,7 @@ const SYLLABLES: &[&str] = &[ /// for a membership check, never iterated, so its unordered nature never /// leaks into the output). pub fn station_names(seed: u64, count: usize) -> Vec { - let mut rng = SplitMix64::new(seed ^ 0xA11CE_5EED); + let mut rng = SplitMix64::new(seed ^ 0xA_11CE_5EED); let mut seen = std::collections::HashSet::with_capacity(count * 2); let mut names = Vec::with_capacity(count); while names.len() < count { diff --git a/crates/onebrc-probe/src/lane_b.rs b/crates/onebrc-probe/src/lane_b.rs new file mode 100644 index 00000000..401d92e2 --- /dev/null +++ b/crates/onebrc-probe/src/lane_b.rs @@ -0,0 +1,138 @@ +//! Lane B — SIMD delimiter scan via `ndarray::simd`. +//! +//! Per Addendum-13 lane B (see `README.md` §3), this lane measures vectorized +//! `;`/`\n` scanning against Lane A's byte-wise scalar scan, on identical +//! parse + accumulate logic. The workspace's SIMD iron rule is "all SIMD from +//! `ndarray::simd`" (`simd-savant` agent, +//! `.claude/knowledge/ndarray-vertical-simd-alien-magic.md`) — this module +//! uses `ndarray::simd::U8x32::cmpeq_mask` exclusively, never a raw +//! `core::arch` intrinsic, `pulp`, `wide`, or `memchr`. +//! +//! `U8x32` is the AVX2-native byte width (one `__m256i` = 32 bytes; see +//! `ndarray/src/simd_avx2.rs` module doc). The corpus is scanned in 32-byte +//! strides: for each block, `cmpeq_mask(U8x32::splat(b'\n'))` and +//! `cmpeq_mask(U8x32::splat(b';'))` produce 32-bit masks with bit `i` set +//! iff `block[i]` matches. The set bits of the combined mask (newline and +//! semicolon bytes never coincide, so `nl_mask | semi_mask` has no lost +//! information) are walked in ascending order via the classic +//! `mask & (mask - 1)` "clear lowest set bit" trick, recovering the ordered +//! sequence of delimiter events for the block. +//! +//! **Parse remains scalar** (SWAR/branchless parse deliberately deferred — +//! see `README.md` §1 "NOT reimplemented here"): `parse_temp_tenths` is the +//! same byte-scan integer parser Lane A uses. This lane's speedup, if any, +//! comes purely from vectorized delimiter-finding, not from vectorized +//! parsing. +//! +//! ## Cross-block record state +//! +//! A record's `;` and its `\n` are not guaranteed to land in the same +//! 32-byte block (short station names put the `;` near a block's end and +//! the `\n` in the next block, or vice versa). Two scalars carry state +//! across block boundaries: +//! +//! - `line_start: usize` — the byte offset where the current (in-progress) +//! station name begins. +//! - `pending_semi: Option` — `Some(offset)` once this record's `;` +//! has been seen but its `\n` has not yet arrived; `None` while still +//! scanning for the `;`. +//! +//! The tail (fewer than 32 bytes remaining after the last full block) is +//! finished with a plain byte-wise scalar loop — the same station-name / +//! temp-field extraction shape as `lane_a_scalar`, continuing from whatever +//! `line_start` / `pending_semi` state the SIMD pass left behind. + +use crate::{parse_temp_tenths, Stats}; +use ndarray::simd::U8x32; +use std::collections::BTreeMap; + +/// Lane B — SIMD delimiter scan. One pass over `data` in 32-byte strides +/// using `ndarray::simd::U8x32::cmpeq_mask` to locate `;` and `\n` bytes; +/// scalar integer temp parse (see module doc); `BTreeMap` +/// accumulation identical in shape to `lane_a_scalar`. +pub fn lane_b_simd(data: &[u8]) -> BTreeMap { + let mut map: BTreeMap = BTreeMap::new(); + let len = data.len(); + + // `line_start` — offset where the in-progress station name begins. + // `pending_semi` — `Some(offset)` once `;` has been seen for the + // in-progress record but its `\n` has not yet arrived. + let mut line_start = 0usize; + let mut pending_semi: Option = None; + + let nl_needle = U8x32::splat(b'\n'); + let semi_needle = U8x32::splat(b';'); + + let aligned_end = (len / U8x32::LANES) * U8x32::LANES; + let mut pos = 0usize; + while pos < aligned_end { + let block = U8x32::from_slice(&data[pos..pos + U8x32::LANES]); + let nl_mask = block.cmpeq_mask(nl_needle); + let semi_mask = block.cmpeq_mask(semi_needle); + // `;` and `\n` never occupy the same byte, so OR-ing loses no + // information and gives one ascending walk over both event kinds. + let mut combined = nl_mask | semi_mask; + while combined != 0 { + let bit = combined.trailing_zeros() as usize; + let abs = pos + bit; + if (nl_mask >> bit) & 1 == 1 { + // Newline event: closes the temp field started at the + // most recent pending semicolon. + let semi = pending_semi + .take() + .expect("newline event must be preceded by a pending semicolon"); + let name = std::str::from_utf8(&data[line_start..semi]) + .expect("station name is valid utf8"); + let tenths = parse_temp_tenths(&data[semi + 1..abs]); + match map.get_mut(name) { + Some(stats) => stats.observe(tenths), + None => { + map.insert(name.to_string(), Stats::single(tenths)); + } + } + line_start = abs + 1; + } else { + // Semicolon event: closes the station name, opens the temp + // field. + pending_semi = Some(abs); + } + combined &= combined - 1; // clear the lowest set bit + } + pos += U8x32::LANES; + } + + // Tail — fewer than 32 bytes remain. Finish with a plain scalar scan, + // continuing from whatever `line_start` / `pending_semi` state the SIMD + // pass left behind (mirrors `lane_a_scalar`'s per-record shape). + let mut i = aligned_end; + while i < len { + match pending_semi { + None => { + while data[i] != b';' { + i += 1; + } + pending_semi = Some(i); + i += 1; + } + Some(semi) => { + while data[i] != b'\n' { + i += 1; + } + let name = std::str::from_utf8(&data[line_start..semi]) + .expect("station name is valid utf8"); + let tenths = parse_temp_tenths(&data[semi + 1..i]); + match map.get_mut(name) { + Some(stats) => stats.observe(tenths), + None => { + map.insert(name.to_string(), Stats::single(tenths)); + } + } + i += 1; + line_start = i; + pending_semi = None; + } + } + } + + map +} diff --git a/crates/onebrc-probe/src/lane_d.rs b/crates/onebrc-probe/src/lane_d.rs new file mode 100644 index 00000000..d004fc8f --- /dev/null +++ b/crates/onebrc-probe/src/lane_d.rs @@ -0,0 +1,138 @@ +//! Lane D — `ractor` actor-per-worker aggregation. +//! +//! Purpose: quantifies the "ractor is a helper, not a messaging path" ruling +//! (see `.claude/v3/knowledge/v3-substrate-primer.md` §6) as a measured +//! ratio against Lane C's bare `std::thread::scope`, on **identical** +//! chunking (`chunk_bounds`) and merge shape (`Stats::merge` via +//! `merge_maps`) — the only variable this lane changes is the worker +//! primitive: a `ractor` `Actor` instead of a raw OS thread. +//! +//! Mirrors `lance-graph-supervisor`'s `KanbanActor` idioms (`kanban_actor.rs` +//! — `Actor::spawn`, the ask-pattern `ractor::call!`, `RpcReplyPort` in the +//! message variant) for a single, stateless worker actor. +//! +//! ## Actor-model boundary cost +//! +//! Lane C's workers borrow `&data[start..end]` directly (zero-copy) because +//! `std::thread::scope` proves the borrow outlives every spawned thread. +//! `ractor` actors run as independent tokio tasks that outlive any single +//! call site, so they cannot borrow the caller's stack slice — the corpus +//! is copied ONCE into an `Arc>` and each actor is handed a clone of +//! the `Arc` (refcount bump, not a byte copy) plus its `(start, end)` chunk +//! bounds. That one upfront `data.to_vec()` is itself part of what this +//! lane measures: it is the actor-model tax Lane C does not pay. +//! +//! ## Per-record logic +//! +//! Each actor computes its own chunk with `lane_a_scalar` — the SAME +//! shared per-record helper Lane C's worker closures already call (Lane C +//! already factored this out; there is nothing to duplicate here). The +//! coordinator folds the per-actor maps with `merge_maps` (the same +//! commutative, order-independent BUNDLE step Lane C uses). + +use crate::{chunk_bounds, lane_a_scalar, merge_maps, Stats}; +use ractor::{Actor, ActorProcessingErr, ActorRef, RpcReplyPort}; +use std::collections::BTreeMap; +use std::sync::Arc; + +/// Messages `ChunkWorker` accepts — a single ask-pattern variant per the +/// `KanbanActor` idiom (`kanban_actor.rs`'s `RpcReplyPort`-in-variant shape). +pub enum ChunkMsg { + /// Aggregate `data[start..end]` (newline-aligned, per `chunk_bounds`) + /// via `lane_a_scalar` and reply with the owned per-chunk map. + Aggregate { + data: Arc>, + start: usize, + end: usize, + reply: RpcReplyPort>, + }, +} + +/// A stateless per-chunk aggregation actor. One is spawned per worker chunk +/// and stopped after its single reply — the actor-per-worker shape this +/// lane measures against Lane C's `std::thread::scope` worker closures. +pub struct ChunkWorker; + +impl Actor for ChunkWorker { + type Msg = ChunkMsg; + type State = (); + type Arguments = (); + + async fn pre_start( + &self, + _myself: ActorRef, + _args: Self::Arguments, + ) -> Result { + Ok(()) + } + + async fn handle( + &self, + _myself: ActorRef, + msg: Self::Msg, + _state: &mut Self::State, + ) -> Result<(), ActorProcessingErr> { + match msg { + ChunkMsg::Aggregate { + data, + start, + end, + reply, + } => { + // Same per-record logic as Lane C's worker closure — see + // module doc "Per-record logic". + let map = lane_a_scalar(&data[start..end]); + let _ = reply.send(map); + } + } + Ok(()) + } +} + +/// Lane D — actor-per-worker baseline. Builds a `workers`-thread tokio +/// runtime, splits `data` into `workers` newline-aligned chunks +/// (`chunk_bounds`, identical to Lane C), spawns one `ChunkWorker` per +/// chunk, asks each for its aggregate via `ractor::call!`, stops the actor, +/// then folds all per-chunk maps with the same commutative `merge_maps` +/// Lane C uses. +pub fn lane_d_ractor(data: &[u8], workers: usize) -> BTreeMap { + let workers = workers.max(1); + let bounds = chunk_bounds(data, workers); + + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(workers) + .build() + .expect("build tokio runtime for lane D"); + + runtime.block_on(async move { + // Actor-model boundary cost: one upfront copy into a shared Arc — + // see module doc "Actor-model boundary cost". + let shared = Arc::new(data.to_vec()); + + let mut join_handles = Vec::with_capacity(bounds.len()); + for &(start, end) in &bounds { + let shared = Arc::clone(&shared); + join_handles.push(tokio::spawn(async move { + let (actor, handle) = Actor::spawn(None, ChunkWorker, ()) + .await + .expect("spawn lane D chunk worker"); + let map = ractor::call!(actor, |reply| ChunkMsg::Aggregate { + data: shared, + start, + end, + reply, + }) + .expect("lane D actor rpc"); + actor.stop(None); + handle.await.expect("lane D actor join"); + map + })); + } + + let mut results = Vec::with_capacity(join_handles.len()); + for jh in join_handles { + results.push(jh.await.expect("lane D worker task join")); + } + merge_maps(results) + }) +} diff --git a/crates/onebrc-probe/src/lib.rs b/crates/onebrc-probe/src/lib.rs index 3aeb7c3a..84af576e 100644 --- a/crates/onebrc-probe/src/lib.rs +++ b/crates/onebrc-probe/src/lib.rs @@ -10,9 +10,13 @@ //! - **Lane C** (`lane_c_threads`) — `std::thread` parallel baseline, //! newline-aligned chunk split + commutative merge. //! -//! Lane B (ndarray SIMD), Lane D (ractor actors), Lane E (kanban) are -//! follow-up work — see `README.md` for the stub sections describing what -//! each will measure. +//! - **Lane B** (`lane_b::lane_b_simd`, feature `lane-b`) — `ndarray::simd` +//! vectorized `;`/`\n` scan, scalar parse. +//! - **Lane D** (`lane_d::lane_d_ractor`, feature `lane-d`) — `ractor` +//! actor-per-worker over the same `chunk_bounds` split as Lane C. +//! +//! Lane E (kanban) is follow-up work — see `README.md` for the stub section +//! describing what it will measure. //! //! ## Reference inventory //! @@ -31,8 +35,17 @@ //! branchless parser, and SIMD semicolon-finding (`pulp` / `wide`). pub mod gen; +#[cfg(feature = "lane-b")] +pub mod lane_b; +#[cfg(feature = "lane-d")] +pub mod lane_d; pub mod sha256; +#[cfg(feature = "lane-b")] +pub use lane_b::lane_b_simd; +#[cfg(feature = "lane-d")] +pub use lane_d::lane_d_ractor; + use std::collections::BTreeMap; /// Per-station aggregate: min/max/sum/count, in tenths-of-a-degree. @@ -136,8 +149,7 @@ pub fn lane_a_scalar(data: &[u8]) -> BTreeMap { while data[i] != b';' { i += 1; } - let name = - std::str::from_utf8(&data[name_start..i]).expect("station name is valid utf8"); + let name = std::str::from_utf8(&data[name_start..i]).expect("station name is valid utf8"); i += 1; // skip ';' let temp_start = i; while data[i] != b'\n' { @@ -332,4 +344,110 @@ mod tests { "same seed must produce same sha256" ); } + + /// Lane A and Lane B must agree byte-for-byte on aggregate output over + /// a generated corpus — the correctness spot check for the SIMD + /// delimiter scan. + #[cfg(feature = "lane-b")] + #[test] + fn lane_b_agrees_with_lane_a_on_generated_corpus() { + let dir = std::env::temp_dir(); + let path = dir.join(format!("onebrc_probe_test_b_{}.txt", std::process::id())); + let result = gen::gen(&path, 50_000, 7).expect("gen"); + assert_eq!(result.rows, 50_000); + + let data = std::fs::read(&path).expect("read generated corpus"); + std::fs::remove_file(&path).ok(); + + let a = lane_a_scalar(&data); + let b = lane_b_simd(&data); + assert_eq!(a, b, "lane A and lane B must produce identical aggregates"); + assert!(!a.is_empty()); + } + + /// Hand-built corpus, crafted so at least one record's `;`/`\n` land in + /// DIFFERENT 32-byte SIMD blocks (block0=[0,32), block1=[32,64), + /// tail=[64,..)) — exercises the cross-iteration `line_start` / + /// `pending_semi` carry in `lane_b_simd`, not just the common in-block + /// case a random generated corpus would mostly hit. + #[cfg(feature = "lane-b")] + #[test] + fn lane_b_handles_records_that_straddle_32_byte_block_boundaries() { + let long_name = "N".repeat(22); + let mut corpus = String::new(); + corpus.push_str("Ab;1.0\n"); // fully inside block0 + corpus.push_str(&format!("{long_name};9.9\n")); // straddles block0/block1 + corpus.push_str("Zz;3.3\n"); // fully inside block1 + corpus.push_str("QqRrSsTt;2.2\n"); // fully inside block1 + corpus.push_str("Uu;4.4\n"); // fully inside block1 + corpus.push_str("Vv;5.5\n"); // straddles block1/tail + + let data = corpus.as_bytes(); + assert!( + data.len() > 64, + "test corpus must span block0, block1, AND a tail region" + ); + + // Confirm (rather than assume) that at least one record's `;` and + // `\n` land in different 32-byte blocks — otherwise this test + // would silently degrade into testing only the non-crossing case. + let find_all = |needle: u8| -> Vec { + data.iter() + .enumerate() + .filter(|&(_, &b)| b == needle) + .map(|(i, _)| i) + .collect() + }; + let semis = find_all(b';'); + let newlines = find_all(b'\n'); + assert_eq!(semis.len(), newlines.len()); + let crosses_a_block = semis + .iter() + .zip(newlines.iter()) + .any(|(&s, &n)| s / 32 != n / 32); + assert!( + crosses_a_block, + "test corpus must contain a record whose `;`/`\\n` land in different 32-byte blocks" + ); + + let a = lane_a_scalar(data); + let b = lane_b_simd(data); + assert_eq!( + a, b, + "lane B must agree with lane A across straddled block boundaries" + ); + + let mut expected: BTreeMap = BTreeMap::new(); + expected.insert("Ab".to_string(), Stats::single(10)); + expected.insert(long_name, Stats::single(99)); + expected.insert("Zz".to_string(), Stats::single(33)); + expected.insert("QqRrSsTt".to_string(), Stats::single(22)); + expected.insert("Uu".to_string(), Stats::single(44)); + expected.insert("Vv".to_string(), Stats::single(55)); + assert_eq!( + b, expected, + "lane B stats must match the hand-computed expectation" + ); + } + + /// Lane A and Lane D must agree byte-for-byte on aggregate output — + /// the correctness spot check for the ractor actor-per-worker path. + /// Uses an odd worker count (3) on purpose, mirroring `chunk_bounds`'s + /// own odd-split test coverage. + #[cfg(feature = "lane-d")] + #[test] + fn lane_d_agrees_with_lane_a_on_generated_corpus() { + let dir = std::env::temp_dir(); + let path = dir.join(format!("onebrc_probe_test_d_{}.txt", std::process::id())); + let result = gen::gen(&path, 50_000, 13).expect("gen"); + assert_eq!(result.rows, 50_000); + + let data = std::fs::read(&path).expect("read generated corpus"); + std::fs::remove_file(&path).ok(); + + let a = lane_a_scalar(&data); + let d = lane_d_ractor(&data, 3); + assert_eq!(a, d, "lane A and lane D must produce identical aggregates"); + assert!(!a.is_empty()); + } } diff --git a/crates/onebrc-probe/src/main.rs b/crates/onebrc-probe/src/main.rs index a9c9711d..b81b7475 100644 --- a/crates/onebrc-probe/src/main.rs +++ b/crates/onebrc-probe/src/main.rs @@ -3,8 +3,11 @@ //! //! ```text //! onebrc-probe gen -//! onebrc-probe run [workers] +//! onebrc-probe run [workers] //! ``` +//! +//! Lane `b` requires `--features lane-b`; lane `d` requires +//! `--features lane-d` (see `README.md` §3/§4). use onebrc_probe::{gen::gen, lane_a_scalar, lane_c_threads}; use std::env; @@ -19,7 +22,7 @@ fn main() { Some("run") => cmd_run(&args[2..]), _ => { eprintln!( - "usage:\n onebrc-probe gen \n onebrc-probe run [workers]" + "usage:\n onebrc-probe gen \n onebrc-probe run [workers]" ); std::process::exit(2); } @@ -27,10 +30,7 @@ fn main() { } fn cmd_gen(args: &[String]) { - let path = PathBuf::from( - args.first() - .expect("usage: gen "), - ); + let path = PathBuf::from(args.first().expect("usage: gen ")); let rows: u64 = args .get(1) .expect("usage: gen ") @@ -54,7 +54,7 @@ fn cmd_gen(args: &[String]) { fn cmd_run(args: &[String]) { let path = PathBuf::from( args.first() - .expect("usage: run [workers]"), + .expect("usage: run [workers]"), ); let lane = args.get(1).map(String::as_str).unwrap_or("a"); let workers: usize = args @@ -80,8 +80,30 @@ fn cmd_run(args: &[String]) { let map = match lane { "a" => lane_a_scalar(&data), "c" => lane_c_threads(&data, workers), + "b" => { + #[cfg(feature = "lane-b")] + { + onebrc_probe::lane_b_simd(&data) + } + #[cfg(not(feature = "lane-b"))] + { + eprintln!("lane b requires --features lane-b"); + std::process::exit(1); + } + } + "d" => { + #[cfg(feature = "lane-d")] + { + onebrc_probe::lane_d_ractor(&data, workers) + } + #[cfg(not(feature = "lane-d"))] + { + eprintln!("lane d requires --features lane-d"); + std::process::exit(1); + } + } other => { - eprintln!("unknown lane '{other}' (expected 'a' or 'c')"); + eprintln!("unknown lane '{other}' (expected 'a', 'b', 'c', or 'd')"); std::process::exit(2); } }; diff --git a/crates/onebrc-probe/src/sha256.rs b/crates/onebrc-probe/src/sha256.rs index 169daefd..1d7e2a16 100644 --- a/crates/onebrc-probe/src/sha256.rs +++ b/crates/onebrc-probe/src/sha256.rs @@ -110,7 +110,12 @@ impl Sha256 { fn process_block(&mut self, block: &[u8; 64]) { let mut w = [0u32; 64]; for i in 0..16 { - w[i] = u32::from_be_bytes([block[i * 4], block[i * 4 + 1], block[i * 4 + 2], block[i * 4 + 3]]); + w[i] = u32::from_be_bytes([ + block[i * 4], + block[i * 4 + 1], + block[i * 4 + 2], + block[i * 4 + 3], + ]); } for i in 16..64 { let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); From 79b3b3fb29c123f59ae08f6bdcd099347e589ce8 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 17:14:39 +0000 Subject: [PATCH 13/14] =?UTF-8?q?probe(1brc):=20lane=20E=20(kanban-schedul?= =?UTF-8?q?ed=20cards)=20=E2=80=94=20t2:=20journaling=20floor=20within=20n?= =?UTF-8?q?oise,=20~66=20us/card?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One kanban card per batch: fresh KanbanActor (real lance-graph-supervisor machinery, feature lane-e) driven through the full Rubicon forward arc (3x drive_version_tick) around each aggregation batch; AtomicUsize batch queue, workers pull; combined journal asserted 3*batches legal KanbanMoves. ProbeBoard mirrors the supervisor's own TestBoard shape (stand-in board; the real SoA board is lane F's business). t2 (recipe corpus re-verified, 4 cores, best-of-2): C 28.310 / D 22.381 / E(4) 22.963 / E(64) 22.477 / E(256) 22.118 Mrows/s. E-D ~ 0 at chunk granularity — the per-card journal (spawn + 3 ticks + join) is invisible next to the shared actor-boundary corpus copy; 256 cards cost ~4% total, ~66 us per card = ~0.01% of the W2d 550 ms Libet budget. The board is not a scheduling threat; the actor boundary remains the only material tax. Gates: 11/12/15 tests green (no-feature / lane-e / all-lanes), clippy -D warnings clean, fmt clean. README §5.2 carries tables + readings; plan Addendum-13 updated. Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .claude/v3/INTEGRATION-PLAN.md | 14 + crates/onebrc-probe/Cargo.lock | 831 +++++++++++++++++++++++++++++- crates/onebrc-probe/Cargo.toml | 19 +- crates/onebrc-probe/README.md | 58 ++- crates/onebrc-probe/src/lane_e.rs | 252 +++++++++ crates/onebrc-probe/src/lib.rs | 36 +- crates/onebrc-probe/src/main.rs | 46 +- 7 files changed, 1235 insertions(+), 21 deletions(-) create mode 100644 crates/onebrc-probe/src/lane_e.rs diff --git a/.claude/v3/INTEGRATION-PLAN.md b/.claude/v3/INTEGRATION-PLAN.md index 817c3b05..53573b58 100644 --- a/.claude/v3/INTEGRATION-PLAN.md +++ b/.claude/v3/INTEGRATION-PLAN.md @@ -562,3 +562,17 @@ forced one-time Arc corpus copy). Full tables + readings: `crates/onebrc-probe/README.md` §5.1. Remaining: lane E (E−D isolates the kanban journaling tax; feeds W2d), lane F (Morton-tile shader vs plain radix control — the addressing-tax isolator). + +#### Addendum-13 status update (2026-07-02, t2) + +Lane E SHIPPED (feature `lane-e`; one kanban card per batch — fresh +`KanbanActor` driven through the full Rubicon arc via 3× +`drive_version_tick` around each real aggregation batch; journal asserted +3 legal moves/batch). t2 (same recipe corpus, best-of-2): C 28.310 / +D 22.381 / E(4 cards) 22.963 / E(64) 22.477 / E(256) 22.118 Mrows/s. +**The W2d number lane E was sent to fetch: E−D ≈ 0 at chunk granularity +(journaling floor within noise) and ≈ 66 µs per card at fine granularity +(spawn + 3 ticks + join) — ~0.01% of the 550 ms Libet budget.** The board +is not a scheduling threat; the actor-boundary copy remains the only real +tax (unchanged ~20% vs C). Full tables: `crates/onebrc-probe/README.md` +§5.2. Remaining: lane F (Morton-tile shader vs plain radix control). diff --git a/crates/onebrc-probe/Cargo.lock b/crates/onebrc-probe/Cargo.lock index eaf46169..2911c1ca 100644 --- a/crates/onebrc-probe/Cargo.lock +++ b/crates/onebrc-probe/Cargo.lock @@ -2,6 +2,29 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "arrayref" version = "0.3.9" @@ -14,12 +37,89 @@ version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fb67a6e08acf24fdeccbac2cb6ac4305825bd1f117462e0e6f2f193345ad56" +[[package]] +name = "arrow-array" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.17.1", + "num-complex", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-buffer" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0" +dependencies = [ + "bytes", + "half", + "num-bigint", + "num-traits", +] + +[[package]] +name = "arrow-data" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-schema" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed" + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "autocfg" version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" +[[package]] +name = "bgz-tensor" +version = "0.1.0" +dependencies = [ + "holograph", + "ndarray", +] + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "2.13.0" @@ -37,7 +137,16 @@ dependencies = [ "cc", "cfg-if", "constant_time_eq", - "cpufeatures", + "cpufeatures 0.3.0", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", ] [[package]] @@ -71,6 +180,16 @@ version = "3.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" +[[package]] +name = "bytes" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae3f5d315924270530207e2a68396c3cc547f6dca3fbdca317cfb1a51edb593" + +[[package]] +name = "causal-edge" +version = "0.2.0" + [[package]] name = "cc" version = "1.2.65" @@ -87,12 +206,58 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "chrono" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" +dependencies = [ + "iana-time-zone", + "num-traits", + "windows-link", +] + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "tiny-keccak", +] + [[package]] name = "constant_time_eq" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "cpufeatures" version = "0.3.0" @@ -108,6 +273,22 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "darling" version = "0.23.0" @@ -150,18 +331,41 @@ checksum = "e6361d5c062261c78a176addb82d4c821ae42bed6089de0e12603cd25de2059c" dependencies = [ "cfg-if", "crossbeam-utils", - "hashbrown", + "hashbrown 0.14.5", "lock_api", "once_cell", "parking_lot_core", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "find-msvc-tools" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "fractal" +version = "0.1.0" +dependencies = [ + "libm", +] + [[package]] name = "futures" version = "0.3.32" @@ -250,24 +454,139 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "highheelbgz" +version = "0.1.0" + +[[package]] +name = "holograph" +version = "0.1.0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "bincode", + "futures", + "log", + "serde", + "thiserror 2.0.18", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + [[package]] name = "js-sys" version = "0.3.103" @@ -279,12 +598,76 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lance-graph-callcenter" +version = "0.1.0" +dependencies = [ + "async-trait", + "lance-graph-contract", + "lance-graph-ontology", + "lance-graph-rbac", + "log", + "syn", + "thinking-engine", + "thiserror 1.0.69", +] + +[[package]] +name = "lance-graph-contract" +version = "0.1.0" +dependencies = [ + "glob", + "serde", + "serde_yaml", +] + +[[package]] +name = "lance-graph-ontology" +version = "0.1.0" +dependencies = [ + "lance-graph-contract", + "once_cell", + "oxrdf", + "oxrdfxml", + "oxttl", + "quick-xml", + "sha2", + "thiserror 2.0.18", + "toml", +] + +[[package]] +name = "lance-graph-rbac" +version = "0.1.0" +dependencies = [ + "lance-graph-contract", +] + +[[package]] +name = "lance-graph-supervisor" +version = "0.1.0" +dependencies = [ + "lance-graph-callcenter", + "lance-graph-contract", + "ractor", + "static_assertions", + "thiserror 1.0.69", + "tokio", + "tracing", +] + [[package]] name = "libc" version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + [[package]] name = "lock_api" version = "0.4.14" @@ -294,6 +677,12 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "log" +version = "0.4.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ceec5bc11778974d1bcb055b18002eba7f4b3518b6a0081b3af5f21666da9ad" + [[package]] name = "matrixmultiply" version = "0.3.10" @@ -315,16 +704,28 @@ name = "ndarray" version = "0.17.2" dependencies = [ "blake3", + "fractal", "matrixmultiply", "num-complex", "num-integer", "num-traits", + "p64", "paste", "portable-atomic", "portable-atomic-util", "rawpointer", ] +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -350,6 +751,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -362,11 +764,73 @@ checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" name = "onebrc-probe" version = "0.1.0" dependencies = [ + "lance-graph-contract", + "lance-graph-supervisor", "ndarray", "ractor", "tokio", ] +[[package]] +name = "oxilangtag" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d3b4eb570abd4a1dcb062c31fd37b832264d9dc7292c3e69acfe926c87b063f" +dependencies = [ + "serde", +] + +[[package]] +name = "oxiri" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b4ed3a7192fa19f5f48f99871f2755047fabefd7f222f12a1df1773796a102" + +[[package]] +name = "oxrdf" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0afd5c28e4a399c57ee2bc3accd40c7b671fdc7b6537499f14e95b265af7d7e0" +dependencies = [ + "oxilangtag", + "oxiri", + "rand", + "thiserror 2.0.18", +] + +[[package]] +name = "oxrdfxml" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd5516ae083d09bc57ec65ed5ee97701481725de6ffaa83d968ab42a96157ba1" +dependencies = [ + "oxilangtag", + "oxiri", + "oxrdf", + "quick-xml", + "thiserror 2.0.18", +] + +[[package]] +name = "oxttl" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f03fd471bd54c23d76631c0a2677aa4bb308d905f6e491ee35dcb0732b7c5c6c" +dependencies = [ + "memchr", + "oxilangtag", + "oxiri", + "oxrdf", + "thiserror 2.0.18", +] + +[[package]] +name = "p64" +version = "0.1.0" +dependencies = [ + "fractal", +] + [[package]] name = "parking_lot_core" version = "0.9.12" @@ -407,6 +871,15 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -426,6 +899,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.46" @@ -435,6 +917,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "ractor" version = "0.15.13" @@ -454,6 +942,35 @@ dependencies = [ "web-time", ] +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + [[package]] name = "rawpointer" version = "0.2.1" @@ -475,12 +992,94 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest", +] + [[package]] name = "shlex" version = "2.0.1" @@ -499,6 +1098,12 @@ version = "1.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -537,6 +1142,67 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thinking-engine" +version = "0.1.0" +dependencies = [ + "bgz-tensor", + "causal-edge", + "highheelbgz", + "ndarray", + "serde", + "serde_json", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tokio" version = "1.52.3" @@ -583,6 +1249,40 @@ dependencies = [ "syn", ] +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + [[package]] name = "tracing" version = "0.1.44" @@ -614,12 +1314,45 @@ dependencies = [ "once_cell", ] +[[package]] +name = "typenum" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" + [[package]] name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.4+wasi-0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67efb37e106e55ce722a510d6b5f9c17f083e5fc79afc2badeb12cc313d9487" +dependencies = [ + "wit-bindgen", +] + [[package]] name = "wasm-bindgen" version = "0.2.126" @@ -695,8 +1428,102 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "zerocopy" +version = "0.8.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/crates/onebrc-probe/Cargo.toml b/crates/onebrc-probe/Cargo.toml index 7c5e23a7..324b5d93 100644 --- a/crates/onebrc-probe/Cargo.toml +++ b/crates/onebrc-probe/Cargo.toml @@ -4,15 +4,16 @@ version = "0.1.0" edition = "2021" license = "Apache-2.0" publish = false -description = "1BRC (One Billion Row Challenge) substrate probe: measures the V3 SoA/mailbox substrate's groupby-aggregate throughput at container scale (100M rows). Lane A (single-thread scalar baseline), Lane C (std::thread parallel baseline), Lane B (ndarray SIMD, feature lane-b) and Lane D (ractor actors, feature lane-d) ship here; Lane E (kanban) is follow-up work — see README.md." +description = "1BRC (One Billion Row Challenge) substrate probe: measures the V3 SoA/mailbox substrate's groupby-aggregate throughput at container scale (100M rows). Lane A (single-thread scalar baseline), Lane C (std::thread parallel baseline), Lane B (ndarray SIMD, feature lane-b), Lane D (ractor actors, feature lane-d) and Lane E (kanban-scheduled batches, feature lane-e) ship here — see README.md." # Standalone, workspace-EXCLUDED crate (see root Cargo.toml `exclude`, same # precedent as crates/bgz17 and crates/deepnsm). Lanes A/C are zero-dependency -# (std only); lanes B/D are opt-in via feature flags so the zero-dep contract +# (std only); lanes B/D/E are opt-in via feature flags so the zero-dep contract # for A/C is never silently broken. Verify with: # cargo test --manifest-path crates/onebrc-probe/Cargo.toml # cargo test --manifest-path crates/onebrc-probe/Cargo.toml --features lane-b # cargo test --manifest-path crates/onebrc-probe/Cargo.toml --features lane-d +# cargo test --manifest-path crates/onebrc-probe/Cargo.toml --features lane-e # Empty [workspace] table: this crate is `exclude`d from the parent # lance-graph workspace, but cargo's manifest auto-discovery would otherwise @@ -21,9 +22,13 @@ description = "1BRC (One Billion Row Challenge) substrate probe: measures the V3 [workspace] [features] -# Lanes A/C stay dependency-free; B and D are opt-in. +# Lanes A/C stay dependency-free; B, D, E are opt-in. lane-b = ["dep:ndarray"] lane-d = ["dep:ractor", "dep:tokio"] +# Lane E (kanban-scheduled batches): reuses the lane-d ractor/tokio deps plus +# the V3 kanban execution machinery (lance-graph-supervisor's KanbanActor + +# drivers, lance-graph-contract's kanban/scheduler/soa_view contract types). +lane-e = ["dep:lance-graph-supervisor", "dep:lance-graph-contract", "dep:ractor", "dep:tokio"] [dependencies] # AdaWorldAPI fork (workspace P0 rule: fork over crates.io, always — see @@ -36,5 +41,13 @@ ndarray = { path = "../../../ndarray", optional = true, default-features = false # fork carries the MessagingErr::Saturated non-exhaustive-match fix). ractor = { git = "https://github.com/AdaWorldAPI/ractor", optional = true, default-features = false, features = ["tokio_runtime"] } tokio = { version = "1", optional = true, default-features = false, features = ["rt-multi-thread", "macros"] } +# Sibling in-tree crates (path deps, not the outer workspace — this crate is +# workspace-excluded per the `[workspace]` table above). lance-graph-contract +# is zero-dep; lance-graph-supervisor needs its own `supervisor` feature for +# the KanbanActor + drive_version_tick surface lane E drives. +lance-graph-contract = { path = "../lance-graph-contract", optional = true } +lance-graph-supervisor = { path = "../lance-graph-supervisor", optional = true, features = [ + "supervisor", +] } [dev-dependencies] diff --git a/crates/onebrc-probe/README.md b/crates/onebrc-probe/README.md index 9bda6e1b..89934b2d 100644 --- a/crates/onebrc-probe/README.md +++ b/crates/onebrc-probe/README.md @@ -82,7 +82,7 @@ regenerating with the same `(rows, seed)` and diffing the printed | **C** — `lane_c_threads` | `std::thread` parallel baseline: newline-aligned `chunk_bounds` split, per-worker owned `BTreeMap`, commutative `Stats::merge` combine | **Shipped** | | **B** — `lane_b::lane_b_simd` (feature `lane-b`) | Vectorized `;`/`\n` scanning via `ndarray::simd::U8x32::cmpeq_mask` (per the workspace's SIMD rule — never raw `pulp`/`wide`/hand intrinsics in a consumer crate; see `.claude/knowledge/ndarray-vertical-simd-alien-magic.md`), 32-byte-stride scan with cross-block `line_start`/`pending_semi` carry, scalar temp parse (SWAR/branchless parse deliberately still deferred). | **Shipped** | | **D** — `lane_d::lane_d_ractor` (feature `lane-d`) | Same groupby-aggregate workload as Lane C, but each `chunk_bounds` chunk is aggregated by a stateless `ractor` actor (actor-per-worker, ask-pattern reply, `lance-graph-supervisor`-style `Actor`/`RpcReplyPort` shape) instead of a bare `std::thread::scope` closure — identical chunking + commutative merge, only the worker primitive changes. | **Shipped** | -| **E** — kanban | Routes the aggregation through the V3 kanban execution machinery (`v3-kanban-executor-engineer` domain — `KanbanPhase` lifecycle, ahead-firing batch writer) to measure the substrate's own scheduling/dispatch overhead against the bare-metal Lane A/C numbers as a ceiling reference. | **Not implemented** — orchestrator follow-up | +| **E** — `lane_e::lane_e_kanban` (feature `lane-e`) | One kanban card per batch: the corpus splits into `batches` newline-aligned chunks (`batches >= workers`) pulled from a shared `AtomicUsize` queue by `workers` puller tasks; every batch is journaled by a fresh `lance-graph-supervisor::KanbanActor` driven through the full Rubicon forward arc (`Planning->CognitiveWork->Evaluation->Commit`, `drive_version_tick` × 3) around the actual `lane_a_scalar` work. The combined journal is asserted to carry exactly `3 * batches` legal `KanbanMove`s. `batches == workers` vs Lane D isolates the kanban journaling cost (identical chunking + actor-model tax, only the actor type differs); fine-grained batching (`batches >> workers`) prices per-card scheduling overhead (feeds W2d, the 550 ms Libet budget question). | **Shipped** | --- @@ -90,17 +90,25 @@ regenerating with the same `(rows, seed)` and diffing the printed ```text onebrc-probe gen -onebrc-probe run [workers] +onebrc-probe run [workers] [batches] ``` -Lane `b` requires `--features lane-b`; lane `d` requires `--features lane-d` -(see `Cargo.toml` `[features]`). Lanes A/C stay dependency-free either way: +Lane `b` requires `--features lane-b`; lane `d` requires `--features lane-d`; +lane `e` requires `--features lane-e` (see `Cargo.toml` `[features]`). Lanes +A/C stay dependency-free either way. `batches` is **lane-`e`-only** (ignored +by every other lane): the number of newline-aligned batches the corpus +splits into, each journaled as one kanban card (`batches >= workers`, +default `workers * 16`): ```bash cargo run --release --manifest-path crates/onebrc-probe/Cargo.toml \ --features lane-b -- run /tmp/onebrc_10m.txt b cargo run --release --manifest-path crates/onebrc-probe/Cargo.toml \ --features lane-d -- run /tmp/onebrc_10m.txt d 4 +cargo run --release --manifest-path crates/onebrc-probe/Cargo.toml \ + --features lane-e -- run /tmp/onebrc_10m.txt e 4 4 +cargo run --release --manifest-path crates/onebrc-probe/Cargo.toml \ + --features lane-e -- run /tmp/onebrc_10m.txt e 4 64 ``` `run` prints: @@ -113,6 +121,9 @@ lane= rows= workers= elapsed_ms= throughput_mrows_s= ... ``` +For lane `e` the line carries an extra `batches=` field between +`workers=` and `elapsed_ms=` — every other lane's line is unchanged. + The first/last-3-stations dump (map is a `BTreeMap`, so this is sorted-by-name order) is the correctness spot-check surface — a cheap sanity signal that the aggregate isn't obviously garbage without diffing @@ -202,3 +213,42 @@ Readings: - Lane E (kanban scheduling tax, E−D isolates the journaling cost) is the next lane; lane F (Morton-tile cascaded shader vs plain radix control) closes the set per Addendum-13. + +### §5.2 — t2 (lane E) — measured 2026-07-02 + +Same recipe corpus (hash re-verified byte-identical at regeneration), +same 4-core container, release build `--features lane-b,lane-d,lane-e`. +Two passes per configuration, best-of-2 (both listed). Lane C and D +re-run in the same session as live comparators: + +| Lane | workers | batches | elapsed_ms (best) | throughput (Mrows/s) | +|---|---|---|---|---| +| C (threads) | 4 | — | 353.235 | 28.310 (28.3, 27.5) | +| D (ractor) | 4 | — | 446.805 | 22.381 (21.6, 22.4) | +| E (kanban) | 4 | 4 | 435.489 | 22.963 (22.8, 23.0) | +| E (kanban) | 4 | 64 | 444.903 | 22.477 (22.0, 22.5) | +| E (kanban) | 4 | 256 | 452.126 | 22.118 (21.8, 22.1) | + +Readings: + +- **The kanban journaling floor is within noise.** E at + `batches == workers` (one card per worker — lane D's chunking plus a + full Rubicon journal per chunk) measured *at or slightly above* lane D + (22.963 vs 22.381 best-of; the two interleave across passes). The + fresh-`KanbanActor`-per-card spawn + 3 `drive_version_tick` RPCs + + join are invisible next to the shared actor-boundary corpus copy both + lanes pay. E−D ≈ 0: journaling real work through the board costs + nothing measurable at chunk granularity. +- **Fine-grained cards stay cheap.** 4 → 256 cards costs ~4% + (22.963 → 22.118). Per-card overhead from the E(256)−E(4) elapsed + delta: ≈ 16.6 ms / 252 extra cards ≈ **66 µs per card** (actor spawn + + 3 Rubicon ticks + join + queue pull). Against W2d's 550 ms Libet + budget, a card's scheduling overhead is ~0.01% — the budget is spent + on thinking, not on the board. This is the number Addendum-13 sent + lane E to fetch. +- The dominant tax in D and E alike remains the actor-model boundary + (one-time `Arc` corpus copy + task-vs-scoped-thread overhead) — the + ~20% D-vs-C gap carries over unchanged; the kanban layer adds nothing + material on top. +- Lane F (Morton-tile cascaded shader vs a plain radix control — the + addressing-tax isolator) is the remaining lane. diff --git a/crates/onebrc-probe/src/lane_e.rs b/crates/onebrc-probe/src/lane_e.rs new file mode 100644 index 00000000..ffde5ee6 --- /dev/null +++ b/crates/onebrc-probe/src/lane_e.rs @@ -0,0 +1,252 @@ +//! Lane E — kanban-scheduled batches. +//! +//! Per Addendum-13 lane E (see `README.md` §3), this lane measures the V3 +//! kanban scheduling/journaling tax on top of the SAME groupby-aggregate +//! workload lanes A/C/D already measure. The corpus is split into `batches` +//! newline-aligned chunks (`batches >= workers`, `chunk_bounds`), pulled by +//! `workers` puller tasks from a shared lock-free queue (`AtomicUsize` +//! index into the batch list), and EVERY batch is journaled as one kanban +//! card: a fresh [`KanbanActor`] (from `lance-graph-supervisor`, feature +//! `supervisor`) whose owned [`ProbeBoard`] is driven through the full +//! Rubicon **forward arc** (`Planning -> CognitiveWork -> Evaluation -> +//! Commit`) around the actual per-batch work +//! ([`crate::lane_a_scalar`](super::lane_a_scalar)). +//! +//! Two readings this lane is built to support: +//! +//! - **E at `batches == workers`** vs Lane D: identical `chunk_bounds` +//! split, identical `Arc>` corpus-copy tax (see `lane_d.rs` +//! module doc "Actor-model boundary cost") — the only variable is +//! swapping Lane D's stateless `ChunkWorker` ask-pattern actor for a +//! `KanbanActor` driven through 3 Rubicon ticks per batch. +//! E-D isolates the **journaling cost** in isolation from the actor-model +//! tax Lane D already prices. +//! - **E at fine granularity** (`batches >> workers`, e.g. +//! `batches = workers * 16`): each puller spawns, ticks 3×, and stops +//! many short-lived actors instead of one long-lived one per worker — +//! prices the **per-card scheduling overhead** the V3 substrate pays when +//! work is journaled at kanban-card granularity rather than +//! worker-chunk granularity. This feeds W2d (the 550 ms Libet budget +//! question — how many kanban cards per wall-clock second the substrate +//! can actually journal). +//! +//! ## Journal invariant +//! +//! Each batch drives exactly 3 [`KanbanMove`]s (`Planning->CognitiveWork`, +//! `CognitiveWork->Evaluation`, `Evaluation->Commit` — the pure forward arc +//! to the absorbing `Commit` column, mirroring `kanban_actor.rs`'s +//! `run_to_absorbing` test). Every worker collects its own moves into a +//! local `Vec`; at the end of [`lane_e_kanban`] the combined +//! journal is asserted to have exactly `3 * batches` moves, and every move +//! is asserted legal via [`KanbanColumn::can_transition_to`] — a violated +//! assert here is a probe bug, not a measurement. + +use crate::{chunk_bounds, lane_a_scalar, merge_maps, Stats}; +use lance_graph_contract::collapse_gate::MailboxId; +use lance_graph_contract::kanban::{ExecTarget, KanbanColumn, KanbanMove}; +use lance_graph_contract::scheduler::DatasetVersion; +use lance_graph_contract::soa_view::{MailboxSoaOwner, MailboxSoaView}; +use lance_graph_supervisor::{drive_version_tick, KanbanActor}; +use ractor::Actor; +use std::collections::BTreeMap; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; + +/// The probe's stand-in kanban-owned board — mirrors the shape of +/// `lance-graph-supervisor`'s own `TestBoard` (`kanban_actor.rs`'s test +/// module): a minimal in-RAM [`MailboxSoaView`] + [`MailboxSoaOwner`] with +/// empty column slices (`n_rows() == 0`, no energy/edges/meta/entity_type +/// data). This lane measures the KANBAN JOURNALING overhead only, not SoA +/// storage — a real SoA board wired to actual rows is lane F's business +/// (Morton-tile cascaded shader, per README §5.1's closing note). +struct ProbeBoard { + id: MailboxId, + phase: KanbanColumn, + cycle: u32, +} + +impl ProbeBoard { + /// A fresh board for kanban card `id`, starting at the spawn state + /// ([`KanbanColumn::Planning`], the `#[default]` variant). + fn new(id: MailboxId) -> Self { + Self { + id, + phase: KanbanColumn::default(), + cycle: 0, + } + } +} + +impl MailboxSoaView for ProbeBoard { + fn mailbox_id(&self) -> MailboxId { + self.id + } + fn n_rows(&self) -> usize { + 0 + } + fn w_slot(&self) -> u8 { + // `id` here is a probe-local kanban-card counter, not a composed + // classid — this is the same bit-op `TestBoard::w_slot` uses over + // `MailboxId` (a plain `u32`), not classid discrimination. + (self.id & 0x3F) as u8 + } + fn current_cycle(&self) -> u32 { + self.cycle + } + fn phase(&self) -> KanbanColumn { + self.phase + } + fn energy(&self) -> &[f32] { + &[] + } + fn edges_raw(&self) -> &[u64] { + &[] + } + fn meta_raw(&self) -> &[u32] { + &[] + } + fn entity_type(&self) -> &[u16] { + &[] + } +} + +impl MailboxSoaOwner for ProbeBoard { + fn advance_phase(&mut self, to: KanbanColumn) -> KanbanMove { + let from = self.phase; + self.phase = to; + self.cycle = self.cycle.wrapping_add(1); + KanbanMove { + mailbox: self.id, + from, + to, + witness_chain_position: self.cycle, + libet_offset_us: 0, + exec: ExecTarget::Native, + } + } +} + +/// Lane E — kanban-scheduled batches. See module doc for the full design +/// and the two readings (E vs D at `batches == workers`; E at fine +/// granularity for per-card scheduling cost). +/// +/// `batches` is clamped to `>= workers.max(1)` — a batch queue thinner than +/// the worker pool would leave pullers idle and defeat the point of the +/// shared-queue design. +pub fn lane_e_kanban(data: &[u8], workers: usize, batches: usize) -> BTreeMap { + let workers = workers.max(1); + let batches = batches.max(workers.max(1)); + let bounds = chunk_bounds(data, batches); + + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(workers) + .build() + .expect("build tokio runtime for lane E"); + + runtime.block_on(async move { + // One-time corpus copy into a shared Arc — the same actor-model + // boundary cost Lane D pays (see `lane_d.rs` module doc). + let shared = Arc::new(data.to_vec()); + let bounds = Arc::new(bounds); + // Lock-free shared batch queue: each puller atomically claims the + // next batch index until the queue is exhausted. + let next = Arc::new(AtomicUsize::new(0)); + + let mut join_handles = Vec::with_capacity(workers); + for _ in 0..workers { + let shared = Arc::clone(&shared); + let bounds = Arc::clone(&bounds); + let next = Arc::clone(&next); + join_handles.push(tokio::spawn(async move { + let mut local_map: BTreeMap = BTreeMap::new(); + let mut journal: Vec = Vec::new(); + + loop { + let idx = next.fetch_add(1, Ordering::Relaxed); + if idx >= bounds.len() { + break; + } + let (start, end) = bounds[idx]; + + // One kanban card per batch: a fresh KanbanActor whose + // owned board starts at Planning. + let (actor, handle) = Actor::spawn( + None, + KanbanActor::::default(), + ProbeBoard::new(idx as MailboxId), + ) + .await + .expect("spawn lane E kanban actor"); + + // Tick 1: Planning -> CognitiveWork. + let mv1 = drive_version_tick(&actor, DatasetVersion(1)) + .await + .expect("lane E tick 1 rpc") + .expect("Planning -> CognitiveWork must advance"); + journal.push(mv1); + + // The actual work — same per-record helper every lane + // shares (see `lib.rs` module doc "Reference inventory"). + let batch_map = lane_a_scalar(&shared[start..end]); + + // Tick 2: CognitiveWork -> Evaluation. Merge the batch's + // map into the worker-local accumulator here — mirrors + // the commutative BUNDLE step `merge_maps` uses, applied + // per-batch instead of per-worker (see `Stats::merge` + // struct-level doc). + let mv2 = drive_version_tick(&actor, DatasetVersion(2)) + .await + .expect("lane E tick 2 rpc") + .expect("CognitiveWork -> Evaluation must advance"); + journal.push(mv2); + for (name, stats) in batch_map { + match local_map.get_mut(&name) { + Some(existing) => existing.merge(&stats), + None => { + local_map.insert(name, stats); + } + } + } + + // Tick 3: Evaluation -> Commit (absorbing). + let mv3 = drive_version_tick(&actor, DatasetVersion(3)) + .await + .expect("lane E tick 3 rpc") + .expect("Evaluation -> Commit must advance"); + journal.push(mv3); + + actor.stop(None); + handle.await.expect("lane E actor join"); + } + + (local_map, journal) + })); + } + + let mut worker_maps = Vec::with_capacity(join_handles.len()); + let mut all_moves: Vec = Vec::new(); + for jh in join_handles { + let (map, journal) = jh.await.expect("lane E worker task join"); + worker_maps.push(map); + all_moves.extend(journal); + } + + // Journal invariant (see module doc "Journal invariant"): exactly 3 + // moves per batch, every move a legal Rubicon edge. + assert_eq!( + all_moves.len(), + 3 * batches, + "lane E journal must record exactly 3 kanban moves per batch" + ); + for mv in &all_moves { + assert!( + mv.from.can_transition_to(mv.to), + "lane E journal move {:?} -> {:?} must be a legal Rubicon edge", + mv.from, + mv.to + ); + } + + merge_maps(worker_maps) + }) +} diff --git a/crates/onebrc-probe/src/lib.rs b/crates/onebrc-probe/src/lib.rs index 84af576e..83ac7925 100644 --- a/crates/onebrc-probe/src/lib.rs +++ b/crates/onebrc-probe/src/lib.rs @@ -14,9 +14,12 @@ //! vectorized `;`/`\n` scan, scalar parse. //! - **Lane D** (`lane_d::lane_d_ractor`, feature `lane-d`) — `ractor` //! actor-per-worker over the same `chunk_bounds` split as Lane C. -//! -//! Lane E (kanban) is follow-up work — see `README.md` for the stub section -//! describing what it will measure. +//! - **Lane E** (`lane_e::lane_e_kanban`, feature `lane-e`) — kanban-scheduled +//! batches: a shared `AtomicUsize` batch queue, one fresh `KanbanActor` per +//! batch driven through the full Rubicon forward arc +//! (Planning->CognitiveWork->Evaluation->Commit) around the actual work. +//! Measures the V3 kanban scheduling/journaling tax (E-D isolates the +//! journaling cost; fine-grained batching prices per-card scheduling). //! //! ## Reference inventory //! @@ -39,12 +42,16 @@ pub mod gen; pub mod lane_b; #[cfg(feature = "lane-d")] pub mod lane_d; +#[cfg(feature = "lane-e")] +pub mod lane_e; pub mod sha256; #[cfg(feature = "lane-b")] pub use lane_b::lane_b_simd; #[cfg(feature = "lane-d")] pub use lane_d::lane_d_ractor; +#[cfg(feature = "lane-e")] +pub use lane_e::lane_e_kanban; use std::collections::BTreeMap; @@ -450,4 +457,27 @@ mod tests { assert_eq!(a, d, "lane A and lane D must produce identical aggregates"); assert!(!a.is_empty()); } + + /// Lane A and Lane E must agree byte-for-byte on aggregate output — the + /// correctness spot check for the kanban-scheduled-batches path. Uses + /// `workers=3, batches=7` (odd, greater than workers) so the shared + /// `AtomicUsize` batch queue actually round-robins multiple batches per + /// puller, not just one batch per worker (the `batches == workers` + /// degenerate case lane D already covers via its own test). + #[cfg(feature = "lane-e")] + #[test] + fn lane_e_agrees_with_lane_a_on_generated_corpus() { + let dir = std::env::temp_dir(); + let path = dir.join(format!("onebrc_probe_test_e_{}.txt", std::process::id())); + let result = gen::gen(&path, 50_000, 21).expect("gen"); + assert_eq!(result.rows, 50_000); + + let data = std::fs::read(&path).expect("read generated corpus"); + std::fs::remove_file(&path).ok(); + + let a = lane_a_scalar(&data); + let e = lane_e_kanban(&data, 3, 7); + assert_eq!(a, e, "lane A and lane E must produce identical aggregates"); + assert!(!a.is_empty()); + } } diff --git a/crates/onebrc-probe/src/main.rs b/crates/onebrc-probe/src/main.rs index b81b7475..adc0f1e0 100644 --- a/crates/onebrc-probe/src/main.rs +++ b/crates/onebrc-probe/src/main.rs @@ -3,11 +3,12 @@ //! //! ```text //! onebrc-probe gen -//! onebrc-probe run [workers] +//! onebrc-probe run [workers] [batches] //! ``` //! -//! Lane `b` requires `--features lane-b`; lane `d` requires -//! `--features lane-d` (see `README.md` §3/§4). +//! Lane `b` requires `--features lane-b`; lane `d` and lane `e` require +//! `--features lane-d` / `--features lane-e` respectively (see `README.md` +//! §3/§4). `batches` is lane-`e`-only (optional, default `workers * 16`). use onebrc_probe::{gen::gen, lane_a_scalar, lane_c_threads}; use std::env; @@ -22,7 +23,7 @@ fn main() { Some("run") => cmd_run(&args[2..]), _ => { eprintln!( - "usage:\n onebrc-probe gen \n onebrc-probe run [workers]" + "usage:\n onebrc-probe gen \n onebrc-probe run [workers] [batches]" ); std::process::exit(2); } @@ -54,7 +55,7 @@ fn cmd_gen(args: &[String]) { fn cmd_run(args: &[String]) { let path = PathBuf::from( args.first() - .expect("usage: run [workers]"), + .expect("usage: run [workers] [batches]"), ); let lane = args.get(1).map(String::as_str).unwrap_or("a"); let workers: usize = args @@ -65,6 +66,13 @@ fn cmd_run(args: &[String]) { .map(|n| n.get()) .unwrap_or(1) }); + // Lane-`e`-only: number of kanban-journaled batches the corpus splits + // into (default `workers * 16` — fine-grained batching prices per-card + // scheduling; see `lane_e.rs` module doc). + let batches: usize = args + .get(3) + .map(|s| s.parse().expect("batches must be a usize")) + .unwrap_or(workers * 16); // NOTE (mmap note): plain `std::fs::read`, NOT mmap. automataIA/1brc-rs // treats `memmap2::Mmap` as "the only path to break the 2-second @@ -102,8 +110,19 @@ fn cmd_run(args: &[String]) { std::process::exit(1); } } + "e" => { + #[cfg(feature = "lane-e")] + { + onebrc_probe::lane_e_kanban(&data, workers, batches) + } + #[cfg(not(feature = "lane-e"))] + { + eprintln!("lane e requires --features lane-e"); + std::process::exit(1); + } + } other => { - eprintln!("unknown lane '{other}' (expected 'a', 'b', 'c', or 'd')"); + eprintln!("unknown lane '{other}' (expected 'a', 'b', 'c', 'd', or 'e')"); std::process::exit(2); } }; @@ -111,9 +130,18 @@ fn cmd_run(args: &[String]) { let elapsed_ms = elapsed.as_secs_f64() * 1000.0; let throughput_mrows_s = (rows as f64 / 1_000_000.0) / elapsed.as_secs_f64(); - println!( - "lane={lane} rows={rows} workers={workers} elapsed_ms={elapsed_ms:.3} throughput_mrows_s={throughput_mrows_s:.3}" - ); + // `batches=` is appended ONLY for lane e (see README §4) — the other + // lanes don't have a batch concept, so the printed line stays identical + // for them. + if lane == "e" { + println!( + "lane={lane} rows={rows} workers={workers} batches={batches} elapsed_ms={elapsed_ms:.3} throughput_mrows_s={throughput_mrows_s:.3}" + ); + } else { + println!( + "lane={lane} rows={rows} workers={workers} elapsed_ms={elapsed_ms:.3} throughput_mrows_s={throughput_mrows_s:.3}" + ); + } // Correctness spot-check surface — first/last 3 stations by name (map // is a BTreeMap, so iteration order is the sorted station-name order). From 63243bc73af4539f07bd4a39565cb706a235fdc2 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 17:20:23 +0000 Subject: [PATCH 14/14] =?UTF-8?q?probe(1brc):=20lanes=20F=20(Morton-tile?= =?UTF-8?q?=20SoA)=20+=20R=20(radix=20control)=20=E2=80=94=20t3:=20route-a?= =?UTF-8?q?nd-write=203x,=20addressing=20tax=20~10%;=20probe=20complete?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lane F: station identity -> FNV-1a 64 -> two axis bytes nibble-interleaved into the 16-bit Morton tile position (the GUID canon's 256x256 centroid-tile read) -> flat SoA accumulators (min[]/max[]/sum[]/count[], open-addressed, name-verified on tag hit), gated indexed writes, per-worker owned tables BUNDLE-merged. Lane R: byte-identical pipeline, slot = hash & 0xFFFF — F-R isolates the Morton addressing tax exactly, R-C prices flat-SoA-vs-BTreeMap. Both std-only; A/C zero-dep contract holds. Collision-forcing probe test (constant slot fn) included. t3 (recipe corpus re-verified, 4 cores, 5 passes, medians): C 28.3 / F 77.4 / R 86.3 Mrows/s; F/R single-thread 21.5/23.3 vs A 7.16. Readings: route-and-write beats look-up-and-compare 3x; the semantic address costs ~10% over plain radix at ~400-group cardinality (high-cardinality prefix-local payoff untested, unclaimed); the accumulator, not the scan, is where the win lives. Board: E-1BRC-ADDRESSING-1. Addendum-13 probe COMPLETE — all lanes A-F + R measured on one regenerable recipe corpus. Gates: 14 (std) / 18 (all features) tests green, clippy -D warnings clean, fmt clean. Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .claude/board/EPIPHANIES.md | 4 + .claude/v3/INTEGRATION-PLAN.md | 15 ++ crates/onebrc-probe/README.md | 46 ++++- crates/onebrc-probe/src/lane_f.rs | 323 ++++++++++++++++++++++++++++++ crates/onebrc-probe/src/lib.rs | 7 + crates/onebrc-probe/src/main.rs | 12 +- 6 files changed, 402 insertions(+), 5 deletions(-) create mode 100644 crates/onebrc-probe/src/lane_f.rs diff --git a/.claude/board/EPIPHANIES.md b/.claude/board/EPIPHANIES.md index 7a90f576..9e0c436c 100644 --- a/.claude/board/EPIPHANIES.md +++ b/.claude/board/EPIPHANIES.md @@ -1,3 +1,7 @@ +## 2026-07-02 — E-1BRC-ADDRESSING-1: addressing-is-aggregation measured — route-and-write is 3× the classic map; the Morton dress costs ~10% +**Status:** FINDING (measured, onebrc-probe t0–t3, recipe corpus rows=10000000 seed=42 sha256=f1853caa…5691, 4-core container; tables in `crates/onebrc-probe/README.md` §5–5.3) + +The 1BRC probe (operator-requested, Addendum-13) closed all six lanes. The ladder, medians at 4 workers: BTreeMap map (C) 28.3 Mrows/s → ractor actor-per-worker (D) 22.4 (the "helper, not a messaging path" ruling measured: ~20% actor tax, dominated by the forced Arc corpus copy) → kanban-carded (E) ≈ D (journaling floor WITHIN NOISE; ~66 µs/card at 256-card granularity = ~0.01% of the W2d 550 ms Libet budget — the board is not a scheduling threat) → **flat SoA route-and-write: radix control (R) 86.3, Morton tile address (F) 77.4**. Three load-bearing numbers: (1) group-by-as-address-route into SoA accumulators with gated indexed writes beats look-up-and-compare by **3×** — the substrate's aggregation shape is right; (2) the semantic (Morton tile) address costs **~10% over plain radix** at ~400-group cardinality — directionally validating addressing-is-aggregation while honestly pricing the address layer (the canon's prefix-local-tile payoff is a HIGH-cardinality claim this corpus cannot test); (3) SIMD delimiter-scan alone (B, 1.06×) is not where the win lives — the accumulator is. Fence kept: every number travels with its regenerable corpus recipe. ## 2026-07-02 — E-SEMANTIC-OS-CONVERGENCE-1: the operator's capstone — "code is no longer the center; the center is a verified semantic substrate" **Status:** FINDING (operator synthesis across 7+ parallel sessions, recorded verbatim below; grounding table + two sharpenings appended by this session) diff --git a/.claude/v3/INTEGRATION-PLAN.md b/.claude/v3/INTEGRATION-PLAN.md index 53573b58..ed659ec2 100644 --- a/.claude/v3/INTEGRATION-PLAN.md +++ b/.claude/v3/INTEGRATION-PLAN.md @@ -576,3 +576,18 @@ D 22.381 / E(4 cards) 22.963 / E(64) 22.477 / E(256) 22.118 Mrows/s. is not a scheduling threat; the actor-boundary copy remains the only real tax (unchanged ~20% vs C). Full tables: `crates/onebrc-probe/README.md` §5.2. Remaining: lane F (Morton-tile shader vs plain radix control). + +#### Addendum-13 status update (2026-07-02, t3 — PROBE COMPLETE) + +Lanes F (Morton-tile SoA) + R (plain-radix control) SHIPPED (std-only, no +feature gate — the A/C zero-dep contract holds for all four std lanes). +t3 medians @4 workers: C 28.3 / F 77.4 / R 86.3 Mrows/s. The three +numbers the probe was sent to fetch: route-and-write beats the classic +map **3×** (R−C, address-agnostic); the Morton address costs **~10%** +over plain radix at ~400-group cardinality (F−R — the addressing tax, +isolated exactly; high-cardinality prefix-local payoff remains untested +by this corpus and unclaimed); the accumulator, not the SIMD scan, is +where the win lives (B was 1.06×). All six lanes A–F + R now measured +on one regenerable recipe corpus. Board: E-1BRC-ADDRESSING-1. The probe +is COMPLETE; follow-ups (100M container-scale run, high-cardinality +corpus, SWAR parse, mmap) are priced and parked in README §1/§5.3. diff --git a/crates/onebrc-probe/README.md b/crates/onebrc-probe/README.md index 89934b2d..ebc66733 100644 --- a/crates/onebrc-probe/README.md +++ b/crates/onebrc-probe/README.md @@ -82,6 +82,8 @@ regenerating with the same `(rows, seed)` and diffing the printed | **C** — `lane_c_threads` | `std::thread` parallel baseline: newline-aligned `chunk_bounds` split, per-worker owned `BTreeMap`, commutative `Stats::merge` combine | **Shipped** | | **B** — `lane_b::lane_b_simd` (feature `lane-b`) | Vectorized `;`/`\n` scanning via `ndarray::simd::U8x32::cmpeq_mask` (per the workspace's SIMD rule — never raw `pulp`/`wide`/hand intrinsics in a consumer crate; see `.claude/knowledge/ndarray-vertical-simd-alien-magic.md`), 32-byte-stride scan with cross-block `line_start`/`pending_semi` carry, scalar temp parse (SWAR/branchless parse deliberately still deferred). | **Shipped** | | **D** — `lane_d::lane_d_ractor` (feature `lane-d`) | Same groupby-aggregate workload as Lane C, but each `chunk_bounds` chunk is aggregated by a stateless `ractor` actor (actor-per-worker, ask-pattern reply, `lance-graph-supervisor`-style `Actor`/`RpcReplyPort` shape) instead of a bare `std::thread::scope` closure — identical chunking + commutative merge, only the worker primitive changes. | **Shipped** | +| **F** — `lane_f::lane_f_morton` (std-only, no feature) | The substrate-native lane: station identity → FNV-1a 64 → two axis bytes **nibble-interleaved** into a 16-bit Morton tile position (the GUID canon's 256×256 centroid-tile read) → slot into flat **SoA accumulators** (`min[]/max[]/sum[]/count[]`, open-addressed linear probe, name-verified on tag hit) — group-by as a prefix ROUTE, aggregation as a gated indexed write, per-worker owned tables BUNDLE-merged. Same scalar scan + `chunk_bounds` as lane C: the only variable vs C is the accumulator. | **Shipped** | +| **R** — `lane_f::lane_r_radix` (std-only, no feature) | The honest control for F: byte-identical pipeline, slot = plain `hash & 0xFFFF` (no interleave). **F−R isolates the Morton addressing tax exactly; R−C prices flat-SoA-table-vs-BTreeMap.** | **Shipped** | | **E** — `lane_e::lane_e_kanban` (feature `lane-e`) | One kanban card per batch: the corpus splits into `batches` newline-aligned chunks (`batches >= workers`) pulled from a shared `AtomicUsize` queue by `workers` puller tasks; every batch is journaled by a fresh `lance-graph-supervisor::KanbanActor` driven through the full Rubicon forward arc (`Planning->CognitiveWork->Evaluation->Commit`, `drive_version_tick` × 3) around the actual `lane_a_scalar` work. The combined journal is asserted to carry exactly `3 * batches` legal `KanbanMove`s. `batches == workers` vs Lane D isolates the kanban journaling cost (identical chunking + actor-model tax, only the actor type differs); fine-grained batching (`batches >> workers`) prices per-card scheduling overhead (feeds W2d, the 550 ms Libet budget question). | **Shipped** | --- @@ -90,7 +92,7 @@ regenerating with the same `(rows, seed)` and diffing the printed ```text onebrc-probe gen -onebrc-probe run [workers] [batches] +onebrc-probe run [workers] [batches] ``` Lane `b` requires `--features lane-b`; lane `d` requires `--features lane-d`; @@ -252,3 +254,45 @@ Readings: material on top. - Lane F (Morton-tile cascaded shader vs a plain radix control — the addressing-tax isolator) is the remaining lane. + +### §5.3 — t3 (lanes F/R) — measured 2026-07-02 + +Same recipe corpus (hash re-verified), same 4-core container, release +build (F/R are std-only — no feature flags involved). Five passes per +lane at 4 workers after an initial warm-up round showed high first-pass +variance; all five listed: + +| Lane | workers | throughput passes (Mrows/s) | median | +|---|---|---|---| +| C (BTreeMap) | 4 | 28.3, 27.8, 28.9, 28.1, 28.8 | **28.3** | +| F (Morton SoA) | 4 | 76.2, 80.8, 56.9, 77.4, 81.2 | **77.4** | +| R (radix control) | 4 | 86.2, 86.4, 86.3, 86.8, 85.1 | **86.3** | +| F | 1 | 21.5 | — | +| R | 1 | 23.3 | — | + +(single-thread lane A same session: 7.16 — F/R are 3.0×/3.3× lane A on +one core, so the win is not a parallelism artifact.) + +Readings — the numbers Addendum-13 sent this lane to fetch: + +- **Route-and-write beats look-up-and-compare by ~3×.** Both F and R + (~77–86 Mrows/s) demolish lane C's BTreeMap accumulation (~28) on + identical scan, chunking, and merge — group-by as an address route + into flat SoA accumulators with gated indexed writes IS the right + shape for the substrate's aggregation paths. R−C = the + data-structure win (3.05×), address-agnostic. +- **The Morton addressing tax is single-digit-to-~10%.** F medians + ~10% under R (77.4 vs 86.3), with higher run-to-run variance (one + 56.9 outlier vs R's ±1%). The interleave ALU chain sits in the + address-generation dependency path before the table load, and the + tile-scattered slot distribution is less cache-regular than the + plain low-bits radix at this tiny (~400-group) cardinality. So: + addressing-is-aggregation holds directionally — the semantic + address layer costs ~10%, NOT 3× — but it is not free, and at this + group count the plain radix bucket is the faster dress. The canon's + bet (prefix-local tile batches paying off) is a HIGH-cardinality + claim; this corpus can't test it and this table doesn't claim it. +- Deliberately absent (see `lane_f.rs` module doc): per-tile + bucketing + cascade-ordered sweeps (earns its keep only at high + cardinality), kanban tile-batch scheduling (lane E priced it: + ~66 µs/card), SIMD scan (lane B's variable — composable later). diff --git a/crates/onebrc-probe/src/lane_f.rs b/crates/onebrc-probe/src/lane_f.rs new file mode 100644 index 00000000..bb8c20c7 --- /dev/null +++ b/crates/onebrc-probe/src/lane_f.rs @@ -0,0 +1,323 @@ +//! Lane F — the substrate-native lane: group-by-identity as a prefix +//! ROUTE, aggregation as a gated indexed write into SoA-shaped +//! accumulators. Plus Lane R, its honest control. +//! +//! Per Addendum-13 lane F (operator: "process it as cognitive shader in a +//! morton tile cascaded batch"): a station's identity is hashed to a key, +//! the key is read as **two axis bytes**, and the axes are +//! **nibble-interleaved** into a 16-bit Morton tile position — the +//! 256×256 centroid-tile read of the GUID canon (OGAR `CLAUDE.md` § "Tier +//! interpretation": each tier's 16 bits = two 256-entry axes, +//! nibble-interleaved, coarse→fine = alternating-axis refinement, "Morton +//! in centroid space"). That tile position IS the accumulator address: +//! records route into flat parallel arrays (`min[]/max[]/sum[]/count[]` — +//! SoA-shaped, one slot per tile) and aggregation is an **indexed gated +//! write** into the worker's OWN arrays (single-writer by ownership), +//! with a commutative BUNDLE merge across workers at the end — the +//! borrow-strategy write-back discipline, applied to raw OLAP. +//! +//! ## The honest control — Lane R +//! +//! The Morton route is radix bucketing wearing our address. The fastest +//! known 1BRC entries are radix/perfect-hash designs, so lane F alone +//! proves nothing about the ADDRESS — only about flat-table-vs-BTreeMap. +//! Lane R (`lane_r_radix`) is byte-identical to lane F except for ONE +//! line: the slot function is the plain low 16 bits of the same hash +//! (`h & 0xFFFF`), no interleave. Therefore: +//! +//! - **R vs C** prices the open-addressed SoA flat table against the +//! `BTreeMap` accumulation (the data-structure win, address-agnostic). +//! - **F vs R** prices the Morton addressing itself (the interleave ALU +//! cost + any cache-distribution difference) — the ADDRESSING TAX +//! Addendum-13 sends this lane to isolate. F ≈ R validates +//! addressing-is-aggregation (the semantic address costs nothing over +//! a plain radix bucket); F < R prices the address layer. +//! +//! ## What is deliberately NOT here +//! +//! - No per-tile record bucketing + cascade-ordered sweep: with ~400 +//! groups the scatter/gather of a two-pass tile batch costs more than +//! it buys; the direct-indexed write already exercises the +//! route-then-accumulate shape. (A 100M-row, high-cardinality corpus is +//! where tile-batched sweeps would earn their keep — noted for a +//! follow-up, not smuggled into this measurement.) +//! - No kanban scheduling of tile batches: lane E measured that tax +//! (~66 µs/card, within noise at chunk granularity) — re-adding it +//! here would only blur F−R. +//! - No SIMD scan: that is lane B's variable. F/R use the same scalar +//! byte scan as lanes A/C so the ONLY variable vs lane C is the +//! accumulator (and, between F and R, the slot function). std-only — +//! lanes A/C/F/R keep the crate's zero-dep contract. +//! +//! ## Hash +//! +//! FNV-1a 64 over the station-name bytes (own impl, no dep). Both slot +//! functions consume the SAME hash value, so the slot fn is the only +//! difference between F and R. Name-byte equality is verified on every +//! tag hit (not just the 64-bit tag), so a hash collision degrades to a +//! linear probe, never to a wrong merge — both lanes pay the identical +//! verification cost, keeping F−R clean. + +use crate::{parse_temp_tenths, Stats}; +use std::collections::BTreeMap; + +/// Number of accumulator slots — the full 16-bit tile space (256×256). +const SLOTS: usize = 1 << 16; + +/// FNV-1a 64-bit over the station name bytes. +#[inline(always)] +fn fnv1a64(bytes: &[u8]) -> u64 { + let mut h: u64 = 0xcbf2_9ce4_8422_2325; + for &b in bytes { + h ^= b as u64; + h = h.wrapping_mul(0x0000_0100_0000_01b3); + } + h +} + +/// Morton tile position: the hash's two low axis bytes, nibble-interleaved +/// coarse→fine (`x_hi y_hi x_lo y_lo`) — the 256×256 centroid-tile read of +/// the GUID canon (alternating-axis refinement; each byte's nibbles are the +/// axis's coarse→fine ancestry). +#[inline(always)] +fn morton_slot(h: u64) -> u16 { + let x = (h & 0xFF) as u16; + let y = ((h >> 8) & 0xFF) as u16; + ((x & 0xF0) << 8) | ((y & 0xF0) << 4) | ((x & 0x0F) << 4) | (y & 0x0F) +} + +/// Radix control slot: the plain low 16 bits of the same hash — identical +/// pipeline, no interleave. The one-line difference that isolates the +/// addressing tax. +#[inline(always)] +fn radix_slot(h: u64) -> u16 { + (h & 0xFFFF) as u16 +} + +/// One worker's owned accumulator: SoA parallel arrays indexed by tile +/// slot, open-addressed (linear probe) on collision. Single-writer by +/// ownership — each worker builds its own table; cross-worker combination +/// is the commutative BUNDLE merge in [`table_to_map`] + `merge_maps`. +struct SoaTable { + /// Full 64-bit hash tag per slot; `None`-ness is carried by `names`. + tags: Vec, + /// Station name owned per occupied slot (empty vec = unoccupied). + /// Verified byte-for-byte on every tag hit — see module doc "Hash". + names: Vec>, + // SoA value arrays — the "SoA-shaped accumulators" of Addendum-13: + // one field per column, indexed by slot, updated by gated indexed + // writes (min/max/sum/count — each write is a fold, never a blind + // overwrite of foreign state). + mins: Vec, + maxs: Vec, + sums: Vec, + counts: Vec, +} + +impl SoaTable { + fn new() -> Self { + Self { + tags: vec![0; SLOTS], + names: vec![Vec::new(); SLOTS], + mins: vec![i32::MAX; SLOTS], + maxs: vec![i32::MIN; SLOTS], + sums: vec![0; SLOTS], + counts: vec![0; SLOTS], + } + } + + /// Route `name` to its slot (slot fn + linear probe) and fold one + /// observation into the SoA columns at that address. + #[inline(always)] + fn observe(&mut self, slot0: u16, h: u64, name: &[u8], tenths: i32) { + let mut s = slot0 as usize; + loop { + if self.counts[s] == 0 { + // First occupancy of this slot: claim it for `name`. + self.tags[s] = h; + self.names[s] = name.to_vec(); + break; + } + if self.tags[s] == h && self.names[s] == name { + break; + } + // Collision (different station hashed/probed here): linear + // probe to the next slot, wrapping. ~400 stations in 65536 + // slots keeps probe chains ≈ 1. + s = (s + 1) & (SLOTS - 1); + } + // The gated indexed write — folds, never blind assignment. + if tenths < self.mins[s] { + self.mins[s] = tenths; + } + if tenths > self.maxs[s] { + self.maxs[s] = tenths; + } + self.sums[s] += tenths as i64; + self.counts[s] += 1; + } +} + +/// Scan `data` (the same scalar `;`/`\n` byte scan as lane A) routing every +/// record through `slot_of` into an owned [`SoaTable`]. Generic over the +/// slot function so F and R monomorphize separately (zero-cost, no fn-ptr +/// indirection in the hot loop). +fn accumulate_table(data: &[u8], slot_of: impl Fn(u64) -> u16 + Copy) -> SoaTable { + let mut table = SoaTable::new(); + let len = data.len(); + let mut i = 0usize; + while i < len { + let name_start = i; + while data[i] != b';' { + i += 1; + } + let name = &data[name_start..i]; + i += 1; // skip ';' + let temp_start = i; + while data[i] != b'\n' { + i += 1; + } + let tenths = parse_temp_tenths(&data[temp_start..i]); + i += 1; // skip '\n' + + let h = fnv1a64(name); + table.observe(slot_of(h), h, name, tenths); + } + table +} + +/// Sweep a worker's table into the common `BTreeMap` output +/// shape (occupied slots only) so cross-worker combination reuses the same +/// commutative `merge_maps` BUNDLE step every other lane uses — and so the +/// parity tests compare like with like. +fn table_to_map(table: SoaTable) -> BTreeMap { + let mut out = BTreeMap::new(); + for s in 0..SLOTS { + if table.counts[s] > 0 { + let name = String::from_utf8(table.names[s].clone()).expect("station name utf8"); + out.insert( + name, + Stats { + min: table.mins[s], + max: table.maxs[s], + sum: table.sums[s], + count: table.counts[s], + }, + ); + } + } + out +} + +/// Shared threaded driver for F/R: `chunk_bounds` split (identical to lane +/// C), each worker accumulates its OWN [`SoaTable`] over its slice, tables +/// sweep to maps, maps BUNDLE-merge. Only `slot_of` differs between lanes. +fn lane_table_threads( + data: &[u8], + workers: usize, + slot_of: impl Fn(u64) -> u16 + Copy + Send, +) -> BTreeMap { + let workers = workers.max(1); + let bounds = crate::chunk_bounds(data, workers); + let results: Vec> = std::thread::scope(|scope| { + let handles: Vec<_> = bounds + .iter() + .map(|&(start, end)| { + let slice = &data[start..end]; + scope.spawn(move || table_to_map(accumulate_table(slice, slot_of))) + }) + .collect(); + handles + .into_iter() + .map(|h| h.join().expect("lane F/R worker panicked")) + .collect() + }); + crate::merge_maps(results) +} + +/// Lane F — Morton-tile routed SoA accumulation (see module doc). The +/// station key's two axis bytes, nibble-interleaved into the 256×256 tile +/// space, ARE the accumulator address. +pub fn lane_f_morton(data: &[u8], workers: usize) -> BTreeMap { + lane_table_threads(data, workers, morton_slot) +} + +/// Lane R — the plain-radix control: identical pipeline, slot = low 16 +/// hash bits, no interleave. F−R isolates the addressing tax. +pub fn lane_r_radix(data: &[u8], workers: usize) -> BTreeMap { + lane_table_threads(data, workers, radix_slot) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn morton_slot_nibble_interleaves_coarse_to_fine() { + // x = 0xAB (h low byte), y = 0xCD (h next byte) → + // x_hi y_hi x_lo y_lo = 0xA C B D. + let h = 0xCDAB_u64; // low byte 0xAB → x; next byte 0xCD → y + assert_eq!(morton_slot(h), 0xACBD); + // Degenerate corners. + assert_eq!(morton_slot(0x0000), 0x0000); + assert_eq!(morton_slot(0xFFFF), 0xFFFF); + // Only the low 16 bits of the hash participate. + assert_eq!(morton_slot(0xDEAD_BEEF_0000_CDAB), 0xACBD); + } + + #[test] + fn forced_collisions_probe_correctly() { + // A constant slot function forces EVERY station into slot 0 — + // the probe chain must still keep stations separate (tag + full + // name-byte verification), never merge two stations' stats. + let corpus = b"aa;1.0\nbb;2.0\naa;3.0\ncc;-4.0\nbb;0.5\n"; + let table = accumulate_table(corpus, |_| 0u16); + let map = table_to_map(table); + assert_eq!(map.len(), 3, "three stations despite total slot collision"); + assert_eq!( + map["aa"], + Stats { + min: 10, + max: 30, + sum: 40, + count: 2 + } + ); + assert_eq!( + map["bb"], + Stats { + min: 5, + max: 20, + sum: 25, + count: 2 + } + ); + assert_eq!( + map["cc"], + Stats { + min: -40, + max: -40, + sum: -40, + count: 1 + } + ); + } + + #[test] + fn lane_f_and_lane_r_agree_with_lane_a_on_generated_corpus() { + let dir = std::env::temp_dir(); + let path = dir.join(format!("onebrc_probe_test_f_{}.txt", std::process::id())); + let result = crate::gen::gen(&path, 50_000, 33).expect("gen"); + assert_eq!(result.rows, 50_000); + + let data = std::fs::read(&path).expect("read generated corpus"); + std::fs::remove_file(&path).ok(); + + let a = crate::lane_a_scalar(&data); + let f = lane_f_morton(&data, 3); + let r = lane_r_radix(&data, 3); + assert_eq!(a, f, "lane F must produce identical aggregates to lane A"); + assert_eq!(a, r, "lane R must produce identical aggregates to lane A"); + assert!(!a.is_empty()); + } +} diff --git a/crates/onebrc-probe/src/lib.rs b/crates/onebrc-probe/src/lib.rs index 83ac7925..ff140f42 100644 --- a/crates/onebrc-probe/src/lib.rs +++ b/crates/onebrc-probe/src/lib.rs @@ -20,6 +20,11 @@ //! (Planning->CognitiveWork->Evaluation->Commit) around the actual work. //! Measures the V3 kanban scheduling/journaling tax (E-D isolates the //! journaling cost; fine-grained batching prices per-card scheduling). +//! - **Lanes F/R** (`lane_f::{lane_f_morton, lane_r_radix}`, std-only, no +//! feature) — the substrate-native lane and its honest control: station +//! identity → Morton tile address → SoA-shaped flat accumulators (F); +//! identical pipeline with a plain-radix slot (R). F−R isolates the +//! addressing tax; R−C prices flat-table-vs-BTreeMap. //! //! ## Reference inventory //! @@ -44,6 +49,7 @@ pub mod lane_b; pub mod lane_d; #[cfg(feature = "lane-e")] pub mod lane_e; +pub mod lane_f; pub mod sha256; #[cfg(feature = "lane-b")] @@ -52,6 +58,7 @@ pub use lane_b::lane_b_simd; pub use lane_d::lane_d_ractor; #[cfg(feature = "lane-e")] pub use lane_e::lane_e_kanban; +pub use lane_f::{lane_f_morton, lane_r_radix}; use std::collections::BTreeMap; diff --git a/crates/onebrc-probe/src/main.rs b/crates/onebrc-probe/src/main.rs index adc0f1e0..31c7a639 100644 --- a/crates/onebrc-probe/src/main.rs +++ b/crates/onebrc-probe/src/main.rs @@ -3,7 +3,7 @@ //! //! ```text //! onebrc-probe gen -//! onebrc-probe run [workers] [batches] +//! onebrc-probe run [workers] [batches] //! ``` //! //! Lane `b` requires `--features lane-b`; lane `d` and lane `e` require @@ -23,7 +23,7 @@ fn main() { Some("run") => cmd_run(&args[2..]), _ => { eprintln!( - "usage:\n onebrc-probe gen \n onebrc-probe run [workers] [batches]" + "usage:\n onebrc-probe gen \n onebrc-probe run [workers] [batches]" ); std::process::exit(2); } @@ -55,7 +55,7 @@ fn cmd_gen(args: &[String]) { fn cmd_run(args: &[String]) { let path = PathBuf::from( args.first() - .expect("usage: run [workers] [batches]"), + .expect("usage: run [workers] [batches]"), ); let lane = args.get(1).map(String::as_str).unwrap_or("a"); let workers: usize = args @@ -121,8 +121,12 @@ fn cmd_run(args: &[String]) { std::process::exit(1); } } + // Lanes F/R are std-only (no feature gate): the substrate-native + // Morton-tile SoA lane and its plain-radix control (lane_f.rs). + "f" => onebrc_probe::lane_f_morton(&data, workers), + "r" => onebrc_probe::lane_r_radix(&data, workers), other => { - eprintln!("unknown lane '{other}' (expected 'a', 'b', 'c', 'd', or 'e')"); + eprintln!("unknown lane '{other}' (expected 'a', 'b', 'c', 'd', 'e', 'f', or 'r')"); std::process::exit(2); } };