From 8aee2ac2cc4d51787bd80f5af6581b47903179af Mon Sep 17 00:00:00 2001 From: Fredrik Ahlgren Date: Sat, 13 Jun 2026 13:07:30 +0200 Subject: [PATCH 1/8] docs(spec): C1+C2 LAN-direct (Locator seam + QUIC + mDNS) design + plan Approved 2026-06-13. CLI-only, Go-only. A Locator seam (in package client) turns a Machine into a live peer.MsgConn; Attach composes [LAN, Relay]. LAN uses QUIC (self-signed + skip-verify; real auth = Noise-KK + binding) and mDNS discovery. Relay/WAN path unchanged. QUIC-over-WAN (DCUtR) is a stated future locator, not built now. Stacked on the B1.4+B1.5 branch (reuses ownerPubFromBinding, the cached binding, and wallet owner_id). Co-Authored-By: Claude Opus 4.8 --- .../plans/2026-06-13-c1-c2-lan-direct-plan.md | 291 ++++++++++++++++++ ...6-06-13-c1-c2-lan-direct-locator-design.md | 179 +++++++++++ 2 files changed, 470 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-13-c1-c2-lan-direct-plan.md create mode 100644 docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md diff --git a/docs/superpowers/plans/2026-06-13-c1-c2-lan-direct-plan.md b/docs/superpowers/plans/2026-06-13-c1-c2-lan-direct-plan.md new file mode 100644 index 0000000..ab2a6c7 --- /dev/null +++ b/docs/superpowers/plans/2026-06-13-c1-c2-lan-direct-plan.md @@ -0,0 +1,291 @@ +# C1 + C2 — Locator seam + LAN-direct (QUIC + mDNS) — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: superpowers:subagent-driven-development. +> Steps use checkbox (`- [ ]`) syntax. + +**Goal:** `mir attach` reaches a `mir up` node on the same LAN with no relay — mDNS +discovery + a direct QUIC transport — via a `Locator` seam under the unchanged Noise-KK +session. Relay/WAN path unchanged. CLI-only, Go-only. + +**Architecture:** a `Locator` (in package `client`) turns a `Machine` into a live +`peer.MsgConn`; `Attach` composes `[lanLocator, relayLocator]` and runs Noise-KK over the +first that connects. LAN uses QUIC (self-signed + skip-verify; real auth = Noise-KK + +binding) and discovers addresses through a pluggable `resolver` (mDNS in prod, injected in +tests). Spec: `docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md`. + +**Tech stack:** Go; `github.com/quic-go/quic-go`, `github.com/grandcat/zeroconf`. + +**Cross-cutting facts:** +- The transport seam is `peer.MsgConn` (`Send([]byte) error`, `Recv(ctx) ([]byte, error)`). + Noise (`peer.RunInitiator/RunResponder`) and `agent.RunAgentSession` are unchanged. +- The agent's binding verify+pin helper `ownerPubFromBinding(bindingJSON, owner string) + ([]byte, error)` already exists (B1.4.1, `agent/runtime.go`) — reuse it for LAN frame 0. +- `Machine` = `{name, machine_id, host_pub, signal_url}` (`client/store.go`); `Identity` + has `OwnerPriv()`, `WalletAddress`, `BindingJSON`, `HasWallet()`. +- Discovery yields only an address; trust stays Noise-KK pin + wallet binding. + +--- + +## Task 1 — C1: Locator seam + RelayLocator (pure refactor) + +**Files:** +- Create: `go/internal/client/locator.go` +- Modify: `go/internal/client/attach.go` +- Test: `go/internal/client/locator_test.go` (+ existing e2e must stay green) + +- [ ] **Step 1: Define the seam.** `locator.go`: + ```go + package client + + import ( + "context" + "errors" + "github.com/srcful/terminal-relay/go/internal/peer" + ) + + // ErrUnreachable means this locator can't reach the machine; Attach falls through + // to the next locator. Any other error aborts (a real failure on a reachable path). + var ErrUnreachable = errors.New("locator: machine not reachable by this path") + + // Locator turns a Machine into a live MsgConn (post-transport, pre-Noise). + type Locator interface { + Dial(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (peer.MsgConn, func(), error) + } + ``` + +- [ ] **Step 2: Extract RelayLocator from attach.go.** Move the WS-dial + offer/answer + ICE + wait (today's `attach.go` body up to the opened `DataChannel`) into: + ```go + type relayLocator struct{} + + func (relayLocator) Dial(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (peer.MsgConn, func(), error) { + // ... today's attach.go: dial /attach WS, send offer {SDP, Binding: id.BindingJSON}, + // accept answer, wait for the DataChannel; on timeout return ErrUnreachable. + // Return (mc, cleanup, nil). + } + ``` + Keep the offer's `Binding: id.BindingJSON` exactly as today. + +- [ ] **Step 3: Recompose Attach.** `Attach` builds `locators := []Locator{relayLocator{}}` + (LAN prepended in Task 6), iterates: `mc, cleanup, err := loc.Dial(...)`; on + `ErrUnreachable` continue, on other error return it, on success break. Then the **lifted, + unchanged** Noise + return: + ```go + hostPub, err := hex.DecodeString(m.HostPubHex) // as today + sess, err = peer.RunInitiator(ctx, mc, id.OwnerPriv(), hostPub) + // ... return mc, sess, cleanup, nil (same signature as today) + ``` + Preserve `Attach`'s current exported signature. + +- [ ] **Step 4: Run.** `cd go && go test ./internal/client/` — the existing e2e + (`TestEndToEnd*`) must stay green (behavior-preserving). Add a small `locator_test.go` + asserting `ErrUnreachable` fall-through with a stub locator. `go vet`, `gofmt`. + +- [ ] **Step 5: Commit.** `refactor(client): C1 Locator seam + RelayLocator (no behavior change)` + +--- + +## Task 2 — C2.0: QUIC MsgConn + +**Files:** +- Create: `go/internal/client/quicconn.go` + `quicconn_test.go` +- Modify: `go/go.mod` (add `github.com/quic-go/quic-go`) + +- [ ] **Step 1: Add the dep.** `cd go && go get github.com/quic-go/quic-go@latest`. + +- [ ] **Step 2: Test first (round-trip framing).** `quicconn_test.go`: stand up a QUIC + listener on `127.0.0.1:0` (helper makes a self-signed `tls.Config` with ALPN + `miranda/lan/v1`), dial it, wrap both ends' streams in `quicConn`, and assert several + `Send`/`Recv` frames (incl. an empty frame and a 70 KB frame) round-trip exactly and in + order. + +- [ ] **Step 3: Implement.** `quicconn.go`: + ```go + // quicConn adapts one QUIC bidi stream to peer.MsgConn with 4-byte big-endian + // length-prefixed frames (a QUIC stream is a byte stream; framing preserves the + // message boundaries Noise/agent code expects). + type quicConn struct { + stream quic.Stream + conn quic.Connection + } + func (q *quicConn) Send(b []byte) error { + var hdr [4]byte + binary.BigEndian.PutUint32(hdr[:], uint32(len(b))) + if _, err := q.stream.Write(hdr[:]); err != nil { return err } + _, err := q.stream.Write(b) + return err + } + func (q *quicConn) Recv(ctx context.Context) ([]byte, error) { + // honor ctx via stream.SetReadDeadline on ctx.Done (or a goroutine); read 4-byte + // length then exactly that many bytes (io.ReadFull). Reject absurd lengths (> maxFrame). + } + ``` + Add `const maxFrame = 1 << 20`. Helpers: `selfSignedTLS() *tls.Config` (server, ALPN) and + `clientTLS() *tls.Config` (`InsecureSkipVerify: true`, same ALPN) — shared by Tasks 3/4. + +- [ ] **Step 4: Run.** `cd go && go test ./internal/client/ -run QUIC -v`; `go vet`, `gofmt`. + +- [ ] **Step 5: Commit.** `feat(client): C2.0 QUIC MsgConn (length-framed stream)` + +--- + +## Task 3 — C2.1: LANLocator (resolver + QUIC dial + frame0) + +**Files:** +- Create: `go/internal/client/lan_locator.go` + `lan_locator_test.go` +- Modify: `go/go.mod` (add `github.com/grandcat/zeroconf`) + +- [ ] **Step 1: Resolver seam (for deterministic tests).** + ```go + // resolver maps a machine_id to a dialable "host:port" on the LAN. mdnsResolver is + // the prod impl; tests inject a static one so the QUIC/Noise path is exercised without + // multicast (flaky in CI). + type resolver interface { + resolve(ctx context.Context, machineID string) (addr string, err error) // ErrUnreachable on miss + } + ``` + +- [ ] **Step 2: Test first.** `lan_locator_test.go`: an in-process QUIC echo/agent stub on + loopback + a static `resolver` returning its address. Assert `lanLocator.Dial` (a) sends + frame 0 = `id.BindingJSON` (the stub reads and checks it), (b) returns a usable + `quicConn`, and (c) returns `ErrUnreachable` when the resolver misses. + +- [ ] **Step 3: Implement Dial.** + ```go + type lanLocator struct{ res resolver } + func (l lanLocator) Dial(ctx context.Context, m Machine, id *Identity, _ []peer.ICEServer) (peer.MsgConn, func(), error) { + if !id.HasWallet() { return nil, nil, ErrUnreachable } // LAN needs a binding + addr, err := l.res.resolve(ctx, m.MachineID) + if err != nil { return nil, nil, ErrUnreachable } + conn, err := quic.DialAddr(ctx, addr, clientTLS(), nil) + if err != nil { return nil, nil, ErrUnreachable } + stream, err := conn.OpenStreamSync(ctx) + if err != nil { _ = conn.CloseWithError(0, ""); return nil, nil, ErrUnreachable } + mc := &quicConn{stream: stream, conn: conn} + if err := mc.Send([]byte(id.BindingJSON)); err != nil { /* close */ return nil, nil, ErrUnreachable } + cleanup := func() { _ = conn.CloseWithError(0, "") } + return mc, cleanup, nil + } + ``` + +- [ ] **Step 4: mdnsResolver (prod).** Using `grandcat/zeroconf`, browse `_miranda._udp` for + ~`resolveTimeout` (1.5 s), match an entry whose TXT `mid=` (or instance) equals + `machineID`, return `net.JoinHostPort(entry.AddrIPv4[0], entry.Port)`; `ErrUnreachable` on + timeout/miss. (No unit test for live multicast here — covered by the skippable mDNS + integration test in Task 7.) + +- [ ] **Step 5: Run.** `cd go && go test ./internal/client/ -run LAN -v`; `go vet`, `gofmt`. + +- [ ] **Step 6: Commit.** `feat(client): C2.1 LANLocator (mDNS resolve + QUIC + frame0 binding)` + +--- + +## Task 4 — C2.2: agent QUIC listener + mDNS advertise + accept + +**Files:** +- Create: `go/internal/agent/lan.go` + `lan_test.go` +- Modify: `go/internal/agent/runtime.go` (start LAN alongside the relay loop) + +- [ ] **Step 1: Factor the post-pin path.** Extract the relay `handleOffer`'s + post-DataChannel logic into a shared helper so LAN reuses it verbatim: + ```go + // serveAuthenticated runs RunResponder(host_priv, ownerPub) then the PTY session over mc. + func (rt *Runtime) serveAuthenticated(ctx context.Context, mc peer.MsgConn, ownerPub []byte) error + ``` + `handleOffer` calls it after `ownerPubFromBinding`; LAN calls it too. + +- [ ] **Step 2: Test first.** `lan_test.go`: start `rt.startLAN` on `127.0.0.1:0` (expose the + chosen addr), then from a test client QUIC-dial + send a **valid** binding frame0 for a + pinned owner + run `peer.RunInitiator` and assert the Noise session establishes and a PTY + echo round-trips. Negative: an **unpinned/forged** binding closes the stream pre-Noise. + +- [ ] **Step 3: Implement lan.go.** + ```go + func (rt *Runtime) startLAN(ctx context.Context) (stop func(), err error) { + ln, err := quic.ListenAddr("0.0.0.0:0", selfSignedTLS(), nil) // ephemeral port + // zeroconf.Register("", "_miranda._udp", "local.", port, + // []string{"mid=" + rt.cfg.MachineID}, nil) + // accept loop: for { conn := ln.Accept(ctx); go rt.lanAccept(ctx, conn) } + } + + func (rt *Runtime) lanAccept(ctx context.Context, conn quic.Connection) { + if !rt.admit() { _ = conn.CloseWithError(0, "busy"); return } // reuse DoS bound + defer rt.release() + stream, err := conn.AcceptStream(ctx); if err != nil { return } + mc := &quicConn{stream: stream, conn: conn} + bindingJSON, err := mc.Recv(ctx); if err != nil { return } // frame 0 + sb, err := identity.ParseSignedBinding(bindingJSON); if err != nil { return } + if !rt.cfg.IsOwnerPinned(sb.Wallet) { return } // pinned? + ownerPub, err := ownerPubFromBinding(string(bindingJSON), sb.Wallet); if err != nil { return } + _ = rt.serveAuthenticated(ctx, mc, ownerPub) // shared path + } + ``` + (`quicConn`/`selfSignedTLS` live in package `client`; move the shared QUIC helpers to a + small internal package, e.g. `go/internal/quicmsg`, imported by both `client` and `agent`, + to avoid duplication and any client↔agent import. Adjust Task 2/3 imports accordingly.) + +- [ ] **Step 4: Run.** `cd go && go test ./internal/agent/ -run LAN -v`; full + `go test ./internal/agent/`; `go vet`, `gofmt`. + +- [ ] **Step 5: Commit.** `feat(agent): C2.2 QUIC LAN listener + mDNS advertise + binding-gated accept` + +--- + +## Task 5 — C2.3: wiring (attach order + flags) + mir up start + +**Files:** +- Modify: `go/internal/client/attach.go` (prepend lanLocator) +- Modify: `go/internal/agent/runtime.go` or the `mir up` command (`go/internal/cli/*`) — start + LAN unless `--no-lan` +- Modify: `go/internal/cli/*` — `mir up --no-lan`, `mir attach --relay-only` +- Test: `go/internal/cli/*_test.go` (flag parsing) + +- [ ] **Step 1:** `Attach` builds `[]Locator{lanLocator{res: newMDNSResolver()}, relayLocator{}}` + unless `--relay-only`, then `[]Locator{relayLocator{}}`. Thread the flag through Attach's + call site. +- [ ] **Step 2:** `mir up` starts `rt.startLAN` unless `--no-lan`; ensure clean shutdown + (call `stop()` on ctx cancel; unregister mDNS). +- [ ] **Step 3:** Flag-parse tests for `--no-lan` / `--relay-only`. Run + `cd go && go test ./internal/cli/`. +- [ ] **Step 4: Commit.** `feat(cli): C2.3 LAN-first attach, mir up --no-lan, mir attach --relay-only` + +--- + +## Task 6 — C2.4: e2e (no relay) + netsim + docs + +**Files:** +- Create: `go/internal/client/lan_e2e_test.go` (or under `agent/`) +- Create/modify: `deploy/netsim/*` (LAN-within-a-Docker-network path) +- Modify: `SECURITY.md` (LAN-direct paragraph), `README.md` (mention LAN-direct + `--no-lan`) +- Test (skippable): real mDNS register+browse on loopback (build tag or `testing.Short` skip) + +- [ ] **Step 1: e2e — full path, no relay.** Start `rt.startLAN` (real agent runtime, real + PTY echo), construct a client with a **static resolver** pointed at the listener, and run + `Attach` → assert a shell command round-trips end-to-end with **no `mir-signal` process**. + Negative: with a bad binding, attach fails and falls through (here, to a non-existent relay + → clean error). +- [ ] **Step 2: mDNS integration (skippable).** A `TestMDNSResolveLoopback` that registers + via zeroconf and browses for it; `t.Skip` under `-short` or when multicast is unavailable, + so CI stays deterministic. +- [ ] **Step 3: netsim.** Extend `deploy/netsim` with two nodes on one Docker network doing + LAN-direct (mDNS resolves, relay container absent/blocked); document the make target. +- [ ] **Step 4: docs.** `SECURITY.md` LAN-direct threat note (listener surface, mitigations); + `README.md` one line + `--no-lan`. +- [ ] **Step 5: full gates.** `cd go && go test ./... && go vet ./... && gofmt -l .` clean; + `cd web && npm test` (must be unaffected — sanity). +- [ ] **Step 6: Commit.** `test(lan): C2.4 relay-less e2e + netsim + SECURITY/README` + +--- + +## Self-review notes +- **Spec coverage:** Tasks 1–6 cover C1 (seam+relay refactor), C2.0 (QUIC MsgConn), C2.1 + (LANLocator), C2.2 (agent listener/advertise/accept), C2.3 (wiring/flags), C2.4 + (e2e/netsim/docs). QUIC-WAN/DCUtR is a stated non-goal. +- **Import hygiene:** shared QUIC helpers (`quicConn`, TLS configs) live in a small + `internal/quicmsg` package imported by both `client` and `agent` (no client↔agent cycle); + the `Locator` interface lives in `client` (Attach composes it). +- **Type consistency:** `Locator.Dial(ctx, m Machine, id *Identity, ice)`, `ErrUnreachable`, + `resolver.resolve`, `quicConn{stream, conn}`, `serveAuthenticated(ctx, mc, ownerPub)`, + reuse of `ownerPubFromBinding` are referenced consistently across tasks. +- **Determinism:** mDNS multicast is isolated behind `resolver`; all core tests use loopback + QUIC + injected addresses. Live mDNS is a single skippable test. diff --git a/docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md b/docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md new file mode 100644 index 0000000..50058c4 --- /dev/null +++ b/docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md @@ -0,0 +1,179 @@ +# C1 + C2 — Locator seam + LAN-direct (QUIC + mDNS) + +**Status:** Approved (2026-06-13). Implements **C1 + C2** of the mesh track in +`2026-06-10-north-star-mesh-wallet-identity-design.md`. CLI-only (the browser cannot do +mDNS or raw QUIC; it keeps using the relay). Entirely Go-side — **no web, no byte-identical +crypto gate touched.** + +**Goal:** a `mir` client reaches a `mir up` node **on the same LAN with no relay** — +zero-config discovery (mDNS) + a direct QUIC transport — by inserting a `Locator` seam +under the existing Noise-KK session. The relay/WAN path is **unchanged**. + +**Decisions (review, 2026-06-13):** +1. **Scope:** C1 + C2 together. +2. **LAN transport:** QUIC (self-signed + skip-verify; real auth is Noise-KK + binding + inside). Not WebRTC, not raw TCP. +3. **Discovery:** mDNS/DNS-SD via a small zeroconf dependency. +4. **QUIC-everywhere is the destination, not this step.** WAN's hard part is NAT traversal + (ICE), which QUIC alone doesn't solve; WebRTC already does it and ships. A future + `QUICHolePunchLocator` (DCUtR + circuit-relay, north-star C4) drops into the same seam — + captured here as a stated future, **not built now.** + +--- + +## How a client reaches an agent today (precise) + +- **Discovery = none.** The client knows a machine only from its stored `Machine` record + (`client/store.go`): `{name, machine_id, host_pub, signal_url}` — no IP/host address. +- **Connect = relay brokers SDP, then P2P.** `client/attach.go` dials the relay + `/attach` WebSocket, exchanges an SDP offer/answer, opens a WebRTC `DataChannel`, then + runs Noise-KK over it. **Terminal traffic never touches the relay** — it is P2P + Noise + once the DataChannel is live. +- **Agent = outbound only.** `agent/runtime.go` only *dials out* to the relay's + `/agent/signal`; it has **no listener**, no mDNS, no LAN presence. +- **The seam already exists.** Noise-KK (`peer.RunInitiator/RunResponder`) and the PTY mux + (`agent.RunAgentSession`) are transport-agnostic — they speak only `peer.MsgConn` + (`Send([]byte)`, `Recv(ctx) ([]byte, error)`). The WebRTC `DataChannel` is one + implementation. **A QUIC stream is another** — the crypto and session code need zero + changes. + +--- + +## C1 — the `Locator` seam (pure refactor, no behavior change) + +A locator turns a `Machine` into a live, pre-Noise transport. It lives **in the `client` +package** (`go/internal/client/locator.go`), not a separate package — `Attach` composes +locators, so a separate `locator` package importing `client` for `Machine`/`Identity` would +be an import cycle. Keeping it in `client` is cycle-free and the implementations already +need `client`'s types. + +```go +// go/internal/client/locator.go (package client) +type Locator interface { + // Dial reaches m and returns a live MsgConn (post-transport, pre-Noise) plus a + // cleanup. ErrUnreachable signals "I can't reach it" so Attach falls through to + // the next locator; any other error aborts (it's a real failure on a reachable path). + Dial(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (peer.MsgConn, func(), error) +} +var ErrUnreachable = errors.New("locator: machine not reachable by this path") +``` + +- **`RelayLocator`** wraps today's `attach.go` body (WS + offer/answer + ICE) and returns + the opened `DataChannel` as the `MsgConn`. The binding still rides the SDP offer + (B1.4.2). **Byte-identical behavior.** +- **`Attach`** becomes: try locators in order, take the first `MsgConn`, run + `peer.RunInitiator(ctx, mc, id.OwnerPriv(), hostPub)`. The Noise + session loop is lifted + out of `attach.go` unchanged and runs over whichever `MsgConn` a locator returned. + +This is the whole of C1: no new transport yet, relay path preserved, but `Attach` now +composes locators. Unit-test: `RelayLocator` against the in-memory relay test harness still +drives a full session. + +--- + +## C2 — LAN-direct (`LANLocator`: mDNS + QUIC) + +### Discovery (mDNS / DNS-SD) +- **Agent (`mir up`)** registers `_miranda._udp.local`, instance name = `machine_id`, the + QUIC listen port, and TXT `mid=`. (Ephemeral port, advertised — no fixed-port + config.) +- **Client** browses `_miranda._udp`, matches each instance's `mid` against its known + `Machine` records, resolves A/AAAA + port. Discovery yields only an **address** — never + trust. + +### Transport (QUIC) +- **TLS is QUIC's requirement, not our trust.** The agent generates an **ephemeral + self-signed cert** at startup; the client dials with `InsecureSkipVerify: true` + ALPN + `miranda/lan/v1`. Real authentication is Noise-KK (the pinned `host_pub`) + the wallet + binding **inside** the QUIC stream — a TLS/QUIC MITM cannot complete Noise-KK without + `host_priv`. (Redundant QUIC-TLS encryption under Noise is accepted: Noise is the real + layer; QUIC-TLS is dumb transport.) +- **`quicConn` implements `peer.MsgConn`** over one bidirectional stream with 4-byte + big-endian length-prefixed frames (a QUIC stream is a byte stream, so message boundaries + are framed). ~40 lines. + +### LAN wire (replaces the SDP offer's job of delivering the binding + pin) +1. Client QUIC-dials the mDNS-resolved `IP:port`, opens a bidi stream. +2. **Frame 0 = the binding record** (`id.BindingJSON`, the same B1.4 `{v,wallet,device, + x25519,ts,sig}`), sent as the first `MsgConn.Send`. +3. Agent's accept handler does `Recv()` for frame 0, then **reuses + `ownerPubFromBinding(bindingJSON, binding.wallet)` from B1.4.1**: `IsOwnerPinned(wallet)` + → `VerifyBinding` → pin `binding.x25519`. A bad/unpinned/forged binding closes the + stream with a clear error. +4. Noise-KK over the **same** stream: client `RunInitiator(id.OwnerPriv(), host_pub)` + (the X25519 transport key + the pinned agent `host_pub` from the `Machine` record), + agent `RunResponder(host_priv, pinnedX25519)`. Byte-for-byte the current code. +5. `RunAgentSession` / client session loop — unchanged. + +Because the binding is just the first framed message and Noise messages are the subsequent +ones on the same ordered stream, `RunInitiator/RunResponder` are byte-for-byte the current +code. + +### Agent presence (`mir up`) +Alongside the existing relay serve loop, `mir up` starts: a QUIC listener + the zeroconf +registration. Each accepted connection runs the frame0-verify → pin → `RunResponder` → +`RunAgentSession` path above (a shared helper with the relay path's post-pin logic). +**LAN is on by default** (`mir up --no-lan` opts out); the relay path always runs too. + +### Attach ordering (client) +`Attach` composes `[LANLocator, RelayLocator]`. `LANLocator.Dial` does an mDNS lookup with a +short timeout (~1.5 s); on a hit it QUIC-dials + sends frame0 and returns the `quicConn`; on +no hit / dial failure it returns `ErrUnreachable` and `Attach` falls through to the relay +(today's path). A `mir attach --no-lan` / `--relay-only` flag forces the relay path. + +--- + +## Trust model (unchanged) + new surface + +- **Same invariant: locate but never impersonate.** Noise-KK pins `host_pub` (from the + `Machine` record) and the agent pins the owner via the wallet binding. mDNS spoofing or a + rogue LAN host yields at worst a **failed handshake (DoS)** — never impersonation or + plaintext. A wrong `host_pub` fails the client's Noise-KK; an unpinned/forged wallet fails + the agent's verify. +- **New attack surface: the agent now listens on the LAN.** Mitigations: reject non-pinned + owners immediately (cheap, pre-Noise, on frame 0), bound concurrent LAN handshakes (reuse + the agent's `admit()` semaphore), and `--no-lan` to disable. The QUIC listener binds to + all interfaces but only LAN peers can route to it on a typical home/office network. +- **mDNS info leak:** advertises that a miranda node with `machine_id` exists on the LAN. + `machine_id` is already opaque (random hex); acceptable for a personal LAN. `--no-lan` + removes the advertisement entirely. +- **`SECURITY.md`** gains a short "LAN-direct" paragraph when this lands. + +--- + +## Dependencies (non-crypto, Go-only) +- `github.com/quic-go/quic-go` — QUIC transport. +- `github.com/grandcat/zeroconf` — mDNS/DNS-SD register + browse. + +Neither touches the byte-identical crypto path (base58/SLIP-0010/BIP39/Noise vectors). The +browser is unaffected (no web changes). + +--- + +## Implementation order (TDD, small commits) +- **C1** `locator` package + `Locator` interface + `ErrUnreachable`; `RelayLocator` wrapping + today's attach; refactor `Attach` to compose locators + run the lifted Noise/session loop. + Relay e2e tests stay green (behavior-preserving). +- **C2.0** `quicConn` implementing `peer.MsgConn` (length-framed) + a round-trip frame test. +- **C2.1** `LANLocator.Dial`: mDNS browse → match `machine_id` → QUIC-dial → send frame0 + (binding) → return `quicConn`; `ErrUnreachable` on miss. Unit test with an in-process + advertiser. +- **C2.2** agent: QUIC listener + zeroconf register in `mir up`; accept handler + (frame0-verify → pin → `RunResponder` → `RunAgentSession`), sharing the post-pin helper + with the relay path; `admit()` bound. +- **C2.3** wiring: `Attach` order `[LAN, Relay]`; `mir up --no-lan`, `mir attach + --relay-only`. +- **C2.4** e2e: `mir up` + `mir attach` over QUIC on loopback with **no relay running**; + mDNS resolve test; bad/missing binding rejected; relay fallback when LAN is absent. Extend + `deploy/netsim` with an mDNS-within-a-Docker-network LAN path. + +Each step is independently shippable; C1 alone is a pure refactor. + +--- + +## Non-goals (now) +- **QUIC over WAN / NAT hole-punching (DCUtR).** Stated future locator; not built. The seam + is shaped for it. +- **Browser LAN-direct.** Browsers can't do mDNS/raw QUIC; the browser keeps the relay. +- **Cross-wallet LAN sharing** (Track D seam only). +- **Dropping WebRTC/pion.** The WAN path stays on WebRTC until DCUtR is built and proven. From 47060a8fb045bb718ff14fc26a98fe4a1dad29e7 Mon Sep 17 00:00:00 2001 From: Fredrik Ahlgren Date: Sat, 13 Jun 2026 13:11:14 +0200 Subject: [PATCH 2/8] refactor(client): C1 Locator seam + RelayLocator (no behavior change) Introduce a Locator interface (package client) that turns a Machine into a live peer.MsgConn; Attach composes [relayLocator] and runs Noise-KK over whatever connects. Today's relay path moves verbatim into relayLocator. Attach now returns peer.MsgConn (was *peer.DataChannel; consumers use only Send/Recv). Behavior-preserving: the relay e2e tests stay green. Co-Authored-By: Claude Opus 4.8 --- go/internal/client/attach.go | 91 +++++++++------------------- go/internal/client/locator.go | 19 ++++++ go/internal/client/locator_test.go | 94 +++++++++++++++++++++++++++++ go/internal/client/relay_locator.go | 82 +++++++++++++++++++++++++ go/internal/client/term.go | 2 +- 5 files changed, 225 insertions(+), 63 deletions(-) create mode 100644 go/internal/client/locator.go create mode 100644 go/internal/client/locator_test.go create mode 100644 go/internal/client/relay_locator.go diff --git a/go/internal/client/attach.go b/go/internal/client/attach.go index 69a63f6..1ffa41d 100644 --- a/go/internal/client/attach.go +++ b/go/internal/client/attach.go @@ -4,82 +4,26 @@ package client import ( "context" "encoding/hex" - "encoding/json" + "errors" "fmt" - "net/url" - "strings" - "time" - - "github.com/coder/websocket" "github.com/srcful/terminal-relay/go/internal/noise" "github.com/srcful/terminal-relay/go/internal/peer" - "github.com/srcful/terminal-relay/go/internal/signal" ) -// Attach connects to the signaling server as the owner, negotiates a P2P -// DataChannel with the named machine's agent, runs the Noise KK initiator, and -// returns the established session. Call cleanup when done. -func Attach(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (mc *peer.DataChannel, sess *noise.Session, cleanup func(), err error) { +// Attach connects to the named machine's agent over the first locator that can +// reach it, runs the Noise KK initiator over that MsgConn, and returns the +// established session. Call cleanup when done. +func Attach(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (mc peer.MsgConn, sess *noise.Session, cleanup func(), err error) { if !id.HasWallet() { return nil, nil, nil, fmt.Errorf("this identity has no wallet; run `mir keygen --wallet`") } - ownerID := id.WalletAddress - wsURL := "ws" + strings.TrimPrefix(m.SignalURL, "http") + - "/attach?owner_id=" + url.QueryEscape(ownerID) + - "&machine_id=" + url.QueryEscape(m.MachineID) - - c, _, err := websocket.Dial(ctx, wsURL, nil) - if err != nil { - return nil, nil, nil, fmt.Errorf("dial signaling: %w", err) - } - closeWS := func() { _ = c.CloseNow() } - - off, opened, err := peer.NewOfferer(ice) - if err != nil { - closeWS() - return nil, nil, nil, err - } - cleanup = func() { _ = off.Close(); closeWS() } - - offerSDP, err := peer.CreateOffer(off) - if err != nil { - cleanup() - return nil, nil, nil, err - } - offerMsg, _ := json.Marshal(signal.SignalMsg{Type: signal.TypeOffer, SDP: offerSDP, Binding: id.BindingJSON}) - if err := c.Write(ctx, websocket.MessageText, offerMsg); err != nil { - cleanup() - return nil, nil, nil, err - } - _, data, err := c.Read(ctx) + mc, cleanup, err = dialFirst([]Locator{relayLocator{}}, ctx, m, id, ice) if err != nil { - cleanup() - return nil, nil, nil, err - } - var ans signal.SignalMsg - if json.Unmarshal(data, &ans) != nil || ans.Type != signal.TypeAnswer { - cleanup() - if ans.Type == signal.TypeError { - return nil, nil, nil, fmt.Errorf("signaling: %s", ans.Reason) - } - return nil, nil, nil, fmt.Errorf("unexpected signaling reply: %s", string(data)) - } - if err := peer.AcceptAnswer(off, ans.SDP); err != nil { - cleanup() return nil, nil, nil, err } - octx, ocancel := context.WithTimeout(ctx, 20*time.Second) - defer ocancel() - select { - case mc = <-opened: - case <-octx.Done(): - cleanup() - return nil, nil, nil, fmt.Errorf("no direct P2P path to %q (strict P2P, no relay fallback)", m.Name) - } - hostPub, err := hex.DecodeString(m.HostPubHex) if err != nil { cleanup() @@ -92,3 +36,26 @@ func Attach(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) } return mc, sess, cleanup, nil } + +// dialFirst tries each locator in order, falling through on ErrUnreachable and +// aborting on any other (real) error. It returns the MsgConn from the first +// locator that connects, or the last ErrUnreachable (or a generic "unreachable" +// error) if none did. +func dialFirst(locators []Locator, ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (peer.MsgConn, func(), error) { + var lastErr error + for _, loc := range locators { + mc, cleanup, err := loc.Dial(ctx, m, id, ice) + if errors.Is(err, ErrUnreachable) { + lastErr = err + continue + } + if err != nil { + return nil, nil, err + } + return mc, cleanup, nil + } + if lastErr == nil { + lastErr = fmt.Errorf("machine %q unreachable", m.Name) + } + return nil, nil, lastErr +} diff --git a/go/internal/client/locator.go b/go/internal/client/locator.go new file mode 100644 index 0000000..f03e8d6 --- /dev/null +++ b/go/internal/client/locator.go @@ -0,0 +1,19 @@ +// go/internal/client/locator.go +package client + +import ( + "context" + "errors" + + "github.com/srcful/terminal-relay/go/internal/peer" +) + +// ErrUnreachable means this locator can't reach the machine; Attach falls through +// to the next locator. Any other error aborts (a real failure on a reachable path). +var ErrUnreachable = errors.New("locator: machine not reachable by this path") + +// Locator turns a Machine into a live MsgConn (post-transport, pre-Noise) plus a +// cleanup. Attach composes locators and runs Noise-KK over the first that connects. +type Locator interface { + Dial(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (peer.MsgConn, func(), error) +} diff --git a/go/internal/client/locator_test.go b/go/internal/client/locator_test.go new file mode 100644 index 0000000..6120fc3 --- /dev/null +++ b/go/internal/client/locator_test.go @@ -0,0 +1,94 @@ +// go/internal/client/locator_test.go +package client + +import ( + "context" + "errors" + "testing" + + "github.com/srcful/terminal-relay/go/internal/peer" +) + +// fakeConn is a no-op MsgConn used to prove dialFirst returns the conn from the +// first locator that connects. +type fakeConn struct{} + +func (fakeConn) Send(b []byte) error { return nil } +func (fakeConn) Recv(ctx context.Context) ([]byte, error) { return nil, nil } + +// stubLocator returns a canned (conn, cleanup, err) regardless of inputs and +// records whether it was invoked. +type stubLocator struct { + conn peer.MsgConn + cleanup func() + err error + called *bool +} + +func (s stubLocator) Dial(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (peer.MsgConn, func(), error) { + if s.called != nil { + *s.called = true + } + return s.conn, s.cleanup, s.err +} + +func TestDialFirstFallsThroughOnUnreachable(t *testing.T) { + want := fakeConn{} + secondCalled := false + cleaned := false + locators := []Locator{ + stubLocator{err: ErrUnreachable}, + stubLocator{conn: want, cleanup: func() { cleaned = true }, called: &secondCalled}, + } + + mc, cleanup, err := dialFirst(locators, context.Background(), Machine{Name: "box"}, &Identity{}, nil) + if err != nil { + t.Fatalf("dialFirst: unexpected error: %v", err) + } + if mc != want { + t.Fatalf("dialFirst returned wrong conn: got %#v want %#v", mc, want) + } + if !secondCalled { + t.Fatal("expected fall-through to the second locator") + } + // The returned cleanup must be the second locator's, not the first's. + cleanup() + if !cleaned { + t.Fatal("expected the second locator's cleanup to be returned") + } +} + +func TestDialFirstAbortsOnRealError(t *testing.T) { + boom := errors.New("boom: reachable path failed") + secondCalled := false + locators := []Locator{ + stubLocator{err: boom}, + stubLocator{conn: fakeConn{}, called: &secondCalled}, + } + + mc, _, err := dialFirst(locators, context.Background(), Machine{Name: "box"}, &Identity{}, nil) + if !errors.Is(err, boom) { + t.Fatalf("expected the real error to abort, got: %v", err) + } + if mc != nil { + t.Fatal("expected no conn on abort") + } + if secondCalled { + t.Fatal("a real (non-unreachable) error must NOT fall through to the next locator") + } +} + +func TestDialFirstAllUnreachable(t *testing.T) { + locators := []Locator{ + stubLocator{err: ErrUnreachable}, + stubLocator{err: ErrUnreachable}, + } + + mc, _, err := dialFirst(locators, context.Background(), Machine{Name: "box"}, &Identity{}, nil) + if mc != nil { + t.Fatal("expected no conn when all locators are unreachable") + } + if !errors.Is(err, ErrUnreachable) { + t.Fatalf("expected ErrUnreachable when every locator falls through, got: %v", err) + } +} diff --git a/go/internal/client/relay_locator.go b/go/internal/client/relay_locator.go new file mode 100644 index 0000000..479fbc0 --- /dev/null +++ b/go/internal/client/relay_locator.go @@ -0,0 +1,82 @@ +// go/internal/client/relay_locator.go +package client + +import ( + "context" + "encoding/json" + "fmt" + "net/url" + "strings" + "time" + + "github.com/coder/websocket" + + "github.com/srcful/terminal-relay/go/internal/peer" + "github.com/srcful/terminal-relay/go/internal/signal" +) + +// relayLocator reaches a machine through the mir-signal relay: it dials the +// /attach WebSocket, exchanges SDP offer/answer, and waits for the WebRTC +// DataChannel to open. This is today's relay path, moved out of Attach verbatim. +type relayLocator struct{} + +func (relayLocator) Dial(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (peer.MsgConn, func(), error) { + ownerID := id.WalletAddress + wsURL := "ws" + strings.TrimPrefix(m.SignalURL, "http") + + "/attach?owner_id=" + url.QueryEscape(ownerID) + + "&machine_id=" + url.QueryEscape(m.MachineID) + + c, _, err := websocket.Dial(ctx, wsURL, nil) + if err != nil { + return nil, nil, fmt.Errorf("dial signaling: %w", err) + } + closeWS := func() { _ = c.CloseNow() } + + off, opened, err := peer.NewOfferer(ice) + if err != nil { + closeWS() + return nil, nil, err + } + cleanup := func() { _ = off.Close(); closeWS() } + + offerSDP, err := peer.CreateOffer(off) + if err != nil { + cleanup() + return nil, nil, err + } + offerMsg, _ := json.Marshal(signal.SignalMsg{Type: signal.TypeOffer, SDP: offerSDP, Binding: id.BindingJSON}) + if err := c.Write(ctx, websocket.MessageText, offerMsg); err != nil { + cleanup() + return nil, nil, err + } + + _, data, err := c.Read(ctx) + if err != nil { + cleanup() + return nil, nil, err + } + var ans signal.SignalMsg + if json.Unmarshal(data, &ans) != nil || ans.Type != signal.TypeAnswer { + cleanup() + if ans.Type == signal.TypeError { + return nil, nil, fmt.Errorf("signaling: %s", ans.Reason) + } + return nil, nil, fmt.Errorf("unexpected signaling reply: %s", string(data)) + } + if err := peer.AcceptAnswer(off, ans.SDP); err != nil { + cleanup() + return nil, nil, err + } + + octx, ocancel := context.WithTimeout(ctx, 20*time.Second) + defer ocancel() + var dc *peer.DataChannel + select { + case dc = <-opened: + case <-octx.Done(): + cleanup() + return nil, nil, fmt.Errorf("no direct P2P path to %q (strict P2P, no relay fallback)", m.Name) + } + + return dc, cleanup, nil +} diff --git a/go/internal/client/term.go b/go/internal/client/term.go index 4ac49e8..bb43588 100644 --- a/go/internal/client/term.go +++ b/go/internal/client/term.go @@ -16,7 +16,7 @@ import ( // RunInteractive puts the real terminal into raw mode, wires SIGWINCH to RESIZE, // and runs the bridge against stdin/stdout. Restores the terminal on exit. -func RunInteractive(ctx context.Context, mc *peer.DataChannel, sess *noise.Session, machineName string) error { +func RunInteractive(ctx context.Context, mc peer.MsgConn, sess *noise.Session, machineName string) error { fd := int(os.Stdin.Fd()) if !term.IsTerminal(fd) { return fmt.Errorf("mir attach requires a TTY (stdin is not a terminal)") From ce1e052db23ccc5bafa863cef9a54d33d400d1aa Mon Sep 17 00:00:00 2001 From: Fredrik Ahlgren Date: Sat, 13 Jun 2026 13:15:12 +0200 Subject: [PATCH 3/8] feat(quicmsg): C2.0 QUIC transport implementing peer.MsgConn A length-framed QUIC bidi stream as a peer.MsgConn, shared by client (LAN dial) and agent (LAN listen). QUIC TLS is dumb transport: the real auth is Noise-KK + the wallet binding that runs inside, so ClientTLS skips verification. ServerTLS uses an ephemeral self-signed cert. ALPN miranda/lan/v1. Co-Authored-By: Claude Opus 4.8 --- go/go.mod | 3 +- go/go.sum | 15 +- go/internal/quicmsg/quicmsg.go | 251 ++++++++++++++++++++++++++++ go/internal/quicmsg/quicmsg_test.go | 172 +++++++++++++++++++ 4 files changed, 437 insertions(+), 4 deletions(-) create mode 100644 go/internal/quicmsg/quicmsg.go create mode 100644 go/internal/quicmsg/quicmsg_test.go diff --git a/go/go.mod b/go/go.mod index 22d9ee3..b4c5671 100644 --- a/go/go.mod +++ b/go/go.mod @@ -14,6 +14,7 @@ require ( github.com/flynn/noise v1.1.0 github.com/mdp/qrterminal/v3 v3.2.1 github.com/pion/webrtc/v4 v4.2.14 + github.com/quic-go/quic-go v0.60.0 golang.org/x/crypto v0.52.0 golang.org/x/mod v0.37.0 golang.org/x/term v0.43.0 @@ -37,7 +38,7 @@ require ( github.com/pion/transport/v4 v4.0.2 // indirect github.com/pion/turn/v5 v5.0.7 // indirect github.com/wlynxg/anet v0.0.5 // indirect - golang.org/x/net v0.54.0 // indirect + golang.org/x/net v0.55.0 // indirect golang.org/x/sys v0.45.0 // indirect golang.org/x/time v0.14.0 // indirect rsc.io/qr v0.2.0 // indirect diff --git a/go/go.sum b/go/go.sum index 86abd08..e30a8b4 100644 --- a/go/go.sum +++ b/go/go.sum @@ -8,8 +8,9 @@ github.com/flynn/noise v1.1.0 h1:KjPQoQCEFdZDiP03phOvGi11+SVVhBG2wOWAorLsstg= github.com/flynn/noise v1.1.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -52,18 +53,26 @@ github.com/pion/webrtc/v4 v4.2.14 h1:Q6zMs+fSDsYuhZcNlvFGBxCOMHVV9oYcDa6O9/HIGTc github.com/pion/webrtc/v4 v4.2.14/go.mod h1:87NVKP86+g4OMrRxWhjWfUjeXP4JrV6RTlUrIW+/Jak= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/quic-go/go-ossfuzz-seeds v0.1.0 h1:APacT+iIaNF6fd8AGEiN3bT/Jtkd2jz4v4TzM7MFjy0= +github.com/quic-go/go-ossfuzz-seeds v0.1.0/go.mod h1:3IOHRbJIc+L6YKMwfDtJAM9Vj9k0YY4muhuyUYk5tbk= +github.com/quic-go/quic-go v0.60.0 h1:xcQioE8OM66UQLeUMHltK1CCcOu3JbVB4JAQdDQSB+0= +github.com/quic-go/quic-go v0.60.0/go.mod h1:wpKpjmPpftl30sL6pFh7REVpjbcCVy4zt2vDyK1TuJk= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/wlynxg/anet v0.0.5 h1:J3VJGi1gvo0JwZ/P1/Yc/8p63SoW98B5dHkYDmpgvvU= github.com/wlynxg/anet v0.0.5/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA= +go.uber.org/mock v0.5.2 h1:LbtPTcP8A5k9WPXj54PPPbjcI4Y6lhyOZXn+VS7wNko= +go.uber.org/mock v0.5.2/go.mod h1:wLlUxC2vVTPTaE3UD51E0BGOAElKrILxhVSDYQLld5o= golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.52.0 h1:RMs7fP2rXdep0CftQlK8Uf+kibLm7qkCcradZWYz988= golang.org/x/crypto v0.52.0/go.mod h1:1QgfPxDqh0T2M/elOJtp9RvuR95kVjir0e6/BvEmGbc= golang.org/x/mod v0.37.0 h1:vF1DjpVEshcIqoEaauuHebaLk1O1forxjxBaVn884JQ= golang.org/x/mod v0.37.0/go.mod h1:m8S8VeM9r4dzDwjrKO0a1sZP3YjeMamRRlD+fmR2Q/0= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.54.0 h1:2zJIZAxAHV/OHCDTCOHAYehQzLfSXuf/5SoL/Dv6w/w= -golang.org/x/net v0.54.0/go.mod h1:Sj4oj8jK6XmHpBZU/zWHw3BV3abl4Kvi+Ut7cQcY+cQ= +golang.org/x/net v0.55.0 h1:bcvxaJn3e1U6InsFWt1JUq1aSjnRxLzT2rtD2KfkDF8= +golang.org/x/net v0.55.0/go.mod h1:L5U2KuzuOe1lY7Z+aWVIKK6qEeJXnXV9yzGA+WCHJww= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.45.0 h1:dO4czNzziLiiXplLQgBCEpCvXQ3dnkn0SdaZSYdQ+FY= golang.org/x/sys v0.45.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= diff --git a/go/internal/quicmsg/quicmsg.go b/go/internal/quicmsg/quicmsg.go new file mode 100644 index 0000000..9c02d01 --- /dev/null +++ b/go/internal/quicmsg/quicmsg.go @@ -0,0 +1,251 @@ +// Package quicmsg is a QUIC-backed transport that satisfies peer.MsgConn, +// shared by the client (LAN dial) and the agent (LAN listen). Messages are +// length-framed over a single bidirectional QUIC stream. +// +// QUIC's TLS is treated as *dumb transport* here: it gives us an encrypted, +// reliable, ordered byte stream and nothing more. The real authentication — +// proving the peer is the right owner/agent — is the Noise-KK handshake plus +// the wallet binding that run *inside* this MsgConn. Because the transport TLS +// identity carries no trust, the client deliberately skips TLS verification +// (ClientTLS sets InsecureSkipVerify) and the server presents an ephemeral +// self-signed certificate (ServerTLS). Trust is established one layer up, not +// here. +package quicmsg + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/tls" + "crypto/x509" + "crypto/x509/pkix" + "encoding/binary" + "fmt" + "io" + "math/big" + "net" + "time" + + quic "github.com/quic-go/quic-go" + + "github.com/srcful/terminal-relay/go/internal/peer" +) + +// Conn implements peer.MsgConn — the seam shared with the WebRTC DataChannel. +var _ peer.MsgConn = (*Conn)(nil) + +// ALPN identifies the Miranda LAN-direct QUIC protocol. Both peers must agree +// on it during the TLS handshake. +const ALPN = "miranda/lan/v1" + +// maxFrame bounds a single message to 1 MiB so a malicious or buggy peer can't +// make us allocate an absurd buffer from an attacker-controlled length prefix. +const maxFrame = 1 << 20 + +// Conn wraps a QUIC connection plus its single bidirectional stream and +// implements peer.MsgConn (Send/Recv). Messages are framed with a 4-byte +// big-endian length prefix. +type Conn struct { + conn *quic.Conn + stream *quic.Stream +} + +// Send writes a single length-prefixed frame: a 4-byte big-endian length +// followed by the payload. An empty payload is a valid frame (length 0). +func (c *Conn) Send(b []byte) error { + if len(b) > maxFrame { + return fmt.Errorf("quicmsg: frame too large: %d > %d", len(b), maxFrame) + } + buf := make([]byte, 4+len(b)) + binary.BigEndian.PutUint32(buf[:4], uint32(len(b))) + copy(buf[4:], b) + if _, err := c.stream.Write(buf); err != nil { + return err + } + return nil +} + +// Recv reads exactly one length-prefixed frame. It honors ctx: if ctx is +// cancelled (or already cancelled) while blocked on the stream read, Recv +// returns promptly with ctx.Err() instead of parking forever. +// +// ctx is honored via the stream read deadline, mirroring how peer.DataChannel +// unblocks Recv. A watcher goroutine sets a past read deadline on ctx.Done, +// which makes any in-flight Read return a timeout error; on the normal path the +// watcher is torn down (and the deadline cleared) before returning, so no +// goroutine leaks. +func (c *Conn) Recv(ctx context.Context) (_ []byte, err error) { + // Fast path: already-cancelled ctx shouldn't even touch the stream. + if cerr := ctx.Err(); cerr != nil { + return nil, cerr + } + + stop := make(chan struct{}) + done := make(chan struct{}) + go func() { + defer close(done) + select { + case <-ctx.Done(): + // Force any blocked Read to return with a deadline-exceeded error. + _ = c.stream.SetReadDeadline(time.Now().Add(-time.Second)) + case <-stop: + } + }() + defer func() { + close(stop) + <-done + // Clear the deadline so the next Recv isn't poisoned. Only meaningful + // when the read completed normally; harmless otherwise. + _ = c.stream.SetReadDeadline(time.Time{}) + // If the read failed because ctx fired, surface ctx.Err() rather than + // the opaque deadline error. + if err != nil && ctx.Err() != nil { + err = ctx.Err() + } + }() + + var lenBuf [4]byte + if _, err = io.ReadFull(c.stream, lenBuf[:]); err != nil { + return nil, err + } + n := binary.BigEndian.Uint32(lenBuf[:]) + if n > maxFrame { + return nil, fmt.Errorf("quicmsg: incoming frame too large: %d > %d", n, maxFrame) + } + if n == 0 { + return []byte{}, nil + } + buf := make([]byte, n) + if _, err = io.ReadFull(c.stream, buf); err != nil { + return nil, err + } + return buf, nil +} + +// Close closes the stream and the underlying QUIC connection. +func (c *Conn) Close() error { + if c.stream != nil { + _ = c.stream.Close() + } + if c.conn != nil { + return c.conn.CloseWithError(0, "") + } + return nil +} + +// ServerTLS returns a TLS config for the listener using a freshly generated, +// ephemeral self-signed certificate. The cert identity is meaningless on +// purpose: trust comes from Noise-KK + the wallet binding inside the stream, +// not from this certificate. +func ServerTLS() (*tls.Config, error) { + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + return nil, err + } + serial, err := rand.Int(rand.Reader, new(big.Int).Lsh(big.NewInt(1), 128)) + if err != nil { + return nil, err + } + tmpl := &x509.Certificate{ + SerialNumber: serial, + Subject: pkix.Name{CommonName: "miranda-lan"}, + NotBefore: time.Now().Add(-time.Minute), + NotAfter: time.Now().Add(24 * time.Hour), + KeyUsage: x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + } + der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &key.PublicKey, key) + if err != nil { + return nil, err + } + cert := tls.Certificate{ + Certificate: [][]byte{der}, + PrivateKey: key, + } + return &tls.Config{ + Certificates: []tls.Certificate{cert}, + MinVersion: tls.VersionTLS13, + NextProtos: []string{ALPN}, + }, nil +} + +// ClientTLS returns a TLS config for dialing. It skips verification on purpose: +// the QUIC TLS identity carries no trust in Miranda — the real authentication +// is the Noise-KK handshake and wallet binding that run inside the stream. +func ClientTLS() *tls.Config { + return &tls.Config{ + InsecureSkipVerify: true, // trust is established by Noise-KK inside, not by TLS + MinVersion: tls.VersionTLS13, + NextProtos: []string{ALPN}, + } +} + +// Dial connects to a quicmsg listener at addr, opens the single bidirectional +// stream, and wraps it in a *Conn. On any error the QUIC connection is closed. +func Dial(ctx context.Context, addr string) (*Conn, error) { + conn, err := quic.DialAddr(ctx, addr, ClientTLS(), nil) + if err != nil { + return nil, err + } + stream, err := conn.OpenStreamSync(ctx) + if err != nil { + _ = conn.CloseWithError(0, "") + return nil, err + } + // Nudge the stream open so the listener's AcceptStream returns without + // waiting for the first payload. Writing the empty frame keeps both sides + // in lockstep on the framing protocol. + if _, err := stream.Write([]byte{0, 0, 0, 0}); err != nil { + _ = conn.CloseWithError(0, "") + return nil, err + } + return &Conn{conn: conn, stream: stream}, nil +} + +// Listener wraps a QUIC listener and yields *Conn for each accepted connection. +type Listener struct { + ln *quic.Listener +} + +// Listen binds a quicmsg listener at addr (e.g. "127.0.0.1:0" for an ephemeral +// port). Use Addr to discover the bound address. +func Listen(addr string) (*Listener, error) { + tlsConf, err := ServerTLS() + if err != nil { + return nil, err + } + ln, err := quic.ListenAddr(addr, tlsConf, nil) + if err != nil { + return nil, err + } + return &Listener{ln: ln}, nil +} + +// Addr returns the address the listener is bound to (so callers/mDNS can learn +// the ephemeral port). +func (l *Listener) Addr() net.Addr { return l.ln.Addr() } + +// Accept waits for the next QUIC connection, accepts its first bidirectional +// stream, consumes the open-nudge frame Dial sent, and wraps it in a *Conn. +func (l *Listener) Accept(ctx context.Context) (*Conn, error) { + conn, err := l.ln.Accept(ctx) + if err != nil { + return nil, err + } + stream, err := conn.AcceptStream(ctx) + if err != nil { + _ = conn.CloseWithError(0, "") + return nil, err + } + c := &Conn{conn: conn, stream: stream} + // Consume the empty open-nudge frame Dial wrote to flush the stream open. + if _, err := c.Recv(ctx); err != nil { + _ = c.Close() + return nil, err + } + return c, nil +} + +// Close closes the listener (does not close already-accepted connections). +func (l *Listener) Close() error { return l.ln.Close() } diff --git a/go/internal/quicmsg/quicmsg_test.go b/go/internal/quicmsg/quicmsg_test.go new file mode 100644 index 0000000..3447d0f --- /dev/null +++ b/go/internal/quicmsg/quicmsg_test.go @@ -0,0 +1,172 @@ +package quicmsg + +import ( + "bytes" + "context" + "sync" + "testing" + "time" +) + +// TestConnRoundTripFrames stands up a quicmsg listener on an ephemeral port, +// dials it, and asserts that several frames round-trip exactly and in order in +// both directions: an empty frame, a small frame, and a 70_000-byte frame +// (which exercises the length prefix beyond 64 KiB). +func TestConnRoundTripFrames(t *testing.T) { + ln, err := Listen("127.0.0.1:0") + if err != nil { + t.Fatalf("Listen: %v", err) + } + defer ln.Close() + + frames := [][]byte{ + {}, // empty frame + []byte("hello, miranda"), // small frame + bytes.Repeat([]byte{0xAB}, 70_000), // > 64 KiB + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + // Accept side runs concurrently with the dial. + type acceptResult struct { + c *Conn + err error + } + accepted := make(chan acceptResult, 1) + go func() { + c, err := ln.Accept(ctx) + accepted <- acceptResult{c, err} + }() + + client, err := Dial(ctx, ln.Addr().String()) + if err != nil { + t.Fatalf("Dial: %v", err) + } + defer client.Close() + + ar := <-accepted + if ar.err != nil { + t.Fatalf("Accept: %v", ar.err) + } + server := ar.c + defer server.Close() + + // Exercise both directions: client->server and server->client. + dirs := []struct { + name string + sender *Conn + receiver *Conn + }{ + {"client->server", client, server}, + {"server->client", server, client}, + } + + for _, d := range dirs { + d := d + t.Run(d.name, func(t *testing.T) { + var wg sync.WaitGroup + wg.Add(1) + var recvErr error + got := make([][]byte, len(frames)) + go func() { + defer wg.Done() + for i := range frames { + b, err := d.receiver.Recv(ctx) + if err != nil { + recvErr = err + return + } + got[i] = b + } + }() + + for _, f := range frames { + if err := d.sender.Send(f); err != nil { + t.Fatalf("Send: %v", err) + } + } + + wg.Wait() + if recvErr != nil { + t.Fatalf("Recv: %v", recvErr) + } + for i, want := range frames { + if !bytes.Equal(got[i], want) { + t.Fatalf("frame %d: got %d bytes, want %d bytes (in-order mismatch)", i, len(got[i]), len(want)) + } + } + }) + } +} + +// TestRecvRespectsContext asserts that Recv returns promptly with an error when +// its ctx is already cancelled, rather than blocking forever waiting for data. +func TestRecvRespectsContext(t *testing.T) { + ln, err := Listen("127.0.0.1:0") + if err != nil { + t.Fatalf("Listen: %v", err) + } + defer ln.Close() + + dialCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + accepted := make(chan *Conn, 1) + go func() { + c, err := ln.Accept(dialCtx) + if err != nil { + accepted <- nil + return + } + accepted <- c + }() + + client, err := Dial(dialCtx, ln.Addr().String()) + if err != nil { + t.Fatalf("Dial: %v", err) + } + defer client.Close() + + server := <-accepted + if server == nil { + t.Fatal("Accept failed") + } + defer server.Close() + + // Already-cancelled ctx: Recv must return promptly with an error and not + // block (no peer is sending anything). + cctx, ccancel := context.WithCancel(context.Background()) + ccancel() + + done := make(chan error, 1) + go func() { + _, err := client.Recv(cctx) + done <- err + }() + + select { + case err := <-done: + if err == nil { + t.Fatal("Recv with cancelled ctx returned nil error, want non-nil") + } + case <-time.After(3 * time.Second): + t.Fatal("Recv blocked despite cancelled ctx") + } + + // The connection must still be usable afterwards: a subsequent Send/Recv + // with a fresh ctx should round-trip, proving the cancel didn't corrupt the + // stream. + okCtx, okCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer okCancel() + if err := server.Send([]byte("ping")); err != nil { + t.Fatalf("post-cancel Send: %v", err) + } + b, err := client.Recv(okCtx) + if err != nil { + t.Fatalf("post-cancel Recv: %v", err) + } + if !bytes.Equal(b, []byte("ping")) { + t.Fatalf("post-cancel Recv: got %q, want %q", b, "ping") + } +} From 88046d67b5934d313c309eb08d5dc6ab782c2157 Mon Sep 17 00:00:00 2001 From: Fredrik Ahlgren Date: Sat, 13 Jun 2026 13:18:37 +0200 Subject: [PATCH 4/8] feat(client): C2.1 LANLocator (mDNS resolve + QUIC + frame0 binding) A lanLocator that resolves a machine_id to a LAN address (mDNS in prod, injectable resolver in tests), QUIC-dials it, and sends the wallet binding as frame 0 before Noise-KK. Returns ErrUnreachable on miss / no-wallet so Attach falls through to the relay. Co-Authored-By: Claude Opus 4.8 --- go/go.mod | 3 + go/go.sum | 24 +++++ go/internal/client/lan_locator.go | 116 ++++++++++++++++++++ go/internal/client/lan_locator_test.go | 144 +++++++++++++++++++++++++ 4 files changed, 287 insertions(+) create mode 100644 go/internal/client/lan_locator.go create mode 100644 go/internal/client/lan_locator_test.go diff --git a/go/go.mod b/go/go.mod index b4c5671..9c82fd2 100644 --- a/go/go.mod +++ b/go/go.mod @@ -12,6 +12,7 @@ require ( github.com/coder/websocket v1.8.14 github.com/creack/pty v1.1.24 github.com/flynn/noise v1.1.0 + github.com/grandcat/zeroconf v1.0.0 github.com/mdp/qrterminal/v3 v3.2.1 github.com/pion/webrtc/v4 v4.2.14 github.com/quic-go/quic-go v0.60.0 @@ -21,7 +22,9 @@ require ( ) require ( + github.com/cenkalti/backoff v2.2.1+incompatible // indirect github.com/google/uuid v1.6.0 // indirect + github.com/miekg/dns v1.1.27 // indirect github.com/pion/datachannel v1.6.0 // indirect github.com/pion/dtls/v3 v3.1.3 // indirect github.com/pion/ice/v4 v4.2.7 // indirect diff --git a/go/go.sum b/go/go.sum index e30a8b4..b611fde 100644 --- a/go/go.sum +++ b/go/go.sum @@ -1,3 +1,5 @@ +github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= +github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/coder/websocket v1.8.14 h1:9L0p0iKiNOibykf283eHkKUHHrpG7f65OE3BhhO7v9g= github.com/coder/websocket v1.8.14/go.mod h1:NX3SzP+inril6yawo5CQXx8+fk145lPDC6pumgx0mVg= github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= @@ -8,6 +10,8 @@ github.com/flynn/noise v1.1.0 h1:KjPQoQCEFdZDiP03phOvGi11+SVVhBG2wOWAorLsstg= github.com/flynn/noise v1.1.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grandcat/zeroconf v1.0.0 h1:uHhahLBKqwWBV6WZUDAT71044vwOTL+McW0mBJvo6kE= +github.com/grandcat/zeroconf v1.0.0/go.mod h1:lTKmG1zh86XyCoUeIHSA4FJMBwCJiQmGfcP2PdzytEs= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -17,6 +21,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/mdp/qrterminal/v3 v3.2.1 h1:6+yQjiiOsSuXT5n9/m60E54vdgFsw0zhADHhHLrFet4= github.com/mdp/qrterminal/v3 v3.2.1/go.mod h1:jOTmXvnBsMy5xqLniO0R++Jmjs2sTm9dFSuQ5kpz/SU= +github.com/miekg/dns v1.1.27 h1:aEH/kqUzUxGJ/UHcEKdJY+ugH6WEzsEBBSPa8zuy1aM= +github.com/miekg/dns v1.1.27/go.mod h1:KNUDUusw/aVsxyTYZM1oqvCicbwhgbNgztCETuNZ7xM= github.com/pion/datachannel v1.6.0 h1:XecBlj+cvsxhAMZWFfFcPyUaDZtd7IJvrXqlXD/53i0= github.com/pion/datachannel v1.6.0/go.mod h1:ur+wzYF8mWdC+Mkis5Thosk+u/VOL287apDNEbFpsIk= github.com/pion/dtls/v3 v3.1.3 h1:OA6J5UCeA8DvRXD8ofaMnlNPXN3ISBLHHJ9P8SWL09E= @@ -51,6 +57,8 @@ github.com/pion/turn/v5 v5.0.7 h1:cA4zPYZR/tS1qZqOi5myHSQ+cwPENCvY8T/wMloP8Tg= github.com/pion/turn/v5 v5.0.7/go.mod h1:1VwvxElZaOdJU0liJ/WUSm/Tsh+n2OxS5ISSDxgOWxU= github.com/pion/webrtc/v4 v4.2.14 h1:Q6zMs+fSDsYuhZcNlvFGBxCOMHVV9oYcDa6O9/HIGTc= github.com/pion/webrtc/v4 v4.2.14/go.mod h1:87NVKP86+g4OMrRxWhjWfUjeXP4JrV6RTlUrIW+/Jak= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/quic-go/go-ossfuzz-seeds v0.1.0 h1:APacT+iIaNF6fd8AGEiN3bT/Jtkd2jz4v4TzM7MFjy0= @@ -65,24 +73,40 @@ github.com/wlynxg/anet v0.0.5 h1:J3VJGi1gvo0JwZ/P1/Yc/8p63SoW98B5dHkYDmpgvvU= github.com/wlynxg/anet v0.0.5/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA= go.uber.org/mock v0.5.2 h1:LbtPTcP8A5k9WPXj54PPPbjcI4Y6lhyOZXn+VS7wNko= go.uber.org/mock v0.5.2/go.mod h1:wLlUxC2vVTPTaE3UD51E0BGOAElKrILxhVSDYQLld5o= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.52.0 h1:RMs7fP2rXdep0CftQlK8Uf+kibLm7qkCcradZWYz988= golang.org/x/crypto v0.52.0/go.mod h1:1QgfPxDqh0T2M/elOJtp9RvuR95kVjir0e6/BvEmGbc= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.37.0 h1:vF1DjpVEshcIqoEaauuHebaLk1O1forxjxBaVn884JQ= golang.org/x/mod v0.37.0/go.mod h1:m8S8VeM9r4dzDwjrKO0a1sZP3YjeMamRRlD+fmR2Q/0= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.55.0 h1:bcvxaJn3e1U6InsFWt1JUq1aSjnRxLzT2rtD2KfkDF8= golang.org/x/net v0.55.0/go.mod h1:L5U2KuzuOe1lY7Z+aWVIKK6qEeJXnXV9yzGA+WCHJww= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.45.0 h1:dO4czNzziLiiXplLQgBCEpCvXQ3dnkn0SdaZSYdQ+FY= golang.org/x/sys v0.45.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.43.0 h1:S4RLU2sB31O/NCl+zFN9Aru9A/Cq2aqKpTZJ6B+DwT4= golang.org/x/term v0.43.0/go.mod h1:lrhlHNdQJHO+1qVYiHfFKVuVioJIheAc3fBSMFYEIsk= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191216052735-49a3e744a425/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/go/internal/client/lan_locator.go b/go/internal/client/lan_locator.go new file mode 100644 index 0000000..ac82f13 --- /dev/null +++ b/go/internal/client/lan_locator.go @@ -0,0 +1,116 @@ +// go/internal/client/lan_locator.go +package client + +import ( + "context" + "net" + "strconv" + "strings" + "time" + + "github.com/grandcat/zeroconf" + + "github.com/srcful/terminal-relay/go/internal/peer" + "github.com/srcful/terminal-relay/go/internal/quicmsg" +) + +// resolver maps a machine_id to a dialable host:port on the LAN. mdnsResolver is the +// prod impl; tests inject a static resolver so the QUIC/Noise path runs without +// multicast (flaky in CI). resolve returns ErrUnreachable when the machine isn't found. +type resolver interface { + resolve(ctx context.Context, machineID string) (addr string, err error) +} + +// lanLocator reaches an agent on the local network: it resolves the machine_id +// to a host:port (mDNS in prod, injectable in tests), QUIC-dials it, and sends +// the wallet binding as the first application frame before Noise-KK runs inside +// the MsgConn. It returns ErrUnreachable on any miss (no wallet, resolve miss, +// dial/send failure) so Attach falls through to the relay path. +type lanLocator struct{ res resolver } + +func (l lanLocator) Dial(ctx context.Context, m Machine, id *Identity, _ []peer.ICEServer) (peer.MsgConn, func(), error) { + if !id.HasWallet() { + return nil, nil, ErrUnreachable // LAN attach requires a wallet binding + } + addr, err := l.res.resolve(ctx, m.MachineID) + if err != nil { + return nil, nil, ErrUnreachable + } + conn, err := quicmsg.Dial(ctx, addr) + if err != nil { + return nil, nil, ErrUnreachable + } + if err := conn.Send([]byte(id.BindingJSON)); err != nil { // frame 0: the wallet binding + _ = conn.Close() + return nil, nil, ErrUnreachable + } + return conn, func() { _ = conn.Close() }, nil +} + +// mDNS service/domain the agent advertises on. The agent registers under +// _miranda._udp.local. with a "mid=" TXT entry (and/or the +// machine_id as the instance name) so the client can resolve it by machine_id. +const mdnsService = "_miranda._udp" +const mdnsDomain = "local." + +// resolveTimeout bounds the mDNS browse so a miss fails fast (ErrUnreachable) +// rather than blocking Attach's first locator indefinitely. +var resolveTimeout = 1500 * time.Millisecond + +// mdnsResolver is the production resolver: it browses the LAN over mDNS for the +// Miranda service and matches the requested machine_id. +type mdnsResolver struct{} + +func (mdnsResolver) resolve(ctx context.Context, machineID string) (string, error) { + r, err := zeroconf.NewResolver() + if err != nil { + return "", ErrUnreachable + } + + // Bound the browse so a miss fails fast. Derive from the caller's ctx so + // cancellation still propagates. + bctx, cancel := context.WithTimeout(ctx, resolveTimeout) + defer cancel() + + entries := make(chan *zeroconf.ServiceEntry, 8) + if err := r.Browse(bctx, mdnsService, mdnsDomain, entries); err != nil { + return "", ErrUnreachable + } + + for { + select { + case entry, ok := <-entries: + if !ok { + // Channel closed without a match (browse ended). + return "", ErrUnreachable + } + if entry == nil || !matchesMachine(entry, machineID) { + continue + } + if len(entry.AddrIPv4) == 0 || entry.Port == 0 { + continue + } + return net.JoinHostPort(entry.AddrIPv4[0].String(), strconv.Itoa(entry.Port)), nil + case <-bctx.Done(): + return "", ErrUnreachable + } + } +} + +// matchesMachine reports whether a browsed service entry belongs to machineID, +// either via a "mid=" TXT record or by the instance name. +func matchesMachine(entry *zeroconf.ServiceEntry, machineID string) bool { + if entry.Instance == machineID { + return true + } + want := "mid=" + machineID + for _, txt := range entry.Text { + if strings.TrimSpace(txt) == want { + return true + } + } + return false +} + +// newMDNSResolver returns the production mDNS-backed resolver. +func newMDNSResolver() resolver { return mdnsResolver{} } diff --git a/go/internal/client/lan_locator_test.go b/go/internal/client/lan_locator_test.go new file mode 100644 index 0000000..a7b439d --- /dev/null +++ b/go/internal/client/lan_locator_test.go @@ -0,0 +1,144 @@ +// go/internal/client/lan_locator_test.go +package client + +import ( + "bytes" + "context" + "errors" + "testing" + "time" + + "github.com/srcful/terminal-relay/go/internal/quicmsg" +) + +// staticResolver returns a fixed address (or a fixed error), so the QUIC/Noise +// path runs in tests without touching mDNS multicast (flaky in CI). +type staticResolver struct { + addr string + err error +} + +func (s staticResolver) resolve(ctx context.Context, machineID string) (string, error) { + if s.err != nil { + return "", s.err + } + return s.addr, nil +} + +// TestLANLocatorDialSendsBinding stands up a real quicmsg listener, dials it via +// lanLocator, and asserts the wallet binding is delivered as frame 0. +func TestLANLocatorDialSendsBinding(t *testing.T) { + ln, err := quicmsg.Listen("127.0.0.1:0") + if err != nil { + t.Fatalf("listen: %v", err) + } + defer ln.Close() + + gotFrame := make(chan []byte, 1) + gotErr := make(chan error, 1) + go func() { + actx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + conn, err := ln.Accept(actx) + if err != nil { + gotErr <- err + return + } + frame, err := conn.Recv(actx) + if err != nil { + gotErr <- err + return + } + gotFrame <- frame + }() + + id := &Identity{} + secret := make([]byte, 32) + for i := range secret { + secret[i] = byte(i + 1) + } + if err := id.SetFromSecret(secret); err != nil { + t.Fatalf("set from secret: %v", err) + } + if !id.HasWallet() { + t.Fatal("expected identity to have a wallet") + } + + m := Machine{Name: "box", MachineID: "machine-xyz"} + res := staticResolver{addr: ln.Addr().String()} + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + conn, cleanup, err := lanLocator{res: res}.Dial(ctx, m, id, nil) + if err != nil { + t.Fatalf("Dial: unexpected error: %v", err) + } + if conn == nil { + t.Fatal("Dial: nil MsgConn") + } + if cleanup == nil { + t.Fatal("Dial: nil cleanup") + } + defer cleanup() + + select { + case err := <-gotErr: + t.Fatalf("stub agent error: %v", err) + case frame := <-gotFrame: + if !bytes.Equal(frame, []byte(id.BindingJSON)) { + t.Fatalf("frame 0 mismatch:\n got=%q\nwant=%q", frame, id.BindingJSON) + } + case <-time.After(5 * time.Second): + t.Fatal("timed out waiting for stub agent to receive frame 0") + } +} + +// TestLANLocatorUnreachableOnResolveMiss: a resolver that can't find the machine +// makes Dial return ErrUnreachable so Attach falls through to the relay. +func TestLANLocatorUnreachableOnResolveMiss(t *testing.T) { + id := &Identity{} + secret := make([]byte, 32) + for i := range secret { + secret[i] = byte(i + 7) + } + if err := id.SetFromSecret(secret); err != nil { + t.Fatalf("set from secret: %v", err) + } + + res := staticResolver{err: ErrUnreachable} + m := Machine{Name: "box", MachineID: "machine-missing"} + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + conn, cleanup, err := lanLocator{res: res}.Dial(ctx, m, id, nil) + if !errors.Is(err, ErrUnreachable) { + t.Fatalf("expected ErrUnreachable, got %v", err) + } + if conn != nil || cleanup != nil { + t.Fatal("expected nil conn/cleanup on miss") + } +} + +// TestLANLocatorUnreachableWithoutWallet: a legacy (no-wallet) identity can't do +// LAN attach (which requires a binding), so Dial returns ErrUnreachable before +// even touching the resolver. +func TestLANLocatorUnreachableWithoutWallet(t *testing.T) { + id := &Identity{} // no SecretHex/BindingJSON => HasWallet()==false + if id.HasWallet() { + t.Fatal("expected identity to have no wallet") + } + + // Resolver that would panic if consulted — proves Dial short-circuits. + res := staticResolver{addr: "should.not.dial:1"} + m := Machine{Name: "box", MachineID: "machine-legacy"} + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + conn, cleanup, err := lanLocator{res: res}.Dial(ctx, m, id, nil) + if !errors.Is(err, ErrUnreachable) { + t.Fatalf("expected ErrUnreachable, got %v", err) + } + if conn != nil || cleanup != nil { + t.Fatal("expected nil conn/cleanup without a wallet") + } +} From 6a820eb7b45059086acaf76aff09bb6c0a0e46cf Mon Sep 17 00:00:00 2001 From: Fredrik Ahlgren Date: Sat, 13 Jun 2026 13:22:41 +0200 Subject: [PATCH 5/8] feat(agent): C2.2 QUIC LAN listener + mDNS advertise + binding-gated accept mir up can accept LAN-direct QUIC connections (advertised via mDNS as _miranda._udp). The wallet binding arrives as frame 0; the agent checks IsOwnerPinned + verifies the binding (reusing ownerPubFromBinding) and pins binding.x25519, then runs the SAME authenticated PTY session as the relay path via the extracted serveAuthenticated helper. admit() bounds pre-auth handshakes. Not yet auto-started (wiring + --no-lan is next). Co-Authored-By: Claude Opus 4.8 --- go/internal/agent/lan.go | 105 ++++++++++++++++++++++++++ go/internal/agent/lan_test.go | 135 ++++++++++++++++++++++++++++++++++ go/internal/agent/runtime.go | 27 ++++--- 3 files changed, 255 insertions(+), 12 deletions(-) create mode 100644 go/internal/agent/lan.go create mode 100644 go/internal/agent/lan_test.go diff --git a/go/internal/agent/lan.go b/go/internal/agent/lan.go new file mode 100644 index 0000000..cb4017b --- /dev/null +++ b/go/internal/agent/lan.go @@ -0,0 +1,105 @@ +// go/internal/agent/lan.go +package agent + +import ( + "context" + "net" + "strconv" + + "github.com/grandcat/zeroconf" + + "github.com/srcful/terminal-relay/go/internal/identity" + "github.com/srcful/terminal-relay/go/internal/quicmsg" +) + +// lanService is the mDNS service type advertised for LAN-direct attach. Clients +// browse for this to discover an agent's ephemeral QUIC port on the local network. +const lanService = "_miranda._udp" + +// startLAN opens a QUIC listener for LAN-direct attach and advertises it over mDNS. +// Returns the bound address (for callers/tests) and a stop func. Each connection runs +// the same binding-gated authenticated session as the relay path. +// +// The QUIC TLS identity carries no trust (see quicmsg): authentication is the wallet +// binding (frame 0) plus the Noise-KK handshake that run inside the stream. +func (rt *Runtime) startLAN(ctx context.Context) (addr string, stop func(), err error) { + ln, err := quicmsg.Listen("0.0.0.0:0") // ephemeral; advertised via mDNS + if err != nil { + return "", nil, err + } + port, err := portOf(ln.Addr()) + if err != nil { + _ = ln.Close() + return "", nil, err + } + srv, err := zeroconf.Register(rt.cfg.MachineID, lanService, "local.", port, []string{"mid=" + rt.cfg.MachineID}, nil) + if err != nil { + _ = ln.Close() + return "", nil, err + } + go rt.acceptLAN(ctx, ln) + stop = func() { + srv.Shutdown() + _ = ln.Close() + } + // The listener binds 0.0.0.0 (all interfaces) so real LAN peers reach it via + // the mDNS-advertised host IP. The returned addr is for local callers/tests, so + // hand back a dialable loopback form rather than the unspecified 0.0.0.0/[::]. + return net.JoinHostPort("127.0.0.1", strconv.Itoa(port)), stop, nil +} + +// portOf extracts the UDP port from a listener address. quic's listener returns a +// *net.UDPAddr, but we fall back to parsing the string form so we don't depend on +// the concrete type. +func portOf(a net.Addr) (int, error) { + if ua, ok := a.(*net.UDPAddr); ok { + return ua.Port, nil + } + _, portStr, err := net.SplitHostPort(a.String()) + if err != nil { + return 0, err + } + return strconv.Atoi(portStr) +} + +// acceptLAN loops accepting LAN-direct connections until the listener closes (stop) +// or ctx is done. Each connection is handled on its own goroutine. +func (rt *Runtime) acceptLAN(ctx context.Context, ln *quicmsg.Listener) { + for { + conn, err := ln.Accept(ctx) + if err != nil { + return // listener closed or ctx done + } + go rt.lanAccept(ctx, conn) + } +} + +// lanAccept gates a single LAN-direct connection: it reads the wallet binding as +// frame 0, refuses any unpinned wallet *before* the Noise handshake, recovers the +// X25519 pin from the binding, then runs the same authenticated PTY session as the +// relay path. admit() bounds concurrent pre-auth handshakes (a DoS bound shared with +// the relay path). +func (rt *Runtime) lanAccept(ctx context.Context, conn *quicmsg.Conn) { + defer conn.Close() + if !rt.admit() { + return // too many pre-auth handshakes in flight + } + defer rt.release() + + bindingJSON, err := conn.Recv(ctx) // frame 0 + if err != nil { + return + } + sb, err := identity.ParseSignedBinding(bindingJSON) + if err != nil { + return + } + if !rt.cfg.IsOwnerPinned(sb.Wallet) { + return // unpinned wallet: refuse pre-Noise, no session starts + } + ownerPub, err := ownerPubFromBinding(string(bindingJSON), sb.Wallet) + if err != nil { + return + } + _ = rt.serveAuthenticated(ctx, conn, ownerPub) +} diff --git a/go/internal/agent/lan_test.go b/go/internal/agent/lan_test.go new file mode 100644 index 0000000..7c58d2b --- /dev/null +++ b/go/internal/agent/lan_test.go @@ -0,0 +1,135 @@ +// go/internal/agent/lan_test.go +package agent + +import ( + "bytes" + "context" + "encoding/json" + "testing" + "time" + + "github.com/srcful/terminal-relay/go/internal/noise" + "github.com/srcful/terminal-relay/go/internal/peer" + "github.com/srcful/terminal-relay/go/internal/quicmsg" +) + +// TestLANAttachRealShell drives the LAN-direct path end to end: a QUIC client +// dials the agent's listener, sends a wallet-signed binding as frame 0 (in place +// of the relay offer's binding), runs the Noise-KK initiator handshake, and +// round-trips a real shell command. It is the LAN twin of +// TestEndToEndRealShellOverP2P, swapping the WebRTC DataChannel for quicmsg. +func TestLANAttachRealShell(t *testing.T) { + // Owner is a real wallet: owner_id is the base58 address, the Noise pin is + // recovered from a wallet-signed binding carried as frame 0 (B1.4.1). + ownerPriv, _, ownerID, bindingJSON := ownerBinding(t, bytes.Repeat([]byte{0x22}, 32), "owner-device-lan") + dir := t.TempDir() + cfg, err := LoadOrInit(dir, "lan-machine", "http://unused") + if err != nil { + t.Fatal(err) + } + if err := PinOwner(dir, ownerID); err != nil { + t.Fatal(err) + } + cfg, _ = LoadOrInit(dir, "lan-machine", "http://unused") // reload with the pinned owner + + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + rt := NewRuntime(cfg, []string{"sh"}, nil) + addr, stop, err := rt.startLAN(ctx) + if err != nil { + t.Fatalf("startLAN: %v", err) + } + defer stop() + + // Browser-stand-in over QUIC: dial, send binding frame 0, Noise init. + conn, err := quicmsg.Dial(ctx, addr) + if err != nil { + t.Fatalf("dial: %v", err) + } + defer conn.Close() + if err := conn.Send([]byte(bindingJSON)); err != nil { + t.Fatalf("send binding: %v", err) + } + + sess, err := peer.RunInitiator(ctx, conn, ownerPriv, cfg.HostPub()) + if err != nil { + t.Fatalf("initiator handshake: %v", err) + } + + // First frame must be HELLO with the machine name. + hello := recvFrame(t, ctx, conn, sess) + htype, hpayload, _ := noise.DecodeFrame(hello) + if htype != noise.FrameHello { + t.Fatalf("expected HELLO, got %d", htype) + } + var meta map[string]string + _ = json.Unmarshal(hpayload, &meta) + if meta["name"] != "lan-machine" { + t.Fatalf("HELLO name = %q", meta["name"]) + } + + // Run a real command over the encrypted LAN-direct channel. + sendData(t, ctx, conn, sess, []byte("echo LAN_OK\n")) + deadline := time.Now().Add(10 * time.Second) + var acc bytes.Buffer + for time.Now().Before(deadline) { + frame := recvFrame(t, ctx, conn, sess) + typ, payload, derr := noise.DecodeFrame(frame) + if derr != nil || typ != noise.FrameData { + continue + } + acc.Write(payload) + if bytes.Contains(acc.Bytes(), []byte("LAN_OK")) { + return // SUCCESS: real shell, over LAN-direct QUIC + } + } + t.Fatalf("never saw command output; got:\n%s", acc.String()) +} + +// TestLANRejectsUnpinnedBinding proves the agent refuses a binding for a wallet it +// has not pinned: it closes the stream before the Noise handshake, so the client's +// initiator handshake fails and no session starts. +func TestLANRejectsUnpinnedBinding(t *testing.T) { + // The agent pins owner A. + _, _, ownerID, _ := ownerBinding(t, bytes.Repeat([]byte{0x33}, 32), "owner-device-pinned") + dir := t.TempDir() + cfg, err := LoadOrInit(dir, "lan-machine", "http://unused") + if err != nil { + t.Fatal(err) + } + if err := PinOwner(dir, ownerID); err != nil { + t.Fatal(err) + } + cfg, _ = LoadOrInit(dir, "lan-machine", "http://unused") + + // The attacker is owner B (a valid wallet, validly self-signed binding) that the + // agent has NOT pinned. + attackerPriv, _, _, attackerBinding := ownerBinding(t, bytes.Repeat([]byte{0x99}, 32), "owner-device-attacker") + + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + rt := NewRuntime(cfg, []string{"sh"}, nil) + addr, stop, err := rt.startLAN(ctx) + if err != nil { + t.Fatalf("startLAN: %v", err) + } + defer stop() + + conn, err := quicmsg.Dial(ctx, addr) + if err != nil { + t.Fatalf("dial: %v", err) + } + defer conn.Close() + if err := conn.Send([]byte(attackerBinding)); err != nil { + t.Fatalf("send binding: %v", err) + } + + // The agent closes the stream pre-Noise; the initiator handshake must fail. + hctx, hcancel := context.WithTimeout(ctx, 5*time.Second) + defer hcancel() + if _, err := peer.RunInitiator(hctx, conn, attackerPriv, cfg.HostPub()); err == nil { + t.Fatal("expected handshake failure for unpinned binding, got success") + } +} diff --git a/go/internal/agent/runtime.go b/go/internal/agent/runtime.go index 513748b..cc1c45d 100644 --- a/go/internal/agent/runtime.go +++ b/go/internal/agent/runtime.go @@ -357,24 +357,27 @@ func (rt *Runtime) handleOffer(ctx context.Context, c *websocket.Conn, m signal. if err != nil { return } - sess, err := peer.RunResponder(attachCtx, dc, rt.cfg.HostPriv(), ownerPub) + _ = rt.serveAuthenticated(attachCtx, dc, ownerPub) +} + +// serveAuthenticated runs the Noise-KK responder against the pinned owner X25519 key +// and then the PTY session over mc. Shared by the relay offer path and LAN-direct. +// +// The active-session bracket lives HERE — after auth — not at the transport accept: +// pre-auth handshakes (already bounded by admit()) must not inflate the active count +// and starve opt-in auto-update, which defers binary swaps until the agent is idle. +func (rt *Runtime) serveAuthenticated(ctx context.Context, mc peer.MsgConn, ownerPub []byte) error { + sess, err := peer.RunResponder(ctx, mc, rt.cfg.HostPriv(), ownerPub) if err != nil { - return + return err } - // Authenticated session established (Noise KK passed). Count it as active so - // opt-in auto-update defers any binary swap until the agent is idle. Bracketed - // HERE — after auth — not at handleOffer's top: pre-auth attach handshakes - // (already bounded by admit()) must not inflate the active count and starve - // auto-update. rt.sessionStarted() defer rt.sessionEnded() - - pty, err := StartPTY(attachCtx, rt.launch) + pty, err := StartPTY(ctx, rt.launch) if err != nil { - return + return err } defer pty.Close() - // For a tmux launch, push whole-server session/window snapshots so clients // render an overview, and accept window+session control commands (select/new/ // rename/kill, switch-session). Targeting OUR client for cross-session switches @@ -387,7 +390,7 @@ func (rt *Runtime) handleOffer(ctx context.Context, c *websocket.Conn, m signal. if pid > 0 { windows = func() []byte { return tmuxSessionsJSON(pid) } } - _ = RunAgentSession(attachCtx, dc, sess, pty, rt.cfg.MachineName, windows, pid) + return RunAgentSession(ctx, mc, sess, pty, rt.cfg.MachineName, windows, pid) } // agentSignalURL builds ws(s)://host/agent/signal?owner_id=..&machine_id=.. From dfc63356b6bd63c43a4bdae61dbbba80d5089db7 Mon Sep 17 00:00:00 2001 From: Fredrik Ahlgren Date: Sat, 13 Jun 2026 13:30:21 +0200 Subject: [PATCH 6/8] feat(cli): C2.3 LAN-first attach, mir up --no-lan, mir attach --relay-only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mir attach now tries LAN-direct (mDNS+QUIC) first with a ~600ms budget, then falls back to the relay; --relay-only skips LAN. mir up starts the QUIC listener + mDNS advertisement (in Up, after the paired-owner guard) unless --no-lan; LAN start failure is non-fatal — the relay path always serves. attachLocators() makes the LAN-first-vs-relay-only decision unit- testable. Co-Authored-By: Claude Opus 4.8 --- go/internal/agent/runtime.go | 14 +++++++++++ go/internal/cli/agent_cmds.go | 2 ++ go/internal/cli/client_cmds.go | 7 +++--- go/internal/client/attach.go | 17 ++++++++++++-- go/internal/client/attach_locators_test.go | 27 ++++++++++++++++++++++ go/internal/client/e2e_mux_test.go | 4 ++-- go/internal/client/e2e_test.go | 2 +- go/internal/client/lan_locator.go | 16 +++++++++++-- go/internal/client/muxterm.go | 7 +++--- 9 files changed, 83 insertions(+), 13 deletions(-) create mode 100644 go/internal/client/attach_locators_test.go diff --git a/go/internal/agent/runtime.go b/go/internal/agent/runtime.go index cc1c45d..90a8468 100644 --- a/go/internal/agent/runtime.go +++ b/go/internal/agent/runtime.go @@ -74,6 +74,8 @@ type Runtime struct { maxBackoff time.Duration // cap reloadInterval time.Duration // how often to re-read config for newly-paired owners Logf func(string, ...any) // optional reconnect/status log (set by the CLI) + + DisableLAN bool // when set, mir up serves the relay only (no QUIC listener / mDNS advertise) } // admit reserves a slot for a new attach handshake, returning false immediately @@ -109,6 +111,18 @@ func (rt *Runtime) Up(ctx context.Context) error { if len(rt.cfg.PairedOwners) == 0 { return errNoOwner } + // LAN-direct: advertise + listen for relay-less attach on the local network. + // Start failure is NON-FATAL — the relay path below always serves. + if !rt.DisableLAN { + if addr, stop, err := rt.startLAN(ctx); err == nil { + defer stop() + if rt.Logf != nil { + rt.Logf("LAN-direct listening (mDNS _miranda._udp) at %s", addr) + } + } else if rt.Logf != nil { + rt.Logf("LAN-direct disabled: %v", err) + } + } var mu sync.Mutex served := map[string]bool{} start := func(owner string) { diff --git a/go/internal/cli/agent_cmds.go b/go/internal/cli/agent_cmds.go index 9dd98fb..70c3bb3 100644 --- a/go/internal/cli/agent_cmds.go +++ b/go/internal/cli/agent_cmds.go @@ -62,6 +62,7 @@ func (a *app) cmdUp(args []string) error { shell := fs.String("shell", "tmux:new:-A:-s:main", "launch command, ':'-separated") ice := iceFlags(fs) autoUpdate := fs.Bool("auto-update", os.Getenv("MIR_AUTO_UPDATE") == "1", "opt-in: automatically self-update when idle") + noLAN := fs.Bool("no-lan", false, "disable LAN-direct (no QUIC listener, no mDNS advertise); serve the relay only") _ = fs.Parse(args) cfg, err := agent.LoadOrInit(*dir, *name, *signalURL) @@ -77,6 +78,7 @@ func (a *app) cmdUp(args []string) error { defer stop() rt := agent.NewRuntime(cfg, launch, ice()) + rt.DisableLAN = *noLAN // Structured, timestamped agent log. RFC3339-ish date+time in UTC plus the // binary prefix turns a bare "owner … disconnected" line into something you // can correlate against relay logs and tell a flap (low uptime) from a normal diff --git a/go/internal/cli/client_cmds.go b/go/internal/cli/client_cmds.go index 30f9380..a69f798 100644 --- a/go/internal/cli/client_cmds.go +++ b/go/internal/cli/client_cmds.go @@ -75,7 +75,7 @@ func (a *app) cmdRun(args []string) error { ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) defer stop() - mc, sess, cleanup, err := client.Attach(ctx, *m, idn, ice()) + mc, sess, cleanup, err := client.Attach(ctx, *m, idn, ice(), false) if err != nil { return err } @@ -160,6 +160,7 @@ func (a *app) cmdAttach(args []string) error { fs := flag.NewFlagSet("attach", flag.ExitOnError) dir := fs.String("dir", defaultDir(), "config directory") prefixFlag := fs.String("prefix", "ctrl-o", "multiplexer switch key (e.g. ctrl-o, ctrl-a, ctrl-space)") + relayOnly := fs.Bool("relay-only", false, "skip LAN-direct discovery; use the relay") ice := iceFlags(fs) _ = fs.Parse(args) names := fs.Args() @@ -187,7 +188,7 @@ func (a *app) cmdAttach(args []string) error { if err != nil { return err } - mc, sess, cleanup, err := client.Attach(ctx, *m, idn, servers) + mc, sess, cleanup, err := client.Attach(ctx, *m, idn, servers, *relayOnly) if err != nil { return err } @@ -198,7 +199,7 @@ func (a *app) cmdAttach(args []string) error { return nil } - sessions, cleanup, err := client.AttachAll(ctx, *dir, names, idn, servers) + sessions, cleanup, err := client.AttachAll(ctx, *dir, names, idn, servers, *relayOnly) if err != nil { return err } diff --git a/go/internal/client/attach.go b/go/internal/client/attach.go index 1ffa41d..a674c0a 100644 --- a/go/internal/client/attach.go +++ b/go/internal/client/attach.go @@ -14,12 +14,16 @@ import ( // Attach connects to the named machine's agent over the first locator that can // reach it, runs the Noise KK initiator over that MsgConn, and returns the // established session. Call cleanup when done. -func Attach(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (mc peer.MsgConn, sess *noise.Session, cleanup func(), err error) { +// +// By default it tries LAN-direct (mDNS + QUIC) first, then falls back to the +// relay; the LAN attempt is bounded (see lanLocator.Dial) so a remote attach +// drops to the relay fast. relayOnly skips LAN entirely. +func Attach(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer, relayOnly bool) (mc peer.MsgConn, sess *noise.Session, cleanup func(), err error) { if !id.HasWallet() { return nil, nil, nil, fmt.Errorf("this identity has no wallet; run `mir keygen --wallet`") } - mc, cleanup, err = dialFirst([]Locator{relayLocator{}}, ctx, m, id, ice) + mc, cleanup, err = dialFirst(attachLocators(relayOnly), ctx, m, id, ice) if err != nil { return nil, nil, nil, err } @@ -37,6 +41,15 @@ func Attach(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) return mc, sess, cleanup, nil } +// attachLocators is the ordered locator list Attach tries: LAN-direct first (a +// bounded mDNS+QUIC attempt) then the relay, unless relayOnly skips LAN. +func attachLocators(relayOnly bool) []Locator { + if relayOnly { + return []Locator{relayLocator{}} + } + return []Locator{lanLocator{res: newMDNSResolver()}, relayLocator{}} +} + // dialFirst tries each locator in order, falling through on ErrUnreachable and // aborting on any other (real) error. It returns the MsgConn from the first // locator that connects, or the last ErrUnreachable (or a generic "unreachable" diff --git a/go/internal/client/attach_locators_test.go b/go/internal/client/attach_locators_test.go new file mode 100644 index 0000000..aa6473c --- /dev/null +++ b/go/internal/client/attach_locators_test.go @@ -0,0 +1,27 @@ +package client + +import "testing" + +// attachLocators encodes the LAN-first-then-relay default and the --relay-only +// escape. The transport paths themselves are covered by the locator/quicmsg/agent +// tests; this pins the composition decision. +func TestAttachLocators(t *testing.T) { + def := attachLocators(false) + if len(def) != 2 { + t.Fatalf("default: want 2 locators (LAN, relay), got %d", len(def)) + } + if _, ok := def[0].(lanLocator); !ok { + t.Errorf("default: locator[0] = %T, want lanLocator (LAN tried first)", def[0]) + } + if _, ok := def[1].(relayLocator); !ok { + t.Errorf("default: locator[1] = %T, want relayLocator", def[1]) + } + + only := attachLocators(true) + if len(only) != 1 { + t.Fatalf("relayOnly: want 1 locator, got %d", len(only)) + } + if _, ok := only[0].(relayLocator); !ok { + t.Errorf("relayOnly: locator[0] = %T, want relayLocator (LAN skipped)", only[0]) + } +} diff --git a/go/internal/client/e2e_mux_test.go b/go/internal/client/e2e_mux_test.go index 34ba0cd..b954b25 100644 --- a/go/internal/client/e2e_mux_test.go +++ b/go/internal/client/e2e_mux_test.go @@ -47,12 +47,12 @@ func TestEndToEndMuxSwitchesBetweenTwoMachines(t *testing.T) { m1 := startAgent(t, ctx, srv.URL, "box1", id) time.Sleep(400 * time.Millisecond) - s0mc, s0sess, c0, err := Attach(ctx, m0, id, nil) + s0mc, s0sess, c0, err := Attach(ctx, m0, id, nil, false) if err != nil { t.Fatalf("attach box0: %v", err) } defer c0() - s1mc, s1sess, c1, err := Attach(ctx, m1, id, nil) + s1mc, s1sess, c1, err := Attach(ctx, m1, id, nil, false) if err != nil { t.Fatalf("attach box1: %v", err) } diff --git a/go/internal/client/e2e_test.go b/go/internal/client/e2e_test.go index 73a9c45..97e4ef6 100644 --- a/go/internal/client/e2e_test.go +++ b/go/internal/client/e2e_test.go @@ -43,7 +43,7 @@ func TestEndToEndTrClientDrivesRealShell(t *testing.T) { // Register the machine in the client (as `tr add-machine` would). m := Machine{Name: "box", MachineID: acfg.MachineID, HostPubHex: acfg.HostPubHex, SignalURL: srv.URL} - mc, sess, cleanup, err := Attach(ctx, m, id, nil) + mc, sess, cleanup, err := Attach(ctx, m, id, nil, false) if err != nil { t.Fatalf("attach: %v", err) } diff --git a/go/internal/client/lan_locator.go b/go/internal/client/lan_locator.go index ac82f13..b89ae82 100644 --- a/go/internal/client/lan_locator.go +++ b/go/internal/client/lan_locator.go @@ -32,11 +32,16 @@ func (l lanLocator) Dial(ctx context.Context, m Machine, id *Identity, _ []peer. if !id.HasWallet() { return nil, nil, ErrUnreachable // LAN attach requires a wallet binding } - addr, err := l.res.resolve(ctx, m.MachineID) + // Bound the whole LAN attempt (resolve + dial) so a remote attach — where no + // LAN peer answers — falls through to the relay fast instead of waiting out + // the resolver's own cap. resolveTimeout still bounds the browse itself. + dctx, cancel := context.WithTimeout(ctx, lanAttachBudget) + defer cancel() + addr, err := l.res.resolve(dctx, m.MachineID) if err != nil { return nil, nil, ErrUnreachable } - conn, err := quicmsg.Dial(ctx, addr) + conn, err := quicmsg.Dial(dctx, addr) if err != nil { return nil, nil, ErrUnreachable } @@ -57,6 +62,13 @@ const mdnsDomain = "local." // rather than blocking Attach's first locator indefinitely. var resolveTimeout = 1500 * time.Millisecond +// lanAttachBudget is the ceiling for the entire LAN attach attempt (resolve + +// QUIC dial). It keeps the remote-attach penalty small: when no LAN peer answers, +// Attach falls back to the relay within this window rather than blocking on the +// resolver's full cap. The LAN tests inject a static resolver, so this budget +// doesn't affect them. +const lanAttachBudget = 600 * time.Millisecond + // mdnsResolver is the production resolver: it browses the LAN over mDNS for the // Miranda service and matches the requested machine_id. type mdnsResolver struct{} diff --git a/go/internal/client/muxterm.go b/go/internal/client/muxterm.go index 44474c2..3d0d520 100644 --- a/go/internal/client/muxterm.go +++ b/go/internal/client/muxterm.go @@ -14,8 +14,9 @@ import ( ) // AttachAll attaches every named machine and returns their sessions + a cleanup. -// On any failure it cleans up the ones already attached. -func AttachAll(ctx context.Context, dir string, names []string, id *Identity, ice []peer.ICEServer) ([]*MuxSession, func(), error) { +// On any failure it cleans up the ones already attached. relayOnly is threaded to +// each Attach to skip LAN-direct discovery. +func AttachAll(ctx context.Context, dir string, names []string, id *Identity, ice []peer.ICEServer, relayOnly bool) ([]*MuxSession, func(), error) { var sessions []*MuxSession var cleanups []func() cleanupAll := func() { @@ -29,7 +30,7 @@ func AttachAll(ctx context.Context, dir string, names []string, id *Identity, ic cleanupAll() return nil, nil, err } - mc, sess, cleanup, err := Attach(ctx, *m, id, ice) + mc, sess, cleanup, err := Attach(ctx, *m, id, ice, relayOnly) if err != nil { cleanupAll() return nil, nil, fmt.Errorf("attach %s: %w", name, err) From 3d4866663074bb6f45f3634d6fce7109c1556198 Mon Sep 17 00:00:00 2001 From: Fredrik Ahlgren Date: Sat, 13 Jun 2026 13:33:19 +0200 Subject: [PATCH 7/8] docs+test(lan): C2.4 SECURITY/README LAN-direct + skippable mDNS resolve test SECURITY.md gains an honest LAN-direct residual-exposure note (new inbound listener surface + mDNS leak; same Noise-KK + binding trust, --no-lan to disable). README documents LAN-direct + the flags. A skippable live-mDNS test validates the prod resolver where multicast is available. Spec notes happy-eyeballs as the future latency refinement and netsim as a follow-up. Co-Authored-By: Claude Opus 4.8 --- README.md | 7 ++++ SECURITY.md | 15 ++++++++ ...6-06-13-c1-c2-lan-direct-locator-design.md | 24 +++++++++---- go/internal/client/lan_mdns_test.go | 36 +++++++++++++++++++ 4 files changed, 75 insertions(+), 7 deletions(-) create mode 100644 go/internal/client/lan_mdns_test.go diff --git a/README.md b/README.md index a87c13e..e988d23 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,13 @@ mir attach laptop macmini linux Everything defaults to the hosted relay + STUN, so no flags are needed. Point at your own infrastructure with `--signal` / `MIR_SIGNAL` and `--stun` / `MIR_STUN`. +**LAN-direct (no relay on the same network).** When the client and the machine are on +the same LAN, `mir attach` finds it over mDNS and connects straight over QUIC — no relay +round-trip. It's automatic and falls back to the relay within ~0.6 s if there's no local +answer. Same trust as ever: Noise-KK + the wallet binding run inside, so the LAN only +supplies an address. Turn it off with `mir up --no-lan` (agent) or +`mir attach --relay-only` (client). + ## Updating `mir` checks GitHub for a newer release at most once a day and prints a one-line diff --git a/SECURITY.md b/SECURITY.md index 681fb75..5cf2ea7 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -92,6 +92,21 @@ the network is hostile — provided the trust roots below are intact. operator may enable a TURN relay. Even then the relay forwards only ciphertext — Noise keeps it blind — but it does carry (encrypted) bytes and learns more timing/volume. It is **off by default**. +- **LAN-direct (mDNS + QUIC).** `mir up` advertises itself on the local network + (mDNS `_miranda._udp`, instance = your opaque `machine_id`) and listens for direct + QUIC connections, so a `mir attach` on the same LAN reaches it **without the relay**. + This changes nothing about trust: the QUIC layer uses a throwaway self-signed cert + (the client skips TLS verification), and the **real** authentication is the unchanged + Noise-KK handshake + the wallet binding that runs *inside* the QUIC stream — exactly + as over the relay. A rogue LAN host that spoofs the mDNS record or connects to the + listener can at worst cause a **failed handshake (DoS)**: it cannot impersonate your + agent (Noise-KK pins `host_pub`), cannot attach as you (the agent rejects any + unpinned wallet *before* Noise, and a binding requires your wallet key), and cannot + read traffic (Noise). The new exposure is (a) the agent now accepts inbound LAN + connections — bounded by the same pre-auth handshake limiter as relay attaches — and + (b) the mDNS advertisement reveals that a Miranda node with a given `machine_id` + exists on the LAN. Disable both with `mir up --no-lan`; skip LAN discovery on a + client with `mir attach --relay-only`. - **Compromised endpoints / Keychain.** Out of scope — the same trust you already place in your own devices. diff --git a/docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md b/docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md index 50058c4..4e4817a 100644 --- a/docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md +++ b/docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md @@ -116,10 +116,17 @@ registration. Each accepted connection runs the frame0-verify → pin → `RunRe **LAN is on by default** (`mir up --no-lan` opts out); the relay path always runs too. ### Attach ordering (client) -`Attach` composes `[LANLocator, RelayLocator]`. `LANLocator.Dial` does an mDNS lookup with a -short timeout (~1.5 s); on a hit it QUIC-dials + sends frame0 and returns the `quicConn`; on -no hit / dial failure it returns `ErrUnreachable` and `Attach` falls through to the relay -(today's path). A `mir attach --no-lan` / `--relay-only` flag forces the relay path. +`Attach` composes `[LANLocator, RelayLocator]`. `LANLocator.Dial` does an mDNS lookup + +QUIC dial bounded by a **~600 ms budget** (`lanAttachBudget`); on a hit it sends frame0 and +returns the `quicConn`; on no hit / dial failure / timeout it returns `ErrUnreachable` and +`Attach` falls through to the relay (today's path). The budget keeps the remote-attach +penalty small (a remote machine has no LAN answer, so it drops to the relay within ~600 ms). +`mir attach --relay-only` skips LAN entirely (zero penalty when you know you're remote). + +> **Future refinement (not built now): happy-eyeballs.** The ~600 ms sequential budget can be +> removed by racing the LAN and relay locators concurrently (start both, take the first +> `MsgConn`, cancel the loser) — zero added latency for remote attaches. Deferred to keep +> this slice from adding concurrency to the working relay path ("Robust Over Feature-Rich"). --- @@ -163,9 +170,12 @@ browser is unaffected (no web changes). with the relay path; `admit()` bound. - **C2.3** wiring: `Attach` order `[LAN, Relay]`; `mir up --no-lan`, `mir attach --relay-only`. -- **C2.4** e2e: `mir up` + `mir attach` over QUIC on loopback with **no relay running**; - mDNS resolve test; bad/missing binding rejected; relay fallback when LAN is absent. Extend - `deploy/netsim` with an mDNS-within-a-Docker-network LAN path. +- **C2.4** tests + docs: real-shell echo over QUIC with **no relay** (agent-side + `startLAN` ↔ a QUIC client); bad/unpinned binding rejected pre-Noise; `attachLocators` + composition; a **skippable** live-mDNS resolve test (multicast-dependent); `SECURITY.md` + + `README.md` LAN-direct notes. **Follow-up (not in this PR):** extend `deploy/netsim` with + an mDNS-within-a-Docker-network LAN path — the Go tests already prove the wire/transport; + netsim adds cross-container NAT/LAN integration coverage and is tracked separately. Each step is independently shippable; C1 alone is a pure refactor. diff --git a/go/internal/client/lan_mdns_test.go b/go/internal/client/lan_mdns_test.go new file mode 100644 index 0000000..e67f71b --- /dev/null +++ b/go/internal/client/lan_mdns_test.go @@ -0,0 +1,36 @@ +package client + +import ( + "context" + "strings" + "testing" + "time" + + "github.com/grandcat/zeroconf" +) + +// TestMDNSResolverFindsAdvertisedMachine exercises the PRODUCTION mdnsResolver +// against a real zeroconf advertisement on the loopback/LAN. It is skipped where +// multicast isn't available (CI sandboxes, restricted networks) so the suite stays +// deterministic — the wire/QUIC path is covered by the non-multicast tests. +func TestMDNSResolverFindsAdvertisedMachine(t *testing.T) { + if testing.Short() { + t.Skip("skipping live mDNS test under -short") + } + const machineID = "testmid_abc123" + server, err := zeroconf.Register(machineID, mdnsService, mdnsDomain, 47777, []string{"mid=" + machineID}, nil) + if err != nil { + t.Skipf("mDNS register unavailable here: %v", err) + } + defer server.Shutdown() + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + addr, err := mdnsResolver{}.resolve(ctx, machineID) + if err != nil { + t.Skipf("mDNS browse returned nothing (no multicast on this host?): %v", err) + } + if !strings.HasSuffix(addr, ":47777") { + t.Errorf("resolve(%q) = %q, want the advertised :47777 port", machineID, addr) + } +} From b6515b86ac02cf20aa045189406aa002fd97b384 Mon Sep 17 00:00:00 2001 From: Fredrik Ahlgren Date: Sat, 13 Jun 2026 14:07:28 +0200 Subject: [PATCH 8/8] feat(client): staggered happy-eyeballs race for LAN-vs-relay attach Replace the sequential LAN-then-relay dial with dialStaggered: the LAN locator starts immediately, the relay only after a ~200ms head start; the first live MsgConn wins, the loser is cancelled and any late-connecting loser is cleaned up. On the LAN, LAN-direct wins inside the head start so the relay is never contacted (a successful LAN attach stays relay-free). Remote attaches pay only the head start, not the full ~600ms LAN budget. Staggered (not a naive simultaneous race) on purpose: it keeps the relay-free property that is the point of LAN-direct. Race-tested (-race). Co-Authored-By: Claude Opus 4.8 --- ...6-06-13-c1-c2-lan-direct-locator-design.md | 30 +++-- go/internal/client/attach.go | 108 ++++++++++++--- go/internal/client/locator_test.go | 124 ++++++++++++------ 3 files changed, 190 insertions(+), 72 deletions(-) diff --git a/docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md b/docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md index 4e4817a..97ea4de 100644 --- a/docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md +++ b/docs/superpowers/specs/2026-06-13-c1-c2-lan-direct-locator-design.md @@ -115,18 +115,22 @@ registration. Each accepted connection runs the frame0-verify → pin → `RunRe `RunAgentSession` path above (a shared helper with the relay path's post-pin logic). **LAN is on by default** (`mir up --no-lan` opts out); the relay path always runs too. -### Attach ordering (client) -`Attach` composes `[LANLocator, RelayLocator]`. `LANLocator.Dial` does an mDNS lookup + -QUIC dial bounded by a **~600 ms budget** (`lanAttachBudget`); on a hit it sends frame0 and -returns the `quicConn`; on no hit / dial failure / timeout it returns `ErrUnreachable` and -`Attach` falls through to the relay (today's path). The budget keeps the remote-attach -penalty small (a remote machine has no LAN answer, so it drops to the relay within ~600 ms). -`mir attach --relay-only` skips LAN entirely (zero penalty when you know you're remote). - -> **Future refinement (not built now): happy-eyeballs.** The ~600 ms sequential budget can be -> removed by racing the LAN and relay locators concurrently (start both, take the first -> `MsgConn`, cancel the loser) — zero added latency for remote attaches. Deferred to keep -> this slice from adding concurrency to the working relay path ("Robust Over Feature-Rich"). +### Attach ordering (client) — staggered happy-eyeballs +`Attach` races `[LANLocator, RelayLocator]` (`dialStaggered`): the LAN locator starts +immediately; the relay starts only after a **~200 ms head start** (`relayHeadStart`). The +first locator to return a live `MsgConn` wins; the loser is cancelled and cleaned up. +- **On the LAN**, LAN-direct connects in tens of ms, so it wins inside the head start and + **the relay is never contacted** — a successful LAN attach stays relay-free (no relay + round-trip, no metadata exposure). `LANLocator.Dial` is itself bounded by `lanAttachBudget` + (~600 ms) so a stuck mDNS browse can't hang the race. +- **Remote** (no LAN answer): the relay starts after ~200 ms and wins, so the penalty vs a + pure relay attach is just the head start (not the full LAN budget). +- `mir attach --relay-only` collapses the list to `[RelayLocator]` — a direct dial, no race, + zero penalty when you know you're remote. + +The staggered design (rather than a naive simultaneous race) is deliberate: it preserves the +relay-free property for successful LAN attaches — the whole point of LAN-direct — instead of +opening a throwaway relay connection on every attach. --- @@ -176,6 +180,8 @@ browser is unaffected (no web changes). `README.md` LAN-direct notes. **Follow-up (not in this PR):** extend `deploy/netsim` with an mDNS-within-a-Docker-network LAN path — the Go tests already prove the wire/transport; netsim adds cross-container NAT/LAN integration coverage and is tracked separately. + (The happy-eyeballs race that earlier sat here as a follow-up is now implemented — see + "Attach ordering" above.) Each step is independently shippable; C1 alone is a pure refactor. diff --git a/go/internal/client/attach.go b/go/internal/client/attach.go index a674c0a..1b8b3f7 100644 --- a/go/internal/client/attach.go +++ b/go/internal/client/attach.go @@ -6,11 +6,20 @@ import ( "encoding/hex" "errors" "fmt" + "time" "github.com/srcful/terminal-relay/go/internal/noise" "github.com/srcful/terminal-relay/go/internal/peer" ) +// relayHeadStart is how long the relay locator waits before it starts, giving the +// LAN locator a head start. On the LAN, LAN-direct connects in tens of ms, so it +// wins inside this window and the relay is never contacted (a successful LAN attach +// stays relay-free — no relay round-trip, no metadata). When there is no LAN answer +// the relay starts after this delay, so a remote attach pays only ~this much rather +// than the full LAN budget. See dialStaggered. +const relayHeadStart = 200 * time.Millisecond + // Attach connects to the named machine's agent over the first locator that can // reach it, runs the Noise KK initiator over that MsgConn, and returns the // established session. Call cleanup when done. @@ -23,7 +32,7 @@ func Attach(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer, return nil, nil, nil, fmt.Errorf("this identity has no wallet; run `mir keygen --wallet`") } - mc, cleanup, err = dialFirst(attachLocators(relayOnly), ctx, m, id, ice) + mc, cleanup, err = dialStaggered(ctx, attachLocators(relayOnly), relayHeadStart, m, id, ice) if err != nil { return nil, nil, nil, err } @@ -50,25 +59,88 @@ func attachLocators(relayOnly bool) []Locator { return []Locator{lanLocator{res: newMDNSResolver()}, relayLocator{}} } -// dialFirst tries each locator in order, falling through on ErrUnreachable and -// aborting on any other (real) error. It returns the MsgConn from the first -// locator that connects, or the last ErrUnreachable (or a generic "unreachable" -// error) if none did. -func dialFirst(locators []Locator, ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (peer.MsgConn, func(), error) { - var lastErr error - for _, loc := range locators { - mc, cleanup, err := loc.Dial(ctx, m, id, ice) - if errors.Is(err, ErrUnreachable) { - lastErr = err - continue +// dialStaggered races the locators "happy-eyeballs" style: locator[0] starts +// immediately and each later locator starts after an additional headStart, so a +// LAN that answers wins before the relay is ever contacted. The FIRST locator to +// return a live MsgConn wins; the others are cancelled and any that also connected +// is cleaned up. If all fail it returns the most informative error (a real failure +// in preference to ErrUnreachable). A single locator (relay-only) dials directly. +func dialStaggered(parent context.Context, locators []Locator, headStart time.Duration, m Machine, id *Identity, ice []peer.ICEServer) (peer.MsgConn, func(), error) { + if len(locators) == 0 { + return nil, nil, fmt.Errorf("machine %q: no locators", m.Name) + } + if len(locators) == 1 { + return locators[0].Dial(parent, m, id, ice) + } + + type dialResult struct { + mc peer.MsgConn + cleanup func() + err error + i int + } + results := make(chan dialResult, len(locators)) + cancels := make([]context.CancelFunc, len(locators)) + for i, loc := range locators { + cctx, cancel := context.WithCancel(parent) + cancels[i] = cancel + go func(i int, loc Locator, cctx context.Context) { + if i > 0 { // stagger later locators; cancellation pre-empts the wait + select { + case <-time.After(time.Duration(i) * headStart): + case <-cctx.Done(): + results <- dialResult{err: context.Canceled, i: i} + return + } + } + mc, cleanup, err := loc.Dial(cctx, m, id, ice) + results <- dialResult{mc, cleanup, err, i} + }(i, loc, cctx) + } + + var bestErr error + for pending := len(locators); pending > 0; pending-- { + r := <-results + if r.err == nil && r.mc != nil { + // Winner. Cancel the losers (keep the winner's ctx alive until its + // session ends), and drain+close any loser that also connected. + for j := range cancels { + if j != r.i { + cancels[j]() + } + } + remaining := pending - 1 + go func() { + for ; remaining > 0; remaining-- { + lr := <-results + if lr.mc != nil && lr.cleanup != nil { + lr.cleanup() + } + } + }() + winnerCancel := cancels[r.i] + return r.mc, func() { + if r.cleanup != nil { + r.cleanup() + } + winnerCancel() + }, nil } - if err != nil { - return nil, nil, err + // Track the best error: a real failure beats ErrUnreachable / cancellation. + if r.err != nil && !errors.Is(r.err, context.Canceled) { + if bestErr == nil || (errors.Is(bestErr, ErrUnreachable) && !errors.Is(r.err, ErrUnreachable)) { + bestErr = r.err + } } - return mc, cleanup, nil } - if lastErr == nil { - lastErr = fmt.Errorf("machine %q unreachable", m.Name) + for _, c := range cancels { + c() + } + if bestErr == nil { + if parent.Err() != nil { + return nil, nil, parent.Err() + } + bestErr = fmt.Errorf("machine %q unreachable", m.Name) } - return nil, nil, lastErr + return nil, nil, bestErr } diff --git a/go/internal/client/locator_test.go b/go/internal/client/locator_test.go index 6120fc3..750cc64 100644 --- a/go/internal/client/locator_test.go +++ b/go/internal/client/locator_test.go @@ -4,91 +4,131 @@ package client import ( "context" "errors" + "sync/atomic" "testing" + "time" "github.com/srcful/terminal-relay/go/internal/peer" ) -// fakeConn is a no-op MsgConn used to prove dialFirst returns the conn from the -// first locator that connects. -type fakeConn struct{} +// fakeConn is a no-op MsgConn used to identify which locator won the race. +type fakeConn struct{ tag string } func (fakeConn) Send(b []byte) error { return nil } func (fakeConn) Recv(ctx context.Context) ([]byte, error) { return nil, nil } -// stubLocator returns a canned (conn, cleanup, err) regardless of inputs and -// records whether it was invoked. +// stubLocator returns a canned (conn, cleanup, err). delay (if set) sleeps before +// returning, ignoring ctx — simulating a locator already in flight when the race is +// decided. called records whether Dial actually ran (a staggered locator cancelled +// during its head start never dials). type stubLocator struct { conn peer.MsgConn cleanup func() err error - called *bool + called *int32 + delay time.Duration } func (s stubLocator) Dial(ctx context.Context, m Machine, id *Identity, ice []peer.ICEServer) (peer.MsgConn, func(), error) { if s.called != nil { - *s.called = true + atomic.StoreInt32(s.called, 1) + } + if s.delay > 0 { + time.Sleep(s.delay) } return s.conn, s.cleanup, s.err } -func TestDialFirstFallsThroughOnUnreachable(t *testing.T) { - want := fakeConn{} - secondCalled := false - cleaned := false +// LAN (locator[0]) connects fast, so it wins inside the head start and the relay is +// never dialed — a successful LAN attach stays relay-free. +func TestDialStaggeredLANWinsRelayNeverDialed(t *testing.T) { + want := fakeConn{tag: "lan"} + var relayCalled int32 locators := []Locator{ - stubLocator{err: ErrUnreachable}, - stubLocator{conn: want, cleanup: func() { cleaned = true }, called: &secondCalled}, + stubLocator{conn: want}, + stubLocator{conn: fakeConn{tag: "relay"}, called: &relayCalled}, } - - mc, cleanup, err := dialFirst(locators, context.Background(), Machine{Name: "box"}, &Identity{}, nil) + mc, cleanup, err := dialStaggered(context.Background(), locators, 200*time.Millisecond, Machine{Name: "box"}, &Identity{}, nil) if err != nil { - t.Fatalf("dialFirst: unexpected error: %v", err) + t.Fatalf("unexpected error: %v", err) } if mc != want { - t.Fatalf("dialFirst returned wrong conn: got %#v want %#v", mc, want) - } - if !secondCalled { - t.Fatal("expected fall-through to the second locator") + t.Fatalf("got %#v, want the LAN conn", mc) } - // The returned cleanup must be the second locator's, not the first's. cleanup() - if !cleaned { - t.Fatal("expected the second locator's cleanup to be returned") + if atomic.LoadInt32(&relayCalled) != 0 { + t.Fatal("relay must NOT be dialed when LAN wins within the head start") } } -func TestDialFirstAbortsOnRealError(t *testing.T) { - boom := errors.New("boom: reachable path failed") - secondCalled := false +// No LAN answer (ErrUnreachable) -> the relay starts after the head start and wins. +func TestDialStaggeredFallsToRelay(t *testing.T) { + want := fakeConn{tag: "relay"} locators := []Locator{ - stubLocator{err: boom}, - stubLocator{conn: fakeConn{}, called: &secondCalled}, + stubLocator{err: ErrUnreachable}, + stubLocator{conn: want}, } + mc, _, err := dialStaggered(context.Background(), locators, 10*time.Millisecond, Machine{Name: "box"}, &Identity{}, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if mc != want { + t.Fatalf("got %#v, want the relay conn", mc) + } +} - mc, _, err := dialFirst(locators, context.Background(), Machine{Name: "box"}, &Identity{}, nil) - if !errors.Is(err, boom) { - t.Fatalf("expected the real error to abort, got: %v", err) +// When everything fails, surface the relay's REAL error, not the LAN ErrUnreachable. +func TestDialStaggeredAllFailPrefersRealError(t *testing.T) { + boom := errors.New("signaling: machine offline") + locators := []Locator{ + stubLocator{err: ErrUnreachable}, + stubLocator{err: boom}, } + mc, _, err := dialStaggered(context.Background(), locators, 5*time.Millisecond, Machine{Name: "box"}, &Identity{}, nil) if mc != nil { - t.Fatal("expected no conn on abort") + t.Fatal("expected no conn when all locators fail") } - if secondCalled { - t.Fatal("a real (non-unreachable) error must NOT fall through to the next locator") + if !errors.Is(err, boom) { + t.Fatalf("expected the real relay error, got: %v", err) } } -func TestDialFirstAllUnreachable(t *testing.T) { +// A slow loser that still connects after the winner is chosen must be cleaned up +// (its conn would otherwise leak). +func TestDialStaggeredCleansSlowLoser(t *testing.T) { + var loserCleaned int32 locators := []Locator{ - stubLocator{err: ErrUnreachable}, - stubLocator{err: ErrUnreachable}, + // LAN: slow success — it loses the race but still connects later. + stubLocator{conn: fakeConn{tag: "lan"}, cleanup: func() { atomic.StoreInt32(&loserCleaned, 1) }, delay: 60 * time.Millisecond}, + // relay: wins shortly after its (small) head start. + stubLocator{conn: fakeConn{tag: "relay"}}, + } + mc, _, err := dialStaggered(context.Background(), locators, 5*time.Millisecond, Machine{Name: "box"}, &Identity{}, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) } + if mc.(fakeConn).tag != "relay" { + t.Fatalf("expected the relay to win, got %v", mc) + } + // Give the slow loser time to return and be drained/cleaned. + deadline := time.Now().Add(time.Second) + for atomic.LoadInt32(&loserCleaned) == 0 && time.Now().Before(deadline) { + time.Sleep(5 * time.Millisecond) + } + if atomic.LoadInt32(&loserCleaned) == 0 { + t.Fatal("the slow loser's conn was not cleaned up") + } +} - mc, _, err := dialFirst(locators, context.Background(), Machine{Name: "box"}, &Identity{}, nil) - if mc != nil { - t.Fatal("expected no conn when all locators are unreachable") +// A single locator (relay-only) dials directly with no race. +func TestDialStaggeredSingleLocatorDirect(t *testing.T) { + want := fakeConn{tag: "relay"} + var called int32 + mc, _, err := dialStaggered(context.Background(), []Locator{stubLocator{conn: want, called: &called}}, 50*time.Millisecond, Machine{Name: "box"}, &Identity{}, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) } - if !errors.Is(err, ErrUnreachable) { - t.Fatalf("expected ErrUnreachable when every locator falls through, got: %v", err) + if mc != want || atomic.LoadInt32(&called) != 1 { + t.Fatal("single locator should be dialed directly") } }