Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion cmd/pilotctl/verify_flow.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,22 @@ func cmdVerifyProvider(flags map[string]string, provider string) {

deadline := time.Now().Add(10 * time.Minute)
var badge, badgeSig string
// Tolerate transient poll failures (a dropped frame, a relay blip) rather
// than aborting a flow the user may have already half-completed. Only give
// up after several consecutive errors.
const maxPollErrors = 5
pollErrors := 0
for time.Now().Before(deadline) {
time.Sleep(5 * time.Second)
poll, err := verifierRoundtrip(d, vaddr, verifierRequest{Op: "poll", FlowID: begin.FlowID})
if err != nil {
fatalCode("connection_failed", "verify poll: %v", err)
pollErrors++
if pollErrors >= maxPollErrors {
fatalCode("connection_failed", "verify poll: %d consecutive failures: %v", pollErrors, err)
}
continue
}
pollErrors = 0
switch poll.Status {
case "ready":
badge, badgeSig = poll.Badge, poll.BadgeSig
Expand Down
49 changes: 49 additions & 0 deletions cmd/pilotctl/zz_verify_cmds_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,52 @@ func TestNodeArgToID(t *testing.T) {
t.Errorf("address: got %d, want 99", got)
}
}

// TestCmdRecoveryRecoverInstallsNewIdentity covers the most destructive command:
// keyless force-rotate via the registry, then install the new identity locally.
func TestCmdRecoveryRecoverInstallsNewIdentity(t *testing.T) {
dir := t.TempDir()
newKey := filepath.Join(dir, "new.json")
id, err := crypto.GenerateIdentity()
if err != nil {
t.Fatalf("keygen: %v", err)
}
if err := crypto.SaveIdentity(newKey, id); err != nil {
t.Fatalf("save new key: %v", err)
}
newPub := crypto.EncodePublicKey(id.PublicKey)
idPath := filepath.Join(dir, "installed.json")

r := newFakeRegistry(t)
var gotPub, gotRecovery string
r.on("recover_identity", func(req map[string]interface{}) map[string]interface{} {
gotPub, _ = req["new_public_key"].(string)
gotRecovery, _ = req["recovery"].(string)
return map[string]interface{}{"type": "recover_identity_ok", "ok": true, "node_id": float64(99)}
})
useRegistry(t, r)

prev := jsonOutput
defer func() { jsonOutput = prev }()
jsonOutput = true
_ = captureStdout(t, func() {
cmdRecovery([]string{"recover", "--node", "99", "--new-key", newKey,
"--recovery", "pilotrecover:v1:99:bmV3:Y29t:9999999999:nn:rec-v1", "--recovery-sig", "c2ln",
"--identity", idPath})
})

if gotRecovery == "" {
t.Fatal("registry never received recover_identity")
}
if gotPub != newPub {
t.Errorf("registry got new_public_key=%q, want the new-key pubkey %q", gotPub, newPub)
}
// The recovered key must be installed at the daemon identity path.
installed, err := crypto.LoadIdentity(idPath)
if err != nil {
t.Fatalf("new identity not installed at %s: %v", idPath, err)
}
if crypto.EncodePublicKey(installed.PublicKey) != newPub {
t.Errorf("installed identity does not match the recovered key")
}
}
19 changes: 14 additions & 5 deletions pkg/daemon/tunnel.go
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,19 @@ var TunnelKeepaliveInterval = 25 * time.Second
// "ping" packet. The encrypted payload still authenticates as us
// (peer's AEAD verifies our nodeID AAD), so an attacker can't forge
// keepalives to keep stale entries warm.
// newKeepalivePacket builds the tiny ProtoControl/PortPing keepalive. It
// stamps our own node id as the inner source: the receiver enforces
// pkt.Src.Node == the AEAD-authenticated peer, so a zero-Src keepalive would
// be dropped as spoofed and the peer's NAT mapping would then expire.
func (tm *TunnelManager) newKeepalivePacket() *protocol.Packet {
return &protocol.Packet{
Version: protocol.Version,
Protocol: protocol.ProtoControl,
DstPort: protocol.PortPing,
Src: protocol.Addr{Node: tm.loadNodeID()},
}
}

func (tm *TunnelManager) keepaliveSweep(now time.Time) int {
type peerInfo struct {
id uint32
Expand All @@ -814,11 +827,7 @@ func (tm *TunnelManager) keepaliveSweep(now time.Time) int {

sent := 0
for _, p := range stale {
ka := &protocol.Packet{
Version: protocol.Version,
Protocol: protocol.ProtoControl,
DstPort: protocol.PortPing,
}
ka := tm.newKeepalivePacket()
plaintext, err := ka.Marshal()
if err != nil {
continue
Expand Down
22 changes: 22 additions & 0 deletions pkg/daemon/zz_nat_keepalive_bug_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,28 @@ func TestHandleEncryptedDropsKeepaliveBeforeRecvCh(t *testing.T) {
}
}

// TestKeepalivePacketStampsOwnSource pins the fix for the regression where the
// transport src-binding check dropped NAT keepalives: keepaliveSweep left
// Src.Node=0, but the receiver enforces pkt.Src.Node == the authenticated peer,
// so a zero-Src keepalive was dropped and the NAT mapping then expired. The
// keepalive must carry our own node id (= the peer id from the receiver's view).
func TestKeepalivePacketStampsOwnSource(t *testing.T) {
t.Parallel()
tm := NewTunnelManager()
t.Cleanup(func() { tm.Close() })

const selfID uint32 = 0xAB00CD01
tm.SetNodeID(selfID)

ka := tm.newKeepalivePacket()
if ka.Src.Node != selfID {
t.Fatalf("keepalive Src.Node = %#x, want own id %#x — a zero/wrong Src is dropped by the receiver's spoof check, expiring the NAT mapping", ka.Src.Node, selfID)
}
if ka.Protocol != protocol.ProtoControl || ka.DstPort != protocol.PortPing {
t.Fatalf("keepalive shape wrong: protocol=%d dstport=%d", ka.Protocol, ka.DstPort)
}
}

// TestRecordOutboundSendStampsTimestamp pins the half of the fix that
// already lands in RED: every successful writeFrame call must update
// tm.lastOutboundSend[nodeID] so the eventual keepaliveSweep can tell
Expand Down
Loading