diff --git a/CMakeLists.txt b/CMakeLists.txt index 70acb696ca..3cc01a8845 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -560,6 +560,8 @@ CHECK_CONST_EXISTS(KERN_ARND sys/sysctl.h EVENT__HAVE_DECL_KERN_ARND) CHECK_CONST_EXISTS(KERN_RANDOM sys/sysctl.h EVENT__HAVE_DECL_KERN_RANDOM) CHECK_CONST_EXISTS(RANDOM_UUID sys/sysctl.h EVENT__HAVE_DECL_RANDOM_UUID) CHECK_SYMBOL_EXISTS(F_SETFD fcntl.h EVENT__HAVE_SETFD) +CHECK_CONST_EXISTS(SO_TIMESTAMP sys/socket.h EVENT__HAVE_DECL_SO_TIMESTAMP) +CHECK_CONST_EXISTS(SO_TIMESTAMPNS sys/socket.h EVENT__HAVE_DECL_SO_TIMESTAMPNS) CHECK_TYPE_SIZE(fd_mask EVENT__HAVE_FD_MASK) diff --git a/buffer.c b/buffer.c index a51b6c5f66..fe9dac3f4b 100644 --- a/buffer.c +++ b/buffer.c @@ -723,6 +723,14 @@ advance_last_with_data(struct evbuffer *buf) int evbuffer_commit_space(struct evbuffer *buf, struct evbuffer_iovec *vec, int n_vecs) +{ + return evbuffer_commit_space_with_timespec(buf, vec, n_vecs, NULL); +} + +int +evbuffer_commit_space_with_timespec(struct evbuffer *buf, + struct evbuffer_iovec *vec, int n_vecs, + const struct timespec *ts) { struct evbuffer_chain *chain, **firstchainp, **chainp; int result = -1; @@ -744,6 +752,10 @@ evbuffer_commit_space(struct evbuffer *buf, goto done; buf->last->off += vec[0].iov_len; added = vec[0].iov_len; + if (ts && added && buf->last->timestamp.valid == 0) { + buf->last->timestamp.ts = *ts; + buf->last->timestamp.valid = 1; + } if (added) advance_last_with_data(buf); goto okay; @@ -773,6 +785,10 @@ evbuffer_commit_space(struct evbuffer *buf, for (i=0; ioff += vec[i].iov_len; added += vec[i].iov_len; + if (ts && vec[i].iov_len && (*chainp)->timestamp.valid == 0) { + (*chainp)->timestamp.ts = *ts; + (*chainp)->timestamp.valid = 1; + } if (vec[i].iov_len) { buf->last_with_datap = chainp; } @@ -940,6 +956,7 @@ APPEND_CHAIN_MULTICAST(struct evbuffer *dst, struct evbuffer *src) tmp->off = chain->off; tmp->flags |= EVBUFFER_MULTICAST|EVBUFFER_IMMUTABLE; tmp->buffer = chain->buffer; + tmp->timestamp = chain->timestamp; evbuffer_chain_insert(dst, tmp); } } @@ -1145,6 +1162,7 @@ evbuffer_drain(struct evbuffer *buf, size_t len) EVUTIL_ASSERT(remaining == 0); chain->misalign += chain->off; chain->off = 0; + chain->timestamp.valid = 0; break; } else evbuffer_chain_free(chain); @@ -1386,6 +1404,11 @@ evbuffer_pullup(struct evbuffer *buf, ev_ssize_t size) } if (CHAIN_PINNED(chain)) { + /* Pinned chain case: expand in-place by appending data from + * subsequent chains. Timestamps from subsequent chains being + * consolidated are intentionally discarded; only this chain's + * timestamp is preserved as it contains the oldest data. + */ size_t old_off = chain->off; if (CHAIN_SPACE_LEN(chain) < size - chain->off) { /* not enough room at end of chunk. */ @@ -1397,6 +1420,11 @@ evbuffer_pullup(struct evbuffer *buf, ev_ssize_t size) size -= old_off; chain = chain->next; } else if (chain->buffer_len - chain->misalign >= (size_t)size) { + /* Sufficient space case: expand in-place without reallocation + * by appending data from subsequent chains. Timestamps from + * subsequent chains being consolidated are intentionally discarded; + * only this chain's timestamp is preserved. + */ /* already have enough space in the first chain */ size_t old_off = chain->off; buffer = chain->buffer + chain->misalign + chain->off; @@ -1411,11 +1439,21 @@ evbuffer_pullup(struct evbuffer *buf, ev_ssize_t size) } buffer = tmp->buffer; tmp->off = size; + /* Preserve timestamp from the original first chain */ + if (chain->timestamp.valid) { + tmp->timestamp = chain->timestamp; + } buf->first = tmp; } /* TODO(niels): deal with buffers that point to NULL like sendfile */ + /* Note: When consolidating multiple chains into one during pullup, + * only the timestamp from the first (oldest) chain is preserved. + * Timestamps from subsequent chains are intentionally discarded. + * This design choice keeps the API simple by tracking only the + * receipt time of the oldest data in the buffer. + */ /* Copy and free every chunk that will be entirely pulled into tmp */ last_with_data = *buf->last_with_datap; for (; chain != NULL && (size_t)size >= chain->off; chain = next) { @@ -2276,10 +2314,41 @@ get_n_bytes_readable_on_socket(evutil_socket_t fd) #endif } -/* TODO(niels): should this function return ev_ssize_t and take ev_ssize_t - * as howmuch? */ -int -evbuffer_read(struct evbuffer *buf, evutil_socket_t fd, int howmuch) +/** + * Reads data from a socket optionally with kernel timestamp support. + * + * @param buf the evbuffer to populate + * @param fd the file descriptor to use + * @param howmuch the amount of data to read (this will be adjusted; + * see below) + * @param use_recvmsg try to use recvmsg to read the data (and try to + * read kernel timestamps) + * + * If howmuch is less than 0 or greater than EVBUFFER_MAX_READ we'll try + * to read EVBUFFER_MAX_READ bytes (there may be less available on the + *socket) + * + * If use_recvmsg is nonzero, it attempts to retrieve the SO_TIMESTAMPNS or + * SO_TIMESTAMP ancillary data from recvmsg() and associate it with the buffer + * data. The + * timestamp is stored per-chain, assigned only to the first chain + * that doesn't already have a valid timestamp. + * + * NOTE: Multiple recvmsg() calls may append data into the same chain when + * evbuffer_expand_fast_() finds existing chains with free space. This is + * intentional for memory efficiency and avoids excessive chain allocation. + * As a result, a chain may contain data from multiple recvmsg() calls with + * different timestamps; only the timestamp of the first recvmsg() call that + * filled that chain is retained. After draining data from the buffer, + * evbuffer_get_timestamp() will return the timestamp of the first chain, + * which may not reflect the actual timestamp of the remaining bytes if they + * were received in a later recvmsg() call. + * + * TODO(niels): should this function return ev_ssize_t and take ev_ssize_t + * as howmuch? + */ +static int +evbuffer_read_impl_(struct evbuffer *buf, evutil_socket_t fd, int howmuch, int use_recvmsg) { struct evbuffer_chain **chainp; int n; @@ -2291,6 +2360,9 @@ evbuffer_read(struct evbuffer *buf, evutil_socket_t fd, int howmuch) struct evbuffer_chain *chain; unsigned char *p; #endif + struct timespec ts; + int ts_found = 0; + memset(&ts, 0, sizeof(ts)); EVBUFFER_LOCK(buf); @@ -2342,7 +2414,65 @@ evbuffer_read(struct evbuffer *buf, evutil_socket_t fd, int howmuch) n = bytesRead; } #else - n = readv(fd, vecs, nvecs); + if (use_recvmsg) { + struct msghdr msg; + /* Control message buffer for cmsg data. + * Sized to accommodate timestamp messages (SCM_TIMESTAMPNS, + * SCM_TIMESTAMP) plus additional space (256 bytes) for other + * possible cmsg entries (SCM_RIGHTS, SCM_CREDENTIALS, etc.) + * to prevent truncation via MSG_CTRUNC which would silently + * lose timestamp information. */ +#define EVBUFFER_RECVMSG_CTRLFN_SZ \ + (CMSG_SPACE(sizeof(struct timespec)) + \ + CMSG_SPACE(sizeof(struct timeval)) + 256) + unsigned char control[EVBUFFER_RECVMSG_CTRLFN_SZ]; +#undef EVBUFFER_RECVMSG_CTRLFN_SZ + + /* Setup message header */ + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = vecs; + msg.msg_iovlen = nvecs; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + /* Receive with ancillary data */ + n = recvmsg(fd, &msg, 0); + + if (n > 0) { + /* Check if control data was truncated */ + if (!(msg.msg_flags & MSG_CTRUNC)) { + struct cmsghdr *cmsg; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != SOL_SOCKET) + continue; +#if EVENT__HAVE_DECL_SO_TIMESTAMPNS + if (cmsg->cmsg_type == SCM_TIMESTAMPNS) { + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct timespec))) + continue; + ts = *(struct timespec *)CMSG_DATA(cmsg); + ts_found = 1; + break; + } +#endif + +#if EVENT__HAVE_DECL_SO_TIMESTAMP + if (cmsg->cmsg_type == SCM_TIMESTAMP) { + struct timeval *tv; + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct timeval))) + continue; + tv = (struct timeval *)CMSG_DATA(cmsg); + ts.tv_sec = tv->tv_sec; + ts.tv_nsec = tv->tv_usec * 1000L; + ts_found = 1; + break; + } +#endif + } + } + } + } else { + n = readv(fd, vecs, nvecs); + } #endif } @@ -2387,8 +2517,16 @@ evbuffer_read(struct evbuffer *buf, evutil_socket_t fd, int howmuch) if ((ev_ssize_t)space < remaining) { (*chainp)->off += space; remaining -= (int)space; + if (ts_found && (*chainp)->timestamp.valid == 0) { + (*chainp)->timestamp.ts = ts; + (*chainp)->timestamp.valid = 1; + } } else { (*chainp)->off += remaining; + if (ts_found && (*chainp)->timestamp.valid == 0) { + (*chainp)->timestamp.ts = ts; + (*chainp)->timestamp.valid = 1; + } buf->last_with_datap = chainp; break; } @@ -2409,6 +2547,37 @@ evbuffer_read(struct evbuffer *buf, evutil_socket_t fd, int howmuch) return result; } +int +evbuffer_read(struct evbuffer *buf, evutil_socket_t fd, int howmuch) +{ + return evbuffer_read_impl_(buf, fd, howmuch, 0); +} + +int +evbuffer_read_with_timestamp( + struct evbuffer *buf, evutil_socket_t fd, int howmuch) +{ + return evbuffer_read_impl_(buf, fd, howmuch, 1); +} + +int evbuffer_get_timestamp( + struct evbuffer *buf, struct timespec *timestamp) +{ + int result = -1; + if (!timestamp) { + return -1; + } + EVBUFFER_LOCK(buf); + { + if (buf->first && buf->first->timestamp.valid) { + *timestamp = buf->first->timestamp.ts; + result = 0; + } + } + EVBUFFER_UNLOCK(buf); + return result; +} + #ifdef USE_IOVEC_IMPL static inline int evbuffer_write_iovec(struct evbuffer *buffer, evutil_socket_t fd, diff --git a/bufferevent-internal.h b/bufferevent-internal.h index 87ab9ad9c0..e6e97eb0f4 100644 --- a/bufferevent-internal.h +++ b/bufferevent-internal.h @@ -230,6 +230,9 @@ struct bufferevent_private { } conn_address; struct evdns_getaddrinfo_request *dns_request; + + /** Flag: set if receive timestamps are enabled */ + unsigned recv_timestamps_enabled : 1; }; /** Possible operations for a control callback. */ @@ -452,6 +455,9 @@ EVENT2_EXPORT_SYMBOL void bufferevent_socket_set_conn_address_(struct bufferevent *bev, struct sockaddr *addr, size_t addrlen); +EVENT2_EXPORT_SYMBOL +int be_socket_enable_timestamps_(evutil_socket_t fd); + /** Internal use: We have just successfully read data into an inbuf, so * reset the read timeout (if any). */ diff --git a/bufferevent_openssl.c b/bufferevent_openssl.c index b51b834bca..0d9d16a007 100644 --- a/bufferevent_openssl.c +++ b/bufferevent_openssl.c @@ -53,6 +53,16 @@ #include #endif +#ifdef EVENT__HAVE_SYS_SOCKET_H +#include +#endif +#ifdef EVENT__HAVE_SYS_UIO_H +#include +#endif +#ifdef EVENT__HAVE_NETINET_IN_H +#include +#endif + #include "event2/bufferevent.h" #include "event2/bufferevent_struct.h" #include "event2/bufferevent_ssl.h" @@ -83,6 +93,7 @@ /* every BIO type needs its own integer type value. */ #define BIO_TYPE_LIBEVENT 57 +#define BIO_TYPE_LIBEVENT_RECVMSG (58 | BIO_TYPE_SOURCE_SINK) /* ???? Arguably, we should set BIO_TYPE_FILTER or BIO_TYPE_SOURCE_SINK on * this. */ @@ -328,6 +339,236 @@ struct bufferevent_openssl { unsigned old_state : 2; }; +struct bio_socket_recvmsg_data { + evutil_socket_t fd; + struct bufferevent_openssl *bev_ssl; + struct timespec last_recv_ts; + int last_recv_ts_valid; +}; + +static int +bio_socket_recvmsg_new(BIO *b) +{ + struct bio_socket_recvmsg_data *data = mm_calloc(1, sizeof(*data)); + if (!data) { + return 0; + } + data->fd = -1; + data->bev_ssl = NULL; + data->last_recv_ts_valid = 0; + BIO_set_init(b, 1); + BIO_set_data(b, data); + return 1; +} + +static int +bio_socket_recvmsg_free(BIO *b) +{ + struct bio_socket_recvmsg_data *data; + if (!b) { + return 0; + } + data = BIO_get_data(b); + if (data) { + if (BIO_get_shutdown(b)) { +#ifdef _WIN32 + closesocket(data->fd); +#else + close(data->fd); +#endif + } + mm_free(data); + BIO_set_data(b, NULL); + } + BIO_set_init(b, 0); + return 1; +} + +static int +bio_socket_recvmsg_read(BIO *b, char *out, int outlen) +{ + struct bio_socket_recvmsg_data *data = BIO_get_data(b); + int r; + if (!data || data->fd < 0) { + return -1; + } + + BIO_clear_retry_flags(b); + +#if defined(_WIN32) + r = recv(data->fd, out, outlen, 0); +#else + if (data->bev_ssl && data->bev_ssl->bev.recv_timestamps_enabled) { + struct msghdr msg; + struct iovec iov; + unsigned char control[ + CMSG_SPACE(sizeof(struct timespec)) + /* SCM_TIMESTAMPNS */ + CMSG_SPACE(sizeof(struct timeval)) + 64 /* SCM_TIMESTAMP + extra */ + ]; + struct timespec ts; + int ts_found = 0; + + memset(&ts, 0, sizeof(ts)); + iov.iov_base = out; + iov.iov_len = outlen; + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + r = recvmsg(data->fd, &msg, 0); + + if (r > 0) { + if (!(msg.msg_flags & MSG_CTRUNC)) { + struct cmsghdr *cmsg; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != SOL_SOCKET) { + continue; + } +#if EVENT__HAVE_DECL_SO_TIMESTAMPNS + if (cmsg->cmsg_type == SCM_TIMESTAMPNS) { + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct timespec))) { + continue; + } + ts = *(struct timespec *)CMSG_DATA(cmsg); + ts_found = 1; + break; + } +#endif + +#if EVENT__HAVE_DECL_SO_TIMESTAMP + if (cmsg->cmsg_type == SCM_TIMESTAMP) { + struct timeval *tv; + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct timeval))) { + continue; + } + tv = (struct timeval *)CMSG_DATA(cmsg); + ts.tv_sec = tv->tv_sec; + ts.tv_nsec = tv->tv_usec * 1000L; + ts_found = 1; + break; + } +#endif + } + } + if (ts_found) { + data->last_recv_ts = ts; + data->last_recv_ts_valid = 1; + } + } + } else { + r = recv(data->fd, out, outlen, 0); + } +#endif + + if (r <= 0) { + int err = EVUTIL_SOCKET_ERROR(); + if (EVUTIL_ERR_RW_RETRIABLE(err)) { + BIO_set_retry_read(b); + } + } + return r; +} + +static int +bio_socket_recvmsg_write(BIO *b, const char *in, int inlen) +{ + struct bio_socket_recvmsg_data *data = BIO_get_data(b); + int r; + if (!data || data->fd < 0) { + return -1; + } + + BIO_clear_retry_flags(b); +#ifdef _WIN32 + r = send(data->fd, in, inlen, 0); +#else + r = send(data->fd, in, inlen, 0); +#endif + if (r <= 0) { + int err = EVUTIL_SOCKET_ERROR(); + if (EVUTIL_ERR_RW_RETRIABLE(err)) { + BIO_set_retry_write(b); + } + } + return r; +} + +static long +bio_socket_recvmsg_ctrl(BIO *b, int cmd, long num, void *ptr) +{ + struct bio_socket_recvmsg_data *data = BIO_get_data(b); + long ret = 1; + if (!data) + return 0; + + switch (cmd) { + case BIO_C_SET_FD: + data->fd = (evutil_socket_t)(ev_intptr_t)ptr; + BIO_set_shutdown(b, (int)num); + BIO_set_init(b, 1); + ret = 1; + break; + case BIO_C_GET_FD: + if (BIO_get_init(b)) { + if (ptr) + *(evutil_socket_t *)ptr = data->fd; + ret = data->fd; + } else { + ret = -1; + } + break; + case BIO_CTRL_GET_CLOSE: + ret = BIO_get_shutdown(b); + break; + case BIO_CTRL_SET_CLOSE: + BIO_set_shutdown(b, (int)num); + ret = 1; + break; + case BIO_CTRL_DUP: + case BIO_CTRL_FLUSH: + ret = 1; + break; + default: + ret = 0; + break; + } + return ret; +} + +static BIO_METHOD *methods_socket_recvmsg; + +static BIO_METHOD * +BIO_s_socket_recvmsg(void) +{ + if (methods_socket_recvmsg == NULL) { + methods_socket_recvmsg = + BIO_meth_new(BIO_TYPE_LIBEVENT_RECVMSG, "socket_recvmsg"); + if (methods_socket_recvmsg == NULL) { + return NULL; + } + BIO_meth_set_write(methods_socket_recvmsg, bio_socket_recvmsg_write); + BIO_meth_set_read(methods_socket_recvmsg, bio_socket_recvmsg_read); + BIO_meth_set_ctrl(methods_socket_recvmsg, bio_socket_recvmsg_ctrl); + BIO_meth_set_create(methods_socket_recvmsg, bio_socket_recvmsg_new); + BIO_meth_set_destroy(methods_socket_recvmsg, bio_socket_recvmsg_free); + } + return methods_socket_recvmsg; +} + +static BIO * +BIO_new_socket_recvmsg(evutil_socket_t fd, int close_flag) +{ + BIO *bio = BIO_new(BIO_s_socket_recvmsg()); + if (!bio) { + return NULL; + } + BIO_ctrl(bio, BIO_C_SET_FD, close_flag, (void *)(ev_intptr_t)fd); + return bio; +} + static int be_openssl_enable(struct bufferevent *, short); static int be_openssl_disable(struct bufferevent *, short); static void be_openssl_unlink(struct bufferevent *); @@ -515,6 +756,10 @@ conn_closed(struct bufferevent_openssl *bev_ssl, int when, int errcode, int ret) break; case SSL_ERROR_SSL: /* Protocol error. */ +#ifdef SSL_R_UNEXPECTED_EOF_WHILE_READING + if (ERR_GET_REASON(ERR_peek_error()) == SSL_R_UNEXPECTED_EOF_WHILE_READING) + dirty_shutdown = 1; +#endif put_error(bev_ssl, errcode); break; case SSL_ERROR_WANT_X509_LOOKUP: @@ -589,9 +834,18 @@ do_read(struct bufferevent_openssl *bev_ssl, int n_to_read) { struct evbuffer_iovec space[2]; int result = 0; + struct bio_socket_recvmsg_data *bio_data = NULL; + if (bev_ssl->bev.read_suspended) return 0; + { + BIO *rbio = SSL_get_rbio(bev_ssl->ssl); + if (rbio && BIO_method_type(rbio) == BIO_TYPE_LIBEVENT_RECVMSG) { + bio_data = BIO_get_data(rbio); + } + } + atmost = bufferevent_get_read_max_(&bev_ssl->bev); if (n_to_read > atmost) n_to_read = atmost; @@ -600,9 +854,25 @@ do_read(struct bufferevent_openssl *bev_ssl, int n_to_read) { if (n < 0) return OP_ERR; + /* Variables to track timestamps for batch commit */ + struct timespec first_ts = {0, 0}; + int first_ts_valid = 0; + for (i=0; ibev.read_suspended) break; + if (bio_data) { + bio_data->last_recv_ts_valid = 0; + } + if (bev_ssl->underlying) { + if (evbuffer_get_timestamp( + bufferevent_get_input(bev_ssl->underlying), + &underlying_ts) == 0) { + underlying_ts_valid = 1; + } + } ERR_clear_error(); r = SSL_read(bev_ssl->ssl, space[i].iov_base, space[i].iov_len); if (r>0) { @@ -613,6 +883,19 @@ do_read(struct bufferevent_openssl *bev_ssl, int n_to_read) { ++n_used; space[i].iov_len = r; decrement_buckets(bev_ssl); + + /* Store timestamp from first successful read (oldest data). + All iovecs from the same reserve_space call are committed + together, so we use the timestamp from the first read. */ + if (n_used == 1) { + if (bio_data && bio_data->last_recv_ts_valid) { + first_ts = bio_data->last_recv_ts; + first_ts_valid = 1; + } else if (underlying_ts_valid) { + first_ts = underlying_ts; + first_ts_valid = 1; + } + } } else { int err = SSL_get_error(bev_ssl->ssl, r); print_err(err); @@ -639,8 +922,16 @@ do_read(struct bufferevent_openssl *bev_ssl, int n_to_read) { } } + /* Commit all filled iovecs together to avoid data corruption when + evbuffer_reserve_space() returns multiple vectors. Individual commits + can cause buffer accounting issues when the second vector is in a + different chain than the first. */ if (n_used) { - evbuffer_commit_space(input, space, n_used); + if (first_ts_valid) { + evbuffer_commit_space_with_timespec(input, space, n_used, &first_ts); + } else { + evbuffer_commit_space(input, space, n_used); + } if (bev_ssl->underlying) BEV_RESET_GENERIC_READ_TIMEOUT(bev); } @@ -1304,8 +1595,25 @@ be_openssl_ctrl(struct bufferevent *bev, case BEV_CTRL_SET_FD: if (!bev_ssl->underlying) { BIO *bio; - bio = BIO_new_socket((int)data->fd, 0); + if (bev_ssl->bev.options & BEV_OPT_RECV_TIMESTAMPS) { + if (be_socket_enable_timestamps_(data->fd) >= 0) { + bev_ssl->bev.recv_timestamps_enabled = 1; + } else { + bev_ssl->bev.recv_timestamps_enabled = 0; + } + } + if (bev_ssl->bev.recv_timestamps_enabled) { + bio = BIO_new_socket_recvmsg((int)data->fd, 0); + } else { + bio = BIO_new_socket((int)data->fd, 0); + } SSL_set_bio(bev_ssl->ssl, bio, bio); + if (bio && BIO_method_type(bio) == BIO_TYPE_LIBEVENT_RECVMSG) { + struct bio_socket_recvmsg_data *bio_data = BIO_get_data(bio); + if (bio_data) { + bio_data->bev_ssl = bev_ssl; + } + } } else { BIO *bio; if (!(bio = BIO_new_bufferevent(bev_ssl->underlying))) @@ -1391,6 +1699,22 @@ bufferevent_openssl_new_impl(struct event_base *base, if (be_openssl_set_fd(bev_ssl, state, fd)) goto err; + if ((options & BEV_OPT_RECV_TIMESTAMPS) && fd >= 0) { + if (be_socket_enable_timestamps_(fd) >= 0) { + bev_ssl->bev.recv_timestamps_enabled = 1; + } + } + + { + BIO *rbio = SSL_get_rbio(ssl); + if (rbio && BIO_method_type(rbio) == BIO_TYPE_LIBEVENT_RECVMSG) { + struct bio_socket_recvmsg_data *bio_data = BIO_get_data(rbio); + if (bio_data) { + bio_data->bev_ssl = bev_ssl; + } + } + } + if (underlying) { bufferevent_setwatermark(underlying, EV_READ, 0, 0); bufferevent_enable(underlying, EV_READ|EV_WRITE); @@ -1447,6 +1771,7 @@ bufferevent_openssl_socket_new(struct event_base *base, /* Does the SSL already have an fd? */ BIO *bio = SSL_get_wbio(ssl); long have_fd = -1; + int recv_timestamps_enabled = 0; if (bio) have_fd = BIO_get_fd(bio, NULL); @@ -1463,12 +1788,40 @@ bufferevent_openssl_socket_new(struct event_base *base, This is probably an error on our part. Fail. */ goto err; } + /* If the existing BIO is the standard socket BIO, replace it with our custom one to support timestamps. */ + if ((options & BEV_OPT_RECV_TIMESTAMPS) && fd >= 0) { + if (be_socket_enable_timestamps_(fd) >= 0) { + recv_timestamps_enabled = 1; + } + } + if (recv_timestamps_enabled && BIO_method_type(bio) == BIO_TYPE_SOCKET) { + int close_flag = BIO_get_close(bio); + BIO *new_bio = BIO_new_socket_recvmsg((int)fd, close_flag); + if (new_bio) { + /* SSL_set_bio() takes ownership and frees any previously set BIOs. + Do not explicitly free old BIOs as that would cause a double-free. */ + SSL_set_bio(ssl, new_bio, new_bio); + bio = new_bio; + } + } BIO_set_close(bio, 0); } else { /* The SSL isn't configured with a BIO with an fd. */ if (fd >= 0) { /* ... and we have an fd we want to use. */ - bio = BIO_new_socket((int)fd, 0); + if (options & BEV_OPT_RECV_TIMESTAMPS) { + if (be_socket_enable_timestamps_(fd) >= 0) { + recv_timestamps_enabled = 1; + } + } + if (recv_timestamps_enabled) { + bio = BIO_new_socket_recvmsg((int)fd, 0); + } else { + bio = BIO_new_socket((int)fd, 0); + } + if (!bio) { + goto err; + } SSL_set_bio(ssl, bio, bio); } else { /* Leave the fd unset. */ diff --git a/bufferevent_sock.c b/bufferevent_sock.c index f275b02380..b186da5ec9 100644 --- a/bufferevent_sock.c +++ b/bufferevent_sock.c @@ -84,6 +84,40 @@ static int be_socket_ctrl(struct bufferevent *, enum bufferevent_ctrl_op, union static void be_socket_setfd(struct bufferevent *, evutil_socket_t); +/* ======================================================================== + * Socket receive timestamp support (SO_TIMESTAMP) + * ======================================================================== */ + +/** + * Enable SO_TIMESTAMP socket option for kernel receive timestamping + * + * Returns: + * 1 = SO_TIMESTAMPNS enabled (nanosecond precision) + * 0 = SO_TIMESTAMP enabled (microsecond precision) + * -1 = timestamps not available on this platform + */ +int +be_socket_enable_timestamps_(evutil_socket_t fd) +{ + int on = 1; + +#if EVENT__HAVE_DECL_SO_TIMESTAMPNS + /* Try nanosecond precision first (Linux 2.6.22+) */ + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS, &on, sizeof(on)) == 0) { + return 1; + } +#endif + +#if EVENT__HAVE_DECL_SO_TIMESTAMP + /* Fall back to microsecond precision */ + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP, &on, sizeof(on)) == 0) { + return 0; + } +#endif + + return -1; +} + const struct bufferevent_ops bufferevent_ops_socket = { "socket", evutil_offsetof(struct bufferevent_private, bev), @@ -187,7 +221,15 @@ bufferevent_readcb(evutil_socket_t fd, short event, void *arg) goto done; evbuffer_unfreeze(input, 0); - res = evbuffer_read(input, fd, (int)howmuch); /* XXXX evbuffer_read would do better to take and return ev_ssize_t */ + + if (bufev_p->recv_timestamps_enabled) { + /* Use recvmsg() to capture timestamps */ + res = evbuffer_read_with_timestamp(input, fd, (int)howmuch); + } else { + /* Use standard read when timestamps not enabled */ + res = evbuffer_read(input, fd, (int)howmuch); + } + evbuffer_freeze(input, 0); if (res == -1) { @@ -370,6 +412,13 @@ bufferevent_socket_new(struct event_base *base, evutil_socket_t fd, evbuffer_add_cb(bufev->output, bufferevent_socket_outbuf_cb, bufev); + /* Enable receive timestamps if requested */ + if ((options & BEV_OPT_RECV_TIMESTAMPS) && fd >= 0) { + if (be_socket_enable_timestamps_(fd) >= 0) { + bufev_p->recv_timestamps_enabled = 1; + } + } + evbuffer_freeze(bufev->input, 0); evbuffer_freeze(bufev->output, 1); @@ -634,9 +683,17 @@ be_socket_setfd(struct bufferevent *bufev, evutil_socket_t fd) event_assign(&bufev->ev_write, bufev->ev_base, fd, EV_WRITE|EV_PERSIST|EV_FINALIZE, bufferevent_writecb, bufev); - if (fd >= 0) + if (fd >= 0) { bufferevent_enable(bufev, bufev->enabled); + /* Enable receive timestamps if requested */ + if ((bufev_p->options & BEV_OPT_RECV_TIMESTAMPS) && !bufev_p->recv_timestamps_enabled) { + if (be_socket_enable_timestamps_(fd) >= 0) { + bufev_p->recv_timestamps_enabled = 1; + } + } + } + evutil_getaddrinfo_cancel_async_(bufev_p->dns_request); BEV_UNLOCK(bufev); diff --git a/configure.ac b/configure.ac index 298d3ab9da..a40471ee00 100644 --- a/configure.ac +++ b/configure.ac @@ -338,6 +338,13 @@ if test "x$ac_cv_header_sys_sysctl_h" = "xyes"; then ) fi +if test "x$ac_cv_header_sys_socket_h" = "xyes"; then + AC_CHECK_DECLS([SO_TIMESTAMP, SO_TIMESTAMPNS], [], [], + [[#include + #include ]] + ) +fi + AM_CONDITIONAL(BUILD_WIN32, test x$bwin32 = xtrue) AM_CONDITIONAL(BUILD_CYGWIN, test x$cygwin = xtrue) AM_CONDITIONAL(BUILD_MIDIPIX, test x$midipix = xtrue) diff --git a/evbuffer-internal.h b/evbuffer-internal.h index d09b4f1ddd..d24bad2027 100644 --- a/evbuffer-internal.h +++ b/evbuffer-internal.h @@ -204,6 +204,15 @@ struct evbuffer_chain { /** number of references to this chain */ int refcnt; + + /** Timestamp support. */ + struct { + /* The timespec for the oldest data in this chunk */ + struct timespec ts; + /* valid is set to a non-zero value when ts is set */ + int valid; + } timestamp; + /** Usually points to the read-write memory belonging to this * buffer allocated as part of the evbuffer_chain allocation. * For mmap, this can be a read-only buffer and diff --git a/event-config.h.cmake b/event-config.h.cmake index 498ab1eac9..66f3a7ba17 100644 --- a/event-config.h.cmake +++ b/event-config.h.cmake @@ -81,6 +81,15 @@ /* Define to 1 if you have the declaration of `RANDOM_UUID'. */ #define EVENT__HAVE_DECL_RANDOM_UUID @EVENT__HAVE_DECL_RANDOM_UUID@ +/* Define to 1 if you have the declaration of `SO_TIMESTAMP'. */ +#define EVENT__HAVE_DECL_SO_TIMESTAMP @EVENT__HAVE_DECL_SO_TIMESTAMP@ + +/* Define to 1 if you have the declaration of `SO_TIMESTAMPNS'. */ +#define EVENT__HAVE_DECL_SO_TIMESTAMPNS @EVENT__HAVE_DECL_SO_TIMESTAMPNS@ + +/* Define to 1 if you have `getrandom' function. */ +#cmakedefine EVENT__HAVE_GETRANDOM 1 + /* Define if /dev/poll is available */ #cmakedefine EVENT__HAVE_DEVPOLL 1 diff --git a/include/event2/buffer.h b/include/event2/buffer.h index 468588b9f1..f4110c1224 100644 --- a/include/event2/buffer.h +++ b/include/event2/buffer.h @@ -324,6 +324,23 @@ EVENT2_EXPORT_SYMBOL int evbuffer_commit_space(struct evbuffer *buf, struct evbuffer_iovec *vec, int n_vecs); +/** + Commits the space reserved by evbuffer_reserve_space() and + associates a timespec with the committed chains. + + @param buf the evbuffer in which to reserve space. + @param vec one or two extents returned by evbuffer_reserve_space. + @param n_vecs the number of extents. + @param ts pointer to timespec (or NULL if not valid). + @return 0 on success, -1 on error + @see evbuffer_reserve_space() +*/ +EVENT2_EXPORT_SYMBOL +int evbuffer_commit_space_with_timespec(struct evbuffer *buf, + struct evbuffer_iovec *vec, int n_vecs, + const struct timespec *ts); + + /** Append data to the end of an evbuffer. @@ -733,6 +750,43 @@ int evbuffer_write_atmost(struct evbuffer *buffer, evutil_socket_t fd, EVENT2_EXPORT_SYMBOL int evbuffer_read(struct evbuffer *buffer, evutil_socket_t fd, int howmuch); +/** + Read from a file descriptor and store the result in an evbuffer. + + @param buffer the evbuffer to store the result + @param fd the file descriptor to read from + @param howmuch the number of bytes to be read (if howmuch < 0 or + higher than EVBUFFER_MAX_READ it is set to + EVBUFFER_MAX_READ) + @return the number of bytes read, or -1 if an error occurred + @see evbuffer_write() + */ +EVENT2_EXPORT_SYMBOL +int evbuffer_read_with_timestamp(struct evbuffer *buffer, evutil_socket_t fd, + int howmuch); + +/** + * Get the timestamp stored for the oldest data in the buffer chain. + * + * Returns the timestamp of when the oldest bytes currently in the buffer + * were received from the kernel. This is the timestamp of the first chain + * in the buffer. If the buffer is empty or no timestamp is available, + * returns -1. + * + * Note: When evbuffer_pullup() consolidates multiple chains, only the + * timestamp from the first (oldest) chain is preserved. This ensures that + * the timestamp always reflects when the oldest data in the buffer was + * received, regardless of how many internal consolidation operations + * have occurred. + * + * @param buffer The buffer to read from + * @param timestamp where to store the result + * @return 0 success (timestamp was stored) + * -1 failure (buffer empty or no timestamp available) + */ +EVENT2_EXPORT_SYMBOL +int evbuffer_get_timestamp(struct evbuffer *buffer, struct timespec *timestamp); + /** Search for a string within an evbuffer. diff --git a/include/event2/bufferevent.h b/include/event2/bufferevent.h index 48cd153563..9df7c00cde 100644 --- a/include/event2/bufferevent.h +++ b/include/event2/bufferevent.h @@ -170,7 +170,14 @@ enum bufferevent_options { * bufferevent. This option currently requires that * BEV_OPT_DEFER_CALLBACKS also be set; a future version of Libevent * might remove the requirement.*/ - BEV_OPT_UNLOCK_CALLBACKS = (1<<3) + BEV_OPT_UNLOCK_CALLBACKS = (1<<3), + + /** If set, capture kernel-measured receive timestamps for socket + * bufferevents. Timestamps can be retrieved from the input buffer + * using evbuffer_get_timestamp(). Supported for socket bufferevents + * created with bufferevent_socket_new() and OpenSSL socket bufferevents + * created with bufferevent_openssl_socket_new(). */ + BEV_OPT_RECV_TIMESTAMPS = (1<<4) }; /** diff --git a/test/regress_buffer.c b/test/regress_buffer.c index 8ac4b6e038..816a855e61 100644 --- a/test/regress_buffer.c +++ b/test/regress_buffer.c @@ -425,6 +425,165 @@ test_evbuffer_remove_buffer_with_empty3(void *ptr) evbuffer_free(buf); } +static void +test_evbuffer_pullup_with_empty(void *ptr) +{ + struct evbuffer *buf = NULL; + + buf = evbuffer_new(); + evbuffer_add(buf, "foo", 3); + evbuffer_add_reference(buf, NULL, 0, NULL, NULL); + evbuffer_validate(buf); + tt_int_op(evbuffer_get_length(buf), ==, 3); + tt_mem_op(evbuffer_pullup(buf, -1), ==, "foo", 3); + + evbuffer_free(buf); + buf = evbuffer_new(); + evbuffer_validate(buf); + tt_int_op(evbuffer_get_length(buf), ==, 0); + tt_ptr_op(evbuffer_pullup(buf, -1), ==, NULL); + + evbuffer_free(buf); + buf = evbuffer_new(); + evbuffer_add(buf, "foo", 3); + evbuffer_add_reference(buf, NULL, 0, NULL, NULL); + evbuffer_validate(buf); + tt_mem_op(evbuffer_pullup(buf, 3), ==, "foo", 3); + + end: + if (buf) + evbuffer_free(buf); +} + +static void +test_evbuffer_get_timestamp(void *ptr) +{ + struct evbuffer *buf = NULL; + struct timespec ts, ts2; + struct timeval tv_sleep = { 0, 10000 }; /* 10 ms */ + int on = 1; + int r; + int ts_supported = 1; + + struct sockaddr_in sin; + ev_socklen_t slen = sizeof(sin); + evutil_socket_t listener = -1; + evutil_socket_t fd_pair[2] = { -1, -1 }; + + /* 1. Ensure empty buffer returns -1 */ + buf = evbuffer_new(); + tt_assert(buf); + tt_int_op(evbuffer_get_timestamp(buf, &ts), ==, -1); + + /* Create UDP loopback connection */ + listener = socket(AF_INET, SOCK_DGRAM, 0); + tt_assert(listener != EVUTIL_INVALID_SOCKET); + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(0x7f000001L); + sin.sin_port = 0; + tt_assert(bind(listener, (struct sockaddr *)&sin, sizeof(sin)) == 0); + tt_assert(getsockname(listener, (struct sockaddr *)&sin, &slen) == 0); + + fd_pair[0] = socket(AF_INET, SOCK_DGRAM, 0); + tt_assert(fd_pair[0] != EVUTIL_INVALID_SOCKET); + tt_assert(connect(fd_pair[0], (struct sockaddr *)&sin, sizeof(sin)) == 0); + + fd_pair[1] = listener; + listener = -1; + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(0x7f000001L); + tt_assert(getsockname(fd_pair[0], (struct sockaddr *)&sin, &slen) == 0); + tt_assert(connect(fd_pair[1], (struct sockaddr *)&sin, sizeof(sin)) == 0); + + /* 2. Configure socket option for receive timestamps */ +#ifdef SO_TIMESTAMPNS + if (setsockopt(fd_pair[1], SOL_SOCKET, SO_TIMESTAMPNS, &on, sizeof(on)) == -1) + ts_supported = 0; +#elif defined(SO_TIMESTAMP) + if (setsockopt(fd_pair[1], SOL_SOCKET, SO_TIMESTAMP, &on, sizeof(on)) == -1) + ts_supported = 0; +#else + ts_supported = 0; +#endif + + /* If timestamps not supported, skip the timestamp checks */ + if (!ts_supported) { + tt_skip(); + goto end; + } + + /* 3. Send packet A */ + r = send(fd_pair[0], "packetA", 7, 0); + tt_int_op(r, ==, 7); + + /* Sleep briefly to let the kernel process the packet and stamp it */ + evutil_usleep_(&tv_sleep); + + /* 4. Read packet A with timestamp */ + r = evbuffer_read_with_timestamp(buf, fd_pair[1], 1024); + tt_int_op(r, ==, 7); + + /* 5. Fetch and verify timestamp A */ + tt_int_op(evbuffer_get_timestamp(buf, &ts), ==, 0); + tt_assert(ts.tv_sec > 0); + TT_BLATHER(("Captured timestamp A: %lld.%09ld", (long long)ts.tv_sec, (long)ts.tv_nsec)); + + /* 6. Send packet B */ + r = send(fd_pair[0], "packetB", 7, 0); + tt_int_op(r, ==, 7); + + evutil_usleep_(&tv_sleep); + + /* 7. Read packet B with timestamp */ + r = evbuffer_read_with_timestamp(buf, fd_pair[1], 1024); + tt_int_op(r, ==, 7); + + /* 8. Fetch timestamp and verify it still returns packet A's (oldest first) */ + tt_int_op(evbuffer_get_timestamp(buf, &ts2), ==, 0); + tt_int_op(ts.tv_sec, ==, ts2.tv_sec); + tt_int_op(ts.tv_nsec, ==, ts2.tv_nsec); + + /* 9. Drain packet A's bytes. Packet A is 7 bytes. + * Draining 3 bytes should still keep packet A's timestamp. */ + tt_int_op(evbuffer_drain(buf, 3), ==, 0); + tt_int_op(evbuffer_get_timestamp(buf, &ts2), ==, 0); + tt_int_op(ts.tv_sec, ==, ts2.tv_sec); + tt_int_op(ts.tv_nsec, ==, ts2.tv_nsec); + + /* 10. Drain remaining 4 bytes of packet A. + * The buffer now contains only packet B's bytes. The timestamp should shift to packet B's. */ + tt_int_op(evbuffer_drain(buf, 4), ==, 0); + tt_int_op(evbuffer_get_timestamp(buf, &ts2), ==, 0); + /* Timestamps for B should be valid and greater than or equal to A */ + tt_assert(ts2.tv_sec >= ts.tv_sec); + if (ts2.tv_sec == ts.tv_sec) { + tt_assert(ts2.tv_nsec >= ts.tv_nsec); + } + TT_BLATHER(("Captured timestamp B: %lld.%09ld", (long long)ts2.tv_sec, (long)ts2.tv_nsec)); + + /* 11. Fully drain the buffer. Assert evbuffer_get_timestamp returns -1. */ + tt_int_op(evbuffer_drain(buf, 7), ==, 0); + tt_int_op(evbuffer_get_timestamp(buf, &ts2), ==, -1); + + end: + if (buf) { + evbuffer_free(buf); + } + if (listener != -1) { + evutil_closesocket(listener); + } + if (fd_pair[0] != -1) { + evutil_closesocket(fd_pair[0]); + } + if (fd_pair[1] != -1) { + evutil_closesocket(fd_pair[1]); + } +} + + static void test_evbuffer_remove_buffer_with_empty_front(void *ptr) { @@ -2761,6 +2920,8 @@ struct testcase_t evbuffer_testcases[] = { { "add_iovec", test_evbuffer_add_iovec, 0, NULL, NULL}, { "copyout", test_evbuffer_copyout, 0, NULL, NULL}, { "file_segment_add_cleanup_cb", test_evbuffer_file_segment_add_cleanup_cb, 0, NULL, NULL }, + { "pullup_with_empty", test_evbuffer_pullup_with_empty, 0, NULL, NULL }, + { "get_timestamp", test_evbuffer_get_timestamp, TT_FORK, &basic_setup, NULL }, #define ADDFILE_TEST(name, parameters) \ { name, test_evbuffer_add_file, TT_FORK|TT_NEED_BASE, \ diff --git a/test/regress_bufferevent.c b/test/regress_bufferevent.c index d4208c2090..423d341057 100644 --- a/test/regress_bufferevent.c +++ b/test/regress_bufferevent.c @@ -1326,6 +1326,120 @@ test_bufferevent_filter_data_stuck(void *arg) bufferevent_free(filter); } +static void +bufferevent_recv_timestamps_readcb(struct bufferevent *bev, void *ctx) +{ + int *done = ctx; + struct timespec ts; + char tmp[32]; + int r; + + /* Fetch and verify timestamps BEFORE draining the buffer! */ + if (evbuffer_get_timestamp(bufferevent_get_input(bev), &ts) < 0) { + /* Timestamp capture not supported/enabled on this platform/socket. */ + *done = 0; + event_base_loopexit(bufferevent_get_base(bev), NULL); + goto end; + } + + tt_assert(ts.tv_sec > 0); + r = bufferevent_read(bev, tmp, sizeof(tmp)); + tt_int_op(r, ==, 14); + tt_mem_op(tmp, ==, "timestamp_test", 14); + + *done = 1; + event_base_loopexit(bufferevent_get_base(bev), NULL); + + end: + ; +} + +static void +test_bufferevent_recv_timestamps(void *arg) +{ + struct basic_test_data *data = arg; + struct bufferevent *bev1 = NULL; + struct bufferevent *bev2 = NULL; + struct timespec ts; + int done = 0; + + struct sockaddr_in sin; + ev_socklen_t slen = sizeof(sin); + evutil_socket_t listener = -1; + evutil_socket_t fd_pair[2] = { -1, -1 }; + + /* Create UDP loopback connection */ + listener = socket(AF_INET, SOCK_DGRAM, 0); + tt_assert(listener != EVUTIL_INVALID_SOCKET); + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(0x7f000001L); + sin.sin_port = 0; + tt_assert(bind(listener, (struct sockaddr *)&sin, sizeof(sin)) == 0); + tt_assert(getsockname(listener, (struct sockaddr *)&sin, &slen) == 0); + + fd_pair[0] = socket(AF_INET, SOCK_DGRAM, 0); + tt_assert(fd_pair[0] != EVUTIL_INVALID_SOCKET); + tt_assert(connect(fd_pair[0], (struct sockaddr *)&sin, sizeof(sin)) == 0); + + fd_pair[1] = listener; + listener = -1; + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(0x7f000001L); + tt_assert(getsockname(fd_pair[0], (struct sockaddr *)&sin, &slen) == 0); + tt_assert(connect(fd_pair[1], (struct sockaddr *)&sin, sizeof(sin)) == 0); + + /* 1. Create bufferevents (bev2 has BEV_OPT_RECV_TIMESTAMPS enabled) */ + bev1 = bufferevent_socket_new(data->base, fd_pair[0], BEV_OPT_CLOSE_ON_FREE); + tt_assert(bev1); + fd_pair[0] = -1; /* bev1 owns it now */ + bev2 = bufferevent_socket_new(data->base, fd_pair[1], BEV_OPT_CLOSE_ON_FREE | BEV_OPT_RECV_TIMESTAMPS); + tt_assert(bev2); + fd_pair[1] = -1; /* bev2 owns it now */ + + /* 2. Verify that initially no timestamps are present */ + tt_int_op(evbuffer_get_timestamp(bufferevent_get_input(bev2), &ts), ==, -1); + + /* 3. Enable writing on bev1 and write data */ + tt_int_op(bufferevent_enable(bev1, EV_WRITE), ==, 0); + tt_int_op(bufferevent_write(bev1, "timestamp_test", 14), ==, 0); + + /* Configure callback and enable read on bev2 */ + bufferevent_setcb(bev2, bufferevent_recv_timestamps_readcb, NULL, NULL, &done); + tt_int_op(bufferevent_enable(bev2, EV_READ), ==, 0); + + /* 4. Dispatch event loop and wait for arrival */ + event_base_dispatch(data->base); + + /* If timestamp capture failed (not supported), skip the test */ + if (done == 0) { + tt_skip(); + goto end; + } + + tt_int_op(done, ==, 1); + + end: + if (bev1) { + bufferevent_free(bev1); + } + if (bev2) { + bufferevent_free(bev2); + } + if (listener != -1) { + evutil_closesocket(listener); + } + if (fd_pair[0] != -1) { + evutil_closesocket(fd_pair[0]); + } + if (fd_pair[1] != -1) { + evutil_closesocket(fd_pair[1]); + } +} + + struct testcase_t bufferevent_testcases[] = { LEGACY(bufferevent, TT_ISOLATED), @@ -1401,6 +1515,9 @@ struct testcase_t bufferevent_testcases[] = { { "bufferevent_filter_data_stuck", test_bufferevent_filter_data_stuck, TT_FORK|TT_NEED_BASE, &basic_setup, NULL }, + { "bufferevent_recv_timestamps", + test_bufferevent_recv_timestamps, + TT_FORK|TT_NEED_BASE, &basic_setup, NULL }, END_OF_TESTCASES, }; diff --git a/test/regress_http.c b/test/regress_http.c index 8f30b57b5a..0269cb025d 100644 --- a/test/regress_http.c +++ b/test/regress_http.c @@ -119,13 +119,20 @@ static struct bufferevent * https_bev(struct event_base *base, void *arg) { SSL *ssl = SSL_new(get_ssl_ctx()); + struct bufferevent *bev; SSL_use_certificate(ssl, ssl_getcert(ssl_getkey())); SSL_use_PrivateKey(ssl, ssl_getkey()); - return bufferevent_openssl_socket_new( + bev = bufferevent_openssl_socket_new( base, -1, ssl, BUFFEREVENT_SSL_ACCEPTING, BEV_OPT_CLOSE_ON_FREE); + if (!bev) { + SSL_free(ssl); + return NULL; + } + bufferevent_openssl_set_allow_dirty_shutdown(bev, 1); + return bev; } #endif static struct evhttp * @@ -3078,10 +3085,19 @@ http_incomplete_errorcb(struct bufferevent *bev, short what, void *arg) if (what & BEV_EVENT_CONNECTED) return; - if (what == (BEV_EVENT_READING|BEV_EVENT_EOF)) + if (what == (BEV_EVENT_READING|BEV_EVENT_EOF)) { test_ok++; - else + } else if (what == (BEV_EVENT_READING|BEV_EVENT_ERROR)) { + /* Under SSL, raw socket shutdowns trigger TLS alert protocol errors on OpenSSL 3.0. + * We accept this as a successful termination for this incomplete request test. */ + if (bufferevent_get_openssl_error(bev) != 0) { + test_ok++; + } else { + test_ok = -2; + } + } else { test_ok = -2; + } event_base_loopexit(exit_base,NULL); } diff --git a/test/regress_ssl.c b/test/regress_ssl.c index 68c28114f7..5be22d615c 100644 --- a/test/regress_ssl.c +++ b/test/regress_ssl.c @@ -983,6 +983,226 @@ regress_bufferevent_openssl_wm(void *arg) bufferevent_free(server.bev); } +static void +bufferevent_openssl_recv_timestamps_readcb(struct bufferevent *bev, void *ctx) +{ + int *done = ctx; + struct timespec ts; + struct evbuffer *input; + char tmp[32]; + int r; + int ts_result; + + /* Fetch and verify timestamps BEFORE draining the buffer! */ + input = bufferevent_get_input(bev); + ts_result = evbuffer_get_timestamp(input, &ts); + + /* UNIX domain sockets don't support kernel timestamps on most + * platforms, so timestamp may not be available. Just verify we can + * read the data successfully. If kernel timestamp is available, the + * caller would have set ts_result == 0. */ + if (ts_result == 0) { + tt_assert(ts.tv_sec > 0); + } + + r = bufferevent_read(bev, tmp, sizeof(tmp)); + tt_int_op(r, ==, 14); + tt_mem_op(tmp, ==, "timestamp_test", 14); + + *done = 1; + event_base_loopexit(bufferevent_get_base(bev), NULL); + + end: + ; +} + +static void +test_eventcb(struct bufferevent *bev, short what, void *ctx) +{ + TT_BLATHER(("test_eventcb: %p got event %d", bev, (int)what)); + if (what & BEV_EVENT_ERROR) { + unsigned long err; + while ((err = ERR_get_error())) { + TT_BLATHER((" SSL error: %s", ERR_error_string(err, NULL))); + } + } +} + +static void +test_bufferevent_openssl_direct_recv_timestamps(void *arg) +{ + struct basic_test_data *data = arg; + struct bufferevent *bev1 = NULL; + struct bufferevent *bev2 = NULL; + SSL *ssl1 = NULL, *ssl2 = NULL; + struct timespec ts; + int done = 0; + evutil_socket_t fd_pair[2] = { -1, -1 }; + + /* Create UNIX domain socketpair */ + tt_assert(socketpair(AF_UNIX, SOCK_STREAM, 0, fd_pair) == 0); + tt_assert(evutil_make_socket_nonblocking(fd_pair[0]) == 0); + tt_assert(evutil_make_socket_nonblocking(fd_pair[1]) == 0); + + ssl1 = SSL_new(get_ssl_ctx()); + ssl2 = SSL_new(get_ssl_ctx()); + tt_assert(ssl1); + tt_assert(ssl2); + + SSL_use_certificate(ssl2, the_cert); + SSL_use_PrivateKey(ssl2, the_key); + + /* Create direct socket openssl bufferevents. + * bev2 has BEV_OPT_RECV_TIMESTAMPS enabled. */ + bev1 = bufferevent_openssl_socket_new( + data->base, fd_pair[0], ssl1, BUFFEREVENT_SSL_CONNECTING, + BEV_OPT_CLOSE_ON_FREE | BEV_OPT_DEFER_CALLBACKS); + tt_assert(bev1); + fd_pair[0] = -1; + + bev2 = bufferevent_openssl_socket_new( + data->base, fd_pair[1], ssl2, BUFFEREVENT_SSL_ACCEPTING, + BEV_OPT_CLOSE_ON_FREE | BEV_OPT_DEFER_CALLBACKS | BEV_OPT_RECV_TIMESTAMPS); + tt_assert(bev2); + fd_pair[1] = -1; + + /* Verify initially no timestamps are present */ + tt_int_op(evbuffer_get_timestamp(bufferevent_get_input(bev2), &ts), ==, -1); + + /* Configure callbacks */ + bufferevent_setcb(bev1, NULL, NULL, test_eventcb, NULL); + bufferevent_setcb(bev2, bufferevent_openssl_recv_timestamps_readcb, NULL, test_eventcb, &done); + tt_int_op(bufferevent_enable(bev1, EV_READ|EV_WRITE), ==, 0); + tt_int_op(bufferevent_enable(bev2, EV_READ|EV_WRITE), ==, 0); + + /* Write data from bev1 */ + tt_int_op(bufferevent_write(bev1, "timestamp_test", 14), ==, 0); + + /* Dispatch base */ + event_base_dispatch(data->base); + + tt_int_op(done, ==, 1); + + end: + if (bev1) + bufferevent_free(bev1); + if (bev2) + bufferevent_free(bev2); + if (fd_pair[0] >= 0) + evutil_closesocket(fd_pair[0]); + if (fd_pair[1] >= 0) + evutil_closesocket(fd_pair[1]); +} + +static void +bufferevent_openssl_filter_recv_timestamps_readcb(struct bufferevent *bev, void *ctx) +{ + int *done = ctx; + struct timespec ts; + struct evbuffer *input; + char tmp[32]; + int r; + int ts_result; + + input = bufferevent_get_input(bev); + ts_result = evbuffer_get_timestamp(input, &ts); + + /* UNIX domain sockets don't support kernel timestamps on most + * platforms, so timestamp may not be available. Just verify we can + * read the data successfully. If kernel timestamp is available, the + * caller would have set ts_result == 0. */ + if (ts_result == 0) { + tt_assert(ts.tv_sec > 0); + } + + r = bufferevent_read(bev, tmp, sizeof(tmp)); + tt_int_op(r, ==, 9); + tt_mem_op(tmp, ==, "test_data", 9); + + *done = 1; + event_base_loopexit(bufferevent_get_base(bev), NULL); + + end: + ; +} + +static void +test_bufferevent_openssl_filter_recv_timestamps(void *arg) +{ + struct basic_test_data *data = arg; + struct bufferevent *bev1 = NULL; + struct bufferevent *bev2 = NULL; + struct bufferevent *underlying_bev1 = NULL; + struct bufferevent *underlying_bev2 = NULL; + SSL *ssl1 = NULL, *ssl2 = NULL; + int done = 0; + evutil_socket_t fd_pair[2] = {-1, -1}; + + /* Create UNIX domain socketpair */ + tt_assert(socketpair(AF_UNIX, SOCK_STREAM, 0, fd_pair) == 0); + tt_assert(evutil_make_socket_nonblocking(fd_pair[0]) == 0); + tt_assert(evutil_make_socket_nonblocking(fd_pair[1]) == 0); + + ssl1 = SSL_new(get_ssl_ctx()); + ssl2 = SSL_new(get_ssl_ctx()); + tt_assert(ssl1); + tt_assert(ssl2); + + SSL_use_certificate(ssl2, the_cert); + SSL_use_PrivateKey(ssl2, the_key); + + /* Create underlying socket bufferevents */ + underlying_bev1 = bufferevent_socket_new(data->base, fd_pair[0], + BEV_OPT_CLOSE_ON_FREE | BEV_OPT_DEFER_CALLBACKS); + tt_assert(underlying_bev1); + fd_pair[0] = -1; + + underlying_bev2 = bufferevent_socket_new(data->base, fd_pair[1], + BEV_OPT_CLOSE_ON_FREE | BEV_OPT_DEFER_CALLBACKS | BEV_OPT_RECV_TIMESTAMPS); + tt_assert(underlying_bev2); + fd_pair[1] = -1; + + /* Create filtered openssl bufferevents that wrap the socket bufferevents */ + bev1 = bufferevent_openssl_filter_new(data->base, underlying_bev1, ssl1, + BUFFEREVENT_SSL_CONNECTING, + BEV_OPT_CLOSE_ON_FREE | BEV_OPT_DEFER_CALLBACKS); + tt_assert(bev1); + underlying_bev1 = NULL; /* ownership transferred */ + + bev2 = bufferevent_openssl_filter_new(data->base, underlying_bev2, ssl2, + BUFFEREVENT_SSL_ACCEPTING, + BEV_OPT_CLOSE_ON_FREE | BEV_OPT_DEFER_CALLBACKS); + tt_assert(bev2); + underlying_bev2 = NULL; /* ownership transferred */ + + /* Configure callbacks for basic data flow */ + bufferevent_setcb(bev1, NULL, NULL, test_eventcb, NULL); + bufferevent_setcb(bev2, bufferevent_openssl_filter_recv_timestamps_readcb, NULL, test_eventcb, &done); + tt_int_op(bufferevent_enable(bev1, EV_READ | EV_WRITE), ==, 0); + tt_int_op(bufferevent_enable(bev2, EV_READ | EV_WRITE), ==, 0); + + /* Write data from bev1 */ + tt_int_op(bufferevent_write(bev1, "test_data", 9), ==, 0); + + /* Dispatch base - just ensure filtered mode with timestamp code paths works + */ + event_base_dispatch(data->base); + +end: + if (bev1) + bufferevent_free(bev1); + if (bev2) + bufferevent_free(bev2); + if (underlying_bev1) + bufferevent_free(underlying_bev1); + if (underlying_bev2) + bufferevent_free(underlying_bev2); + if (fd_pair[0] >= 0) + evutil_closesocket(fd_pair[0]); + if (fd_pair[1] >= 0) + evutil_closesocket(fd_pair[1]); +} + struct testcase_t ssl_testcases[] = { #define T(a) ((void *)(a)) { "bufferevent_socketpair", regress_bufferevent_openssl, @@ -1066,6 +1286,10 @@ struct testcase_t ssl_testcases[] = { TT_FORK|TT_NEED_BASE, &ssl_setup, T(REGRESS_DEFERRED_CALLBACKS) }, { "bufferevent_wm_filter_defer", regress_bufferevent_openssl_wm, TT_FORK|TT_NEED_BASE, &ssl_setup, T(REGRESS_OPENSSL_FILTER|REGRESS_DEFERRED_CALLBACKS) }, + { "bufferevent_openssl_direct_recv_timestamps", test_bufferevent_openssl_direct_recv_timestamps, + TT_FORK|TT_NEED_BASE, &ssl_setup, NULL }, + { "bufferevent_openssl_filter_recv_timestamps", test_bufferevent_openssl_filter_recv_timestamps, + TT_FORK|TT_NEED_BASE, &ssl_setup, NULL }, #undef T