// SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE #include <errno.h> #include <limits.h> #include <fcntl.h> #include <string.h> #include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <strings.h> #include <signal.h> #include <unistd.h> #include <time.h> #include <sys/ioctl.h> #include <sys/poll.h> #include <sys/sendfile.h> #include <sys/stat.h> #include <sys/socket.h> #include <sys/types.h> #include <sys/mman.h> #include <netdb.h> #include <netinet/in.h> #include <linux/tcp.h> #include <linux/time_types.h> #include <linux/sockios.h> extern int optind; #ifndef IPPROTO_MPTCP #define IPPROTO_MPTCP 262 #endif #ifndef TCP_ULP #define TCP_ULP 31 #endif static int poll_timeout = 10 * 1000; static bool listen_mode; static bool quit; enum cfg_mode { CFG_MODE_POLL, CFG_MODE_MMAP, CFG_MODE_SENDFILE, }; enum cfg_peek { CFG_NONE_PEEK, CFG_WITH_PEEK, CFG_AFTER_PEEK, }; static enum cfg_mode cfg_mode = CFG_MODE_POLL; static enum cfg_peek cfg_peek = CFG_NONE_PEEK; static const char *cfg_host; static const char *cfg_port = "12000"; static int cfg_sock_proto = IPPROTO_MPTCP; static int pf = AF_INET; static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; static bool cfg_remove; static unsigned int cfg_time; static unsigned int cfg_do_w; static int cfg_wait; static uint32_t cfg_mark; static char *cfg_input; static int cfg_repeat = 1; static int cfg_truncate; static int cfg_rcv_trunc; struct cfg_cmsg_types { unsigned int cmsg_enabled:1; unsigned int timestampns:1; unsigned int tcp_inq:1; }; struct cfg_sockopt_types { unsigned int transparent:1; unsigned int mptfo:1; }; struct tcp_inq_state { unsigned int last; bool expect_eof; }; struct wstate { char buf[8192]; unsigned int len; unsigned int off; unsigned int total_len; }; static struct tcp_inq_state tcp_inq; static struct cfg_cmsg_types cfg_cmsg_types; static struct cfg_sockopt_types cfg_sockopt_types; static void die_usage(void) { fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] " "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-r num] [-R num] " "[-s MPTCP|TCP] [-S num] [-t num] [-T num] [-w sec] connect_address\n"); fprintf(stderr, "\t-6 use ipv6\n"); fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount " "of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP " "fastclose at close/shutdown. If offset is negative, expect the peer to close before " "all the local data as been sent, thus toleration errors on write and EPIPE signals\n"); fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin"); fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num " "incoming connections, in client mode, disconnect and reconnect to the server\n"); fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down " "-- for MPJ tests\n"); fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n"); fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); fprintf(stderr, "\t-M mark -- set socket packet mark\n"); fprintf(stderr, "\t-o option -- test sockopt <option>\n"); fprintf(stderr, "\t-p num -- use port num\n"); fprintf(stderr, "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes " "-- for remove addr tests\n"); fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); fprintf(stderr, "\t-t num -- set poll timeout to num\n"); fprintf(stderr, "\t-T num -- set expected runtime to num ms\n"); fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); exit(1); } static void xerror(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } static void handle_signal(int nr) { quit = true; } static const char *getxinfo_strerr(int err) { if (err == EAI_SYSTEM) return strerror(errno); return gai_strerror(err); } static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, char *host, socklen_t hostlen, char *serv, socklen_t servlen) { int flags = NI_NUMERICHOST | NI_NUMERICSERV; int err = getnameinfo(addr, addrlen, host, hostlen, serv, servlen, flags); if (err) { const char *errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getnameinfo: %s\n", errstr); exit(1); } } static void xgetaddrinfo(const char *node, const char *service, const struct addrinfo *hints, struct addrinfo **res) { int err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); exit(1); } } static void set_rcvbuf(int fd, unsigned int size) { int err; err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)); if (err) { perror("set SO_RCVBUF"); exit(1); } } static void set_sndbuf(int fd, unsigned int size) { int err; err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)); if (err) { perror("set SO_SNDBUF"); exit(1); } } static void set_mark(int fd, uint32_t mark) { int err; err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); if (err) { perror("set SO_MARK"); exit(1); } } static void set_transparent(int fd, int pf) { int one = 1; switch (pf) { case AF_INET: if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one))) perror("IP_TRANSPARENT"); break; case AF_INET6: if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one))) perror("IPV6_TRANSPARENT"); break; } } static void set_mptfo(int fd, int pf) { int qlen = 25; if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) perror("TCP_FASTOPEN"); } static int do_ulp_so(int sock, const char *name) { return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); } #define X(m) xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line) static void sock_test_tcpulp(int sock, int proto, unsigned int line) { socklen_t buflen = 8; char buf[8] = ""; int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen); if (ret != 0) X("getsockopt"); if (buflen > 0) { if (strcmp(buf, "mptcp") != 0) xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line); ret = do_ulp_so(sock, "tls"); if (ret == 0) X("setsockopt"); } else if (proto == IPPROTO_MPTCP) { ret = do_ulp_so(sock, "tls"); if (ret != -1) X("setsockopt"); } ret = do_ulp_so(sock, "mptcp"); if (ret != -1) X("setsockopt"); #undef X } #define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__) static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; hints.ai_family = pf; struct addrinfo *a, *addr; int one = 1; xgetaddrinfo(listenaddr, port, &hints, &addr); hints.ai_family = pf; for (a = addr; a; a = a->ai_next) { sock = socket(a->ai_family, a->ai_socktype, cfg_sock_proto); if (sock < 0) continue; SOCK_TEST_TCPULP(sock, cfg_sock_proto); if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) perror("setsockopt"); if (cfg_sockopt_types.transparent) set_transparent(sock, pf); if (cfg_sockopt_types.mptfo) set_mptfo(sock, pf); if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ perror("bind"); close(sock); sock = -1; } freeaddrinfo(addr); if (sock < 0) { fprintf(stderr, "Could not create listen socket\n"); return sock; } SOCK_TEST_TCPULP(sock, cfg_sock_proto); if (listen(sock, 20)) { perror("listen"); close(sock); return -1; } SOCK_TEST_TCPULP(sock, cfg_sock_proto); return sock; } static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto, struct addrinfo **peer, int infd, struct wstate *winfo) { struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; int syn_copied = 0; int sock = -1; hints.ai_family = pf; xgetaddrinfo(remoteaddr, port, &hints, &addr); for (a = addr; a; a = a->ai_next) { sock = socket(a->ai_family, a->ai_socktype, proto); if (sock < 0) { perror("socket"); continue; } SOCK_TEST_TCPULP(sock, proto); if (cfg_mark) set_mark(sock, cfg_mark); if (cfg_sockopt_types.mptfo) { if (!winfo->total_len) winfo->total_len = winfo->len = read(infd, winfo->buf, sizeof(winfo->buf)); syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN, a->ai_addr, a->ai_addrlen); if (syn_copied >= 0) { winfo->off = syn_copied; winfo->len -= syn_copied; *peer = a; break; /* success */ } } else { if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { *peer = a; break; /* success */ } } if (cfg_sockopt_types.mptfo) { perror("sendto()"); close(sock); sock = -1; } else { perror("connect()"); close(sock); sock = -1; } } freeaddrinfo(addr); if (sock != -1) SOCK_TEST_TCPULP(sock, proto); return sock; } static size_t do_rnd_write(const int fd, char *buf, const size_t len) { static bool first = true; unsigned int do_w; ssize_t bw; do_w = rand() & 0xffff; if (do_w == 0 || do_w > len) do_w = len; if (cfg_join && first && do_w > 100) do_w = 100; if (cfg_remove && do_w > cfg_do_w) do_w = cfg_do_w; bw = write(fd, buf, do_w); if (bw < 0) return bw; /* let the join handshake complete, before going on */ if (cfg_join && first) { usleep(200000); first = false; } if (cfg_remove) usleep(200000); return bw; } static size_t do_write(const int fd, char *buf, const size_t len) { size_t offset = 0; while (offset < len) { size_t written; ssize_t bw; bw = write(fd, buf + offset, len - offset); if (bw < 0) { perror("write"); return 0; } written = (size_t)bw; offset += written; } return offset; } static void process_cmsg(struct msghdr *msgh) { struct __kernel_timespec ts; bool inq_found = false; bool ts_found = false; unsigned int inq = 0; struct cmsghdr *cmsg; for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) { memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts)); ts_found = true; continue; } if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq)); inq_found = true; continue; } } if (cfg_cmsg_types.timestampns) { if (!ts_found) xerror("TIMESTAMPNS not present\n"); } if (cfg_cmsg_types.tcp_inq) { if (!inq_found) xerror("TCP_INQ not present\n"); if (inq > 1024) xerror("tcp_inq %u is larger than one kbyte\n", inq); tcp_inq.last = inq; } } static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) { char msg_buf[8192]; struct iovec iov = { .iov_base = buf, .iov_len = len, }; struct msghdr msg = { .msg_iov = &iov, .msg_iovlen = 1, .msg_control = msg_buf, .msg_controllen = sizeof(msg_buf), }; int flags = 0; unsigned int last_hint = tcp_inq.last; int ret = recvmsg(fd, &msg, flags); if (ret <= 0) { if (ret == 0 && tcp_inq.expect_eof) return ret; if (ret == 0 && cfg_cmsg_types.tcp_inq) if (last_hint != 1 && last_hint != 0) xerror("EOF but last tcp_inq hint was %u\n", last_hint); return ret; } if (tcp_inq.expect_eof) xerror("expected EOF, last_hint %u, now %u\n", last_hint, tcp_inq.last); if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled) xerror("got %lu bytes of cmsg data, expected 0\n", (unsigned long)msg.msg_controllen); if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled) xerror("%s\n", "got no cmsg data"); if (msg.msg_controllen) process_cmsg(&msg); if (cfg_cmsg_types.tcp_inq) { if ((size_t)ret < len && last_hint > (unsigned int)ret) { if (ret + 1 != (int)last_hint) { int next = read(fd, msg_buf, sizeof(msg_buf)); xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n", ret, (unsigned int)len, last_hint, tcp_inq.last, next); } else { tcp_inq.expect_eof = true; } } } return ret; } static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) { int ret = 0; char tmp[16384]; size_t cap = rand(); cap &= 0xffff; if (cap == 0) cap = 1; else if (cap > len) cap = len; if (cfg_peek == CFG_WITH_PEEK) { ret = recv(fd, buf, cap, MSG_PEEK); ret = (ret < 0) ? ret : read(fd, tmp, ret); } else if (cfg_peek == CFG_AFTER_PEEK) { ret = recv(fd, buf, cap, MSG_PEEK); ret = (ret < 0) ? ret : read(fd, buf, cap); } else if (cfg_cmsg_types.cmsg_enabled) { ret = do_recvmsg_cmsg(fd, buf, cap); } else { ret = read(fd, buf, cap); } return ret; } static void set_nonblock(int fd, bool nonblock) { int flags = fcntl(fd, F_GETFL); if (flags == -1) return; if (nonblock) fcntl(fd, F_SETFL, flags | O_NONBLOCK); else fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); } static void shut_wr(int fd) { /* Close our write side, ev. give some time * for address notification and/or checking * the current status */ if (cfg_wait) usleep(cfg_wait); shutdown(fd, SHUT_WR); } static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out, struct wstate *winfo) { struct pollfd fds = { .fd = peerfd, .events = POLLIN | POLLOUT, }; unsigned int total_wlen = 0, total_rlen = 0; set_nonblock(peerfd, true); for (;;) { char rbuf[8192]; ssize_t len; if (fds.events == 0 || quit) break; switch (poll(&fds, 1, poll_timeout)) { case -1: if (errno == EINTR) continue; perror("poll"); return 1; case 0: fprintf(stderr, "%s: poll timed out (events: " "POLLIN %u, POLLOUT %u)\n", __func__, fds.events & POLLIN, fds.events & POLLOUT); return 2; } if (fds.revents & POLLIN) { ssize_t rb = sizeof(rbuf); /* limit the total amount of read data to the trunc value*/ if (cfg_truncate > 0) { if (rb + total_rlen > cfg_truncate) rb = cfg_truncate - total_rlen; len = read(peerfd, rbuf, rb); } else { len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); } if (len == 0) { /* no more data to receive: * peer has closed its write side */ fds.events &= ~POLLIN; if ((fds.events & POLLOUT) == 0) { *in_closed_after_out = true; /* and nothing more to send */ break; } /* Else, still have data to transmit */ } else if (len < 0) { if (cfg_rcv_trunc) return 0; perror("read"); return 3; } total_rlen += len; do_write(outfd, rbuf, len); } if (fds.revents & POLLOUT) { if (winfo->len == 0) { winfo->off = 0; winfo->len = read(infd, winfo->buf, sizeof(winfo->buf)); } if (winfo->len > 0) { ssize_t bw; /* limit the total amount of written data to the trunc value */ if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate) winfo->len = cfg_truncate - total_wlen; bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); if (bw < 0) { if (cfg_rcv_trunc) return 0; perror("write"); return 111; } winfo->off += bw; winfo->len -= bw; total_wlen += bw; } else if (winfo->len == 0) { /* We have no more data to send. */ fds.events &= ~POLLOUT; if ((fds.events & POLLIN) == 0) /* ... and peer also closed already */ break; shut_wr(peerfd); } else { if (errno == EINTR) continue; perror("read"); return 4; } } if (fds.revents & (POLLERR | POLLNVAL)) { if (cfg_rcv_trunc) return 0; fprintf(stderr, "Unexpected revents: " "POLLERR/POLLNVAL(%x)\n", fds.revents); return 5; } if (cfg_truncate > 0 && total_wlen >= cfg_truncate && total_rlen >= cfg_truncate) break; } /* leave some time for late join/announce */ if (cfg_remove && !quit) usleep(cfg_wait); return 0; } static int do_recvfile(int infd, int outfd) { ssize_t r; do { char buf[16384]; r = do_rnd_read(infd, buf, sizeof(buf)); if (r > 0) { if (write(outfd, buf, r) != r) break; } else if (r < 0) { perror("read"); } } while (r > 0); return (int)r; } static int spool_buf(int fd, struct wstate *winfo) { while (winfo->len) { int ret = write(fd, winfo->buf + winfo->off, winfo->len); if (ret < 0) { perror("write"); return 4; } winfo->off += ret; winfo->len -= ret; } return 0; } static int do_mmap(int infd, int outfd, unsigned int size, struct wstate *winfo) { char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); ssize_t ret = 0, off = winfo->total_len; size_t rem; if (inbuf == MAP_FAILED) { perror("mmap"); return 1; } ret = spool_buf(outfd, winfo); if (ret < 0) return ret; rem = size - winfo->total_len; while (rem > 0) { ret = write(outfd, inbuf + off, rem); if (ret < 0) { perror("write"); break; } off += ret; rem -= ret; } munmap(inbuf, size); return rem; } static int get_infd_size(int fd) { struct stat sb; ssize_t count; int err; err = fstat(fd, &sb); if (err < 0) { perror("fstat"); return -1; } if ((sb.st_mode & S_IFMT) != S_IFREG) { fprintf(stderr, "%s: stdin is not a regular file\n", __func__); return -2; } count = sb.st_size; if (count > INT_MAX) { fprintf(stderr, "File too large: %zu\n", count); return -3; } return (int)count; } static int do_sendfile(int infd, int outfd, unsigned int count, struct wstate *winfo) { int ret = spool_buf(outfd, winfo); if (ret < 0) return ret; count -= winfo->total_len; while (count > 0) { ssize_t r; r = sendfile(outfd, infd, NULL, count); if (r < 0) { perror("sendfile"); return 3; } count -= r; } return 0; } static int copyfd_io_mmap(int infd, int peerfd, int outfd, unsigned int size, bool *in_closed_after_out, struct wstate *winfo) { int err; if (listen_mode) { err = do_recvfile(peerfd, outfd); if (err) return err; err = do_mmap(infd, peerfd, size, winfo); } else { err = do_mmap(infd, peerfd, size, winfo); if (err) return err; shut_wr(peerfd); err = do_recvfile(peerfd, outfd); *in_closed_after_out = true; } return err; } static int copyfd_io_sendfile(int infd, int peerfd, int outfd, unsigned int size, bool *in_closed_after_out, struct wstate *winfo) { int err; if (listen_mode) { err = do_recvfile(peerfd, outfd); if (err) return err; err = do_sendfile(infd, peerfd, size, winfo); } else { err = do_sendfile(infd, peerfd, size, winfo); if (err) return err; shut_wr(peerfd); err = do_recvfile(peerfd, outfd); *in_closed_after_out = true; } return err; } static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) { bool in_closed_after_out = false; struct timespec start, end; int file_size; int ret; if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0)) xerror("can not fetch start time %d", errno); switch (cfg_mode) { case CFG_MODE_POLL: ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out, winfo); break; case CFG_MODE_MMAP: file_size = get_infd_size(infd); if (file_size < 0) return file_size; ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out, winfo); break; case CFG_MODE_SENDFILE: file_size = get_infd_size(infd); if (file_size < 0) return file_size; ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out, winfo); break; default: fprintf(stderr, "Invalid mode %d\n", cfg_mode); die_usage(); return 1; } if (ret) return ret; if (close_peerfd) close(peerfd); if (cfg_time) { unsigned int delta_ms; if (clock_gettime(CLOCK_MONOTONIC, &end) < 0) xerror("can not fetch end time %d", errno); delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; if (delta_ms > cfg_time) { xerror("transfer slower than expected! runtime %d ms, expected %d ms", delta_ms, cfg_time); } /* show the runtime only if this end shutdown(wr) before receiving the EOF, * (that is, if this end got the longer runtime) */ if (in_closed_after_out) fprintf(stderr, "%d", delta_ms); } return 0; } static void check_sockaddr(int pf, struct sockaddr_storage *ss, socklen_t salen) { struct sockaddr_in6 *sin6; struct sockaddr_in *sin; socklen_t wanted_size = 0; switch (pf) { case AF_INET: wanted_size = sizeof(*sin); sin = (void *)ss; if (!sin->sin_port) fprintf(stderr, "accept: something wrong: ip connection from port 0"); break; case AF_INET6: wanted_size = sizeof(*sin6); sin6 = (void *)ss; if (!sin6->sin6_port) fprintf(stderr, "accept: something wrong: ipv6 connection from port 0"); break; default: fprintf(stderr, "accept: Unknown pf %d, salen %u\n", pf, salen); return; } if (salen != wanted_size) fprintf(stderr, "accept: size mismatch, got %d expected %d\n", (int)salen, wanted_size); if (ss->ss_family != pf) fprintf(stderr, "accept: pf mismatch, expect %d, ss_family is %d\n", (int)ss->ss_family, pf); } static void check_getpeername(int fd, struct sockaddr_storage *ss, socklen_t salen) { struct sockaddr_storage peerss; socklen_t peersalen = sizeof(peerss); if (getpeername(fd, (struct sockaddr *)&peerss, &peersalen) < 0) { perror("getpeername"); return; } if (peersalen != salen) { fprintf(stderr, "%s: %d vs %d\n", __func__, peersalen, salen); return; } if (memcmp(ss, &peerss, peersalen)) { char a[INET6_ADDRSTRLEN]; char b[INET6_ADDRSTRLEN]; char c[INET6_ADDRSTRLEN]; char d[INET6_ADDRSTRLEN]; xgetnameinfo((struct sockaddr *)ss, salen, a, sizeof(a), b, sizeof(b)); xgetnameinfo((struct sockaddr *)&peerss, peersalen, c, sizeof(c), d, sizeof(d)); fprintf(stderr, "%s: memcmp failure: accept %s vs peername %s, %s vs %s salen %d vs %d\n", __func__, a, c, b, d, peersalen, salen); } } static void check_getpeername_connect(int fd) { struct sockaddr_storage ss; socklen_t salen = sizeof(ss); char a[INET6_ADDRSTRLEN]; char b[INET6_ADDRSTRLEN]; if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { perror("getpeername"); return; } xgetnameinfo((struct sockaddr *)&ss, salen, a, sizeof(a), b, sizeof(b)); if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, cfg_host, a, cfg_port, b); } static void maybe_close(int fd) { unsigned int r = rand(); if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1)) close(fd); } int main_loop_s(int listensock) { struct sockaddr_storage ss; struct wstate winfo; struct pollfd polls; socklen_t salen; int remotesock; int fd = 0; again: polls.fd = listensock; polls.events = POLLIN; switch (poll(&polls, 1, poll_timeout)) { case -1: perror("poll"); return 1; case 0: fprintf(stderr, "%s: timed out\n", __func__); close(listensock); return 2; } salen = sizeof(ss); remotesock = accept(listensock, (struct sockaddr *)&ss, &salen); if (remotesock >= 0) { maybe_close(listensock); check_sockaddr(pf, &ss, salen); check_getpeername(remotesock, &ss, salen); if (cfg_input) { fd = open(cfg_input, O_RDONLY); if (fd < 0) xerror("can't open %s: %d", cfg_input, errno); } SOCK_TEST_TCPULP(remotesock, 0); memset(&winfo, 0, sizeof(winfo)); copyfd_io(fd, remotesock, 1, true, &winfo); } else { perror("accept"); return 1; } if (--cfg_repeat > 0) { if (cfg_input) close(fd); goto again; } return 0; } static void init_rng(void) { int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; if (fd > 0) { int ret = read(fd, &foo, sizeof(foo)); if (ret < 0) srand(fd + foo); close(fd); } srand(foo); } static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen) { int err; err = setsockopt(fd, level, optname, optval, optlen); if (err) { perror("setsockopt"); exit(1); } } static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg) { static const unsigned int on = 1; if (cmsg->timestampns) xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on)); if (cmsg->tcp_inq) xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)); } static void parse_cmsg_types(const char *type) { char *next = strchr(type, ','); unsigned int len = 0; cfg_cmsg_types.cmsg_enabled = 1; if (next) { parse_cmsg_types(next + 1); len = next - type; } else { len = strlen(type); } if (strncmp(type, "TIMESTAMPNS", len) == 0) { cfg_cmsg_types.timestampns = 1; return; } if (strncmp(type, "TCPINQ", len) == 0) { cfg_cmsg_types.tcp_inq = 1; return; } fprintf(stderr, "Unrecognized cmsg option %s\n", type); exit(1); } static void parse_setsock_options(const char *name) { char *next = strchr(name, ','); unsigned int len = 0; if (next) { parse_setsock_options(next + 1); len = next - name; } else { len = strlen(name); } if (strncmp(name, "TRANSPARENT", len) == 0) { cfg_sockopt_types.transparent = 1; return; } if (strncmp(name, "MPTFO", len) == 0) { cfg_sockopt_types.mptfo = 1; return; } fprintf(stderr, "Unrecognized setsockopt option %s\n", name); exit(1); } void xdisconnect(int fd, int addrlen) { struct sockaddr_storage empty; int msec_sleep = 10; int queued = 1; int i; shutdown(fd, SHUT_WR); /* while until the pending data is completely flushed, the later * disconnect will bypass/ignore/drop any pending data. */ for (i = 0; ; i += msec_sleep) { if (ioctl(fd, SIOCOUTQ, &queued) < 0) xerror("can't query out socket queue: %d", errno); if (!queued) break; if (i > poll_timeout) xerror("timeout while waiting for spool to complete"); usleep(msec_sleep * 1000); } memset(&empty, 0, sizeof(empty)); empty.ss_family = AF_UNSPEC; if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0) xerror("can't disconnect: %d", errno); } int main_loop(void) { int fd = 0, ret, fd_in = 0; struct addrinfo *peer; struct wstate winfo; if (cfg_input && cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); if (fd < 0) xerror("can't open %s:%d", cfg_input, errno); } memset(&winfo, 0, sizeof(winfo)); fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo); if (fd < 0) return 2; again: check_getpeername_connect(fd); SOCK_TEST_TCPULP(fd, cfg_sock_proto); if (cfg_rcvbuf) set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); if (cfg_cmsg_types.cmsg_enabled) apply_cmsg_types(fd, &cfg_cmsg_types); if (cfg_input && !cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); if (fd < 0) xerror("can't open %s:%d", cfg_input, errno); } ret = copyfd_io(fd_in, fd, 1, 0, &winfo); if (ret) return ret; if (cfg_truncate > 0) { xdisconnect(fd, peer->ai_addrlen); } else if (--cfg_repeat > 0) { xdisconnect(fd, peer->ai_addrlen); /* the socket could be unblocking at this point, we need the * connect to be blocking */ set_nonblock(fd, false); if (connect(fd, peer->ai_addr, peer->ai_addrlen)) xerror("can't reconnect: %d", errno); if (cfg_input) close(fd_in); memset(&winfo, 0, sizeof(winfo)); goto again; } else { close(fd); } return 0; } int parse_proto(const char *proto) { if (!strcasecmp(proto, "MPTCP")) return IPPROTO_MPTCP; if (!strcasecmp(proto, "TCP")) return IPPROTO_TCP; fprintf(stderr, "Unknown protocol: %s\n.", proto); die_usage(); /* silence compiler warning */ return 0; } int parse_mode(const char *mode) { if (!strcasecmp(mode, "poll")) return CFG_MODE_POLL; if (!strcasecmp(mode, "mmap")) return CFG_MODE_MMAP; if (!strcasecmp(mode, "sendfile")) return CFG_MODE_SENDFILE; fprintf(stderr, "Unknown test mode: %s\n", mode); fprintf(stderr, "Supported modes are:\n"); fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n"); fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n"); fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n"); die_usage(); /* silence compiler warning */ return 0; } int parse_peek(const char *mode) { if (!strcasecmp(mode, "saveWithPeek")) return CFG_WITH_PEEK; if (!strcasecmp(mode, "saveAfterPeek")) return CFG_AFTER_PEEK; fprintf(stderr, "Unknown: %s\n", mode); fprintf(stderr, "Supported MSG_PEEK mode are:\n"); fprintf(stderr, "\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n"); fprintf(stderr, "\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n"); die_usage(); /* silence compiler warning */ return 0; } static int parse_int(const char *size) { unsigned long s; errno = 0; s = strtoul(size, NULL, 0); if (errno) { fprintf(stderr, "Invalid sndbuf size %s (%s)\n", size, strerror(errno)); die_usage(); } if (s > INT_MAX) { fprintf(stderr, "Invalid sndbuf size %s (%s)\n", size, strerror(ERANGE)); die_usage(); } return (int)s; } static void parse_opts(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) { switch (c) { case 'f': cfg_truncate = atoi(optarg); /* when receiving a fastclose, ignore PIPE signals and * all the I/O errors later in the code */ if (cfg_truncate < 0) { cfg_rcv_trunc = true; signal(SIGPIPE, handle_signal); } break; case 'j': cfg_join = true; cfg_mode = CFG_MODE_POLL; break; case 'r': cfg_remove = true; cfg_mode = CFG_MODE_POLL; cfg_wait = 400000; cfg_do_w = atoi(optarg); if (cfg_do_w <= 0) cfg_do_w = 50; break; case 'i': cfg_input = optarg; break; case 'I': cfg_repeat = atoi(optarg); break; case 'l': listen_mode = true; break; case 'p': cfg_port = optarg; break; case 's': cfg_sock_proto = parse_proto(optarg); break; case 'h': die_usage(); break; case '6': pf = AF_INET6; break; case 't': poll_timeout = atoi(optarg) * 1000; if (poll_timeout <= 0) poll_timeout = -1; break; case 'T': cfg_time = atoi(optarg); break; case 'm': cfg_mode = parse_mode(optarg); break; case 'S': cfg_sndbuf = parse_int(optarg); break; case 'R': cfg_rcvbuf = parse_int(optarg); break; case 'w': cfg_wait = atoi(optarg)*1000000; break; case 'M': cfg_mark = strtol(optarg, NULL, 0); break; case 'P': cfg_peek = parse_peek(optarg); break; case 'c': parse_cmsg_types(optarg); break; case 'o': parse_setsock_options(optarg); break; } } if (optind + 1 != argc) die_usage(); cfg_host = argv[optind]; if (strchr(cfg_host, ':')) pf = AF_INET6; } int main(int argc, char *argv[]) { init_rng(); signal(SIGUSR1, handle_signal); parse_opts(argc, argv); if (listen_mode) { int fd = sock_listen_mptcp(cfg_host, cfg_port); if (fd < 0) return 1; if (cfg_rcvbuf) set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); if (cfg_mark) set_mark(fd, cfg_mark); if (cfg_cmsg_types.cmsg_enabled) apply_cmsg_types(fd, &cfg_cmsg_types); return main_loop_s(fd); } return main_loop(); }