// SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE #include <arpa/inet.h> #include <errno.h> #include <error.h> #include <linux/errqueue.h> #include <linux/net_tstamp.h> #include <netinet/if_ether.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip6.h> #include <netinet/udp.h> #include <poll.h> #include <sched.h> #include <signal.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/socket.h> #include <sys/time.h> #include <sys/poll.h> #include <sys/types.h> #include <unistd.h> #include "../kselftest.h" #ifndef ETH_MAX_MTU #define ETH_MAX_MTU 0xFFFFU #endif #ifndef UDP_SEGMENT #define UDP_SEGMENT 103 #endif #ifndef SO_ZEROCOPY #define SO_ZEROCOPY 60 #endif #ifndef SO_EE_ORIGIN_ZEROCOPY #define SO_EE_ORIGIN_ZEROCOPY 5 #endif #ifndef MSG_ZEROCOPY #define MSG_ZEROCOPY 0x4000000 #endif #ifndef ENOTSUPP #define ENOTSUPP 524 #endif #define NUM_PKT 100 static bool cfg_cache_trash; static int cfg_cpu = -1; static int cfg_connected = true; static int cfg_family = PF_UNSPEC; static uint16_t cfg_mss; static int cfg_payload_len = (1472 * 42); static int cfg_port = 8000; static int cfg_runtime_ms = -1; static bool cfg_poll; static int cfg_poll_loop_timeout_ms = 2000; static bool cfg_segment; static bool cfg_sendmmsg; static bool cfg_tcp; static uint32_t cfg_tx_ts = SOF_TIMESTAMPING_TX_SOFTWARE; static bool cfg_tx_tstamp; static bool cfg_audit; static bool cfg_verbose; static bool cfg_zerocopy; static int cfg_msg_nr; static uint16_t cfg_gso_size; static unsigned long total_num_msgs; static unsigned long total_num_sends; static unsigned long stat_tx_ts; static unsigned long stat_tx_ts_errors; static unsigned long tstart; static unsigned long tend; static unsigned long stat_zcopies; static socklen_t cfg_alen; static struct sockaddr_storage cfg_dst_addr; static bool interrupted; static char buf[NUM_PKT][ETH_MAX_MTU]; static void sigint_handler(int signum) { if (signum == SIGINT) interrupted = true; } static unsigned long gettimeofday_ms(void) { struct timeval tv; gettimeofday(&tv, NULL); return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); } static int set_cpu(int cpu) { cpu_set_t mask; CPU_ZERO(&mask); CPU_SET(cpu, &mask); if (sched_setaffinity(0, sizeof(mask), &mask)) error(1, 0, "setaffinity %d", cpu); return 0; } static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) { struct sockaddr_in6 *addr6 = (void *) sockaddr; struct sockaddr_in *addr4 = (void *) sockaddr; switch (domain) { case PF_INET: addr4->sin_family = AF_INET; addr4->sin_port = htons(cfg_port); if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) error(1, 0, "ipv4 parse error: %s", str_addr); break; case PF_INET6: addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(cfg_port); if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) error(1, 0, "ipv6 parse error: %s", str_addr); break; default: error(1, 0, "illegal domain"); } } static void flush_cmsg(struct cmsghdr *cmsg) { struct sock_extended_err *err; struct scm_timestamping *tss; __u32 lo; __u32 hi; int i; switch (cmsg->cmsg_level) { case SOL_SOCKET: if (cmsg->cmsg_type == SO_TIMESTAMPING) { i = (cfg_tx_ts == SOF_TIMESTAMPING_TX_HARDWARE) ? 2 : 0; tss = (struct scm_timestamping *)CMSG_DATA(cmsg); if (tss->ts[i].tv_sec == 0) stat_tx_ts_errors++; } else { error(1, 0, "unknown SOL_SOCKET cmsg type=%u\n", cmsg->cmsg_type); } break; case SOL_IP: case SOL_IPV6: switch (cmsg->cmsg_type) { case IP_RECVERR: case IPV6_RECVERR: { err = (struct sock_extended_err *)CMSG_DATA(cmsg); switch (err->ee_origin) { case SO_EE_ORIGIN_TIMESTAMPING: /* Got a TX timestamp from error queue */ stat_tx_ts++; break; case SO_EE_ORIGIN_ICMP: case SO_EE_ORIGIN_ICMP6: if (cfg_verbose) fprintf(stderr, "received ICMP error: type=%u, code=%u\n", err->ee_type, err->ee_code); break; case SO_EE_ORIGIN_ZEROCOPY: { lo = err->ee_info; hi = err->ee_data; /* range of IDs acknowledged */ stat_zcopies += hi - lo + 1; break; } case SO_EE_ORIGIN_LOCAL: if (cfg_verbose) fprintf(stderr, "received packet with local origin: %u\n", err->ee_origin); break; default: error(0, 1, "received packet with origin: %u", err->ee_origin); } break; } default: error(0, 1, "unknown IP msg type=%u\n", cmsg->cmsg_type); break; } break; default: error(0, 1, "unknown cmsg level=%u\n", cmsg->cmsg_level); } } static void flush_errqueue_recv(int fd) { char control[CMSG_SPACE(sizeof(struct scm_timestamping)) + CMSG_SPACE(sizeof(struct sock_extended_err)) + CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0}; struct msghdr msg = {0}; struct cmsghdr *cmsg; int ret; while (1) { msg.msg_control = control; msg.msg_controllen = sizeof(control); ret = recvmsg(fd, &msg, MSG_ERRQUEUE); if (ret == -1 && errno == EAGAIN) break; if (ret == -1) error(1, errno, "errqueue"); if (msg.msg_flags != MSG_ERRQUEUE) error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags); if (cfg_audit) { for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) flush_cmsg(cmsg); } msg.msg_flags = 0; } } static void flush_errqueue(int fd, const bool do_poll, unsigned long poll_timeout, const bool poll_err) { if (do_poll) { struct pollfd fds = {0}; int ret; fds.fd = fd; ret = poll(&fds, 1, poll_timeout); if (ret == 0) { if ((cfg_verbose) && (poll_err)) fprintf(stderr, "poll timeout\n"); } else if (ret < 0) { error(1, errno, "poll"); } } flush_errqueue_recv(fd); } static void flush_errqueue_retry(int fd, unsigned long num_sends) { unsigned long tnow, tstop; bool first_try = true; tnow = gettimeofday_ms(); tstop = tnow + cfg_poll_loop_timeout_ms; do { flush_errqueue(fd, true, tstop - tnow, first_try); first_try = false; tnow = gettimeofday_ms(); } while ((stat_zcopies != num_sends) && (tnow < tstop)); } static int send_tcp(int fd, char *data) { int ret, done = 0, count = 0; while (done < cfg_payload_len) { ret = send(fd, data + done, cfg_payload_len - done, cfg_zerocopy ? MSG_ZEROCOPY : 0); if (ret == -1) error(1, errno, "write"); done += ret; count++; } return count; } static int send_udp(int fd, char *data) { int ret, total_len, len, count = 0; total_len = cfg_payload_len; while (total_len) { len = total_len < cfg_mss ? total_len : cfg_mss; ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0, cfg_connected ? NULL : (void *)&cfg_dst_addr, cfg_connected ? 0 : cfg_alen); if (ret == -1) error(1, errno, "write"); if (ret != len) error(1, errno, "write: %uB != %uB\n", ret, len); total_len -= len; count++; } return count; } static void send_ts_cmsg(struct cmsghdr *cm) { uint32_t *valp; cm->cmsg_level = SOL_SOCKET; cm->cmsg_type = SO_TIMESTAMPING; cm->cmsg_len = CMSG_LEN(sizeof(cfg_tx_ts)); valp = (void *)CMSG_DATA(cm); *valp = cfg_tx_ts; } static int send_udp_sendmmsg(int fd, char *data) { char control[CMSG_SPACE(sizeof(cfg_tx_ts))] = {0}; const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN; struct mmsghdr mmsgs[max_nr_msg]; struct iovec iov[max_nr_msg]; unsigned int off = 0, left; size_t msg_controllen = 0; int i = 0, ret; memset(mmsgs, 0, sizeof(mmsgs)); if (cfg_tx_tstamp) { struct msghdr msg = {0}; struct cmsghdr *cmsg; msg.msg_control = control; msg.msg_controllen = sizeof(control); cmsg = CMSG_FIRSTHDR(&msg); send_ts_cmsg(cmsg); msg_controllen += CMSG_SPACE(sizeof(cfg_tx_ts)); } left = cfg_payload_len; while (left) { if (i == max_nr_msg) error(1, 0, "sendmmsg: exceeds max_nr_msg"); iov[i].iov_base = data + off; iov[i].iov_len = cfg_mss < left ? cfg_mss : left; mmsgs[i].msg_hdr.msg_iov = iov + i; mmsgs[i].msg_hdr.msg_iovlen = 1; mmsgs[i].msg_hdr.msg_name = (void *)&cfg_dst_addr; mmsgs[i].msg_hdr.msg_namelen = cfg_alen; if (msg_controllen) { mmsgs[i].msg_hdr.msg_control = control; mmsgs[i].msg_hdr.msg_controllen = msg_controllen; } off += iov[i].iov_len; left -= iov[i].iov_len; i++; } ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0); if (ret == -1) error(1, errno, "sendmmsg"); return ret; } static void send_udp_segment_cmsg(struct cmsghdr *cm) { uint16_t *valp; cm->cmsg_level = SOL_UDP; cm->cmsg_type = UDP_SEGMENT; cm->cmsg_len = CMSG_LEN(sizeof(cfg_gso_size)); valp = (void *)CMSG_DATA(cm); *valp = cfg_gso_size; } static int send_udp_segment(int fd, char *data) { char control[CMSG_SPACE(sizeof(cfg_gso_size)) + CMSG_SPACE(sizeof(cfg_tx_ts))] = {0}; struct msghdr msg = {0}; struct iovec iov = {0}; size_t msg_controllen; struct cmsghdr *cmsg; int ret; iov.iov_base = data; iov.iov_len = cfg_payload_len; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = control; msg.msg_controllen = sizeof(control); cmsg = CMSG_FIRSTHDR(&msg); send_udp_segment_cmsg(cmsg); msg_controllen = CMSG_SPACE(sizeof(cfg_mss)); if (cfg_tx_tstamp) { cmsg = CMSG_NXTHDR(&msg, cmsg); send_ts_cmsg(cmsg); msg_controllen += CMSG_SPACE(sizeof(cfg_tx_ts)); } msg.msg_controllen = msg_controllen; msg.msg_name = (void *)&cfg_dst_addr; msg.msg_namelen = cfg_alen; ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0); if (ret == -1) error(1, errno, "sendmsg"); if (ret != iov.iov_len) error(1, 0, "sendmsg: %u != %llu\n", ret, (unsigned long long)iov.iov_len); return 1; } static void usage(const char *filepath) { error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] " "[-L secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", filepath); } static void parse_opts(int argc, char **argv) { const char *bind_addr = NULL; int max_len, hdrlen; int c; while ((c = getopt(argc, argv, "46acC:D:Hl:L:mM:p:s:PS:tTuvz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) error(1, 0, "Pass one of -4 or -6"); cfg_family = PF_INET; cfg_alen = sizeof(struct sockaddr_in); break; case '6': if (cfg_family != PF_UNSPEC) error(1, 0, "Pass one of -4 or -6"); cfg_family = PF_INET6; cfg_alen = sizeof(struct sockaddr_in6); break; case 'a': cfg_audit = true; break; case 'c': cfg_cache_trash = true; break; case 'C': cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': bind_addr = optarg; break; case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; break; case 'L': cfg_poll_loop_timeout_ms = strtoul(optarg, NULL, 10) * 1000; break; case 'm': cfg_sendmmsg = true; break; case 'M': cfg_msg_nr = strtoul(optarg, NULL, 10); break; case 'p': cfg_port = strtoul(optarg, NULL, 0); break; case 'P': cfg_poll = true; break; case 's': cfg_payload_len = strtoul(optarg, NULL, 0); break; case 'S': cfg_gso_size = strtoul(optarg, NULL, 0); cfg_segment = true; break; case 'H': cfg_tx_ts = SOF_TIMESTAMPING_TX_HARDWARE; cfg_tx_tstamp = true; break; case 't': cfg_tcp = true; break; case 'T': cfg_tx_tstamp = true; break; case 'u': cfg_connected = false; break; case 'v': cfg_verbose = true; break; case 'z': cfg_zerocopy = true; break; default: exit(1); } } if (!bind_addr) bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr); if (optind != argc) usage(argv[0]); if (cfg_family == PF_UNSPEC) error(1, 0, "must pass one of -4 or -6"); if (cfg_tcp && !cfg_connected) error(1, 0, "connectionless tcp makes no sense"); if (cfg_segment && cfg_sendmmsg) error(1, 0, "cannot combine segment offload and sendmmsg"); if (cfg_tx_tstamp && !(cfg_segment || cfg_sendmmsg)) error(1, 0, "Options -T and -H require either -S or -m option"); if (cfg_family == PF_INET) hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr); else hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr); cfg_mss = ETH_DATA_LEN - hdrlen; max_len = ETH_MAX_MTU - hdrlen; if (!cfg_gso_size) cfg_gso_size = cfg_mss; if (cfg_payload_len > max_len) error(1, 0, "payload length %u exceeds max %u", cfg_payload_len, max_len); } static void set_pmtu_discover(int fd, bool is_ipv4) { int level, name, val; if (is_ipv4) { level = SOL_IP; name = IP_MTU_DISCOVER; val = IP_PMTUDISC_DO; } else { level = SOL_IPV6; name = IPV6_MTU_DISCOVER; val = IPV6_PMTUDISC_DO; } if (setsockopt(fd, level, name, &val, sizeof(val))) error(1, errno, "setsockopt path mtu"); } static void set_tx_timestamping(int fd) { int val = SOF_TIMESTAMPING_OPT_CMSG | SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_OPT_TSONLY; if (cfg_tx_ts == SOF_TIMESTAMPING_TX_SOFTWARE) val |= SOF_TIMESTAMPING_SOFTWARE; else val |= SOF_TIMESTAMPING_RAW_HARDWARE; if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val))) error(1, errno, "setsockopt tx timestamping"); } static void print_audit_report(unsigned long num_msgs, unsigned long num_sends) { unsigned long tdelta; tdelta = tend - tstart; if (!tdelta) return; fprintf(stderr, "Summary over %lu.%03lu seconds...\n", tdelta / 1000, tdelta % 1000); fprintf(stderr, "sum %s tx: %6lu MB/s %10lu calls (%lu/s) %10lu msgs (%lu/s)\n", cfg_tcp ? "tcp" : "udp", ((num_msgs * cfg_payload_len) >> 10) / tdelta, num_sends, num_sends * 1000 / tdelta, num_msgs, num_msgs * 1000 / tdelta); if (cfg_tx_tstamp) { if (stat_tx_ts_errors) error(1, 0, "Expected clean TX Timestamps: %9lu msgs received %6lu errors", stat_tx_ts, stat_tx_ts_errors); if (stat_tx_ts != num_sends) error(1, 0, "Unexpected number of TX Timestamps: %9lu expected %9lu received", num_sends, stat_tx_ts); fprintf(stderr, "Tx Timestamps: %19lu received %17lu errors\n", stat_tx_ts, stat_tx_ts_errors); } if (cfg_zerocopy) { if (stat_zcopies != num_sends) error(1, 0, "Unexpected number of Zerocopy completions: %9lu expected %9lu received", num_sends, stat_zcopies); fprintf(stderr, "Zerocopy acks: %19lu\n", stat_zcopies); } } static void print_report(unsigned long num_msgs, unsigned long num_sends) { fprintf(stderr, "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n", cfg_tcp ? "tcp" : "udp", (num_msgs * cfg_payload_len) >> 20, num_sends, num_msgs); if (cfg_audit) { total_num_msgs += num_msgs; total_num_sends += num_sends; } } int main(int argc, char **argv) { unsigned long num_msgs, num_sends; unsigned long tnow, treport, tstop; int fd, i, val, ret; parse_opts(argc, argv); if (cfg_cpu > 0) set_cpu(cfg_cpu); for (i = 0; i < sizeof(buf[0]); i++) buf[0][i] = 'a' + (i % 26); for (i = 1; i < NUM_PKT; i++) memcpy(buf[i], buf[0], sizeof(buf[0])); signal(SIGINT, sigint_handler); fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); if (fd == -1) error(1, errno, "socket"); if (cfg_zerocopy) { val = 1; ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)); if (ret) { if (errno == ENOPROTOOPT || errno == ENOTSUPP) { fprintf(stderr, "SO_ZEROCOPY not supported"); exit(KSFT_SKIP); } error(1, errno, "setsockopt zerocopy"); } } if (cfg_connected && connect(fd, (void *)&cfg_dst_addr, cfg_alen)) error(1, errno, "connect"); if (cfg_segment) set_pmtu_discover(fd, cfg_family == PF_INET); if (cfg_tx_tstamp) set_tx_timestamping(fd); num_msgs = num_sends = 0; tnow = gettimeofday_ms(); tstart = tnow; tend = tnow; tstop = tnow + cfg_runtime_ms; treport = tnow + 1000; i = 0; do { if (cfg_tcp) num_sends += send_tcp(fd, buf[i]); else if (cfg_segment) num_sends += send_udp_segment(fd, buf[i]); else if (cfg_sendmmsg) num_sends += send_udp_sendmmsg(fd, buf[i]); else num_sends += send_udp(fd, buf[i]); num_msgs++; if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp) flush_errqueue(fd, cfg_poll, 500, true); if (cfg_msg_nr && num_msgs >= cfg_msg_nr) break; tnow = gettimeofday_ms(); if (tnow >= treport) { print_report(num_msgs, num_sends); num_msgs = num_sends = 0; treport = tnow + 1000; } /* cold cache when writing buffer */ if (cfg_cache_trash) i = ++i < NUM_PKT ? i : 0; } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop)); if (cfg_zerocopy || cfg_tx_tstamp) flush_errqueue_retry(fd, num_sends); if (close(fd)) error(1, errno, "close"); if (cfg_audit) { tend = tnow; total_num_msgs += num_msgs; total_num_sends += num_sends; print_audit_report(total_num_msgs, total_num_sends); } return 0; }