// SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ #define _GNU_SOURCE #include <linux/compiler.h> #include <linux/ring_buffer.h> #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <sys/mman.h> #include <sys/syscall.h> #include <sys/sysinfo.h> #include <test_progs.h> #include <uapi/linux/bpf.h> #include <unistd.h> #include "user_ringbuf_fail.skel.h" #include "user_ringbuf_success.skel.h" #include "../progs/test_user_ringbuf.h" static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ; static const long c_ringbuf_size = 1 << 12; /* 1 small page */ static const long c_max_entries = c_ringbuf_size / c_sample_size; static void drain_current_samples(void) { syscall(__NR_getpgid); } static int write_samples(struct user_ring_buffer *ringbuf, uint32_t num_samples) { int i, err = 0; /* Write some number of samples to the ring buffer. */ for (i = 0; i < num_samples; i++) { struct sample *entry; int read; entry = user_ring_buffer__reserve(ringbuf, sizeof(*entry)); if (!entry) { err = -errno; goto done; } entry->pid = getpid(); entry->seq = i; entry->value = i * i; read = snprintf(entry->comm, sizeof(entry->comm), "%u", i); if (read <= 0) { /* Assert on the error path to avoid spamming logs with * mostly success messages. */ ASSERT_GT(read, 0, "snprintf_comm"); err = read; user_ring_buffer__discard(ringbuf, entry); goto done; } user_ring_buffer__submit(ringbuf, entry); } done: drain_current_samples(); return err; } static struct user_ringbuf_success *open_load_ringbuf_skel(void) { struct user_ringbuf_success *skel; int err; skel = user_ringbuf_success__open(); if (!ASSERT_OK_PTR(skel, "skel_open")) return NULL; err = bpf_map__set_max_entries(skel->maps.user_ringbuf, c_ringbuf_size); if (!ASSERT_OK(err, "set_max_entries")) goto cleanup; err = bpf_map__set_max_entries(skel->maps.kernel_ringbuf, c_ringbuf_size); if (!ASSERT_OK(err, "set_max_entries")) goto cleanup; err = user_ringbuf_success__load(skel); if (!ASSERT_OK(err, "skel_load")) goto cleanup; return skel; cleanup: user_ringbuf_success__destroy(skel); return NULL; } static void test_user_ringbuf_mappings(void) { int err, rb_fd; int page_size = getpagesize(); void *mmap_ptr; struct user_ringbuf_success *skel; skel = open_load_ringbuf_skel(); if (!skel) return; rb_fd = bpf_map__fd(skel->maps.user_ringbuf); /* cons_pos can be mapped R/O, can't add +X with mprotect. */ mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, 0); ASSERT_OK_PTR(mmap_ptr, "ro_cons_pos"); ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_cons_pos_protect"); ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect"); ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "wr_prod_pos"); err = -errno; ASSERT_ERR(err, "wr_prod_pos_err"); ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro_cons"); /* prod_pos can be mapped RW, can't add +X with mprotect. */ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, page_size); ASSERT_OK_PTR(mmap_ptr, "rw_prod_pos"); ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_prod_pos_protect"); err = -errno; ASSERT_ERR(err, "wr_prod_pos_err"); ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_prod"); /* data pages can be mapped RW, can't add +X with mprotect. */ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size); ASSERT_OK_PTR(mmap_ptr, "rw_data"); ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_data_protect"); err = -errno; ASSERT_ERR(err, "exec_data_err"); ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_data"); user_ringbuf_success__destroy(skel); } static int load_skel_create_ringbufs(struct user_ringbuf_success **skel_out, struct ring_buffer **kern_ringbuf_out, ring_buffer_sample_fn callback, struct user_ring_buffer **user_ringbuf_out) { struct user_ringbuf_success *skel; struct ring_buffer *kern_ringbuf = NULL; struct user_ring_buffer *user_ringbuf = NULL; int err = -ENOMEM, rb_fd; skel = open_load_ringbuf_skel(); if (!skel) return err; /* only trigger BPF program for current process */ skel->bss->pid = getpid(); if (kern_ringbuf_out) { rb_fd = bpf_map__fd(skel->maps.kernel_ringbuf); kern_ringbuf = ring_buffer__new(rb_fd, callback, skel, NULL); if (!ASSERT_OK_PTR(kern_ringbuf, "kern_ringbuf_create")) goto cleanup; *kern_ringbuf_out = kern_ringbuf; } if (user_ringbuf_out) { rb_fd = bpf_map__fd(skel->maps.user_ringbuf); user_ringbuf = user_ring_buffer__new(rb_fd, NULL); if (!ASSERT_OK_PTR(user_ringbuf, "user_ringbuf_create")) goto cleanup; *user_ringbuf_out = user_ringbuf; ASSERT_EQ(skel->bss->read, 0, "no_reads_after_load"); } err = user_ringbuf_success__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto cleanup; *skel_out = skel; return 0; cleanup: if (kern_ringbuf_out) *kern_ringbuf_out = NULL; if (user_ringbuf_out) *user_ringbuf_out = NULL; ring_buffer__free(kern_ringbuf); user_ring_buffer__free(user_ringbuf); user_ringbuf_success__destroy(skel); return err; } static int load_skel_create_user_ringbuf(struct user_ringbuf_success **skel_out, struct user_ring_buffer **ringbuf_out) { return load_skel_create_ringbufs(skel_out, NULL, NULL, ringbuf_out); } static void manually_write_test_invalid_sample(struct user_ringbuf_success *skel, __u32 size, __u64 producer_pos, int err) { void *data_ptr; __u64 *producer_pos_ptr; int rb_fd, page_size = getpagesize(); rb_fd = bpf_map__fd(skel->maps.user_ringbuf); ASSERT_EQ(skel->bss->read, 0, "num_samples_before_bad_sample"); /* Map the producer_pos as RW. */ producer_pos_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, page_size); ASSERT_OK_PTR(producer_pos_ptr, "producer_pos_ptr"); /* Map the data pages as RW. */ data_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size); ASSERT_OK_PTR(data_ptr, "rw_data"); memset(data_ptr, 0, BPF_RINGBUF_HDR_SZ); *(__u32 *)data_ptr = size; /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in the kernel. */ smp_store_release(producer_pos_ptr, producer_pos + BPF_RINGBUF_HDR_SZ); drain_current_samples(); ASSERT_EQ(skel->bss->read, 0, "num_samples_after_bad_sample"); ASSERT_EQ(skel->bss->err, err, "err_after_bad_sample"); ASSERT_OK(munmap(producer_pos_ptr, page_size), "unmap_producer_pos"); ASSERT_OK(munmap(data_ptr, page_size), "unmap_data_ptr"); } static void test_user_ringbuf_post_misaligned(void) { struct user_ringbuf_success *skel; struct user_ring_buffer *ringbuf; int err; __u32 size = (1 << 5) + 7; err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (!ASSERT_OK(err, "misaligned_skel")) return; manually_write_test_invalid_sample(skel, size, size, -EINVAL); user_ring_buffer__free(ringbuf); user_ringbuf_success__destroy(skel); } static void test_user_ringbuf_post_producer_wrong_offset(void) { struct user_ringbuf_success *skel; struct user_ring_buffer *ringbuf; int err; __u32 size = (1 << 5); err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (!ASSERT_OK(err, "wrong_offset_skel")) return; manually_write_test_invalid_sample(skel, size, size - 8, -EINVAL); user_ring_buffer__free(ringbuf); user_ringbuf_success__destroy(skel); } static void test_user_ringbuf_post_larger_than_ringbuf_sz(void) { struct user_ringbuf_success *skel; struct user_ring_buffer *ringbuf; int err; __u32 size = c_ringbuf_size; err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (!ASSERT_OK(err, "huge_sample_skel")) return; manually_write_test_invalid_sample(skel, size, size, -E2BIG); user_ring_buffer__free(ringbuf); user_ringbuf_success__destroy(skel); } static void test_user_ringbuf_basic(void) { struct user_ringbuf_success *skel; struct user_ring_buffer *ringbuf; int err; err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (!ASSERT_OK(err, "ringbuf_basic_skel")) return; ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before"); err = write_samples(ringbuf, 2); if (!ASSERT_OK(err, "write_samples")) goto cleanup; ASSERT_EQ(skel->bss->read, 2, "num_samples_read_after"); cleanup: user_ring_buffer__free(ringbuf); user_ringbuf_success__destroy(skel); } static void test_user_ringbuf_sample_full_ring_buffer(void) { struct user_ringbuf_success *skel; struct user_ring_buffer *ringbuf; int err; void *sample; err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (!ASSERT_OK(err, "ringbuf_full_sample_skel")) return; sample = user_ring_buffer__reserve(ringbuf, c_ringbuf_size - BPF_RINGBUF_HDR_SZ); if (!ASSERT_OK_PTR(sample, "full_sample")) goto cleanup; user_ring_buffer__submit(ringbuf, sample); ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before"); drain_current_samples(); ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after"); cleanup: user_ring_buffer__free(ringbuf); user_ringbuf_success__destroy(skel); } static void test_user_ringbuf_post_alignment_autoadjust(void) { struct user_ringbuf_success *skel; struct user_ring_buffer *ringbuf; struct sample *sample; int err; err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (!ASSERT_OK(err, "ringbuf_align_autoadjust_skel")) return; /* libbpf should automatically round any sample up to an 8-byte alignment. */ sample = user_ring_buffer__reserve(ringbuf, sizeof(*sample) + 1); ASSERT_OK_PTR(sample, "reserve_autoaligned"); user_ring_buffer__submit(ringbuf, sample); ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before"); drain_current_samples(); ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after"); user_ring_buffer__free(ringbuf); user_ringbuf_success__destroy(skel); } static void test_user_ringbuf_overfill(void) { struct user_ringbuf_success *skel; struct user_ring_buffer *ringbuf; int err; err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (err) return; err = write_samples(ringbuf, c_max_entries * 5); ASSERT_ERR(err, "write_samples"); ASSERT_EQ(skel->bss->read, c_max_entries, "max_entries"); user_ring_buffer__free(ringbuf); user_ringbuf_success__destroy(skel); } static void test_user_ringbuf_discards_properly_ignored(void) { struct user_ringbuf_success *skel; struct user_ring_buffer *ringbuf; int err, num_discarded = 0; __u64 *token; err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (err) return; ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before"); while (1) { /* Write samples until the buffer is full. */ token = user_ring_buffer__reserve(ringbuf, sizeof(*token)); if (!token) break; user_ring_buffer__discard(ringbuf, token); num_discarded++; } if (!ASSERT_GE(num_discarded, 0, "num_discarded")) goto cleanup; /* Should not read any samples, as they are all discarded. */ ASSERT_EQ(skel->bss->read, 0, "num_pre_kick"); drain_current_samples(); ASSERT_EQ(skel->bss->read, 0, "num_post_kick"); /* Now that the ring buffer has been drained, we should be able to * reserve another token. */ token = user_ring_buffer__reserve(ringbuf, sizeof(*token)); if (!ASSERT_OK_PTR(token, "new_token")) goto cleanup; user_ring_buffer__discard(ringbuf, token); cleanup: user_ring_buffer__free(ringbuf); user_ringbuf_success__destroy(skel); } static void test_user_ringbuf_loop(void) { struct user_ringbuf_success *skel; struct user_ring_buffer *ringbuf; uint32_t total_samples = 8192; uint32_t remaining_samples = total_samples; int err; BUILD_BUG_ON(total_samples <= c_max_entries); err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (err) return; do { uint32_t curr_samples; curr_samples = remaining_samples > c_max_entries ? c_max_entries : remaining_samples; err = write_samples(ringbuf, curr_samples); if (err != 0) { /* Assert inside of if statement to avoid flooding logs * on the success path. */ ASSERT_OK(err, "write_samples"); goto cleanup; } remaining_samples -= curr_samples; ASSERT_EQ(skel->bss->read, total_samples - remaining_samples, "current_batched_entries"); } while (remaining_samples > 0); ASSERT_EQ(skel->bss->read, total_samples, "total_batched_entries"); cleanup: user_ring_buffer__free(ringbuf); user_ringbuf_success__destroy(skel); } static int send_test_message(struct user_ring_buffer *ringbuf, enum test_msg_op op, s64 operand_64, s32 operand_32) { struct test_msg *msg; msg = user_ring_buffer__reserve(ringbuf, sizeof(*msg)); if (!msg) { /* Assert on the error path to avoid spamming logs with mostly * success messages. */ ASSERT_OK_PTR(msg, "reserve_msg"); return -ENOMEM; } msg->msg_op = op; switch (op) { case TEST_MSG_OP_INC64: case TEST_MSG_OP_MUL64: msg->operand_64 = operand_64; break; case TEST_MSG_OP_INC32: case TEST_MSG_OP_MUL32: msg->operand_32 = operand_32; break; default: PRINT_FAIL("Invalid operand %d\n", op); user_ring_buffer__discard(ringbuf, msg); return -EINVAL; } user_ring_buffer__submit(ringbuf, msg); return 0; } static void kick_kernel_read_messages(void) { syscall(__NR_prctl); } static int handle_kernel_msg(void *ctx, void *data, size_t len) { struct user_ringbuf_success *skel = ctx; struct test_msg *msg = data; switch (msg->msg_op) { case TEST_MSG_OP_INC64: skel->bss->user_mutated += msg->operand_64; return 0; case TEST_MSG_OP_INC32: skel->bss->user_mutated += msg->operand_32; return 0; case TEST_MSG_OP_MUL64: skel->bss->user_mutated *= msg->operand_64; return 0; case TEST_MSG_OP_MUL32: skel->bss->user_mutated *= msg->operand_32; return 0; default: fprintf(stderr, "Invalid operand %d\n", msg->msg_op); return -EINVAL; } } static void drain_kernel_messages_buffer(struct ring_buffer *kern_ringbuf, struct user_ringbuf_success *skel) { int cnt; cnt = ring_buffer__consume(kern_ringbuf); ASSERT_EQ(cnt, 8, "consume_kern_ringbuf"); ASSERT_OK(skel->bss->err, "consume_kern_ringbuf_err"); } static void test_user_ringbuf_msg_protocol(void) { struct user_ringbuf_success *skel; struct user_ring_buffer *user_ringbuf; struct ring_buffer *kern_ringbuf; int err, i; __u64 expected_kern = 0; err = load_skel_create_ringbufs(&skel, &kern_ringbuf, handle_kernel_msg, &user_ringbuf); if (!ASSERT_OK(err, "create_ringbufs")) return; for (i = 0; i < 64; i++) { enum test_msg_op op = i % TEST_MSG_OP_NUM_OPS; __u64 operand_64 = TEST_OP_64; __u32 operand_32 = TEST_OP_32; err = send_test_message(user_ringbuf, op, operand_64, operand_32); if (err) { /* Only assert on a failure to avoid spamming success logs. */ ASSERT_OK(err, "send_test_message"); goto cleanup; } switch (op) { case TEST_MSG_OP_INC64: expected_kern += operand_64; break; case TEST_MSG_OP_INC32: expected_kern += operand_32; break; case TEST_MSG_OP_MUL64: expected_kern *= operand_64; break; case TEST_MSG_OP_MUL32: expected_kern *= operand_32; break; default: PRINT_FAIL("Unexpected op %d\n", op); goto cleanup; } if (i % 8 == 0) { kick_kernel_read_messages(); ASSERT_EQ(skel->bss->kern_mutated, expected_kern, "expected_kern"); ASSERT_EQ(skel->bss->err, 0, "bpf_prog_err"); drain_kernel_messages_buffer(kern_ringbuf, skel); } } cleanup: ring_buffer__free(kern_ringbuf); user_ring_buffer__free(user_ringbuf); user_ringbuf_success__destroy(skel); } static void *kick_kernel_cb(void *arg) { /* Kick the kernel, causing it to drain the ring buffer and then wake * up the test thread waiting on epoll. */ syscall(__NR_prlimit64); return NULL; } static int spawn_kick_thread_for_poll(void) { pthread_t thread; return pthread_create(&thread, NULL, kick_kernel_cb, NULL); } static void test_user_ringbuf_blocking_reserve(void) { struct user_ringbuf_success *skel; struct user_ring_buffer *ringbuf; int err, num_written = 0; __u64 *token; err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (err) return; ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before"); while (1) { /* Write samples until the buffer is full. */ token = user_ring_buffer__reserve(ringbuf, sizeof(*token)); if (!token) break; *token = 0xdeadbeef; user_ring_buffer__submit(ringbuf, token); num_written++; } if (!ASSERT_GE(num_written, 0, "num_written")) goto cleanup; /* Should not have read any samples until the kernel is kicked. */ ASSERT_EQ(skel->bss->read, 0, "num_pre_kick"); /* We correctly time out after 1 second, without a sample. */ token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 1000); if (!ASSERT_EQ(token, NULL, "pre_kick_timeout_token")) goto cleanup; err = spawn_kick_thread_for_poll(); if (!ASSERT_EQ(err, 0, "deferred_kick_thread\n")) goto cleanup; /* After spawning another thread that asychronously kicks the kernel to * drain the messages, we're able to block and successfully get a * sample once we receive an event notification. */ token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 10000); if (!ASSERT_OK_PTR(token, "block_token")) goto cleanup; ASSERT_GT(skel->bss->read, 0, "num_post_kill"); ASSERT_LE(skel->bss->read, num_written, "num_post_kill"); ASSERT_EQ(skel->bss->err, 0, "err_post_poll"); user_ring_buffer__discard(ringbuf, token); cleanup: user_ring_buffer__free(ringbuf); user_ringbuf_success__destroy(skel); } #define SUCCESS_TEST(_func) { _func, #_func } static struct { void (*test_callback)(void); const char *test_name; } success_tests[] = { SUCCESS_TEST(test_user_ringbuf_mappings), SUCCESS_TEST(test_user_ringbuf_post_misaligned), SUCCESS_TEST(test_user_ringbuf_post_producer_wrong_offset), SUCCESS_TEST(test_user_ringbuf_post_larger_than_ringbuf_sz), SUCCESS_TEST(test_user_ringbuf_basic), SUCCESS_TEST(test_user_ringbuf_sample_full_ring_buffer), SUCCESS_TEST(test_user_ringbuf_post_alignment_autoadjust), SUCCESS_TEST(test_user_ringbuf_overfill), SUCCESS_TEST(test_user_ringbuf_discards_properly_ignored), SUCCESS_TEST(test_user_ringbuf_loop), SUCCESS_TEST(test_user_ringbuf_msg_protocol), SUCCESS_TEST(test_user_ringbuf_blocking_reserve), }; void test_user_ringbuf(void) { int i; for (i = 0; i < ARRAY_SIZE(success_tests); i++) { if (!test__start_subtest(success_tests[i].test_name)) continue; success_tests[i].test_callback(); } RUN_TESTS(user_ringbuf_fail); }