// SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ #include <argp.h> #include <sys/prctl.h> #include "local_storage_rcu_tasks_trace_bench.skel.h" #include "bench.h" #include <signal.h> static struct { __u32 nr_procs; __u32 kthread_pid; } args = { .nr_procs = 1000, .kthread_pid = 0, }; enum { ARG_NR_PROCS = 7000, ARG_KTHREAD_PID = 7001, }; static const struct argp_option opts[] = { { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0, "Set number of user processes to spin up"}, { "kthread_pid", ARG_KTHREAD_PID, "PID", 0, "Pid of rcu_tasks_trace kthread for ticks tracking"}, {}, }; static error_t parse_arg(int key, char *arg, struct argp_state *state) { long ret; switch (key) { case ARG_NR_PROCS: ret = strtol(arg, NULL, 10); if (ret < 1 || ret > UINT_MAX) { fprintf(stderr, "invalid nr_procs\n"); argp_usage(state); } args.nr_procs = ret; break; case ARG_KTHREAD_PID: ret = strtol(arg, NULL, 10); if (ret < 1) { fprintf(stderr, "invalid kthread_pid\n"); argp_usage(state); } args.kthread_pid = ret; break; break; default: return ARGP_ERR_UNKNOWN; } return 0; } const struct argp bench_local_storage_rcu_tasks_trace_argp = { .options = opts, .parser = parse_arg, }; #define MAX_SLEEP_PROCS 150000 static void validate(void) { if (env.producer_cnt != 1) { fprintf(stderr, "benchmark doesn't support multi-producer!\n"); exit(1); } if (env.consumer_cnt != 0) { fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } if (args.nr_procs > MAX_SLEEP_PROCS) { fprintf(stderr, "benchmark supports up to %u sleeper procs!\n", MAX_SLEEP_PROCS); exit(1); } } static long kthread_pid_ticks(void) { char procfs_path[100]; long stime; FILE *f; if (!args.kthread_pid) return -1; sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid); f = fopen(procfs_path, "r"); if (!f) { fprintf(stderr, "couldn't open %s, exiting\n", procfs_path); goto err_out; } if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) { fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path); goto err_out; } fclose(f); return stime; err_out: if (f) fclose(f); exit(1); return 0; } static struct { struct local_storage_rcu_tasks_trace_bench *skel; long prev_kthread_stime; } ctx; static void sleep_and_loop(void) { while (true) { sleep(rand() % 4); syscall(__NR_getpgid); } } static void local_storage_tasks_trace_setup(void) { int i, err, forkret, runner_pid; runner_pid = getpid(); for (i = 0; i < args.nr_procs; i++) { forkret = fork(); if (forkret < 0) { fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i, args.nr_procs); goto err_out; } if (!forkret) { err = prctl(PR_SET_PDEATHSIG, SIGKILL); if (err < 0) { fprintf(stderr, "prctl failed with err %d, exiting\n", errno); goto err_out; } if (getppid() != runner_pid) { fprintf(stderr, "Runner died while spinning up procs, exiting\n"); goto err_out; } sleep_and_loop(); } } printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid); setup_libbpf(); ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load(); if (!ctx.skel) { fprintf(stderr, "Error doing open_and_load, exiting\n"); goto err_out; } ctx.prev_kthread_stime = kthread_pid_ticks(); if (!bpf_program__attach(ctx.skel->progs.get_local)) { fprintf(stderr, "Error attaching bpf program\n"); goto err_out; } if (!bpf_program__attach(ctx.skel->progs.pregp_step)) { fprintf(stderr, "Error attaching bpf program\n"); goto err_out; } if (!bpf_program__attach(ctx.skel->progs.postgp)) { fprintf(stderr, "Error attaching bpf program\n"); goto err_out; } return; err_out: exit(1); } static void measure(struct bench_res *res) { long ticks; res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0); res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0); ticks = kthread_pid_ticks(); res->stime = ticks - ctx.prev_kthread_stime; ctx.prev_kthread_stime = ticks; } static void *producer(void *input) { while (true) syscall(__NR_getpgid); return NULL; } static void report_progress(int iter, struct bench_res *res, long delta_ns) { if (ctx.skel->bss->unexpected) { fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp)."); fprintf(stderr, "Data can't be trusted, exiting\n"); exit(1); } if (env.quiet) return; printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n", iter, res->gp_ns / (double)res->gp_ct); printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n", iter, res->stime / (double)res->gp_ct); } static void report_final(struct bench_res res[], int res_cnt) { struct basic_stats gp_stat; grace_period_latency_basic_stats(res, res_cnt, &gp_stat); printf("SUMMARY tasks_trace grace period latency"); printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev); grace_period_ticks_basic_stats(res, res_cnt, &gp_stat); printf("SUMMARY ticks per tasks_trace grace period"); printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev); } /* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use * of RCU Tasks-Trace. * * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside * from sleep() loop, and creating/destroying BPF task-local storage on wakeup. * The number of forked tasks is configurable. * * exercising code paths which call call_rcu_tasks_trace while there are many * thousands of tasks on the system should result in RCU Tasks-Trace having to * do a noticeable amount of work. * * This should be observable by measuring rcu_tasks_trace_kthread CPU usage * after the grace period has ended, or by measuring grace period latency. * * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step * and rcu_tasks_trace_postgp functions to measure grace period latency and * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks */ const struct bench bench_local_storage_tasks_trace = { .name = "local-storage-tasks-trace", .argp = &bench_local_storage_rcu_tasks_trace_argp, .validate = validate, .setup = local_storage_tasks_trace_setup, .producer_thread = producer, .measure = measure, .report_progress = report_progress, .report_final = report_final, };