/* SPDX-License-Identifier: GPL-2.0 */ #define _GNU_SOURCE #include <linux/limits.h> #include <linux/sched.h> #include <sys/types.h> #include <sys/mman.h> #include <sys/wait.h> #include <unistd.h> #include <fcntl.h> #include <sched.h> #include <stdio.h> #include <errno.h> #include <signal.h> #include <string.h> #include <pthread.h> #include "../kselftest.h" #include "cgroup_util.h" static int touch_anon(char *buf, size_t size) { int fd; char *pos = buf; fd = open("/dev/urandom", O_RDONLY); if (fd < 0) return -1; while (size > 0) { ssize_t ret = read(fd, pos, size); if (ret < 0) { if (errno != EINTR) { close(fd); return -1; } } else { pos += ret; size -= ret; } } close(fd); return 0; } static int alloc_and_touch_anon_noexit(const char *cgroup, void *arg) { int ppid = getppid(); size_t size = (size_t)arg; void *buf; buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 0, 0); if (buf == MAP_FAILED) return -1; if (touch_anon((char *)buf, size)) { munmap(buf, size); return -1; } while (getppid() == ppid) sleep(1); munmap(buf, size); return 0; } /* * Create a child process that allocates and touches 100MB, then waits to be * killed. Wait until the child is attached to the cgroup, kill all processes * in that cgroup and wait until "cgroup.procs" is empty. At this point try to * destroy the empty cgroup. The test helps detect race conditions between * dying processes leaving the cgroup and cgroup destruction path. */ static int test_cgcore_destroy(const char *root) { int ret = KSFT_FAIL; char *cg_test = NULL; int child_pid; char buf[PAGE_SIZE]; cg_test = cg_name(root, "cg_test"); if (!cg_test) goto cleanup; for (int i = 0; i < 10; i++) { if (cg_create(cg_test)) goto cleanup; child_pid = cg_run_nowait(cg_test, alloc_and_touch_anon_noexit, (void *) MB(100)); if (child_pid < 0) goto cleanup; /* wait for the child to enter cgroup */ if (cg_wait_for_proc_count(cg_test, 1)) goto cleanup; if (cg_killall(cg_test)) goto cleanup; /* wait for cgroup to be empty */ while (1) { if (cg_read(cg_test, "cgroup.procs", buf, sizeof(buf))) goto cleanup; if (buf[0] == '\0') break; usleep(1000); } if (rmdir(cg_test)) goto cleanup; if (waitpid(child_pid, NULL, 0) < 0) goto cleanup; } ret = KSFT_PASS; cleanup: if (cg_test) cg_destroy(cg_test); free(cg_test); return ret; } /* * A(0) - B(0) - C(1) * \ D(0) * * A, B and C's "populated" fields would be 1 while D's 0. * test that after the one process in C is moved to root, * A,B and C's "populated" fields would flip to "0" and file * modified events will be generated on the * "cgroup.events" files of both cgroups. */ static int test_cgcore_populated(const char *root) { int ret = KSFT_FAIL; int err; char *cg_test_a = NULL, *cg_test_b = NULL; char *cg_test_c = NULL, *cg_test_d = NULL; int cgroup_fd = -EBADF; pid_t pid; cg_test_a = cg_name(root, "cg_test_a"); cg_test_b = cg_name(root, "cg_test_a/cg_test_b"); cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c"); cg_test_d = cg_name(root, "cg_test_a/cg_test_b/cg_test_d"); if (!cg_test_a || !cg_test_b || !cg_test_c || !cg_test_d) goto cleanup; if (cg_create(cg_test_a)) goto cleanup; if (cg_create(cg_test_b)) goto cleanup; if (cg_create(cg_test_c)) goto cleanup; if (cg_create(cg_test_d)) goto cleanup; if (cg_enter_current(cg_test_c)) goto cleanup; if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 1\n")) goto cleanup; if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 1\n")) goto cleanup; if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 1\n")) goto cleanup; if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n")) goto cleanup; if (cg_enter_current(root)) goto cleanup; if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 0\n")) goto cleanup; if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 0\n")) goto cleanup; if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 0\n")) goto cleanup; if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n")) goto cleanup; /* Test that we can directly clone into a new cgroup. */ cgroup_fd = dirfd_open_opath(cg_test_d); if (cgroup_fd < 0) goto cleanup; pid = clone_into_cgroup(cgroup_fd); if (pid < 0) { if (errno == ENOSYS) goto cleanup_pass; goto cleanup; } if (pid == 0) { if (raise(SIGSTOP)) exit(EXIT_FAILURE); exit(EXIT_SUCCESS); } err = cg_read_strcmp(cg_test_d, "cgroup.events", "populated 1\n"); (void)clone_reap(pid, WSTOPPED); (void)kill(pid, SIGCONT); (void)clone_reap(pid, WEXITED); if (err) goto cleanup; if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n")) goto cleanup; /* Remove cgroup. */ if (cg_test_d) { cg_destroy(cg_test_d); free(cg_test_d); cg_test_d = NULL; } pid = clone_into_cgroup(cgroup_fd); if (pid < 0) goto cleanup_pass; if (pid == 0) exit(EXIT_SUCCESS); (void)clone_reap(pid, WEXITED); goto cleanup; cleanup_pass: ret = KSFT_PASS; cleanup: if (cg_test_d) cg_destroy(cg_test_d); if (cg_test_c) cg_destroy(cg_test_c); if (cg_test_b) cg_destroy(cg_test_b); if (cg_test_a) cg_destroy(cg_test_a); free(cg_test_d); free(cg_test_c); free(cg_test_b); free(cg_test_a); if (cgroup_fd >= 0) close(cgroup_fd); return ret; } /* * A (domain threaded) - B (threaded) - C (domain) * * test that C can't be used until it is turned into a * threaded cgroup. "cgroup.type" file will report "domain (invalid)" in * these cases. Operations which fail due to invalid topology use * EOPNOTSUPP as the errno. */ static int test_cgcore_invalid_domain(const char *root) { int ret = KSFT_FAIL; char *grandparent = NULL, *parent = NULL, *child = NULL; grandparent = cg_name(root, "cg_test_grandparent"); parent = cg_name(root, "cg_test_grandparent/cg_test_parent"); child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child"); if (!parent || !child || !grandparent) goto cleanup; if (cg_create(grandparent)) goto cleanup; if (cg_create(parent)) goto cleanup; if (cg_create(child)) goto cleanup; if (cg_write(parent, "cgroup.type", "threaded")) goto cleanup; if (cg_read_strcmp(child, "cgroup.type", "domain invalid\n")) goto cleanup; if (!cg_enter_current(child)) goto cleanup; if (errno != EOPNOTSUPP) goto cleanup; if (!clone_into_cgroup_run_wait(child)) goto cleanup; if (errno == ENOSYS) goto cleanup_pass; if (errno != EOPNOTSUPP) goto cleanup; cleanup_pass: ret = KSFT_PASS; cleanup: cg_enter_current(root); if (child) cg_destroy(child); if (parent) cg_destroy(parent); if (grandparent) cg_destroy(grandparent); free(child); free(parent); free(grandparent); return ret; } /* * Test that when a child becomes threaded * the parent type becomes domain threaded. */ static int test_cgcore_parent_becomes_threaded(const char *root) { int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) goto cleanup; if (cg_create(parent)) goto cleanup; if (cg_create(child)) goto cleanup; if (cg_write(child, "cgroup.type", "threaded")) goto cleanup; if (cg_read_strcmp(parent, "cgroup.type", "domain threaded\n")) goto cleanup; ret = KSFT_PASS; cleanup: if (child) cg_destroy(child); if (parent) cg_destroy(parent); free(child); free(parent); return ret; } /* * Test that there's no internal process constrain on threaded cgroups. * You can add threads/processes on a parent with a controller enabled. */ static int test_cgcore_no_internal_process_constraint_on_threads(const char *root) { int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; if (cg_read_strstr(root, "cgroup.controllers", "cpu") || cg_write(root, "cgroup.subtree_control", "+cpu")) { ret = KSFT_SKIP; goto cleanup; } parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) goto cleanup; if (cg_create(parent)) goto cleanup; if (cg_create(child)) goto cleanup; if (cg_write(parent, "cgroup.type", "threaded")) goto cleanup; if (cg_write(child, "cgroup.type", "threaded")) goto cleanup; if (cg_write(parent, "cgroup.subtree_control", "+cpu")) goto cleanup; if (cg_enter_current(parent)) goto cleanup; ret = KSFT_PASS; cleanup: cg_enter_current(root); cg_enter_current(root); if (child) cg_destroy(child); if (parent) cg_destroy(parent); free(child); free(parent); return ret; } /* * Test that you can't enable a controller on a child if it's not enabled * on the parent. */ static int test_cgcore_top_down_constraint_enable(const char *root) { int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) goto cleanup; if (cg_create(parent)) goto cleanup; if (cg_create(child)) goto cleanup; if (!cg_write(child, "cgroup.subtree_control", "+memory")) goto cleanup; ret = KSFT_PASS; cleanup: if (child) cg_destroy(child); if (parent) cg_destroy(parent); free(child); free(parent); return ret; } /* * Test that you can't disable a controller on a parent * if it's enabled in a child. */ static int test_cgcore_top_down_constraint_disable(const char *root) { int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) goto cleanup; if (cg_create(parent)) goto cleanup; if (cg_create(child)) goto cleanup; if (cg_write(parent, "cgroup.subtree_control", "+memory")) goto cleanup; if (cg_write(child, "cgroup.subtree_control", "+memory")) goto cleanup; if (!cg_write(parent, "cgroup.subtree_control", "-memory")) goto cleanup; ret = KSFT_PASS; cleanup: if (child) cg_destroy(child); if (parent) cg_destroy(parent); free(child); free(parent); return ret; } /* * Test internal process constraint. * You can't add a pid to a domain parent if a controller is enabled. */ static int test_cgcore_internal_process_constraint(const char *root) { int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) goto cleanup; if (cg_create(parent)) goto cleanup; if (cg_create(child)) goto cleanup; if (cg_write(parent, "cgroup.subtree_control", "+memory")) goto cleanup; if (!cg_enter_current(parent)) goto cleanup; if (!clone_into_cgroup_run_wait(parent)) goto cleanup; ret = KSFT_PASS; cleanup: if (child) cg_destroy(child); if (parent) cg_destroy(parent); free(child); free(parent); return ret; } static void *dummy_thread_fn(void *arg) { return (void *)(size_t)pause(); } /* * Test threadgroup migration. * All threads of a process are migrated together. */ static int test_cgcore_proc_migration(const char *root) { int ret = KSFT_FAIL; int t, c_threads = 0, n_threads = 13; char *src = NULL, *dst = NULL; pthread_t threads[n_threads]; src = cg_name(root, "cg_src"); dst = cg_name(root, "cg_dst"); if (!src || !dst) goto cleanup; if (cg_create(src)) goto cleanup; if (cg_create(dst)) goto cleanup; if (cg_enter_current(src)) goto cleanup; for (c_threads = 0; c_threads < n_threads; ++c_threads) { if (pthread_create(&threads[c_threads], NULL, dummy_thread_fn, NULL)) goto cleanup; } cg_enter_current(dst); if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1) goto cleanup; ret = KSFT_PASS; cleanup: for (t = 0; t < c_threads; ++t) { pthread_cancel(threads[t]); } for (t = 0; t < c_threads; ++t) { pthread_join(threads[t], NULL); } cg_enter_current(root); if (dst) cg_destroy(dst); if (src) cg_destroy(src); free(dst); free(src); return ret; } static void *migrating_thread_fn(void *arg) { int g, i, n_iterations = 1000; char **grps = arg; char lines[3][PATH_MAX]; for (g = 1; g < 3; ++g) snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0])); for (i = 0; i < n_iterations; ++i) { cg_enter_current_thread(grps[(i % 2) + 1]); if (proc_read_strstr(0, 1, "cgroup", lines[(i % 2) + 1])) return (void *)-1; } return NULL; } /* * Test single thread migration. * Threaded cgroups allow successful migration of a thread. */ static int test_cgcore_thread_migration(const char *root) { int ret = KSFT_FAIL; char *dom = NULL; char line[PATH_MAX]; char *grps[3] = { (char *)root, NULL, NULL }; pthread_t thr; void *retval; dom = cg_name(root, "cg_dom"); grps[1] = cg_name(root, "cg_dom/cg_src"); grps[2] = cg_name(root, "cg_dom/cg_dst"); if (!grps[1] || !grps[2] || !dom) goto cleanup; if (cg_create(dom)) goto cleanup; if (cg_create(grps[1])) goto cleanup; if (cg_create(grps[2])) goto cleanup; if (cg_write(grps[1], "cgroup.type", "threaded")) goto cleanup; if (cg_write(grps[2], "cgroup.type", "threaded")) goto cleanup; if (cg_enter_current(grps[1])) goto cleanup; if (pthread_create(&thr, NULL, migrating_thread_fn, grps)) goto cleanup; if (pthread_join(thr, &retval)) goto cleanup; if (retval) goto cleanup; snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0])); if (proc_read_strstr(0, 1, "cgroup", line)) goto cleanup; ret = KSFT_PASS; cleanup: cg_enter_current(root); if (grps[2]) cg_destroy(grps[2]); if (grps[1]) cg_destroy(grps[1]); if (dom) cg_destroy(dom); free(grps[2]); free(grps[1]); free(dom); return ret; } /* * cgroup migration permission check should be performed based on the * credentials at the time of open instead of write. */ static int test_cgcore_lesser_euid_open(const char *root) { const uid_t test_euid = TEST_UID; int ret = KSFT_FAIL; char *cg_test_a = NULL, *cg_test_b = NULL; char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; int cg_test_b_procs_fd = -1; uid_t saved_uid; cg_test_a = cg_name(root, "cg_test_a"); cg_test_b = cg_name(root, "cg_test_b"); if (!cg_test_a || !cg_test_b) goto cleanup; cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); if (!cg_test_a_procs || !cg_test_b_procs) goto cleanup; if (cg_create(cg_test_a) || cg_create(cg_test_b)) goto cleanup; if (cg_enter_current(cg_test_a)) goto cleanup; if (chown(cg_test_a_procs, test_euid, -1) || chown(cg_test_b_procs, test_euid, -1)) goto cleanup; saved_uid = geteuid(); if (seteuid(test_euid)) goto cleanup; cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR); if (seteuid(saved_uid)) goto cleanup; if (cg_test_b_procs_fd < 0) goto cleanup; if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES) goto cleanup; ret = KSFT_PASS; cleanup: cg_enter_current(root); if (cg_test_b_procs_fd >= 0) close(cg_test_b_procs_fd); if (cg_test_b) cg_destroy(cg_test_b); if (cg_test_a) cg_destroy(cg_test_a); free(cg_test_b_procs); free(cg_test_a_procs); free(cg_test_b); free(cg_test_a); return ret; } struct lesser_ns_open_thread_arg { const char *path; int fd; int err; }; static int lesser_ns_open_thread_fn(void *arg) { struct lesser_ns_open_thread_arg *targ = arg; targ->fd = open(targ->path, O_RDWR); targ->err = errno; return 0; } /* * cgroup migration permission check should be performed based on the cgroup * namespace at the time of open instead of write. */ static int test_cgcore_lesser_ns_open(const char *root) { static char stack[65536]; const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ int ret = KSFT_FAIL; char *cg_test_a = NULL, *cg_test_b = NULL; char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; int cg_test_b_procs_fd = -1; struct lesser_ns_open_thread_arg targ = { .fd = -1 }; pid_t pid; int status; cg_test_a = cg_name(root, "cg_test_a"); cg_test_b = cg_name(root, "cg_test_b"); if (!cg_test_a || !cg_test_b) goto cleanup; cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); if (!cg_test_a_procs || !cg_test_b_procs) goto cleanup; if (cg_create(cg_test_a) || cg_create(cg_test_b)) goto cleanup; if (cg_enter_current(cg_test_b)) goto cleanup; if (chown(cg_test_a_procs, test_euid, -1) || chown(cg_test_b_procs, test_euid, -1)) goto cleanup; targ.path = cg_test_b_procs; pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack), CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD, &targ); if (pid < 0) goto cleanup; if (waitpid(pid, &status, 0) < 0) goto cleanup; if (!WIFEXITED(status)) goto cleanup; cg_test_b_procs_fd = targ.fd; if (cg_test_b_procs_fd < 0) goto cleanup; if (cg_enter_current(cg_test_a)) goto cleanup; if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT) goto cleanup; ret = KSFT_PASS; cleanup: cg_enter_current(root); if (cg_test_b_procs_fd >= 0) close(cg_test_b_procs_fd); if (cg_test_b) cg_destroy(cg_test_b); if (cg_test_a) cg_destroy(cg_test_a); free(cg_test_b_procs); free(cg_test_a_procs); free(cg_test_b); free(cg_test_a); return ret; } #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); const char *name; } tests[] = { T(test_cgcore_internal_process_constraint), T(test_cgcore_top_down_constraint_enable), T(test_cgcore_top_down_constraint_disable), T(test_cgcore_no_internal_process_constraint_on_threads), T(test_cgcore_parent_becomes_threaded), T(test_cgcore_invalid_domain), T(test_cgcore_populated), T(test_cgcore_proc_migration), T(test_cgcore_thread_migration), T(test_cgcore_destroy), T(test_cgcore_lesser_euid_open), T(test_cgcore_lesser_ns_open), }; #undef T int main(int argc, char *argv[]) { char root[PATH_MAX]; int i, ret = EXIT_SUCCESS; if (cg_find_unified_root(root, sizeof(root))) ksft_exit_skip("cgroup v2 isn't mounted\n"); if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) if (cg_write(root, "cgroup.subtree_control", "+memory")) ksft_exit_skip("Failed to set memory controller\n"); for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: ksft_test_result_pass("%s\n", tests[i].name); break; case KSFT_SKIP: ksft_test_result_skip("%s\n", tests[i].name); break; default: ret = EXIT_FAILURE; ksft_test_result_fail("%s\n", tests[i].name); break; } } return ret; }