#include <linux/bitops.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/cgroup.h>
#include <linux/parser.h>
#include <linux/cgroup_rdma.h>
#define RDMACG_MAX_STR "max"
static DEFINE_MUTEX(rdmacg_mutex);
static LIST_HEAD(rdmacg_devices);
enum rdmacg_file_type {
RDMACG_RESOURCE_TYPE_MAX,
RDMACG_RESOURCE_TYPE_STAT,
};
static char const *rdmacg_resource_names[] = {
[RDMACG_RESOURCE_HCA_HANDLE] = "hca_handle",
[RDMACG_RESOURCE_HCA_OBJECT] = "hca_object",
};
struct rdmacg_resource {
int max;
int usage;
};
struct rdmacg_resource_pool {
struct rdmacg_device *device;
struct rdmacg_resource resources[RDMACG_RESOURCE_MAX];
struct list_head cg_node;
struct list_head dev_node;
u64 usage_sum;
int num_max_cnt;
};
static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css)
{
return container_of(css, struct rdma_cgroup, css);
}
static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg)
{
return css_rdmacg(cg->css.parent);
}
static inline struct rdma_cgroup *get_current_rdmacg(void)
{
return css_rdmacg(task_get_css(current, rdma_cgrp_id));
}
static void set_resource_limit(struct rdmacg_resource_pool *rpool,
int index, int new_max)
{
if (new_max == S32_MAX) {
if (rpool->resources[index].max != S32_MAX)
rpool->num_max_cnt++;
} else {
if (rpool->resources[index].max == S32_MAX)
rpool->num_max_cnt--;
}
rpool->resources[index].max = new_max;
}
static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool)
{
int i;
for (i = 0; i < RDMACG_RESOURCE_MAX; i++)
set_resource_limit(rpool, i, S32_MAX);
}
static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool)
{
lockdep_assert_held(&rdmacg_mutex);
list_del(&rpool->cg_node);
list_del(&rpool->dev_node);
kfree(rpool);
}
static struct rdmacg_resource_pool *
find_cg_rpool_locked(struct rdma_cgroup *cg,
struct rdmacg_device *device)
{
struct rdmacg_resource_pool *pool;
lockdep_assert_held(&rdmacg_mutex);
list_for_each_entry(pool, &cg->rpools, cg_node)
if (pool->device == device)
return pool;
return NULL;
}
static struct rdmacg_resource_pool *
get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device)
{
struct rdmacg_resource_pool *rpool;
rpool = find_cg_rpool_locked(cg, device);
if (rpool)
return rpool;
rpool = kzalloc(sizeof(*rpool), GFP_KERNEL);
if (!rpool)
return ERR_PTR(-ENOMEM);
rpool->device = device;
set_all_resource_max_limit(rpool);
INIT_LIST_HEAD(&rpool->cg_node);
INIT_LIST_HEAD(&rpool->dev_node);
list_add_tail(&rpool->cg_node, &cg->rpools);
list_add_tail(&rpool->dev_node, &device->rpools);
return rpool;
}
static void
uncharge_cg_locked(struct rdma_cgroup *cg,
struct rdmacg_device *device,
enum rdmacg_resource_type index)
{
struct rdmacg_resource_pool *rpool;
rpool = find_cg_rpool_locked(cg, device);
if (unlikely(!rpool)) {
pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device);
return;
}
rpool->resources[index].usage--;
WARN_ON_ONCE(rpool->resources[index].usage < 0);
rpool->usage_sum--;
if (rpool->usage_sum == 0 &&
rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
free_cg_rpool_locked(rpool);
}
}
static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg,
struct rdmacg_device *device,
struct rdma_cgroup *stop_cg,
enum rdmacg_resource_type index)
{
struct rdma_cgroup *p;
mutex_lock(&rdmacg_mutex);
for (p = cg; p != stop_cg; p = parent_rdmacg(p))
uncharge_cg_locked(p, device, index);
mutex_unlock(&rdmacg_mutex);
css_put(&cg->css);
}
void rdmacg_uncharge(struct rdma_cgroup *cg,
struct rdmacg_device *device,
enum rdmacg_resource_type index)
{
if (index >= RDMACG_RESOURCE_MAX)
return;
rdmacg_uncharge_hierarchy(cg, device, NULL, index);
}
EXPORT_SYMBOL(rdmacg_uncharge);
int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
struct rdmacg_device *device,
enum rdmacg_resource_type index)
{
struct rdma_cgroup *cg, *p;
struct rdmacg_resource_pool *rpool;
s64 new;
int ret = 0;
if (index >= RDMACG_RESOURCE_MAX)
return -EINVAL;
cg = get_current_rdmacg();
mutex_lock(&rdmacg_mutex);
for (p = cg; p; p = parent_rdmacg(p)) {
rpool = get_cg_rpool_locked(p, device);
if (IS_ERR(rpool)) {
ret = PTR_ERR(rpool);
goto err;
} else {
new = rpool->resources[index].usage + 1;
if (new > rpool->resources[index].max) {
ret = -EAGAIN;
goto err;
} else {
rpool->resources[index].usage = new;
rpool->usage_sum++;
}
}
}
mutex_unlock(&rdmacg_mutex);
*rdmacg = cg;
return 0;
err:
mutex_unlock(&rdmacg_mutex);
rdmacg_uncharge_hierarchy(cg, device, p, index);
return ret;
}
EXPORT_SYMBOL(rdmacg_try_charge);
void rdmacg_register_device(struct rdmacg_device *device)
{
INIT_LIST_HEAD(&device->dev_node);
INIT_LIST_HEAD(&device->rpools);
mutex_lock(&rdmacg_mutex);
list_add_tail(&device->dev_node, &rdmacg_devices);
mutex_unlock(&rdmacg_mutex);
}
EXPORT_SYMBOL(rdmacg_register_device);
void rdmacg_unregister_device(struct rdmacg_device *device)
{
struct rdmacg_resource_pool *rpool, *tmp;
mutex_lock(&rdmacg_mutex);
list_del_init(&device->dev_node);
list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node)
free_cg_rpool_locked(rpool);
mutex_unlock(&rdmacg_mutex);
}
EXPORT_SYMBOL(rdmacg_unregister_device);
static int parse_resource(char *c, int *intval)
{
substring_t argstr;
char *name, *value = c;
size_t len;
int ret, i;
name = strsep(&value, "=");
if (!name || !value)
return -EINVAL;
i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name);
if (i < 0)
return i;
len = strlen(value);
argstr.from = value;
argstr.to = value + len;
ret = match_int(&argstr, intval);
if (ret >= 0) {
if (*intval < 0)
return -EINVAL;
return i;
}
if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
*intval = S32_MAX;
return i;
}
return -EINVAL;
}
static int rdmacg_parse_limits(char *options,
int *new_limits, unsigned long *enables)
{
char *c;
int err = -EINVAL;
while ((c = strsep(&options, " ")) != NULL) {
int index, intval;
index = parse_resource(c, &intval);
if (index < 0)
goto err;
new_limits[index] = intval;
*enables |= BIT(index);
}
return 0;
err:
return err;
}
static struct rdmacg_device *rdmacg_get_device_locked(const char *name)
{
struct rdmacg_device *device;
lockdep_assert_held(&rdmacg_mutex);
list_for_each_entry(device, &rdmacg_devices, dev_node)
if (!strcmp(name, device->name))
return device;
return NULL;
}
static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct rdma_cgroup *cg = css_rdmacg(of_css(of));
const char *dev_name;
struct rdmacg_resource_pool *rpool;
struct rdmacg_device *device;
char *options = strstrip(buf);
int *new_limits;
unsigned long enables = 0;
int i = 0, ret = 0;
dev_name = strsep(&options, " ");
if (!dev_name) {
ret = -EINVAL;
goto err;
}
new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL);
if (!new_limits) {
ret = -ENOMEM;
goto err;
}
ret = rdmacg_parse_limits(options, new_limits, &enables);
if (ret)
goto parse_err;
mutex_lock(&rdmacg_mutex);
device = rdmacg_get_device_locked(dev_name);
if (!device) {
ret = -ENODEV;
goto dev_err;
}
rpool = get_cg_rpool_locked(cg, device);
if (IS_ERR(rpool)) {
ret = PTR_ERR(rpool);
goto dev_err;
}
for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX)
set_resource_limit(rpool, i, new_limits[i]);
if (rpool->usage_sum == 0 &&
rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
free_cg_rpool_locked(rpool);
}
dev_err:
mutex_unlock(&rdmacg_mutex);
parse_err:
kfree(new_limits);
err:
return ret ?: nbytes;
}
static void print_rpool_values(struct seq_file *sf,
struct rdmacg_resource_pool *rpool)
{
enum rdmacg_file_type sf_type;
int i;
u32 value;
sf_type = seq_cft(sf)->private;
for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
seq_puts(sf, rdmacg_resource_names[i]);
seq_putc(sf, '=');
if (sf_type == RDMACG_RESOURCE_TYPE_MAX) {
if (rpool)
value = rpool->resources[i].max;
else
value = S32_MAX;
} else {
if (rpool)
value = rpool->resources[i].usage;
else
value = 0;
}
if (value == S32_MAX)
seq_puts(sf, RDMACG_MAX_STR);
else
seq_printf(sf, "%d", value);
seq_putc(sf, ' ');
}
}
static int rdmacg_resource_read(struct seq_file *sf, void *v)
{
struct rdmacg_device *device;
struct rdmacg_resource_pool *rpool;
struct rdma_cgroup *cg = css_rdmacg(seq_css(sf));
mutex_lock(&rdmacg_mutex);
list_for_each_entry(device, &rdmacg_devices, dev_node) {
seq_printf(sf, "%s ", device->name);
rpool = find_cg_rpool_locked(cg, device);
print_rpool_values(sf, rpool);
seq_putc(sf, '\n');
}
mutex_unlock(&rdmacg_mutex);
return 0;
}
static struct cftype rdmacg_files[] = {
{
.name = "max",
.write = rdmacg_resource_set_max,
.seq_show = rdmacg_resource_read,
.private = RDMACG_RESOURCE_TYPE_MAX,
.flags = CFTYPE_NOT_ON_ROOT,
},
{
.name = "current",
.seq_show = rdmacg_resource_read,
.private = RDMACG_RESOURCE_TYPE_STAT,
.flags = CFTYPE_NOT_ON_ROOT,
},
{ }
};
static struct cgroup_subsys_state *
rdmacg_css_alloc(struct cgroup_subsys_state *parent)
{
struct rdma_cgroup *cg;
cg = kzalloc(sizeof(*cg), GFP_KERNEL);
if (!cg)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&cg->rpools);
return &cg->css;
}
static void rdmacg_css_free(struct cgroup_subsys_state *css)
{
struct rdma_cgroup *cg = css_rdmacg(css);
kfree(cg);
}
static void rdmacg_css_offline(struct cgroup_subsys_state *css)
{
struct rdma_cgroup *cg = css_rdmacg(css);
struct rdmacg_resource_pool *rpool;
mutex_lock(&rdmacg_mutex);
list_for_each_entry(rpool, &cg->rpools, cg_node)
set_all_resource_max_limit(rpool);
mutex_unlock(&rdmacg_mutex);
}
struct cgroup_subsys rdma_cgrp_subsys = {
.css_alloc = rdmacg_css_alloc,
.css_free = rdmacg_css_free,
.css_offline = rdmacg_css_offline,
.legacy_cftypes = rdmacg_files,
.dfl_cftypes = rdmacg_files,
}