#include "dm.h"
#include "dm-path-selector.h"
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/module.h>
#define DM_MSG_PREFIX "multipath historical-service-time"
#define HST_MIN_IO 1
#define HST_VERSION "0.1.1"
#define HST_FIXED_SHIFT 10 /* 10 bits of decimal precision */
#define HST_FIXED_MAX (ULLONG_MAX >> HST_FIXED_SHIFT)
#define HST_FIXED_1 (1 << HST_FIXED_SHIFT)
#define HST_FIXED_95 972
#define HST_MAX_INFLIGHT HST_FIXED_1
#define HST_BUCKET_SHIFT 24 /* Buckets are ~ 16ms */
#define HST_WEIGHT_COUNT 64ULL
struct selector {
struct list_head valid_paths;
struct list_head failed_paths;
int valid_count;
spinlock_t lock;
unsigned int weights[HST_WEIGHT_COUNT];
unsigned int threshold_multiplier;
};
struct path_info {
struct list_head list;
struct dm_path *path;
unsigned int repeat_count;
spinlock_t lock;
u64 historical_service_time;
u64 stale_after;
u64 last_finish;
u64 outstanding;
};
static u64 fixed_power(u64 x, unsigned int frac_bits, unsigned int n)
{
unsigned long result = 1UL << frac_bits;
if (n) {
for (;;) {
if (n & 1) {
result *= x;
result += 1UL << (frac_bits - 1);
result >>= frac_bits;
}
n >>= 1;
if (!n)
break;
x *= x;
x += 1UL << (frac_bits - 1);
x >>= frac_bits;
}
}
return result;
}
static u64 fixed_ema(u64 last, u64 next, u64 weight)
{
last *= weight;
last += next * (HST_FIXED_1 - weight);
last += 1ULL << (HST_FIXED_SHIFT - 1);
return last >> HST_FIXED_SHIFT;
}
static struct selector *alloc_selector(void)
{
struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
if (s) {
INIT_LIST_HEAD(&s->valid_paths);
INIT_LIST_HEAD(&s->failed_paths);
spin_lock_init(&s->lock);
s->valid_count = 0;
}
return s;
}
static u64 hst_weight(struct path_selector *ps, u64 delta)
{
struct selector *s = ps->context;
int bucket = clamp(delta >> HST_BUCKET_SHIFT, 0ULL,
HST_WEIGHT_COUNT - 1);
return s->weights[bucket];
}
static void hst_set_weights(struct path_selector *ps, unsigned int base)
{
struct selector *s = ps->context;
int i;
if (base >= HST_FIXED_1)
return;
for (i = 0; i < HST_WEIGHT_COUNT - 1; i++)
s->weights[i] = fixed_power(base, HST_FIXED_SHIFT, i + 1);
s->weights[HST_WEIGHT_COUNT - 1] = 0;
}
static int hst_create(struct path_selector *ps, unsigned int argc, char **argv)
{
struct selector *s;
unsigned int base_weight = HST_FIXED_95;
unsigned int threshold_multiplier = 0;
char dummy;
if (argc > 2)
return -EINVAL;
if (argc && (sscanf(argv[0], "%u%c", &base_weight, &dummy) != 1 ||
base_weight >= HST_FIXED_1)) {
return -EINVAL;
}
if (argc > 1 && (sscanf(argv[1], "%u%c",
&threshold_multiplier, &dummy) != 1)) {
return -EINVAL;
}
s = alloc_selector();
if (!s)
return -ENOMEM;
ps->context = s;
hst_set_weights(ps, base_weight);
s->threshold_multiplier = threshold_multiplier;
return 0;
}
static void free_paths(struct list_head *paths)
{
struct path_info *pi, *next;
list_for_each_entry_safe(pi, next, paths, list) {
list_del(&pi->list);
kfree(pi);
}
}
static void hst_destroy(struct path_selector *ps)
{
struct selector *s = ps->context;
free_paths(&s->valid_paths);
free_paths(&s->failed_paths);
kfree(s);
ps->context = NULL;
}
static int hst_status(struct path_selector *ps, struct dm_path *path,
status_type_t type, char *result, unsigned int maxlen)
{
unsigned int sz = 0;
struct path_info *pi;
if (!path) {
struct selector *s = ps->context;
DMEMIT("2 %u %u ", s->weights[0], s->threshold_multiplier);
} else {
pi = path->pscontext;
switch (type) {
case STATUSTYPE_INFO:
DMEMIT("%llu %llu %llu ", pi->historical_service_time,
pi->outstanding, pi->stale_after);
break;
case STATUSTYPE_TABLE:
DMEMIT("0 ");
break;
case STATUSTYPE_IMA:
*result = '\0';
break;
}
}
return sz;
}
static int hst_add_path(struct path_selector *ps, struct dm_path *path,
int argc, char **argv, char **error)
{
struct selector *s = ps->context;
struct path_info *pi;
unsigned int repeat_count = HST_MIN_IO;
char dummy;
unsigned long flags;
if (argc > 1) {
*error = "historical-service-time ps: incorrect number of arguments";
return -EINVAL;
}
if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
*error = "historical-service-time ps: invalid repeat count";
return -EINVAL;
}
pi = kmalloc(sizeof(*pi), GFP_KERNEL);
if (!pi) {
*error = "historical-service-time ps: Error allocating path context";
return -ENOMEM;
}
pi->path = path;
pi->repeat_count = repeat_count;
pi->historical_service_time = HST_FIXED_1;
spin_lock_init(&pi->lock);
pi->outstanding = 0;
pi->stale_after = 0;
pi->last_finish = 0;
path->pscontext = pi;
spin_lock_irqsave(&s->lock, flags);
list_add_tail(&pi->list, &s->valid_paths);
s->valid_count++;
spin_unlock_irqrestore(&s->lock, flags);
return 0;
}
static void hst_fail_path(struct path_selector *ps, struct dm_path *path)
{
struct selector *s = ps->context;
struct path_info *pi = path->pscontext;
unsigned long flags;
spin_lock_irqsave(&s->lock, flags);
list_move(&pi->list, &s->failed_paths);
s->valid_count--;
spin_unlock_irqrestore(&s->lock, flags);
}
static int hst_reinstate_path(struct path_selector *ps, struct dm_path *path)
{
struct selector *s = ps->context;
struct path_info *pi = path->pscontext;
unsigned long flags;
spin_lock_irqsave(&s->lock, flags);
list_move_tail(&pi->list, &s->valid_paths);
s->valid_count++;
spin_unlock_irqrestore(&s->lock, flags);
return 0;
}
static void hst_fill_compare(struct path_info *pi, u64 *hst,
u64 *out, u64 *stale)
{
unsigned long flags;
spin_lock_irqsave(&pi->lock, flags);
*hst = pi->historical_service_time;
*out = pi->outstanding;
*stale = pi->stale_after;
spin_unlock_irqrestore(&pi->lock, flags);
}
static long long hst_compare(struct path_info *pi1, struct path_info *pi2,
u64 time_now, struct path_selector *ps)
{
struct selector *s = ps->context;
u64 hst1, hst2;
long long out1, out2, stale1, stale2;
int pi2_better, over_threshold;
hst_fill_compare(pi1, &hst1, &out1, &stale1);
hst_fill_compare(pi2, &hst2, &out2, &stale2);
if (hst1 > hst2)
over_threshold = hst1 > (s->threshold_multiplier * hst2);
else
over_threshold = hst2 > (s->threshold_multiplier * hst1);
if (!over_threshold)
return out1 - out2;
if ((!out1 && stale1 < time_now) || (!out2 && stale2 < time_now) ||
(!out1 && !out2))
return (!out2 * stale1) - (!out1 * stale2);
if (out1 == out2) {
pi2_better = hst1 > hst2;
} else {
if (unlikely(out1 >= HST_MAX_INFLIGHT ||
out2 >= HST_MAX_INFLIGHT)) {
hst1 >>= HST_FIXED_SHIFT;
hst2 >>= HST_FIXED_SHIFT;
}
pi2_better = (1 + out1) * hst1 > (1 + out2) * hst2;
}
if (pi2_better) {
if (stale2 < time_now)
return out1 - out2;
return 1;
}
if (stale1 < time_now)
return out1 - out2;
return -1;
}
static struct dm_path *hst_select_path(struct path_selector *ps,
size_t nr_bytes)
{
struct selector *s = ps->context;
struct path_info *pi = NULL, *best = NULL;
u64 time_now = ktime_get_ns();
struct dm_path *ret = NULL;
unsigned long flags;
spin_lock_irqsave(&s->lock, flags);
if (list_empty(&s->valid_paths))
goto out;
list_for_each_entry(pi, &s->valid_paths, list) {
if (!best || (hst_compare(pi, best, time_now, ps) < 0))
best = pi;
}
if (!best)
goto out;
list_move_tail(&best->list, &s->valid_paths);
ret = best->path;
out:
spin_unlock_irqrestore(&s->lock, flags);
return ret;
}
static int hst_start_io(struct path_selector *ps, struct dm_path *path,
size_t nr_bytes)
{
struct path_info *pi = path->pscontext;
unsigned long flags;
spin_lock_irqsave(&pi->lock, flags);
pi->outstanding++;
spin_unlock_irqrestore(&pi->lock, flags);
return 0;
}
static u64 path_service_time(struct path_info *pi, u64 start_time)
{
u64 now = ktime_get_ns();
if (time_after64(pi->last_finish, start_time))
start_time = pi->last_finish;
pi->last_finish = now;
if (time_before64(now, start_time))
return 0;
return now - start_time;
}
static int hst_end_io(struct path_selector *ps, struct dm_path *path,
size_t nr_bytes, u64 start_time)
{
struct path_info *pi = path->pscontext;
struct selector *s = ps->context;
unsigned long flags;
u64 st;
spin_lock_irqsave(&pi->lock, flags);
st = path_service_time(pi, start_time);
pi->outstanding--;
pi->historical_service_time =
fixed_ema(pi->historical_service_time,
min(st * HST_FIXED_1, HST_FIXED_MAX),
hst_weight(ps, st));
pi->stale_after = pi->last_finish +
(s->valid_count * (pi->historical_service_time >> HST_FIXED_SHIFT));
spin_unlock_irqrestore(&pi->lock, flags);
return 0;
}
static struct path_selector_type hst_ps = {
.name = "historical-service-time",
.module = THIS_MODULE,
.features = DM_PS_USE_HR_TIMER,
.table_args = 1,
.info_args = 3,
.create = hst_create,
.destroy = hst_destroy,
.status = hst_status,
.add_path = hst_add_path,
.fail_path = hst_fail_path,
.reinstate_path = hst_reinstate_path,
.select_path = hst_select_path,
.start_io = hst_start_io,
.end_io = hst_end_io,
};
static int __init dm_hst_init(void)
{
int r = dm_register_path_selector(&hst_ps);
if (r < 0)
DMERR("register failed %d", r);
DMINFO("version " HST_VERSION " loaded");
return r;
}
static void __exit dm_hst_exit(void)
{
int r = dm_unregister_path_selector(&hst_ps);
if (r < 0)
DMERR("unregister failed %d", r);
}
module_init(dm_hst_init);
module_exit(dm_hst_exit);
MODULE_DESCRIPTION(DM_NAME " measured service time oriented path selector");
MODULE_AUTHOR("Khazhismel Kumykov <khazhy@google.com>");
MODULE_LICENSE("GPL"