#define pr_fmt(fmt) "kfence: " fmt
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/debugfs.h>
#include <linux/hash.h>
#include <linux/irq_work.h>
#include <linux/jhash.h>
#include <linux/kcsan-checks.h>
#include <linux/kfence.h>
#include <linux/kmemleak.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/log2.h>
#include <linux/memblock.h>
#include <linux/moduleparam.h>
#include <linux/notifier.h>
#include <linux/panic_notifier.h>
#include <linux/random.h>
#include <linux/rcupdate.h>
#include <linux/sched/clock.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <asm/kfence.h>
#include "kfence.h"
#define KFENCE_WARN_ON(cond) \
({ \
const bool __cond = WARN_ON(cond); \
if (unlikely(__cond)) { \
WRITE_ONCE(kfence_enabled, false); \
disabled_by_warn = true; \
} \
__cond; \
})
static bool kfence_enabled __read_mostly;
static bool disabled_by_warn __read_mostly;
unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
EXPORT_SYMBOL_GPL(kfence_sample_interval);
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
#define MODULE_PARAM_PREFIX "kfence."
static int kfence_enable_late(void);
static int param_set_sample_interval(const char *val, const struct kernel_param *kp)
{
unsigned long num;
int ret = kstrtoul(val, 0, &num);
if (ret < 0)
return ret;
if (!num && READ_ONCE(kfence_enabled)) {
pr_info("disabled\n");
WRITE_ONCE(kfence_enabled, false);
}
*((unsigned long *)kp->arg) = num;
if (num && !READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING)
return disabled_by_warn ? -EINVAL : kfence_enable_late();
return 0;
}
static int param_get_sample_interval(char *buffer, const struct kernel_param *kp)
{
if (!READ_ONCE(kfence_enabled))
return sprintf(buffer, "0\n");
return param_get_ulong(buffer, kp);
}
static const struct kernel_param_ops sample_interval_param_ops = {
.set = param_set_sample_interval,
.get = param_get_sample_interval,
};
module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600);
static unsigned long kfence_skip_covered_thresh __read_mostly = 75;
module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644);
static bool kfence_deferrable __read_mostly = IS_ENABLED(CONFIG_KFENCE_DEFERRABLE);
module_param_named(deferrable, kfence_deferrable, bool, 0444);
static bool kfence_check_on_panic __read_mostly;
module_param_named(check_on_panic, kfence_check_on_panic, bool, 0444);
char *__kfence_pool __read_mostly;
EXPORT_SYMBOL(__kfence_pool);
static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0);
struct kfence_metadata *kfence_metadata __read_mostly;
static struct kfence_metadata *kfence_metadata_init __read_mostly;
static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
static DEFINE_RAW_SPINLOCK(kfence_freelist_lock);
DEFINE_STATIC_KEY_FALSE(kfence_allocation_key);
atomic_t kfence_allocation_gate = ATOMIC_INIT(1);
#define ALLOC_COVERED_HNUM 2
#define ALLOC_COVERED_ORDER (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2)
#define ALLOC_COVERED_SIZE (1 << ALLOC_COVERED_ORDER)
#define ALLOC_COVERED_HNEXT(h) hash_32(h, ALLOC_COVERED_ORDER)
#define ALLOC_COVERED_MASK (ALLOC_COVERED_SIZE - 1)
static atomic_t alloc_covered[ALLOC_COVERED_SIZE];
#define UNIQUE_ALLOC_STACK_DEPTH ((size_t)8)
static u32 stack_hash_seed __ro_after_init;
enum kfence_counter_id {
KFENCE_COUNTER_ALLOCATED,
KFENCE_COUNTER_ALLOCS,
KFENCE_COUNTER_FREES,
KFENCE_COUNTER_ZOMBIES,
KFENCE_COUNTER_BUGS,
KFENCE_COUNTER_SKIP_INCOMPAT,
KFENCE_COUNTER_SKIP_CAPACITY,
KFENCE_COUNTER_SKIP_COVERED,
KFENCE_COUNTER_COUNT,
};
static atomic_long_t counters[KFENCE_COUNTER_COUNT];
static const char *const counter_names[] = {
[KFENCE_COUNTER_ALLOCATED] = "currently allocated",
[KFENCE_COUNTER_ALLOCS] = "total allocations",
[KFENCE_COUNTER_FREES] = "total frees",
[KFENCE_COUNTER_ZOMBIES] = "zombie allocations",
[KFENCE_COUNTER_BUGS] = "total bugs",
[KFENCE_COUNTER_SKIP_INCOMPAT] = "skipped allocations (incompatible)",
[KFENCE_COUNTER_SKIP_CAPACITY] = "skipped allocations (capacity)",
[KFENCE_COUNTER_SKIP_COVERED] = "skipped allocations (covered)",
};
static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT);
static inline bool should_skip_covered(void)
{
unsigned long thresh = (CONFIG_KFENCE_NUM_OBJECTS * kfence_skip_covered_thresh) / 100;
return atomic_long_read(&counters[KFENCE_COUNTER_ALLOCATED]) > thresh;
}
static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries)
{
num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH);
num_entries = filter_irq_stacks(stack_entries, num_entries);
return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), stack_hash_seed);
}
static void alloc_covered_add(u32 alloc_stack_hash, int val)
{
int i;
for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
atomic_add(val, &alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]);
alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
}
}
static bool alloc_covered_contains(u32 alloc_stack_hash)
{
int i;
for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
if (!atomic_read(&alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]))
return false;
alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
}
return true;
}
static bool kfence_protect(unsigned long addr)
{
return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true));
}
static bool kfence_unprotect(unsigned long addr)
{
return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false));
}
static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta)
{
unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2;
unsigned long pageaddr = (unsigned long)&__kfence_pool[offset];
if (KFENCE_WARN_ON(meta < kfence_metadata ||
meta >= kfence_metadata + CONFIG_KFENCE_NUM_OBJECTS))
return 0;
if (KFENCE_WARN_ON(ALIGN_DOWN(meta->addr, PAGE_SIZE) != pageaddr))
return 0;
return pageaddr;
}
static noinline void
metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state next,
unsigned long *stack_entries, size_t num_stack_entries)
{
struct kfence_track *track =
next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track;
lockdep_assert_held(&meta->lock);
if (stack_entries) {
memcpy(track->stack_entries, stack_entries,
num_stack_entries * sizeof(stack_entries[0]));
} else {
num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1);
}
track->num_stack_entries = num_stack_entries;
track->pid = task_pid_nr(current);
track->cpu = raw_smp_processor_id();
track->ts_nsec = local_clock();
WRITE_ONCE(meta->state, next);
}
static inline bool check_canary_byte(u8 *addr)
{
struct kfence_metadata *meta;
unsigned long flags;
if (likely(*addr == KFENCE_CANARY_PATTERN_U8(addr)))
return true;
atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
meta = addr_to_metadata((unsigned long)addr);
raw_spin_lock_irqsave(&meta->lock, flags);
kfence_report_error((unsigned long)addr, false, NULL, meta, KFENCE_ERROR_CORRUPTION);
raw_spin_unlock_irqrestore(&meta->lock, flags);
return false;
}
static inline void set_canary(const struct kfence_metadata *meta)
{
const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
unsigned long addr = pageaddr;
for (; addr < meta->addr; addr += sizeof(u64))
*((u64 *)addr) = KFENCE_CANARY_PATTERN_U64;
addr = ALIGN_DOWN(meta->addr + meta->size, sizeof(u64));
for (; addr - pageaddr < PAGE_SIZE; addr += sizeof(u64))
*((u64 *)addr) = KFENCE_CANARY_PATTERN_U64;
}
static inline void check_canary(const struct kfence_metadata *meta)
{
const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
unsigned long addr = pageaddr;
for (; meta->addr - addr >= sizeof(u64); addr += sizeof(u64)) {
if (unlikely(*((u64 *)addr) != KFENCE_CANARY_PATTERN_U64))
break;
}
for (; addr < meta->addr; addr++) {
if (unlikely(!check_canary_byte((u8 *)addr)))
break;
}
for (addr = meta->addr + meta->size; addr % sizeof(u64) != 0; addr++) {
if (unlikely(!check_canary_byte((u8 *)addr)))
return;
}
for (; addr - pageaddr < PAGE_SIZE; addr += sizeof(u64)) {
if (unlikely(*((u64 *)addr) != KFENCE_CANARY_PATTERN_U64)) {
for (; addr - pageaddr < PAGE_SIZE; addr++) {
if (!check_canary_byte((u8 *)addr))
return;
}
}
}
}
static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp,
unsigned long *stack_entries, size_t num_stack_entries,
u32 alloc_stack_hash)
{
struct kfence_metadata *meta = NULL;
unsigned long flags;
struct slab *slab;
void *addr;
const bool random_right_allocate = get_random_u32_below(2);
const bool random_fault = CONFIG_KFENCE_STRESS_TEST_FAULTS &&
!get_random_u32_below(CONFIG_KFENCE_STRESS_TEST_FAULTS);
raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
if (!list_empty(&kfence_freelist)) {
meta = list_entry(kfence_freelist.next, struct kfence_metadata, list);
list_del_init(&meta->list);
}
raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
if (!meta) {
atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_CAPACITY]);
return NULL;
}
if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) {
raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
list_add_tail(&meta->list, &kfence_freelist);
raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
return NULL;
}
meta->addr = metadata_to_pageaddr(meta);
if (meta->state == KFENCE_OBJECT_FREED)
kfence_unprotect(meta->addr);
if (random_right_allocate) {
meta->addr += PAGE_SIZE - size;
meta->addr = ALIGN_DOWN(meta->addr, cache->align);
}
addr = (void *)meta->addr;
metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED, stack_entries, num_stack_entries);
WRITE_ONCE(meta->cache, cache);
meta->size = size;
meta->alloc_stack_hash = alloc_stack_hash;
raw_spin_unlock_irqrestore(&meta->lock, flags);
alloc_covered_add(alloc_stack_hash, 1);
slab = virt_to_slab((void *)meta->addr);
slab->slab_cache = cache;
#if defined(CONFIG_SLUB)
slab->objects = 1;
#elif defined(CONFIG_SLAB)
slab->s_mem = addr;
#endif
set_canary(meta);
if (unlikely(slab_want_init_on_alloc(gfp, cache)))
memzero_explicit(addr, size);
if (cache->ctor)
cache->ctor(addr);
if (random_fault)
kfence_protect(meta->addr);
atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]);
atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCS]);
return addr;
}
static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool zombie)
{
struct kcsan_scoped_access assert_page_exclusive;
unsigned long flags;
bool init;
raw_spin_lock_irqsave(&meta->lock, flags);
if (meta->state != KFENCE_OBJECT_ALLOCATED || meta->addr != (unsigned long)addr) {
atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
kfence_report_error((unsigned long)addr, false, NULL, meta,
KFENCE_ERROR_INVALID_FREE);
raw_spin_unlock_irqrestore(&meta->lock, flags);
return;
}
kcsan_begin_scoped_access((void *)ALIGN_DOWN((unsigned long)addr, PAGE_SIZE), PAGE_SIZE,
KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT,
&assert_page_exclusive);
if (CONFIG_KFENCE_STRESS_TEST_FAULTS)
kfence_unprotect((unsigned long)addr);
if (meta->unprotected_page) {
memzero_explicit((void *)ALIGN_DOWN(meta->unprotected_page, PAGE_SIZE), PAGE_SIZE);
kfence_protect(meta->unprotected_page);
meta->unprotected_page = 0;
}
metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0);
init = slab_want_init_on_free(meta->cache);
raw_spin_unlock_irqrestore(&meta->lock, flags);
alloc_covered_add(meta->alloc_stack_hash, -1);
check_canary(meta);
if (!zombie && unlikely(init))
memzero_explicit(addr, meta->size);
kfence_protect((unsigned long)addr);
kcsan_end_scoped_access(&assert_page_exclusive);
if (!zombie) {
raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
KFENCE_WARN_ON(!list_empty(&meta->list));
list_add_tail(&meta->list, &kfence_freelist);
raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
atomic_long_dec(&counters[KFENCE_COUNTER_ALLOCATED]);
atomic_long_inc(&counters[KFENCE_COUNTER_FREES]);
} else {
atomic_long_inc(&counters[KFENCE_COUNTER_ZOMBIES]);
}
}
static void rcu_guarded_free(struct rcu_head *h)
{
struct kfence_metadata *meta = container_of(h, struct kfence_metadata, rcu_head);
kfence_guarded_free((void *)meta->addr, meta, false);
}
static unsigned long kfence_init_pool(void)
{
unsigned long addr;
struct page *pages;
int i;
if (!arch_kfence_init_pool())
return (unsigned long)__kfence_pool;
addr = (unsigned long)__kfence_pool;
pages = virt_to_page(__kfence_pool);
for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
struct slab *slab = page_slab(nth_page(pages, i));
if (!i || (i % 2))
continue;
__folio_set_slab(slab_folio(slab));
#ifdef CONFIG_MEMCG
slab->memcg_data = (unsigned long)&kfence_metadata_init[i / 2 - 1].objcg |
MEMCG_DATA_OBJCGS;
#endif
}
for (i = 0; i < 2; i++) {
if (unlikely(!kfence_protect(addr)))
return addr;
addr += PAGE_SIZE;
}
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
struct kfence_metadata *meta = &kfence_metadata_init[i];
INIT_LIST_HEAD(&meta->list);
raw_spin_lock_init(&meta->lock);
meta->state = KFENCE_OBJECT_UNUSED;
meta->addr = addr;
list_add_tail(&meta->list, &kfence_freelist);
if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
goto reset_slab;
addr += 2 * PAGE_SIZE;
}
smp_store_release(&kfence_metadata, kfence_metadata_init);
return 0;
reset_slab:
for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
struct slab *slab = page_slab(nth_page(pages, i));
if (!i || (i % 2))
continue;
#ifdef CONFIG_MEMCG
slab->memcg_data = 0;
#endif
__folio_clear_slab(slab_folio(slab));
}
return addr;
}
static bool __init kfence_init_pool_early(void)
{
unsigned long addr;
if (!__kfence_pool)
return false;
addr = kfence_init_pool();
if (!addr) {
kmemleak_ignore_phys(__pa(__kfence_pool));
return true;
}
memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
__kfence_pool = NULL;
memblock_free_late(__pa(kfence_metadata_init), KFENCE_METADATA_SIZE);
kfence_metadata_init = NULL;
return false;
}
static int stats_show(struct seq_file *seq, void *v)
{
int i;
seq_printf(seq, "enabled: %i\n", READ_ONCE(kfence_enabled));
for (i = 0; i < KFENCE_COUNTER_COUNT; i++)
seq_printf(seq, "%s: %ld\n", counter_names[i], atomic_long_read(&counters[i]));
return 0;
}
DEFINE_SHOW_ATTRIBUTE(stats);
static void *start_object(struct seq_file *seq, loff_t *pos)
{
if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
return (void *)((long)*pos + 1);
return NULL;
}
static void stop_object(struct seq_file *seq, void *v)
{
}
static void *next_object(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
return (void *)((long)*pos + 1);
return NULL;
}
static int show_object(struct seq_file *seq, void *v)
{
struct kfence_metadata *meta = &kfence_metadata[(long)v - 1];
unsigned long flags;
raw_spin_lock_irqsave(&meta->lock, flags);
kfence_print_object(seq, meta);
raw_spin_unlock_irqrestore(&meta->lock, flags);
seq_puts(seq, "---------------------------------\n");
return 0;
}
static const struct seq_operations objects_sops = {
.start = start_object,
.next = next_object,
.stop = stop_object,
.show = show_object,
};
DEFINE_SEQ_ATTRIBUTE(objects);
static int kfence_debugfs_init(void)
{
struct dentry *kfence_dir;
if (!READ_ONCE(kfence_enabled))
return 0;
kfence_dir = debugfs_create_dir("kfence", NULL);
debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops);
debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops);
return 0;
}
late_initcall(kfence_debugfs_init);
static void kfence_check_all_canary(void)
{
int i;
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
struct kfence_metadata *meta = &kfence_metadata[i];
if (meta->state == KFENCE_OBJECT_ALLOCATED)
check_canary(meta);
}
}
static int kfence_check_canary_callback(struct notifier_block *nb,
unsigned long reason, void *arg)
{
kfence_check_all_canary();
return NOTIFY_OK;
}
static struct notifier_block kfence_check_canary_notifier = {
.notifier_call = kfence_check_canary_callback,
};
static struct delayed_work kfence_timer;
#ifdef CONFIG_KFENCE_STATIC_KEYS
static DECLARE_WAIT_QUEUE_HEAD(allocation_wait);
static void wake_up_kfence_timer(struct irq_work *work)
{
wake_up(&allocation_wait);
}
static DEFINE_IRQ_WORK(wake_up_kfence_timer_work, wake_up_kfence_timer);
#endif
static void toggle_allocation_gate(struct work_struct *work)
{
if (!READ_ONCE(kfence_enabled))
return;
atomic_set(&kfence_allocation_gate, 0);
#ifdef CONFIG_KFENCE_STATIC_KEYS
static_branch_enable(&kfence_allocation_key);
wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate));
static_branch_disable(&kfence_allocation_key);
#endif
queue_delayed_work(system_unbound_wq, &kfence_timer,
msecs_to_jiffies(kfence_sample_interval));
}
void __init kfence_alloc_pool_and_metadata(void)
{
if (!kfence_sample_interval)
return;
if (!__kfence_pool)
__kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
if (!__kfence_pool) {
pr_err("failed to allocate pool\n");
return;
}
kfence_metadata_init = memblock_alloc(KFENCE_METADATA_SIZE, PAGE_SIZE);
if (!kfence_metadata_init) {
pr_err("failed to allocate metadata\n");
memblock_free(__kfence_pool, KFENCE_POOL_SIZE);
__kfence_pool = NULL;
}
}
static void kfence_init_enable(void)
{
if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
static_branch_enable(&kfence_allocation_key);
if (kfence_deferrable)
INIT_DEFERRABLE_WORK(&kfence_timer, toggle_allocation_gate);
else
INIT_DELAYED_WORK(&kfence_timer, toggle_allocation_gate);
if (kfence_check_on_panic)
atomic_notifier_chain_register(&panic_notifier_list, &kfence_check_canary_notifier);
WRITE_ONCE(kfence_enabled, true);
queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
(void *)(__kfence_pool + KFENCE_POOL_SIZE));
}
void __init kfence_init(void)
{
stack_hash_seed = get_random_u32();
if (!kfence_sample_interval)
return;
if (!kfence_init_pool_early()) {
pr_err("%s failed\n", __func__);
return;
}
kfence_init_enable();
}
static int kfence_init_late(void)
{
const unsigned long nr_pages_pool = KFENCE_POOL_SIZE / PAGE_SIZE;
const unsigned long nr_pages_meta = KFENCE_METADATA_SIZE / PAGE_SIZE;
unsigned long addr = (unsigned long)__kfence_pool;
unsigned long free_size = KFENCE_POOL_SIZE;
int err = -ENOMEM;
#ifdef CONFIG_CONTIG_ALLOC
struct page *pages;
pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL, first_online_node,
NULL);
if (!pages)
return -ENOMEM;
__kfence_pool = page_to_virt(pages);
pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL, first_online_node,
NULL);
if (pages)
kfence_metadata_init = page_to_virt(pages);
#else
if (nr_pages_pool > MAX_ORDER_NR_PAGES ||
nr_pages_meta > MAX_ORDER_NR_PAGES) {
pr_warn("KFENCE_NUM_OBJECTS too large for buddy allocator\n");
return -EINVAL;
}
__kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL);
if (!__kfence_pool)
return -ENOMEM;
kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, GFP_KERNEL);
#endif
if (!kfence_metadata_init)
goto free_pool;
memzero_explicit(kfence_metadata_init, KFENCE_METADATA_SIZE);
addr = kfence_init_pool();
if (!addr) {
kfence_init_enable();
kfence_debugfs_init();
return 0;
}
pr_err("%s failed\n", __func__);
free_size = KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool);
err = -EBUSY;
#ifdef CONFIG_CONTIG_ALLOC
free_contig_range(page_to_pfn(virt_to_page((void *)kfence_metadata_init)),
nr_pages_meta);
free_pool:
free_contig_range(page_to_pfn(virt_to_page((void *)addr)),
free_size / PAGE_SIZE);
#else
free_pages_exact((void *)kfence_metadata_init, KFENCE_METADATA_SIZE);
free_pool:
free_pages_exact((void *)addr, free_size);
#endif
kfence_metadata_init = NULL;
__kfence_pool = NULL;
return err;
}
static int kfence_enable_late(void)
{
if (!__kfence_pool)
return kfence_init_late();
WRITE_ONCE(kfence_enabled, true);
queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
pr_info("re-enabled\n");
return 0;
}
void kfence_shutdown_cache(struct kmem_cache *s)
{
unsigned long flags;
struct kfence_metadata *meta;
int i;
if (!smp_load_acquire(&kfence_metadata))
return;
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
bool in_use;
meta = &kfence_metadata[i];
if (READ_ONCE(meta->cache) != s ||
READ_ONCE(meta->state) != KFENCE_OBJECT_ALLOCATED)
continue;
raw_spin_lock_irqsave(&meta->lock, flags);
in_use = meta->cache == s && meta->state == KFENCE_OBJECT_ALLOCATED;
raw_spin_unlock_irqrestore(&meta->lock, flags);
if (in_use) {
kfence_guarded_free((void *)meta->addr, meta, true);
}
}
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
meta = &kfence_metadata[i];
if (READ_ONCE(meta->cache) != s || READ_ONCE(meta->state) != KFENCE_OBJECT_FREED)
continue;
raw_spin_lock_irqsave(&meta->lock, flags);
if (meta->cache == s && meta->state == KFENCE_OBJECT_FREED)
meta->cache = NULL;
raw_spin_unlock_irqrestore(&meta->lock, flags);
}
}
void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
{
unsigned long stack_entries[KFENCE_STACK_DEPTH];
size_t num_stack_entries;
u32 alloc_stack_hash;
if (size > PAGE_SIZE) {
atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
return NULL;
}
if ((flags & GFP_ZONEMASK) ||
(s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) {
atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
return NULL;
}
if (s->flags & SLAB_SKIP_KFENCE)
return NULL;
if (atomic_inc_return(&kfence_allocation_gate) > 1)
return NULL;
#ifdef CONFIG_KFENCE_STATIC_KEYS
if (waitqueue_active(&allocation_wait)) {
irq_work_queue(&wake_up_kfence_timer_work);
}
#endif
if (!READ_ONCE(kfence_enabled))
return NULL;
num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 0);
alloc_stack_hash = get_alloc_stack_hash(stack_entries, num_stack_entries);
if (should_skip_covered() && alloc_covered_contains(alloc_stack_hash)) {
atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_COVERED]);
return NULL;
}
return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries,
alloc_stack_hash);
}
size_t kfence_ksize(const void *addr)
{
const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
return meta ? meta->size : 0;
}
void *kfence_object_start(const void *addr)
{
const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
return meta ? (void *)meta->addr : NULL;
}
void __kfence_free(void *addr)
{
struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
#ifdef CONFIG_MEMCG
KFENCE_WARN_ON(meta->objcg);
#endif
if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU)))
call_rcu(&meta->rcu_head, rcu_guarded_free);
else
kfence_guarded_free(addr, meta, false);
}
bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs)
{
const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE;
struct kfence_metadata *to_report = NULL;
enum kfence_error_type error_type;
unsigned long flags;
if (!is_kfence_address((void *)addr))
return false;
if (!READ_ONCE(kfence_enabled))
return kfence_unprotect(addr);
atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
if (page_index % 2) {
struct kfence_metadata *meta;
int distance = 0;
meta = addr_to_metadata(addr - PAGE_SIZE);
if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
to_report = meta;
distance = addr - data_race(meta->addr + meta->size);
}
meta = addr_to_metadata(addr + PAGE_SIZE);
if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
if (!to_report || distance > data_race(meta->addr) - addr)
to_report = meta;
}
if (!to_report)
goto out;
raw_spin_lock_irqsave(&to_report->lock, flags);
to_report->unprotected_page = addr;
error_type = KFENCE_ERROR_OOB;
} else {
to_report = addr_to_metadata(addr);
if (!to_report)
goto out;
raw_spin_lock_irqsave(&to_report->lock, flags);
error_type = KFENCE_ERROR_UAF;
}
out:
if (to_report) {
kfence_report_error(addr, is_write, regs, to_report, error_type);
raw_spin_unlock_irqrestore(&to_report->lock, flags);
} else {
kfence_report_error(addr, is_write, regs, NULL, KFENCE_ERROR_INVALID);
}
return kfence_unprotect(addr);
}