#define pr_fmt(fmt) "efi: " fmt
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/memblock.h>
#include <linux/ioport.h>
#include <linux/mc146818rtc.h>
#include <linux/efi.h>
#include <linux/export.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/ucs2_string.h>
#include <linux/cc_platform.h>
#include <linux/sched/task.h>
#include <asm/setup.h>
#include <asm/page.h>
#include <asm/e820/api.h>
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm/efi.h>
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
#include <asm/realmode.h>
#include <asm/time.h>
#include <asm/pgalloc.h>
#include <asm/sev.h>
static u64 efi_va = EFI_VA_START;
static struct mm_struct *efi_prev_mm;
int __init efi_alloc_page_tables(void)
{
pgd_t *pgd, *efi_pgd;
p4d_t *p4d;
pud_t *pud;
gfp_t gfp_mask;
gfp_mask = GFP_KERNEL | __GFP_ZERO;
efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
if (!efi_pgd)
goto fail;
pgd = efi_pgd + pgd_index(EFI_VA_END);
p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END);
if (!p4d)
goto free_pgd;
pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
if (!pud)
goto free_p4d;
efi_mm.pgd = efi_pgd;
mm_init_cpumask(&efi_mm);
init_new_context(NULL, &efi_mm);
return 0;
free_p4d:
if (pgtable_l5_enabled())
free_page((unsigned long)pgd_page_vaddr(*pgd));
free_pgd:
free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
fail:
return -ENOMEM;
}
void efi_sync_low_kernel_mappings(void)
{
unsigned num_entries;
pgd_t *pgd_k, *pgd_efi;
p4d_t *p4d_k, *p4d_efi;
pud_t *pud_k, *pud_efi;
pgd_t *efi_pgd = efi_mm.pgd;
pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
pgd_k = pgd_offset_k(PAGE_OFFSET);
num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
pgd_k = pgd_offset_k(EFI_VA_END);
p4d_efi = p4d_offset(pgd_efi, 0);
p4d_k = p4d_offset(pgd_k, 0);
num_entries = p4d_index(EFI_VA_END);
memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries);
BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0);
BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
p4d_efi = p4d_offset(pgd_efi, EFI_VA_END);
p4d_k = p4d_offset(pgd_k, EFI_VA_END);
pud_efi = pud_offset(p4d_efi, 0);
pud_k = pud_offset(p4d_k, 0);
num_entries = pud_index(EFI_VA_END);
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
pud_efi = pud_offset(p4d_efi, EFI_VA_START);
pud_k = pud_offset(p4d_k, EFI_VA_START);
num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
}
static inline phys_addr_t
virt_to_phys_or_null_size(void *va, unsigned long size)
{
phys_addr_t pa;
if (!va)
return 0;
if (virt_addr_valid(va))
return virt_to_phys(va);
pa = slow_virt_to_phys(va);
if (WARN_ON((pa ^ (pa + size - 1)) & PAGE_MASK))
return 0;
return pa;
}
#define virt_to_phys_or_null(addr) \
virt_to_phys_or_null_size((addr), sizeof(*(addr)))
int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
{
extern const u8 __efi64_thunk_ret_tramp[];
unsigned long pfn, text, pf, rodata, tramp;
struct page *page;
unsigned npages;
pgd_t *pgd = efi_mm.pgd;
pfn = pa_memmap >> PAGE_SHIFT;
pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC;
if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) {
pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
return 1;
}
if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) {
pr_err("Failed to create 1:1 mapping for the first page!\n");
return 1;
}
if (sev_es_efi_map_ghcbs(pgd)) {
pr_err("Failed to create 1:1 mapping for the GHCBs!\n");
return 1;
}
if (!efi_is_mixed())
return 0;
page = alloc_page(GFP_KERNEL|__GFP_DMA32);
if (!page) {
pr_err("Unable to allocate EFI runtime stack < 4GB\n");
return 1;
}
efi_mixed_mode_stack_pa = page_to_phys(page + 1);
npages = (_etext - _text) >> PAGE_SHIFT;
text = __pa(_text);
if (kernel_unmap_pages_in_pgd(pgd, text, npages)) {
pr_err("Failed to unmap kernel text 1:1 mapping\n");
return 1;
}
npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT;
rodata = __pa(__start_rodata);
pfn = rodata >> PAGE_SHIFT;
pf = _PAGE_NX | _PAGE_ENC;
if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) {
pr_err("Failed to map kernel rodata 1:1\n");
return 1;
}
tramp = __pa(__efi64_thunk_ret_tramp);
pfn = tramp >> PAGE_SHIFT;
pf = _PAGE_ENC;
if (kernel_map_pages_in_pgd(pgd, pfn, tramp, 1, pf)) {
pr_err("Failed to map mixed mode return trampoline\n");
return 1;
}
return 0;
}
static void __init __map_region(efi_memory_desc_t *md, u64 va)
{
unsigned long flags = _PAGE_RW;
unsigned long pfn;
pgd_t *pgd = efi_mm.pgd;
if (md->type != EFI_BOOT_SERVICES_CODE &&
md->type != EFI_RUNTIME_SERVICES_CODE)
flags |= _PAGE_NX;
if (!(md->attribute & EFI_MEMORY_WB))
flags |= _PAGE_PCD;
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
md->type != EFI_MEMORY_MAPPED_IO)
flags |= _PAGE_ENC;
pfn = md->phys_addr >> PAGE_SHIFT;
if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags))
pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
md->phys_addr, va);
}
void __init efi_map_region(efi_memory_desc_t *md)
{
unsigned long size = md->num_pages << PAGE_SHIFT;
u64 pa = md->phys_addr;
__map_region(md, md->phys_addr);
if (efi_is_mixed()) {
md->virt_addr = md->phys_addr;
return;
}
efi_va -= size;
if (!(pa & (PMD_SIZE - 1))) {
efi_va &= PMD_MASK;
} else {
u64 pa_offset = pa & (PMD_SIZE - 1);
u64 prev_va = efi_va;
efi_va = (efi_va & PMD_MASK) + pa_offset;
if (efi_va > prev_va)
efi_va -= PMD_SIZE;
}
if (efi_va < EFI_VA_END) {
pr_warn(FW_WARN "VA address range overflow!\n");
return;
}
__map_region(md, efi_va);
md->virt_addr = efi_va;
}
void __init efi_map_region_fixed(efi_memory_desc_t *md)
{
__map_region(md, md->phys_addr);
__map_region(md, md->virt_addr);
}
void __init parse_efi_setup(u64 phys_addr, u32 data_len)
{
efi_setup = phys_addr + sizeof(struct setup_data);
}
static int __init efi_update_mappings(efi_memory_desc_t *md, unsigned long pf)
{
unsigned long pfn;
pgd_t *pgd = efi_mm.pgd;
int err1, err2;
pfn = md->phys_addr >> PAGE_SHIFT;
err1 = kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf);
if (err1) {
pr_err("Error while updating 1:1 mapping PA 0x%llx -> VA 0x%llx!\n",
md->phys_addr, md->virt_addr);
}
err2 = kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf);
if (err2) {
pr_err("Error while updating VA mapping PA 0x%llx -> VA 0x%llx!\n",
md->phys_addr, md->virt_addr);
}
return err1 || err2;
}
bool efi_disable_ibt_for_runtime __ro_after_init = true;
static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *md,
bool has_ibt)
{
unsigned long pf = 0;
efi_disable_ibt_for_runtime |= !has_ibt;
if (md->attribute & EFI_MEMORY_XP)
pf |= _PAGE_NX;
if (!(md->attribute & EFI_MEMORY_RO))
pf |= _PAGE_RW;
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
pf |= _PAGE_ENC;
return efi_update_mappings(md, pf);
}
void __init efi_runtime_update_mappings(void)
{
efi_memory_desc_t *md;
if (efi_enabled(EFI_MEM_ATTR)) {
efi_disable_ibt_for_runtime = false;
efi_memattr_apply_permissions(NULL, efi_update_mem_attr);
return;
}
if (!efi_enabled(EFI_NX_PE_DATA))
return;
for_each_efi_memory_desc(md) {
unsigned long pf = 0;
if (!(md->attribute & EFI_MEMORY_RUNTIME))
continue;
if (!(md->attribute & EFI_MEMORY_WB))
pf |= _PAGE_PCD;
if ((md->attribute & EFI_MEMORY_XP) ||
(md->type == EFI_RUNTIME_SERVICES_DATA))
pf |= _PAGE_NX;
if (!(md->attribute & EFI_MEMORY_RO) &&
(md->type != EFI_RUNTIME_SERVICES_CODE))
pf |= _PAGE_RW;
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
pf |= _PAGE_ENC;
efi_update_mappings(md, pf);
}
}
void __init efi_dump_pagetable(void)
{
#ifdef CONFIG_EFI_PGT_DUMP
ptdump_walk_pgd_level(NULL, &efi_mm);
#endif
}
static void efi_enter_mm(void)
{
efi_prev_mm = current->active_mm;
current->active_mm = &efi_mm;
switch_mm(efi_prev_mm, &efi_mm, NULL);
}
static void efi_leave_mm(void)
{
current->active_mm = efi_prev_mm;
switch_mm(&efi_mm, efi_prev_mm, NULL);
}
void arch_efi_call_virt_setup(void)
{
efi_sync_low_kernel_mappings();
efi_fpu_begin();
firmware_restrict_branch_speculation_start();
efi_enter_mm();
}
void arch_efi_call_virt_teardown(void)
{
efi_leave_mm();
firmware_restrict_branch_speculation_end();
efi_fpu_end();
}
static DEFINE_SPINLOCK(efi_runtime_lock);
#define __efi_thunk(func, ...) \
({ \
unsigned short __ds, __es; \
efi_status_t ____s; \
\
savesegment(ds, __ds); \
savesegment(es, __es); \
\
loadsegment(ss, __KERNEL_DS); \
loadsegment(ds, __KERNEL_DS); \
loadsegment(es, __KERNEL_DS); \
\
____s = efi64_thunk(efi.runtime->mixed_mode.func, __VA_ARGS__); \
\
loadsegment(ds, __ds); \
loadsegment(es, __es); \
\
____s ^= (____s & BIT(31)) | (____s & BIT_ULL(31)) << 32; \
____s; \
})
#define efi_thunk(func...) \
({ \
efi_status_t __s; \
\
arch_efi_call_virt_setup(); \
\
__s = __efi_thunk(func); \
\
arch_efi_call_virt_teardown(); \
\
__s; \
})
static efi_status_t __init __no_sanitize_address
efi_thunk_set_virtual_address_map(unsigned long memory_map_size,
unsigned long descriptor_size,
u32 descriptor_version,
efi_memory_desc_t *virtual_map)
{
efi_status_t status;
unsigned long flags;
efi_sync_low_kernel_mappings();
local_irq_save(flags);
efi_enter_mm();
status = __efi_thunk(set_virtual_address_map, memory_map_size,
descriptor_size, descriptor_version, virtual_map);
efi_leave_mm();
local_irq_restore(flags);
return status;
}
static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc)
{
return EFI_UNSUPPORTED;
}
static efi_status_t efi_thunk_set_time(efi_time_t *tm)
{
return EFI_UNSUPPORTED;
}
static efi_status_t
efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
efi_time_t *tm)
{
return EFI_UNSUPPORTED;
}
static efi_status_t
efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
{
return EFI_UNSUPPORTED;
}
static unsigned long efi_name_size(efi_char16_t *name)
{
return ucs2_strsize(name, EFI_VAR_NAME_LEN) + 1;
}
static efi_status_t
efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
u32 *attr, unsigned long *data_size, void *data)
{
u8 buf[24] __aligned(8);
efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
efi_status_t status;
u32 phys_name, phys_vendor, phys_attr;
u32 phys_data_size, phys_data;
unsigned long flags;
spin_lock_irqsave(&efi_runtime_lock, flags);
*vnd = *vendor;
phys_data_size = virt_to_phys_or_null(data_size);
phys_vendor = virt_to_phys_or_null(vnd);
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
phys_attr = virt_to_phys_or_null(attr);
phys_data = virt_to_phys_or_null_size(data, *data_size);
if (!phys_name || (data && !phys_data))
status = EFI_INVALID_PARAMETER;
else
status = efi_thunk(get_variable, phys_name, phys_vendor,
phys_attr, phys_data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
return status;
}
static efi_status_t
efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
u32 attr, unsigned long data_size, void *data)
{
u8 buf[24] __aligned(8);
efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
u32 phys_name, phys_vendor, phys_data;
efi_status_t status;
unsigned long flags;
spin_lock_irqsave(&efi_runtime_lock, flags);
*vnd = *vendor;
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
phys_vendor = virt_to_phys_or_null(vnd);
phys_data = virt_to_phys_or_null_size(data, data_size);
if (!phys_name || (data && !phys_data))
status = EFI_INVALID_PARAMETER;
else
status = efi_thunk(set_variable, phys_name, phys_vendor,
attr, data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
return status;
}
static efi_status_t
efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
u32 attr, unsigned long data_size,
void *data)
{
u8 buf[24] __aligned(8);
efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
u32 phys_name, phys_vendor, phys_data;
efi_status_t status;
unsigned long flags;
if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
return EFI_NOT_READY;
*vnd = *vendor;
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
phys_vendor = virt_to_phys_or_null(vnd);
phys_data = virt_to_phys_or_null_size(data, data_size);
if (!phys_name || (data && !phys_data))
status = EFI_INVALID_PARAMETER;
else
status = efi_thunk(set_variable, phys_name, phys_vendor,
attr, data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
return status;
}
static efi_status_t
efi_thunk_get_next_variable(unsigned long *name_size,
efi_char16_t *name,
efi_guid_t *vendor)
{
u8 buf[24] __aligned(8);
efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
efi_status_t status;
u32 phys_name_size, phys_name, phys_vendor;
unsigned long flags;
spin_lock_irqsave(&efi_runtime_lock, flags);
*vnd = *vendor;
phys_name_size = virt_to_phys_or_null(name_size);
phys_vendor = virt_to_phys_or_null(vnd);
phys_name = virt_to_phys_or_null_size(name, *name_size);
if (!phys_name)
status = EFI_INVALID_PARAMETER;
else
status = efi_thunk(get_next_variable, phys_name_size,
phys_name, phys_vendor);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
*vendor = *vnd;
return status;
}
static efi_status_t
efi_thunk_get_next_high_mono_count(u32 *count)
{
return EFI_UNSUPPORTED;
}
static void
efi_thunk_reset_system(int reset_type, efi_status_t status,
unsigned long data_size, efi_char16_t *data)
{
u32 phys_data;
unsigned long flags;
spin_lock_irqsave(&efi_runtime_lock, flags);
phys_data = virt_to_phys_or_null_size(data, data_size);
efi_thunk(reset_system, reset_type, status, data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
}
static efi_status_t
efi_thunk_update_capsule(efi_capsule_header_t **capsules,
unsigned long count, unsigned long sg_list)
{
return EFI_UNSUPPORTED;
}
static efi_status_t
efi_thunk_query_variable_info(u32 attr, u64 *storage_space,
u64 *remaining_space,
u64 *max_variable_size)
{
efi_status_t status;
u32 phys_storage, phys_remaining, phys_max;
unsigned long flags;
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
spin_lock_irqsave(&efi_runtime_lock, flags);
phys_storage = virt_to_phys_or_null(storage_space);
phys_remaining = virt_to_phys_or_null(remaining_space);
phys_max = virt_to_phys_or_null(max_variable_size);
status = efi_thunk(query_variable_info, attr, phys_storage,
phys_remaining, phys_max);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
return status;
}
static efi_status_t
efi_thunk_query_variable_info_nonblocking(u32 attr, u64 *storage_space,
u64 *remaining_space,
u64 *max_variable_size)
{
efi_status_t status;
u32 phys_storage, phys_remaining, phys_max;
unsigned long flags;
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
return EFI_NOT_READY;
phys_storage = virt_to_phys_or_null(storage_space);
phys_remaining = virt_to_phys_or_null(remaining_space);
phys_max = virt_to_phys_or_null(max_variable_size);
status = efi_thunk(query_variable_info, attr, phys_storage,
phys_remaining, phys_max);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
return status;
}
static efi_status_t
efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules,
unsigned long count, u64 *max_size,
int *reset_type)
{
return EFI_UNSUPPORTED;
}
void __init efi_thunk_runtime_setup(void)
{
if (!IS_ENABLED(CONFIG_EFI_MIXED))
return;
efi.get_time = efi_thunk_get_time;
efi.set_time = efi_thunk_set_time;
efi.get_wakeup_time = efi_thunk_get_wakeup_time;
efi.set_wakeup_time = efi_thunk_set_wakeup_time;
efi.get_variable = efi_thunk_get_variable;
efi.get_next_variable = efi_thunk_get_next_variable;
efi.set_variable = efi_thunk_set_variable;
efi.set_variable_nonblocking = efi_thunk_set_variable_nonblocking;
efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count;
efi.reset_system = efi_thunk_reset_system;
efi.query_variable_info = efi_thunk_query_variable_info;
efi.query_variable_info_nonblocking = efi_thunk_query_variable_info_nonblocking;
efi.update_capsule = efi_thunk_update_capsule;
efi.query_capsule_caps = efi_thunk_query_capsule_caps;
}
efi_status_t __init __no_sanitize_address
efi_set_virtual_address_map(unsigned long memory_map_size,
unsigned long descriptor_size,
u32 descriptor_version,
efi_memory_desc_t *virtual_map,
unsigned long systab_phys)
{
const efi_system_table_t *systab = (efi_system_table_t *)systab_phys;
efi_status_t status;
unsigned long flags;
if (efi_is_mixed())
return efi_thunk_set_virtual_address_map(memory_map_size,
descriptor_size,
descriptor_version,
virtual_map);
efi_enter_mm();
efi_fpu_begin();
local_irq_save(flags);
status = arch_efi_call_virt(efi.runtime, set_virtual_address_map,
memory_map_size, descriptor_size,
descriptor_version, virtual_map);
local_irq_restore(flags);
efi_fpu_end();
efi.runtime = READ_ONCE(systab->runtime);
efi_leave_mm();
return status;
}