#define pr_fmt(fmt) "SEV: " fmt
#include <linux/sched/debug.h> /* For show_regs() */
#include <linux/percpu-defs.h>
#include <linux/cc_platform.h>
#include <linux/printk.h>
#include <linux/mm_types.h>
#include <linux/set_memory.h>
#include <linux/memblock.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
#include <asm/sev.h>
#include <asm/insn-eval.h>
#include <asm/fpu/xcr.h>
#include <asm/processor.h>
#include <asm/realmode.h>
#include <asm/setup.h>
#include <asm/traps.h>
#include <asm/svm.h>
#include <asm/smp.h>
#include <asm/cpu.h>
#define DR7_RESET_VALUE 0x400
static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
static struct ghcb __initdata *boot_ghcb;
struct sev_es_runtime_data {
struct ghcb ghcb_page;
struct ghcb backup_ghcb;
bool ghcb_active;
bool backup_ghcb_active;
unsigned long dr7;
};
struct ghcb_state {
struct ghcb *ghcb;
};
static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
static __always_inline bool on_vc_stack(struct pt_regs *regs)
{
unsigned long sp = regs->sp;
if (user_mode(regs))
return false;
if (ip_within_syscall_gap(regs))
return false;
return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
}
void noinstr __sev_es_ist_enter(struct pt_regs *regs)
{
unsigned long old_ist, new_ist;
new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
if (on_vc_stack(regs))
new_ist = regs->sp;
new_ist -= sizeof(old_ist);
*(unsigned long *)new_ist = old_ist;
this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
}
void noinstr __sev_es_ist_exit(void)
{
unsigned long ist;
ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
return;
this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
}
static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
{
struct sev_es_runtime_data *data;
struct ghcb *ghcb;
WARN_ON(!irqs_disabled());
data = this_cpu_read(runtime_data);
ghcb = &data->ghcb_page;
if (unlikely(data->ghcb_active)) {
if (unlikely(data->backup_ghcb_active)) {
data->ghcb_active = false;
data->backup_ghcb_active = false;
instrumentation_begin();
panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
instrumentation_end();
}
data->backup_ghcb_active = true;
state->ghcb = &data->backup_ghcb;
*state->ghcb = *ghcb;
} else {
state->ghcb = NULL;
data->ghcb_active = true;
}
return ghcb;
}
static inline u64 sev_es_rd_ghcb_msr(void)
{
return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
}
static __always_inline void sev_es_wr_ghcb_msr(u64 val)
{
u32 low, high;
low = (u32)(val);
high = (u32)(val >> 32);
native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
}
static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
unsigned char *buffer)
{
return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
}
static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
{
char buffer[MAX_INSN_SIZE];
int insn_bytes;
insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
if (insn_bytes == 0) {
ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
ctxt->fi.cr2 = ctxt->regs->ip;
return ES_EXCEPTION;
} else if (insn_bytes == -EINVAL) {
ctxt->fi.vector = X86_TRAP_GP;
ctxt->fi.error_code = 0;
ctxt->fi.cr2 = 0;
return ES_EXCEPTION;
}
if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
return ES_DECODE_FAILED;
if (ctxt->insn.immediate.got)
return ES_OK;
else
return ES_DECODE_FAILED;
}
static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
{
char buffer[MAX_INSN_SIZE];
int res, ret;
res = vc_fetch_insn_kernel(ctxt, buffer);
if (res) {
ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.error_code = X86_PF_INSTR;
ctxt->fi.cr2 = ctxt->regs->ip;
return ES_EXCEPTION;
}
ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
if (ret < 0)
return ES_DECODE_FAILED;
else
return ES_OK;
}
static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
{
if (user_mode(ctxt->regs))
return __vc_decode_user_insn(ctxt);
else
return __vc_decode_kern_insn(ctxt);
}
static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
char *dst, char *buf, size_t size)
{
unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
switch (size) {
case 1: {
u8 d1;
u8 __user *target = (u8 __user *)dst;
memcpy(&d1, buf, 1);
if (__put_user(d1, target))
goto fault;
break;
}
case 2: {
u16 d2;
u16 __user *target = (u16 __user *)dst;
memcpy(&d2, buf, 2);
if (__put_user(d2, target))
goto fault;
break;
}
case 4: {
u32 d4;
u32 __user *target = (u32 __user *)dst;
memcpy(&d4, buf, 4);
if (__put_user(d4, target))
goto fault;
break;
}
case 8: {
u64 d8;
u64 __user *target = (u64 __user *)dst;
memcpy(&d8, buf, 8);
if (__put_user(d8, target))
goto fault;
break;
}
default:
WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
return ES_UNSUPPORTED;
}
return ES_OK;
fault:
if (user_mode(ctxt->regs))
error_code |= X86_PF_USER;
ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.error_code = error_code;
ctxt->fi.cr2 = (unsigned long)dst;
return ES_EXCEPTION;
}
static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
char *src, char *buf, size_t size)
{
unsigned long error_code = X86_PF_PROT;
switch (size) {
case 1: {
u8 d1;
u8 __user *s = (u8 __user *)src;
if (__get_user(d1, s))
goto fault;
memcpy(buf, &d1, 1);
break;
}
case 2: {
u16 d2;
u16 __user *s = (u16 __user *)src;
if (__get_user(d2, s))
goto fault;
memcpy(buf, &d2, 2);
break;
}
case 4: {
u32 d4;
u32 __user *s = (u32 __user *)src;
if (__get_user(d4, s))
goto fault;
memcpy(buf, &d4, 4);
break;
}
case 8: {
u64 d8;
u64 __user *s = (u64 __user *)src;
if (__get_user(d8, s))
goto fault;
memcpy(buf, &d8, 8);
break;
}
default:
WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
return ES_UNSUPPORTED;
}
return ES_OK;
fault:
if (user_mode(ctxt->regs))
error_code |= X86_PF_USER;
ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.error_code = error_code;
ctxt->fi.cr2 = (unsigned long)src;
return ES_EXCEPTION;
}
static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
unsigned long vaddr, phys_addr_t *paddr)
{
unsigned long va = (unsigned long)vaddr;
unsigned int level;
phys_addr_t pa;
pgd_t *pgd;
pte_t *pte;
pgd = __va(read_cr3_pa());
pgd = &pgd[pgd_index(va)];
pte = lookup_address_in_pgd(pgd, va, &level);
if (!pte) {
ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.cr2 = vaddr;
ctxt->fi.error_code = 0;
if (user_mode(ctxt->regs))
ctxt->fi.error_code |= X86_PF_USER;
return ES_EXCEPTION;
}
if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
return ES_UNSUPPORTED;
pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
pa |= va & ~page_level_mask(level);
*paddr = pa;
return ES_OK;
}
#include "sev-shared.c"
static noinstr void __sev_put_ghcb(struct ghcb_state *state)
{
struct sev_es_runtime_data *data;
struct ghcb *ghcb;
WARN_ON(!irqs_disabled());
data = this_cpu_read(runtime_data);
ghcb = &data->ghcb_page;
if (state->ghcb) {
*ghcb = *state->ghcb;
data->backup_ghcb_active = false;
state->ghcb = NULL;
} else {
vc_ghcb_invalidate(ghcb);
data->ghcb_active = false;
}
}
void noinstr __sev_es_nmi_complete(void)
{
struct ghcb_state state;
struct ghcb *ghcb;
ghcb = __sev_get_ghcb(&state);
vc_ghcb_invalidate(ghcb);
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
ghcb_set_sw_exit_info_1(ghcb, 0);
ghcb_set_sw_exit_info_2(ghcb, 0);
sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
VMGEXIT();
__sev_put_ghcb(&state);
}
static u64 get_jump_table_addr(void)
{
struct ghcb_state state;
unsigned long flags;
struct ghcb *ghcb;
u64 ret = 0;
local_irq_save(flags);
ghcb = __sev_get_ghcb(&state);
vc_ghcb_invalidate(ghcb);
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
ghcb_set_sw_exit_info_2(ghcb, 0);
sev_es_wr_ghcb_msr(__pa(ghcb));
VMGEXIT();
if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
ghcb_sw_exit_info_2_is_valid(ghcb))
ret = ghcb->save.sw_exit_info_2;
__sev_put_ghcb(&state);
local_irq_restore(flags);
return ret;
}
int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
{
u16 startup_cs, startup_ip;
phys_addr_t jump_table_pa;
u64 jump_table_addr;
u16 __iomem *jump_table;
jump_table_addr = get_jump_table_addr();
if (!jump_table_addr)
return 0;
if (jump_table_addr & ~PAGE_MASK)
return -EINVAL;
jump_table_pa = jump_table_addr & PAGE_MASK;
startup_cs = (u16)(rmh->trampoline_start >> 4);
startup_ip = (u16)(rmh->sev_es_trampoline_start -
rmh->trampoline_start);
jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
if (!jump_table)
return -EIO;
writew(startup_ip, &jump_table[0]);
writew(startup_cs, &jump_table[1]);
iounmap(jump_table);
return 0;
}
int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
{
struct sev_es_runtime_data *data;
unsigned long address, pflags;
int cpu;
u64 pfn;
if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
return 0;
pflags = _PAGE_NX | _PAGE_RW;
for_each_possible_cpu(cpu) {
data = per_cpu(runtime_data, cpu);
address = __pa(&data->ghcb_page);
pfn = address >> PAGE_SHIFT;
if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
return 1;
}
return 0;
}
static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
struct pt_regs *regs = ctxt->regs;
enum es_result ret;
u64 exit_info_1;
exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0;
ghcb_set_rcx(ghcb, regs->cx);
if (exit_info_1) {
ghcb_set_rax(ghcb, regs->ax);
ghcb_set_rdx(ghcb, regs->dx);
}
ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_MSR,
exit_info_1, 0);
if ((ret == ES_OK) && (!exit_info_1)) {
regs->ax = ghcb->save.rax;
regs->dx = ghcb->save.rdx;
}
return ret;
}
static bool __init sev_es_setup_ghcb(void)
{
if (!sev_es_negotiate_protocol())
return false;
memset(&boot_ghcb_page, 0, PAGE_SIZE);
boot_ghcb = &boot_ghcb_page;
return true;
}
#ifdef CONFIG_HOTPLUG_CPU
static void sev_es_ap_hlt_loop(void)
{
struct ghcb_state state;
struct ghcb *ghcb;
ghcb = __sev_get_ghcb(&state);
while (true) {
vc_ghcb_invalidate(ghcb);
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
ghcb_set_sw_exit_info_1(ghcb, 0);
ghcb_set_sw_exit_info_2(ghcb, 0);
sev_es_wr_ghcb_msr(__pa(ghcb));
VMGEXIT();
if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
ghcb->save.sw_exit_info_2)
break;
}
__sev_put_ghcb(&state);
}
static void sev_es_play_dead(void)
{
play_dead_common();
sev_es_ap_hlt_loop();
start_cpu0();
}
#else /* CONFIG_HOTPLUG_CPU */
#define sev_es_play_dead native_play_dead
#endif /* CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_SMP
static void __init sev_es_setup_play_dead(void)
{
smp_ops.play_dead = sev_es_play_dead;
}
#else
static inline void sev_es_setup_play_dead(void) { }
#endif
static void __init alloc_runtime_data(int cpu)
{
struct sev_es_runtime_data *data;
data = memblock_alloc(sizeof(*data), PAGE_SIZE);
if (!data)
panic("Can't allocate SEV-ES runtime data");
per_cpu(runtime_data, cpu) = data;
}
static void __init init_ghcb(int cpu)
{
struct sev_es_runtime_data *data;
int err;
data = per_cpu(runtime_data, cpu);
err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
sizeof(data->ghcb_page));
if (err)
panic("Can't map GHCBs unencrypted");
memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
data->ghcb_active = false;
data->backup_ghcb_active = false;
}
void __init sev_es_init_vc_handling(void)
{
int cpu;
BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
return;
if (!sev_es_check_cpu_features())
panic("SEV-ES CPU Features missing");
static_branch_enable(&sev_es_enable_key);
for_each_possible_cpu(cpu) {
alloc_runtime_data(cpu);
init_ghcb(cpu);
}
sev_es_setup_play_dead();
initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
}
static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
{
int trapnr = ctxt->fi.vector;
if (trapnr == X86_TRAP_PF)
native_write_cr2(ctxt->fi.cr2);
ctxt->regs->orig_ax = ctxt->fi.error_code;
do_early_exception(ctxt->regs, trapnr);
}
static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
{
long *reg_array;
int offset;
reg_array = (long *)ctxt->regs;
offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
if (offset < 0)
return NULL;
offset /= sizeof(long);
return reg_array + offset;
}
static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
unsigned int bytes, bool read)
{
u64 exit_code, exit_info_1, exit_info_2;
unsigned long ghcb_pa = __pa(ghcb);
enum es_result res;
phys_addr_t paddr;
void __user *ref;
ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
if (ref == (void __user *)-1L)
return ES_UNSUPPORTED;
exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
if (res != ES_OK) {
if (res == ES_EXCEPTION && !read)
ctxt->fi.error_code |= X86_PF_WRITE;
return res;
}
exit_info_1 = paddr;
exit_info_2 = bytes;
ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
return sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, exit_info_1, exit_info_2);
}
static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
unsigned int bytes)
{
unsigned long ds_base, es_base;
unsigned char *src, *dst;
unsigned char buffer[8];
enum es_result ret;
bool rep;
int off;
ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
if (ds_base == -1L || es_base == -1L) {
ctxt->fi.vector = X86_TRAP_GP;
ctxt->fi.error_code = 0;
return ES_EXCEPTION;
}
src = ds_base + (unsigned char *)ctxt->regs->si;
dst = es_base + (unsigned char *)ctxt->regs->di;
ret = vc_read_mem(ctxt, src, buffer, bytes);
if (ret != ES_OK)
return ret;
ret = vc_write_mem(ctxt, dst, buffer, bytes);
if (ret != ES_OK)
return ret;
if (ctxt->regs->flags & X86_EFLAGS_DF)
off = -bytes;
else
off = bytes;
ctxt->regs->si += off;
ctxt->regs->di += off;
rep = insn_has_rep_prefix(&ctxt->insn);
if (rep)
ctxt->regs->cx -= 1;
if (!rep || ctxt->regs->cx == 0)
return ES_OK;
else
return ES_RETRY;
}
static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
struct insn *insn = &ctxt->insn;
unsigned int bytes = 0;
enum mmio_type mmio;
enum es_result ret;
u8 sign_byte;
long *reg_data;
mmio = insn_decode_mmio(insn, &bytes);
if (mmio == MMIO_DECODE_FAILED)
return ES_DECODE_FAILED;
if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs);
if (!reg_data)
return ES_DECODE_FAILED;
}
switch (mmio) {
case MMIO_WRITE:
memcpy(ghcb->shared_buffer, reg_data, bytes);
ret = vc_do_mmio(ghcb, ctxt, bytes, false);
break;
case MMIO_WRITE_IMM:
memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
ret = vc_do_mmio(ghcb, ctxt, bytes, false);
break;
case MMIO_READ:
ret = vc_do_mmio(ghcb, ctxt, bytes, true);
if (ret)
break;
if (bytes == 4)
*reg_data = 0;
memcpy(reg_data, ghcb->shared_buffer, bytes);
break;
case MMIO_READ_ZERO_EXTEND:
ret = vc_do_mmio(ghcb, ctxt, bytes, true);
if (ret)
break;
memset(reg_data, 0, insn->opnd_bytes);
memcpy(reg_data, ghcb->shared_buffer, bytes);
break;
case MMIO_READ_SIGN_EXTEND:
ret = vc_do_mmio(ghcb, ctxt, bytes, true);
if (ret)
break;
if (bytes == 1) {
u8 *val = (u8 *)ghcb->shared_buffer;
sign_byte = (*val & 0x80) ? 0xff : 0x00;
} else {
u16 *val = (u16 *)ghcb->shared_buffer;
sign_byte = (*val & 0x8000) ? 0xff : 0x00;
}
memset(reg_data, sign_byte, insn->opnd_bytes);
memcpy(reg_data, ghcb->shared_buffer, bytes);
break;
case MMIO_MOVS:
ret = vc_handle_mmio_movs(ctxt, bytes);
break;
default:
ret = ES_UNSUPPORTED;
break;
}
return ret;
}
static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
long val, *reg = vc_insn_get_rm(ctxt);
enum es_result ret;
if (!reg)
return ES_DECODE_FAILED;
val = *reg;
if (val >> 32) {
ctxt->fi.vector = X86_TRAP_GP;
ctxt->fi.error_code = 0;
return ES_EXCEPTION;
}
val = (val & 0xffff23ffL) | BIT(10);
if (!data && (val & ~DR7_RESET_VALUE))
return ES_UNSUPPORTED;
ghcb_set_rax(ghcb, val);
ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
if (ret != ES_OK)
return ret;
if (data)
data->dr7 = val;
return ES_OK;
}
static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
long *reg = vc_insn_get_rm(ctxt);
if (!reg)
return ES_DECODE_FAILED;
if (data)
*reg = data->dr7;
else
*reg = DR7_RESET_VALUE;
return ES_OK;
}
static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
return sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WBINVD, 0, 0);
}
static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
enum es_result ret;
ghcb_set_rcx(ghcb, ctxt->regs->cx);
ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_RDPMC, 0, 0);
if (ret != ES_OK)
return ret;
if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
return ES_VMM_ERROR;
ctxt->regs->ax = ghcb->save.rax;
ctxt->regs->dx = ghcb->save.rdx;
return ES_OK;
}
static enum es_result vc_handle_monitor(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
return ES_OK;
}
static enum es_result vc_handle_mwait(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
return ES_OK;
}
static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
enum es_result ret;
ghcb_set_rax(ghcb, ctxt->regs->ax);
ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
if (x86_platform.hyper.sev_es_hcall_prepare)
x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_VMMCALL, 0, 0);
if (ret != ES_OK)
return ret;
if (!ghcb_rax_is_valid(ghcb))
return ES_VMM_ERROR;
ctxt->regs->ax = ghcb->save.rax;
if (x86_platform.hyper.sev_es_hcall_finish &&
!x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
return ES_VMM_ERROR;
return ES_OK;
}
static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
ctxt->fi.vector = X86_TRAP_AC;
ctxt->fi.error_code = 0;
return ES_EXCEPTION;
}
static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
struct ghcb *ghcb,
unsigned long exit_code)
{
enum es_result result;
switch (exit_code) {
case SVM_EXIT_READ_DR7:
result = vc_handle_dr7_read(ghcb, ctxt);
break;
case SVM_EXIT_WRITE_DR7:
result = vc_handle_dr7_write(ghcb, ctxt);
break;
case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
result = vc_handle_trap_ac(ghcb, ctxt);
break;
case SVM_EXIT_RDTSC:
case SVM_EXIT_RDTSCP:
result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
break;
case SVM_EXIT_RDPMC:
result = vc_handle_rdpmc(ghcb, ctxt);
break;
case SVM_EXIT_INVD:
pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
result = ES_UNSUPPORTED;
break;
case SVM_EXIT_CPUID:
result = vc_handle_cpuid(ghcb, ctxt);
break;
case SVM_EXIT_IOIO:
result = vc_handle_ioio(ghcb, ctxt);
break;
case SVM_EXIT_MSR:
result = vc_handle_msr(ghcb, ctxt);
break;
case SVM_EXIT_VMMCALL:
result = vc_handle_vmmcall(ghcb, ctxt);
break;
case SVM_EXIT_WBINVD:
result = vc_handle_wbinvd(ghcb, ctxt);
break;
case SVM_EXIT_MONITOR:
result = vc_handle_monitor(ghcb, ctxt);
break;
case SVM_EXIT_MWAIT:
result = vc_handle_mwait(ghcb, ctxt);
break;
case SVM_EXIT_NPF:
result = vc_handle_mmio(ghcb, ctxt);
break;
default:
result = ES_UNSUPPORTED;
}
return result;
}
static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
{
long error_code = ctxt->fi.error_code;
int trapnr = ctxt->fi.vector;
ctxt->regs->orig_ax = ctxt->fi.error_code;
switch (trapnr) {
case X86_TRAP_GP:
exc_general_protection(ctxt->regs, error_code);
break;
case X86_TRAP_UD:
exc_invalid_op(ctxt->regs);
break;
case X86_TRAP_PF:
write_cr2(ctxt->fi.cr2);
exc_page_fault(ctxt->regs, error_code);
break;
case X86_TRAP_AC:
exc_alignment_check(ctxt->regs, error_code);
break;
default:
pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
BUG();
}
}
static __always_inline bool is_vc2_stack(unsigned long sp)
{
return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
}
static __always_inline bool vc_from_invalid_context(struct pt_regs *regs)
{
unsigned long sp, prev_sp;
sp = (unsigned long)regs;
prev_sp = regs->sp;
return is_vc2_stack(sp) && !is_vc2_stack(prev_sp);
}
static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
{
struct ghcb_state state;
struct es_em_ctxt ctxt;
enum es_result result;
struct ghcb *ghcb;
bool ret = true;
ghcb = __sev_get_ghcb(&state);
vc_ghcb_invalidate(ghcb);
result = vc_init_em_ctxt(&ctxt, regs, error_code);
if (result == ES_OK)
result = vc_handle_exitcode(&ctxt, ghcb, error_code);
__sev_put_ghcb(&state);
switch (result) {
case ES_OK:
vc_finish_insn(&ctxt);
break;
case ES_UNSUPPORTED:
pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
error_code, regs->ip);
ret = false;
break;
case ES_VMM_ERROR:
pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
error_code, regs->ip);
ret = false;
break;
case ES_DECODE_FAILED:
pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
error_code, regs->ip);
ret = false;
break;
case ES_EXCEPTION:
vc_forward_exception(&ctxt);
break;
case ES_RETRY:
break;
default:
pr_emerg("Unknown result in %s():%d\n", __func__, result);
BUG();
}
return ret;
}
static __always_inline bool vc_is_db(unsigned long error_code)
{
return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
}
DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
{
irqentry_state_t irq_state;
if (unlikely(vc_from_invalid_context(regs))) {
instrumentation_begin();
panic("Can't handle #VC exception from unsupported context\n");
instrumentation_end();
}
if (vc_is_db(error_code)) {
exc_debug(regs);
return;
}
irq_state = irqentry_nmi_enter(regs);
instrumentation_begin();
if (!vc_raw_handle_exception(regs, error_code)) {
show_regs(regs);
sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
panic("Returned from Terminate-Request to Hypervisor\n");
}
instrumentation_end();
irqentry_nmi_exit(regs, irq_state);
}
DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
{
if (vc_is_db(error_code)) {
noist_exc_debug(regs);
return;
}
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
if (!vc_raw_handle_exception(regs, error_code)) {
force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
}
instrumentation_end();
irqentry_exit_to_user_mode(regs);
}
bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
{
unsigned long exit_code = regs->orig_ax;
struct es_em_ctxt ctxt;
enum es_result result;
if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb()))
sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
vc_ghcb_invalidate(boot_ghcb);
result = vc_init_em_ctxt(&ctxt, regs, exit_code);
if (result == ES_OK)
result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
switch (result) {
case ES_OK:
vc_finish_insn(&ctxt);
break;
case ES_UNSUPPORTED:
early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
exit_code, regs->ip);
goto fail;
case ES_VMM_ERROR:
early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
exit_code, regs->ip);
goto fail;
case ES_DECODE_FAILED:
early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
exit_code, regs->ip);
goto fail;
case ES_EXCEPTION:
vc_early_forward_exception(&ctxt);
break;
case ES_RETRY:
break;
default:
BUG();
}
return true;
fail:
show_regs(regs);
while (true)
halt();
}