summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/sev-es.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/sev-es.c')
-rw-r--r--arch/x86/kernel/sev-es.c119
1 files changed, 79 insertions, 40 deletions
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 84c1821819af..73873b007838 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -121,35 +121,57 @@ static void __init setup_vc_stacks(int cpu)
cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
}
-static __always_inline bool on_vc_stack(unsigned long sp)
+static __always_inline bool on_vc_stack(struct pt_regs *regs)
{
+ unsigned long sp = regs->sp;
+
+ /* User-mode RSP is not trusted */
+ if (user_mode(regs))
+ return false;
+
+ /* SYSCALL gap still has user-mode RSP */
+ if (ip_within_syscall_gap(regs))
+ return false;
+
return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
}
/*
- * This function handles the case when an NMI is raised in the #VC exception
- * handler entry code. In this case, the IST entry for #VC must be adjusted, so
- * that any subsequent #VC exception will not overwrite the stack contents of the
- * interrupted #VC handler.
+ * This function handles the case when an NMI is raised in the #VC
+ * exception handler entry code, before the #VC handler has switched off
+ * its IST stack. In this case, the IST entry for #VC must be adjusted,
+ * so that any nested #VC exception will not overwrite the stack
+ * contents of the interrupted #VC handler.
*
* The IST entry is adjusted unconditionally so that it can be also be
- * unconditionally adjusted back in sev_es_ist_exit(). Otherwise a nested
- * sev_es_ist_exit() call may adjust back the IST entry too early.
+ * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
+ * nested sev_es_ist_exit() call may adjust back the IST entry too
+ * early.
+ *
+ * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
+ * on the NMI IST stack, as they are only called from NMI handling code
+ * right now.
*/
void noinstr __sev_es_ist_enter(struct pt_regs *regs)
{
unsigned long old_ist, new_ist;
/* Read old IST entry */
- old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+ new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
- /* Make room on the IST stack */
- if (on_vc_stack(regs->sp))
- new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
- else
- new_ist = old_ist - sizeof(old_ist);
+ /*
+ * If NMI happened while on the #VC IST stack, set the new IST
+ * value below regs->sp, so that the interrupted stack frame is
+ * not overwritten by subsequent #VC exceptions.
+ */
+ if (on_vc_stack(regs))
+ new_ist = regs->sp;
- /* Store old IST entry */
+ /*
+ * Reserve additional 8 bytes and store old IST value so this
+ * adjustment can be unrolled in __sev_es_ist_exit().
+ */
+ new_ist -= sizeof(old_ist);
*(unsigned long *)new_ist = old_ist;
/* Set new IST entry */
@@ -241,39 +263,54 @@ static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
}
-static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
{
char buffer[MAX_INSN_SIZE];
- enum es_result ret;
int res;
- if (user_mode(ctxt->regs)) {
- res = insn_fetch_from_user(ctxt->regs, buffer);
- if (!res) {
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
- ctxt->fi.cr2 = ctxt->regs->ip;
- return ES_EXCEPTION;
- }
+ res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
+ if (!res) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
+ ctxt->fi.cr2 = ctxt->regs->ip;
+ return ES_EXCEPTION;
+ }
- if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res))
- return ES_DECODE_FAILED;
- } else {
- res = vc_fetch_insn_kernel(ctxt, buffer);
- if (res) {
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.error_code = X86_PF_INSTR;
- ctxt->fi.cr2 = ctxt->regs->ip;
- return ES_EXCEPTION;
- }
+ if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
+ return ES_DECODE_FAILED;
- insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE - res, 1);
- insn_get_length(&ctxt->insn);
+ if (ctxt->insn.immediate.got)
+ return ES_OK;
+ else
+ return ES_DECODE_FAILED;
+}
+
+static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
+{
+ char buffer[MAX_INSN_SIZE];
+ int res, ret;
+
+ res = vc_fetch_insn_kernel(ctxt, buffer);
+ if (res) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_INSTR;
+ ctxt->fi.cr2 = ctxt->regs->ip;
+ return ES_EXCEPTION;
}
- ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
+ ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+ if (ret < 0)
+ return ES_DECODE_FAILED;
+ else
+ return ES_OK;
+}
- return ret;
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+ if (user_mode(ctxt->regs))
+ return __vc_decode_user_insn(ctxt);
+ else
+ return __vc_decode_kern_insn(ctxt);
}
static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
@@ -1248,13 +1285,12 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
{
struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+ irqentry_state_t irq_state;
struct ghcb_state state;
struct es_em_ctxt ctxt;
enum es_result result;
struct ghcb *ghcb;
- lockdep_assert_irqs_disabled();
-
/*
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
*/
@@ -1263,6 +1299,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
return;
}
+ irq_state = irqentry_nmi_enter(regs);
+ lockdep_assert_irqs_disabled();
instrumentation_begin();
/*
@@ -1325,6 +1363,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
out:
instrumentation_end();
+ irqentry_nmi_exit(regs, irq_state);
return;