summaryrefslogtreecommitdiff
path: root/arch/x86/xen/enlighten_pv.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/enlighten_pv.c')
-rw-r--r--arch/x86/xen/enlighten_pv.c133
1 files changed, 98 insertions, 35 deletions
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5e57835e999d..26bbaf4b7330 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -49,7 +49,7 @@
#include <xen/hvc-console.h>
#include <xen/acpi.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/paravirt.h>
#include <asm/apic.h>
#include <asm/page.h>
@@ -61,6 +61,7 @@
#include <asm/processor.h>
#include <asm/proto.h>
#include <asm/msr-index.h>
+#include <asm/msr.h>
#include <asm/traps.h>
#include <asm/setup.h>
#include <asm/desc.h>
@@ -73,6 +74,7 @@
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/cpu.h>
+#include <asm/irq_stack.h>
#ifdef CONFIG_X86_IOPL_IOPERM
#include <asm/io_bitmap.h>
#endif
@@ -94,12 +96,49 @@ void *xen_initial_gdt;
static int xen_cpu_up_prepare_pv(unsigned int cpu);
static int xen_cpu_dead_pv(unsigned int cpu);
+#ifndef CONFIG_PREEMPTION
+/*
+ * Some hypercalls issued by the toolstack can take many 10s of
+ * seconds. Allow tasks running hypercalls via the privcmd driver to
+ * be voluntarily preempted even if full kernel preemption is
+ * disabled.
+ *
+ * Such preemptible hypercalls are bracketed by
+ * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
+ * calls.
+ */
+DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
+EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
+
+/*
+ * In case of scheduling the flag must be cleared and restored after
+ * returning from schedule as the task might move to a different CPU.
+ */
+static __always_inline bool get_and_clear_inhcall(void)
+{
+ bool inhcall = __this_cpu_read(xen_in_preemptible_hcall);
+
+ __this_cpu_write(xen_in_preemptible_hcall, false);
+ return inhcall;
+}
+
+static __always_inline void restore_inhcall(bool inhcall)
+{
+ __this_cpu_write(xen_in_preemptible_hcall, inhcall);
+}
+
+#else
+
+static __always_inline bool get_and_clear_inhcall(void) { return false; }
+static __always_inline void restore_inhcall(bool inhcall) { }
+
+#endif
+
struct tls_descs {
struct desc_struct desc[3];
};
DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE;
-DEFINE_PER_CPU(unsigned int, xen_lazy_nesting);
enum xen_lazy_mode xen_get_lazy_mode(void)
{
@@ -687,6 +726,36 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_machine_check)
}
#endif
+static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ xen_evtchn_do_upcall();
+
+ set_irq_regs(old_regs);
+}
+
+__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
+{
+ irqentry_state_t state = irqentry_enter(regs);
+ bool inhcall;
+
+ instrumentation_begin();
+ run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
+
+ inhcall = get_and_clear_inhcall();
+ if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
+ irqentry_exit_cond_resched();
+ instrumentation_end();
+ restore_inhcall(inhcall);
+ } else {
+ instrumentation_end();
+ irqentry_exit(regs, state);
+ }
+}
+
struct trap_array_entry {
void (*orig)(void);
void (*xen)(void);
@@ -1018,15 +1087,15 @@ static void xen_write_cr4(unsigned long cr4)
native_write_cr4(cr4);
}
-static u64 xen_do_read_msr(unsigned int msr, int *err)
+static u64 xen_do_read_msr(u32 msr, int *err)
{
u64 val = 0; /* Avoid uninitialized value for safe variant. */
- if (pmu_msr_read(msr, &val, err))
+ if (pmu_msr_chk_emulated(msr, &val, true))
return val;
if (err)
- val = native_read_msr_safe(msr, err);
+ *err = native_read_msr_safe(msr, &val);
else
val = native_read_msr(msr);
@@ -1042,17 +1111,9 @@ static u64 xen_do_read_msr(unsigned int msr, int *err)
return val;
}
-static void set_seg(unsigned int which, unsigned int low, unsigned int high,
- int *err)
+static void set_seg(u32 which, u64 base)
{
- u64 base = ((u64)high << 32) | low;
-
- if (HYPERVISOR_set_segment_base(which, base) == 0)
- return;
-
- if (err)
- *err = -EIO;
- else
+ if (HYPERVISOR_set_segment_base(which, base))
WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base);
}
@@ -1061,20 +1122,19 @@ static void set_seg(unsigned int which, unsigned int low, unsigned int high,
* With err == NULL write_msr() semantics are selected.
* Supplying an err pointer requires err to be pre-initialized with 0.
*/
-static void xen_do_write_msr(unsigned int msr, unsigned int low,
- unsigned int high, int *err)
+static void xen_do_write_msr(u32 msr, u64 val, int *err)
{
switch (msr) {
case MSR_FS_BASE:
- set_seg(SEGBASE_FS, low, high, err);
+ set_seg(SEGBASE_FS, val);
break;
case MSR_KERNEL_GS_BASE:
- set_seg(SEGBASE_GS_USER, low, high, err);
+ set_seg(SEGBASE_GS_USER, val);
break;
case MSR_GS_BASE:
- set_seg(SEGBASE_GS_KERNEL, low, high, err);
+ set_seg(SEGBASE_GS_KERNEL, val);
break;
case MSR_STAR:
@@ -1090,42 +1150,45 @@ static void xen_do_write_msr(unsigned int msr, unsigned int low,
break;
default:
- if (!pmu_msr_write(msr, low, high, err)) {
- if (err)
- *err = native_write_msr_safe(msr, low, high);
- else
- native_write_msr(msr, low, high);
- }
+ if (pmu_msr_chk_emulated(msr, &val, false))
+ return;
+
+ if (err)
+ *err = native_write_msr_safe(msr, val);
+ else
+ native_write_msr(msr, val);
}
}
-static u64 xen_read_msr_safe(unsigned int msr, int *err)
+static int xen_read_msr_safe(u32 msr, u64 *val)
{
- return xen_do_read_msr(msr, err);
+ int err = 0;
+
+ *val = xen_do_read_msr(msr, &err);
+ return err;
}
-static int xen_write_msr_safe(unsigned int msr, unsigned int low,
- unsigned int high)
+static int xen_write_msr_safe(u32 msr, u64 val)
{
int err = 0;
- xen_do_write_msr(msr, low, high, &err);
+ xen_do_write_msr(msr, val, &err);
return err;
}
-static u64 xen_read_msr(unsigned int msr)
+static u64 xen_read_msr(u32 msr)
{
- int err;
+ int err = 0;
return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
}
-static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
+static void xen_write_msr(u32 msr, u64 val)
{
int err;
- xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL);
+ xen_do_write_msr(msr, val, xen_msr_safe ? &err : NULL);
}
/* This is called once we have the cpu_possible_mask */