summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig9
-rw-r--r--arch/x86/Kconfig.debug8
-rw-r--r--arch/x86/entry/entry_32.S6
-rw-r--r--arch/x86/entry/entry_64.S6
-rw-r--r--arch/x86/events/amd/power.c2
-rw-r--r--arch/x86/events/intel/ds.c33
-rw-r--r--arch/x86/events/msr.c70
-rw-r--r--arch/x86/hyperv/hv_init.c123
-rw-r--r--arch/x86/hyperv/mmu.c12
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/asm-prototypes.h4
-rw-r--r--arch/x86/include/asm/cpufeature.h7
-rw-r--r--arch/x86/include/asm/cpufeatures.h24
-rw-r--r--arch/x86/include/asm/disabled-features.h3
-rw-r--r--arch/x86/include/asm/hardirq.h3
-rw-r--r--arch/x86/include/asm/hypervisor.h1
-rw-r--r--arch/x86/include/asm/i8259.h5
-rw-r--r--arch/x86/include/asm/irq_vectors.h7
-rw-r--r--arch/x86/include/asm/jailhouse_para.h26
-rw-r--r--arch/x86/include/asm/kvm_host.h22
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mpspec_def.h14
-rw-r--r--arch/x86/include/asm/mshyperv.h32
-rw-r--r--arch/x86/include/asm/msr-index.h14
-rw-r--r--arch/x86/include/asm/nospec-branch.h84
-rw-r--r--arch/x86/include/asm/pat.h2
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/required-features.h3
-rw-r--r--arch/x86/include/asm/svm.h3
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/uprobes.h4
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h1
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h14
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h749
-rw-r--r--arch/x86/include/asm/x86_init.h1
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h22
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h27
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h4
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c35
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/alternative.c14
-rw-r--r--arch/x86/kernel/aperture_64.c46
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c24
-rw-r--r--arch/x86/kernel/apic/io_apic.c20
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c84
-rw-r--r--arch/x86/kernel/cpu/amd.c66
-rw-r--r--arch/x86/kernel/cpu/bugs.c40
-rw-r--r--arch/x86/kernel/cpu/centaur.c4
-rw-r--r--arch/x86/kernel/cpu/common.c49
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c77
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c68
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h5
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c117
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c17
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c29
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c20
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c6
-rw-r--r--arch/x86/kernel/cpu/scattered.c4
-rw-r--r--arch/x86/kernel/ftrace_64.S2
-rw-r--r--arch/x86/kernel/irq.c9
-rw-r--r--arch/x86/kernel/itmt.c1
-rw-r--r--arch/x86/kernel/jailhouse.c211
-rw-r--r--arch/x86/kernel/kvm.c49
-rw-r--r--arch/x86/kernel/mpparse.c23
-rw-r--r--arch/x86/kernel/platform-quirks.c1
-rw-r--r--arch/x86/kernel/process.c1
-rw-r--r--arch/x86/kernel/setup.c1
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/kernel/time.c9
-rw-r--r--arch/x86/kernel/tsc.c61
-rw-r--r--arch/x86/kernel/uprobes.c107
-rw-r--r--arch/x86/kvm/Kconfig8
-rw-r--r--arch/x86/kvm/cpuid.c22
-rw-r--r--arch/x86/kvm/emulate.c71
-rw-r--r--arch/x86/kvm/irq.c2
-rw-r--r--arch/x86/kvm/lapic.c25
-rw-r--r--arch/x86/kvm/lapic.h4
-rw-r--r--arch/x86/kvm/mmu.c26
-rw-r--r--arch/x86/kvm/mmu_audit.c2
-rw-r--r--arch/x86/kvm/svm.c1199
-rw-r--r--arch/x86/kvm/vmx.c760
-rw-r--r--arch/x86/kvm/vmx_shadow_fields.h77
-rw-r--r--arch/x86/kvm/x86.c338
-rw-r--r--arch/x86/kvm/x86.h33
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/lib/retpoline.S57
-rw-r--r--arch/x86/mm/extable.c34
-rw-r--r--arch/x86/mm/fault.c22
-rw-r--r--arch/x86/mm/pat.c19
-rw-r--r--arch/x86/mm/tlb.c34
-rw-r--r--arch/x86/pci/intel_mid_pci.c1
-rw-r--r--arch/x86/platform/efi/efi_64.c1
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c2
-rw-r--r--arch/x86/platform/intel-mid/sfi.c5
-rw-r--r--arch/x86/platform/uv/tlb_uv.c3
-rw-r--r--arch/x86/tools/Makefile12
-rw-r--r--arch/x86/tools/insn_decoder_test.c (renamed from arch/x86/tools/test_get_len.c)43
-rw-r--r--arch/x86/tools/objdump_reformat.awk (renamed from arch/x86/tools/distill.awk)4
-rw-r--r--arch/x86/xen/mmu_hvm.c2
-rw-r--r--arch/x86/xen/spinlock.c2
105 files changed, 4374 insertions, 1032 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 20da391b5f32..423e4b64e683 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -810,6 +810,15 @@ config PARAVIRT_TIME_ACCOUNTING
config PARAVIRT_CLOCK
bool
+config JAILHOUSE_GUEST
+ bool "Jailhouse non-root cell support"
+ depends on X86_64 && PCI
+ select X86_PM_TIMER
+ ---help---
+ This option allows to run Linux as guest in a Jailhouse non-root
+ cell. You can leave this option disabled if you only want to start
+ Jailhouse and run Linux afterwards in the root cell.
+
endif #HYPERVISOR_GUEST
config NO_BOOTMEM
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 672441c008c7..192e4d2f9efc 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -169,14 +169,6 @@ config IOMMU_DEBUG
options. See Documentation/x86/x86_64/boot-options.txt for more
details.
-config IOMMU_STRESS
- bool "Enable IOMMU stress-test mode"
- ---help---
- This option disables various optimizations in IOMMU related
- code to do real stress testing of the IOMMU code. This option
- will cause a performance drop and should only be enabled for
- testing.
-
config IOMMU_LEAK
bool "IOMMU leak tracing"
depends on IOMMU_DEBUG && DMA_API_DEBUG
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 60c4c342316c..7a796eeddf99 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,7 +252,8 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ /* Clobbers %ebx */
+ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
@@ -894,6 +895,9 @@ BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
hyperv_vector_handler)
+BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
+ hyperv_reenlightenment_intr)
+
#endif /* CONFIG_HYPERV */
ENTRY(page_fault)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ff6f8022612c..553aa49909ce 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -499,7 +499,8 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ /* Clobbers %rbx */
+ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
@@ -1244,6 +1245,9 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
#if IS_ENABLED(CONFIG_HYPERV)
apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
hyperv_callback_vector hyperv_vector_handler
+
+apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
+ hyperv_reenlightenment_vector hyperv_reenlightenment_intr
#endif /* CONFIG_HYPERV */
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index a6eee5ac4f58..2aefacf5c5b2 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -277,7 +277,7 @@ static int __init amd_power_pmu_init(void)
int ret;
if (!x86_match_cpu(cpu_match))
- return 0;
+ return -ENODEV;
if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
return -ENODEV;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8156e47da7ba..18c25ab28557 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -372,10 +372,9 @@ static int alloc_pebs_buffer(int cpu)
static void release_pebs_buffer(int cpu)
{
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
- struct debug_store *ds = hwev->ds;
void *cea;
- if (!ds || !x86_pmu.pebs)
+ if (!x86_pmu.pebs)
return;
kfree(per_cpu(insn_buffer, cpu));
@@ -384,7 +383,6 @@ static void release_pebs_buffer(int cpu)
/* Clear the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
- ds->pebs_buffer_base = 0;
dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
hwev->ds_pebs_vaddr = NULL;
}
@@ -419,16 +417,14 @@ static int alloc_bts_buffer(int cpu)
static void release_bts_buffer(int cpu)
{
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
- struct debug_store *ds = hwev->ds;
void *cea;
- if (!ds || !x86_pmu.bts)
+ if (!x86_pmu.bts)
return;
/* Clear the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
ds_clear_cea(cea, BTS_BUFFER_SIZE);
- ds->bts_buffer_base = 0;
dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
hwev->ds_bts_vaddr = NULL;
}
@@ -454,16 +450,22 @@ void release_ds_buffers(void)
if (!x86_pmu.bts && !x86_pmu.pebs)
return;
- get_online_cpus();
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu)
+ release_ds_buffer(cpu);
+
+ for_each_possible_cpu(cpu) {
+ /*
+ * Again, ignore errors from offline CPUs, they will no longer
+ * observe cpu_hw_events.ds and not program the DS_AREA when
+ * they come up.
+ */
fini_debug_store_on_cpu(cpu);
+ }
for_each_possible_cpu(cpu) {
release_pebs_buffer(cpu);
release_bts_buffer(cpu);
- release_ds_buffer(cpu);
}
- put_online_cpus();
}
void reserve_ds_buffers(void)
@@ -483,8 +485,6 @@ void reserve_ds_buffers(void)
if (!x86_pmu.pebs)
pebs_err = 1;
- get_online_cpus();
-
for_each_possible_cpu(cpu) {
if (alloc_ds_buffer(cpu)) {
bts_err = 1;
@@ -521,11 +521,14 @@ void reserve_ds_buffers(void)
if (x86_pmu.pebs && !pebs_err)
x86_pmu.pebs_active = 1;
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu) {
+ /*
+ * Ignores wrmsr_on_cpu() errors for offline CPUs they
+ * will get this call through intel_pmu_cpu_starting().
+ */
init_debug_store_on_cpu(cpu);
+ }
}
-
- put_online_cpus();
}
/*
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 14efaa0e8684..18e2628e2d8f 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -10,7 +10,9 @@ enum perf_msr_id {
PERF_MSR_SMI = 4,
PERF_MSR_PTSC = 5,
PERF_MSR_IRPERF = 6,
-
+ PERF_MSR_THERM = 7,
+ PERF_MSR_THERM_SNAP = 8,
+ PERF_MSR_THERM_UNIT = 9,
PERF_MSR_EVENT_MAX,
};
@@ -29,6 +31,11 @@ static bool test_irperf(int idx)
return boot_cpu_has(X86_FEATURE_IRPERF);
}
+static bool test_therm_status(int idx)
+{
+ return boot_cpu_has(X86_FEATURE_DTHERM);
+}
+
static bool test_intel(int idx)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
@@ -95,22 +102,28 @@ struct perf_msr {
bool (*test)(int idx);
};
-PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
-PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
-PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
-PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
-PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
-PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05");
-PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06");
+PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00" );
+PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01" );
+PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02" );
+PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03" );
+PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04" );
+PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05" );
+PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C" );
static struct perf_msr msr[] = {
- [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
- [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
- [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
- [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
- [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
- [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, },
- [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, },
+ [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
+ [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
+ [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
+ [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
+ [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
+ [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, },
+ [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, },
+ [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &evattr_therm, test_therm_status, },
+ [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, &evattr_therm_snap, test_therm_status, },
+ [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, &evattr_therm_unit, test_therm_status, },
};
static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
@@ -161,9 +174,9 @@ static int msr_event_init(struct perf_event *event)
if (!msr[cfg].attr)
return -EINVAL;
- event->hw.idx = -1;
- event->hw.event_base = msr[cfg].msr;
- event->hw.config = cfg;
+ event->hw.idx = -1;
+ event->hw.event_base = msr[cfg].msr;
+ event->hw.config = cfg;
return 0;
}
@@ -184,7 +197,7 @@ static void msr_event_update(struct perf_event *event)
u64 prev, now;
s64 delta;
- /* Careful, an NMI might modify the previous event value. */
+ /* Careful, an NMI might modify the previous event value: */
again:
prev = local64_read(&event->hw.prev_count);
now = msr_read_counter(event);
@@ -193,17 +206,22 @@ again:
goto again;
delta = now - prev;
- if (unlikely(event->hw.event_base == MSR_SMI_COUNT))
+ if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
delta = sign_extend64(delta, 31);
-
- local64_add(delta, &event->count);
+ local64_add(delta, &event->count);
+ } else if (unlikely(event->hw.event_base == MSR_IA32_THERM_STATUS)) {
+ /* If valid, extract digital readout, otherwise set to -1: */
+ now = now & (1ULL << 31) ? (now >> 16) & 0x3f : -1;
+ local64_set(&event->count, now);
+ } else {
+ local64_add(delta, &event->count);
+ }
}
static void msr_event_start(struct perf_event *event, int flags)
{
- u64 now;
+ u64 now = msr_read_counter(event);
- now = msr_read_counter(event);
local64_set(&event->hw.prev_count, now);
}
@@ -250,9 +268,7 @@ static int __init msr_init(void)
for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
u64 val;
- /*
- * Virt sucks arse; you cannot tell if a R/O MSR is present :/
- */
+ /* Virt sucks; you cannot tell if a R/O MSR is present :/ */
if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
msr[i].attr = NULL;
}
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 189a398290db..a3adece392f1 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -18,6 +18,8 @@
*/
#include <linux/types.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
#include <asm/hypervisor.h>
#include <asm/hyperv.h>
#include <asm/mshyperv.h>
@@ -37,6 +39,7 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return tsc_pg;
}
+EXPORT_SYMBOL_GPL(hv_get_tsc_page);
static u64 read_hv_clock_tsc(struct clocksource *arg)
{
@@ -101,6 +104,115 @@ static int hv_cpu_init(unsigned int cpu)
return 0;
}
+static void (*hv_reenlightenment_cb)(void);
+
+static void hv_reenlightenment_notify(struct work_struct *dummy)
+{
+ struct hv_tsc_emulation_status emu_status;
+
+ rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+
+ /* Don't issue the callback if TSC accesses are not emulated */
+ if (hv_reenlightenment_cb && emu_status.inprogress)
+ hv_reenlightenment_cb();
+}
+static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify);
+
+void hyperv_stop_tsc_emulation(void)
+{
+ u64 freq;
+ struct hv_tsc_emulation_status emu_status;
+
+ rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+ emu_status.inprogress = 0;
+ wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+
+ rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
+ tsc_khz = div64_u64(freq, 1000);
+}
+EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation);
+
+static inline bool hv_reenlightenment_available(void)
+{
+ /*
+ * Check for required features and priviliges to make TSC frequency
+ * change notifications work.
+ */
+ return ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
+ ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE &&
+ ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT;
+}
+
+__visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs)
+{
+ entering_ack_irq();
+
+ inc_irq_stat(irq_hv_reenlightenment_count);
+
+ schedule_delayed_work(&hv_reenlightenment_work, HZ/10);
+
+ exiting_irq();
+}
+
+void set_hv_tscchange_cb(void (*cb)(void))
+{
+ struct hv_reenlightenment_control re_ctrl = {
+ .vector = HYPERV_REENLIGHTENMENT_VECTOR,
+ .enabled = 1,
+ .target_vp = hv_vp_index[smp_processor_id()]
+ };
+ struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
+
+ if (!hv_reenlightenment_available()) {
+ pr_warn("Hyper-V: reenlightenment support is unavailable\n");
+ return;
+ }
+
+ hv_reenlightenment_cb = cb;
+
+ /* Make sure callback is registered before we write to MSRs */
+ wmb();
+
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl));
+}
+EXPORT_SYMBOL_GPL(set_hv_tscchange_cb);
+
+void clear_hv_tscchange_cb(void)
+{
+ struct hv_reenlightenment_control re_ctrl;
+
+ if (!hv_reenlightenment_available())
+ return;
+
+ rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+ re_ctrl.enabled = 0;
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+
+ hv_reenlightenment_cb = NULL;
+}
+EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb);
+
+static int hv_cpu_die(unsigned int cpu)
+{
+ struct hv_reenlightenment_control re_ctrl;
+ unsigned int new_cpu;
+
+ if (hv_reenlightenment_cb == NULL)
+ return 0;
+
+ rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ if (re_ctrl.target_vp == hv_vp_index[cpu]) {
+ /* Reassign to some other online CPU */
+ new_cpu = cpumask_any_but(cpu_online_mask, cpu);
+
+ re_ctrl.target_vp = hv_vp_index[new_cpu];
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ }
+
+ return 0;
+}
+
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
@@ -110,12 +222,19 @@ static int hv_cpu_init(unsigned int cpu)
*/
void hyperv_init(void)
{
- u64 guest_id;
+ u64 guest_id, required_msrs;
union hv_x64_msr_hypercall_contents hypercall_msr;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
+ /* Absolutely required MSRs */
+ required_msrs = HV_X64_MSR_HYPERCALL_AVAILABLE |
+ HV_X64_MSR_VP_INDEX_AVAILABLE;
+
+ if ((ms_hyperv.features & required_msrs) != required_msrs)
+ return;
+
/* Allocate percpu VP index */
hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
GFP_KERNEL);
@@ -123,7 +242,7 @@ void hyperv_init(void)
return;
if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
- hv_cpu_init, NULL) < 0)
+ hv_cpu_init, hv_cpu_die) < 0)
goto free_vp_index;
/*
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 9cc9e1c1e2db..56c9ebac946f 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -137,7 +137,12 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
}
if (info->mm) {
+ /*
+ * AddressSpace argument must match the CR3 with PCID bits
+ * stripped out.
+ */
flush->address_space = virt_to_phys(info->mm->pgd);
+ flush->address_space &= CR3_ADDR_MASK;
flush->flags = 0;
} else {
flush->address_space = 0;
@@ -219,7 +224,12 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
}
if (info->mm) {
+ /*
+ * AddressSpace argument must match the CR3 with PCID bits
+ * stripped out.
+ */
flush->address_space = virt_to_phys(info->mm->pgd);
+ flush->address_space &= CR3_ADDR_MASK;
flush->flags = 0;
} else {
flush->address_space = 0;
@@ -278,8 +288,6 @@ void hyperv_setup_mmu_ops(void)
if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
return;
- setup_clear_cpu_cap(X86_FEATURE_PCID);
-
if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
pr_info("Using hypercall for remote TLB flush\n");
pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 8d0ec9df1cbe..44f5d79d5105 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -49,7 +49,7 @@ extern int acpi_fix_pin2_polarity;
extern int acpi_disable_cmcff;
extern u8 acpi_sci_flags;
-extern int acpi_sci_override_gsi;
+extern u32 acpi_sci_override_gsi;
void acpi_pic_sci_set_trigger(unsigned int, u16);
struct device;
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 0927cdc4f946..4d111616524b 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,5 +38,7 @@ INDIRECT_THUNK(dx)
INDIRECT_THUNK(si)
INDIRECT_THUNK(di)
INDIRECT_THUNK(bp)
-INDIRECT_THUNK(sp)
+asmlinkage void __fill_rsb(void);
+asmlinkage void __clear_rsb(void);
+
#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ea9a7dde62e5..70eddb3922ff 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -29,6 +29,7 @@ enum cpuid_leafs
CPUID_8000_000A_EDX,
CPUID_7_ECX,
CPUID_8000_0007_EBX,
+ CPUID_7_EDX,
};
#ifdef CONFIG_X86_FEATURE_NAMES
@@ -79,8 +80,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -101,8 +103,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 25b9375c1484..0dfe4d3f74e2 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 18 /* N 32-bit words worth of info */
+#define NCAPINTS 19 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -203,14 +203,16 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
+#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
+
+#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -271,6 +273,9 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
+#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -319,6 +324,13 @@
#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index b027633e7300..33833d1909af 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -77,6 +77,7 @@
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
#define DISABLED_MASK17 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 51cc979dd364..7c341a74ec8c 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -38,6 +38,9 @@ typedef struct {
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
unsigned int irq_hv_callback_count;
#endif
+#if IS_ENABLED(CONFIG_HYPERV)
+ unsigned int irq_hv_reenlightenment_count;
+#endif
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 96aa6b9884dc..8c5aaba6633f 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -28,6 +28,7 @@ enum x86_hypervisor_type {
X86_HYPER_XEN_PV,
X86_HYPER_XEN_HVM,
X86_HYPER_KVM,
+ X86_HYPER_JAILHOUSE,
};
#ifdef CONFIG_HYPERVISOR_GUEST
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index c8376b40e882..5cdcdbd4d892 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -69,6 +69,11 @@ struct legacy_pic {
extern struct legacy_pic *legacy_pic;
extern struct legacy_pic null_legacy_pic;
+static inline bool has_legacy_pic(void)
+{
+ return legacy_pic != &null_legacy_pic;
+}
+
static inline int nr_legacy_irqs(void)
{
return legacy_pic->nr_legacy_irqs;
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 67421f649cfa..e71c1120426b 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -103,7 +103,12 @@
#endif
#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
-#define LOCAL_TIMER_VECTOR 0xee
+
+#if IS_ENABLED(CONFIG_HYPERV)
+#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
+#endif
+
+#define LOCAL_TIMER_VECTOR 0xed
#define NR_VECTORS 256
diff --git a/arch/x86/include/asm/jailhouse_para.h b/arch/x86/include/asm/jailhouse_para.h
new file mode 100644
index 000000000000..875b54376689
--- /dev/null
+++ b/arch/x86/include/asm/jailhouse_para.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL2.0 */
+
+/*
+ * Jailhouse paravirt_ops implementation
+ *
+ * Copyright (c) Siemens AG, 2015-2017
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ */
+
+#ifndef _ASM_X86_JAILHOUSE_PARA_H
+#define _ASM_X86_JAILHOUSE_PARA_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_JAILHOUSE_GUEST
+bool jailhouse_paravirt(void);
+#else
+static inline bool jailhouse_paravirt(void)
+{
+ return false;
+}
+#endif
+
+#endif /* _ASM_X86_JAILHOUSE_PARA_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 516798431328..dd6f57a54a26 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -86,7 +86,7 @@
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
- | X86_CR4_SMAP | X86_CR4_PKE))
+ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -504,6 +504,7 @@ struct kvm_vcpu_arch {
int mp_state;
u64 ia32_misc_enable_msr;
u64 smbase;
+ u64 smi_count;
bool tpr_access_reporting;
u64 ia32_xss;
@@ -760,6 +761,15 @@ enum kvm_irqchip_mode {
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
+struct kvm_sev_info {
+ bool active; /* SEV enabled guest */
+ unsigned int asid; /* ASID used for this guest */
+ unsigned int handle; /* SEV firmware handle */
+ int fd; /* SEV device fd */
+ unsigned long pages_locked; /* Number of pages locked */
+ struct list_head regions_list; /* List of registered regions */
+};
+
struct kvm_arch {
unsigned int n_used_mmu_pages;
unsigned int n_requested_mmu_pages;
@@ -847,6 +857,8 @@ struct kvm_arch {
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
+
+ struct kvm_sev_info sev_info;
};
struct kvm_vm_stat {
@@ -883,7 +895,6 @@ struct kvm_vcpu_stat {
u64 request_irq_exits;
u64 irq_exits;
u64 host_state_reload;
- u64 efer_reload;
u64 fpu_reload;
u64 insn_emulation;
u64 insn_emulation_fail;
@@ -965,7 +976,7 @@ struct kvm_x86_ops {
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
- void (*tlb_flush)(struct kvm_vcpu *vcpu);
+ void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
void (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu);
@@ -1017,6 +1028,7 @@ struct kvm_x86_ops {
void (*handle_external_intr)(struct kvm_vcpu *vcpu);
bool (*mpx_supported)(void);
bool (*xsaves_supported)(void);
+ bool (*umip_emulated)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
@@ -1079,6 +1091,10 @@ struct kvm_x86_ops {
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
int (*enable_smi_window)(struct kvm_vcpu *vcpu);
+
+ int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
+ int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
};
struct kvm_arch_async_pf {
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b1e8d8db921f..96ea4b5ba658 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -376,6 +376,7 @@ struct smca_bank {
extern struct smca_bank smca_banks[MAX_NR_BANKS];
extern const char *smca_get_long_name(enum smca_bank_types t);
+extern bool amd_mce_is_memory_error(struct mce *m);
extern int mce_threshold_create_device(unsigned int cpu);
extern int mce_threshold_remove_device(unsigned int cpu);
@@ -384,6 +385,7 @@ extern int mce_threshold_remove_device(unsigned int cpu);
static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
+static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
#endif
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index a6bec8028480..6fb923a34309 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -128,9 +128,17 @@ enum mp_irq_source_types {
mp_ExtINT = 3
};
-#define MP_IRQDIR_DEFAULT 0
-#define MP_IRQDIR_HIGH 1
-#define MP_IRQDIR_LOW 3
+#define MP_IRQPOL_DEFAULT 0x0
+#define MP_IRQPOL_ACTIVE_HIGH 0x1
+#define MP_IRQPOL_RESERVED 0x2
+#define MP_IRQPOL_ACTIVE_LOW 0x3
+#define MP_IRQPOL_MASK 0x3
+
+#define MP_IRQTRIG_DEFAULT 0x0
+#define MP_IRQTRIG_EDGE 0x4
+#define MP_IRQTRIG_RESERVED 0x8
+#define MP_IRQTRIG_LEVEL 0xc
+#define MP_IRQTRIG_MASK 0xc
#define MP_APIC_ALL 0xFF
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 8bf450b13d9f..1790002a2052 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -160,6 +160,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
#define hv_set_synint_state(int_num, val) wrmsrl(int_num, val)
void hyperv_callback_vector(void);
+void hyperv_reenlightenment_vector(void);
#ifdef CONFIG_TRACING
#define trace_hyperv_callback_vector hyperv_callback_vector
#endif
@@ -316,18 +317,27 @@ void hyper_alloc_mmu(void);
void hyperv_report_panic(struct pt_regs *regs, long err);
bool hv_is_hypercall_page_setup(void);
void hyperv_cleanup(void);
+
+void hyperv_reenlightenment_intr(struct pt_regs *regs);
+void set_hv_tscchange_cb(void (*cb)(void));
+void clear_hv_tscchange_cb(void);
+void hyperv_stop_tsc_emulation(void);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline bool hv_is_hypercall_page_setup(void) { return false; }
static inline void hyperv_cleanup(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
+static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
+static inline void clear_hv_tscchange_cb(void) {}
+static inline void hyperv_stop_tsc_emulation(void) {};
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
-static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
+static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
+ u64 *cur_tsc)
{
- u64 scale, offset, cur_tsc;
+ u64 scale, offset;
u32 sequence;
/*
@@ -358,7 +368,7 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
scale = READ_ONCE(tsc_pg->tsc_scale);
offset = READ_ONCE(tsc_pg->tsc_offset);
- cur_tsc = rdtsc_ordered();
+ *cur_tsc = rdtsc_ordered();
/*
* Make sure we read sequence after we read all other values
@@ -368,7 +378,14 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
- return mul_u64_u64_shr(cur_tsc, scale, 64) + offset;
+ return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
+}
+
+static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
+{
+ u64 cur_tsc;
+
+ return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
}
#else
@@ -376,5 +393,12 @@ static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return NULL;
}
+
+static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
+ u64 *cur_tsc)
+{
+ BUG();
+ return U64_MAX;
+}
#endif
#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e7b983a35506..c9084dedfcfa 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -39,6 +39,13 @@
/* Intel MSRs. Some also available on other CPUs */
+#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
+#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
+#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
+
+#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
+#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
+
#define MSR_PPIN_CTL 0x0000004e
#define MSR_PPIN 0x0000004f
@@ -57,6 +64,11 @@
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
#define MSR_MTRRcap 0x000000fe
+
+#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
+#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
+#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
+
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
@@ -385,6 +397,8 @@
#define MSR_K7_PERFCTR3 0xc0010007
#define MSR_K7_CLK_CTL 0xc001001b
#define MSR_K7_HWCR 0xc0010015
+#define MSR_K7_HWCR_SMMLOCK_BIT 0
+#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 4ad41087ce0e..d15d471348b8 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -1,56 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NOSPEC_BRANCH_H__
-#define __NOSPEC_BRANCH_H__
+#ifndef _ASM_X86_NOSPEC_BRANCH_H_
+#define _ASM_X86_NOSPEC_BRANCH_H_
#include <asm/alternative.h>
#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
-/*
- * Fill the CPU return stack buffer.
- *
- * Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
- *
- * This is required in various cases for retpoline and IBRS-based
- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
- * eliminate potentially bogus entries from the RSB, and sometimes
- * purely to ensure that it doesn't get empty, which on some CPUs would
- * allow predictions from other (unwanted!) sources to be used.
- *
- * We define a CPP macro such that it can be used from both .S files and
- * inline assembly. It's possible to do a .macro and then include that
- * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
- */
-
-#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
-
-/*
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version — two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
- mov $(nr/2), reg; \
-771: \
- call 772f; \
-773: /* speculation trap */ \
- pause; \
- lfence; \
- jmp 773b; \
-772: \
- call 774f; \
-775: /* speculation trap */ \
- pause; \
- lfence; \
- jmp 775b; \
-774: \
- dec reg; \
- jnz 771b; \
- add $(BITS_PER_LONG/8) * nr, sp;
-
#ifdef __ASSEMBLY__
/*
@@ -121,17 +77,10 @@
#endif
.endm
- /*
- * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
- * monstrosity above, manually.
- */
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+/* This clobbers the BX register */
+.macro FILL_RETURN_BUFFER nr:req ftr:req
#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE "jmp .Lskip_rsb_\@", \
- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
- \ftr
-.Lskip_rsb_\@:
+ ALTERNATIVE "", "call __clear_rsb", \ftr
#endif
.endm
@@ -206,17 +155,20 @@ extern char __indirect_thunk_end[];
static inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
- unsigned long loops;
-
- asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE("jmp 910f",
- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
- X86_FEATURE_RETPOLINE)
- "910:"
- : "=r" (loops), ASM_CALL_CONSTRAINT
- : : "memory" );
+ alternative_input("",
+ "call __fill_rsb",
+ X86_FEATURE_RETPOLINE,
+ ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
#endif
}
+static inline void indirect_branch_prediction_barrier(void)
+{
+ alternative_input("",
+ "call __ibp_barrier",
+ X86_FEATURE_USE_IBPB,
+ ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory"));
+}
+
#endif /* __ASSEMBLY__ */
-#endif /* __NOSPEC_BRANCH_H__ */
+#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 8a3ee355b422..92015c65fa2a 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -22,4 +22,6 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end,
void io_free_memtype(resource_size_t start, resource_size_t end);
+bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
+
#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d3a67fba200a..efbde088a718 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -971,4 +971,7 @@ bool xen_set_default_idle(void);
void stop_this_cpu(void *dummy);
void df_debug(struct pt_regs *regs, long error_code);
+
+void __ibp_barrier(void);
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index d91ba04dd007..fb3a6de7440b 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -106,6 +106,7 @@
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 (NEED_LA57)
#define REQUIRED_MASK17 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 78dd9df88157..0487ac054870 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -146,6 +146,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
+#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
+#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
+
struct __attribute__ ((__packed__)) vmcb_seg {
u16 selector;
u16 attrib;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 00223333821a..d25a638a2720 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -62,8 +62,6 @@ struct thread_info {
.flags = 0, \
}
-#define init_stack (init_thread_union.stack)
-
#else /* !__ASSEMBLY__ */
#include <asm/asm-offsets.h>
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 74f4c2ff6427..d8bfa98fca98 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -53,6 +53,10 @@ struct arch_uprobe {
u8 fixups;
u8 ilen;
} defparam;
+ struct {
+ u8 reg_offset; /* to the start of pt_regs */
+ u8 ilen;
+ } push;
};
};
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 7cac79802ad2..7803114aa140 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -48,7 +48,6 @@
#define UV2_NET_ENDPOINT_INTD 0x28
#define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \
UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD)
-#define UV_DESC_PSHIFT 49
#define UV_PAYLOADQ_GNODE_SHIFT 49
#define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
#define UV_BAU_BASENAME "sgi_uv/bau_tunables"
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 036e26d63d9a..44cf6d6deb7a 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -241,6 +241,7 @@ static inline int uv_hub_info_check(int version)
#define UV2_HUB_REVISION_BASE 3
#define UV3_HUB_REVISION_BASE 5
#define UV4_HUB_REVISION_BASE 7
+#define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */
#ifdef UV1_HUB_IS_SUPPORTED
static inline int is_uv1_hub(void)
@@ -280,6 +281,19 @@ static inline int is_uv3_hub(void)
}
#endif
+/* First test "is UV4A", then "is UV4" */
+#ifdef UV4A_HUB_IS_SUPPORTED
+static inline int is_uv4a_hub(void)
+{
+ return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE);
+}
+#else
+static inline int is_uv4a_hub(void)
+{
+ return 0;
+}
+#endif
+
#ifdef UV4_HUB_IS_SUPPORTED
static inline int is_uv4_hub(void)
{
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 548d684a7960..ecb9ddef128f 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -39,9 +39,11 @@
* #define UV2Hxxx b
* #define UV3Hxxx c
* #define UV4Hxxx d
+ * #define UV4AHxxx e
* #define UVHxxx (is_uv1_hub() ? UV1Hxxx :
* (is_uv2_hub() ? UV2Hxxx :
* (is_uv3_hub() ? UV3Hxxx :
+ * (is_uv4a_hub() ? UV4AHxxx :
* UV4Hxxx))
*
* If the MMR exists on all hub types > 1 but have different addresses, the
@@ -49,8 +51,10 @@
* #define UV2Hxxx b
* #define UV3Hxxx c
* #define UV4Hxxx d
+ * #define UV4AHxxx e
* #define UVHxxx (is_uv2_hub() ? UV2Hxxx :
* (is_uv3_hub() ? UV3Hxxx :
+ * (is_uv4a_hub() ? UV4AHxxx :
* UV4Hxxx))
*
* union uvh_xxx {
@@ -63,6 +67,7 @@
* } s2;
* struct uv3h_xxx_s { # Full UV3 definition (*)
* } s3;
+ * (NOTE: No struct uv4ah_xxx_s members exist)
* struct uv4h_xxx_s { # Full UV4 definition (*)
* } s4;
* };
@@ -99,6 +104,7 @@
#define UV2_HUB_IS_SUPPORTED 1
#define UV3_HUB_IS_SUPPORTED 1
#define UV4_HUB_IS_SUPPORTED 1
+#define UV4A_HUB_IS_SUPPORTED 1
/* Error function to catch undefined references */
extern unsigned long uv_undefined(char *str);
@@ -2779,35 +2785,47 @@ union uvh_lb_bau_sb_activation_status_1_u {
/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32)
#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
+#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
+#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
-
+#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
+#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
+#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
+#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
+#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x00003ffffffff000UL
-
-
-union uvh_lb_bau_sb_descriptor_base_u {
- unsigned long v;
- struct uvh_lb_bau_sb_descriptor_base_s {
- unsigned long rsvd_0_11:12;
- unsigned long rsvd_12_48:37;
- unsigned long node_id:14; /* RW */
- unsigned long rsvd_63:1;
- } s;
- struct uv4h_lb_bau_sb_descriptor_base_s {
- unsigned long rsvd_0_11:12;
- unsigned long page_address:34; /* RW */
- unsigned long rsvd_46_48:3;
- unsigned long node_id:14; /* RW */
- unsigned long rsvd_63:1;
- } s4;
-};
+#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
+
+#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 53
+#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000ffffffffff000UL
+#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0xffe0000000000000UL
+
+#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT ( \
+ is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \
+ is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT)
+
+#define UVH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK ( \
+ is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \
+ is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK)
+
+#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK ( \
+ is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \
+ is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK)
/* ========================================================================= */
/* UVH_NODE_ID */
@@ -3031,6 +3049,41 @@ union uvh_node_present_table_u {
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
+
union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
unsigned long v;
@@ -3042,6 +3095,46 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} s;
+ struct uv1h_rh_gam_alias210_overlay_config_0_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s1;
+ struct uvxh_rh_gam_alias210_overlay_config_0_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } sx;
+ struct uv2h_rh_gam_alias210_overlay_config_0_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s2;
+ struct uv3h_rh_gam_alias210_overlay_config_0_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s3;
+ struct uv4h_rh_gam_alias210_overlay_config_0_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s4;
};
/* ========================================================================= */
@@ -3064,6 +3157,41 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
+
union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
unsigned long v;
@@ -3075,6 +3203,46 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} s;
+ struct uv1h_rh_gam_alias210_overlay_config_1_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s1;
+ struct uvxh_rh_gam_alias210_overlay_config_1_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } sx;
+ struct uv2h_rh_gam_alias210_overlay_config_1_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s2;
+ struct uv3h_rh_gam_alias210_overlay_config_1_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s3;
+ struct uv4h_rh_gam_alias210_overlay_config_1_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s4;
};
/* ========================================================================= */
@@ -3097,6 +3265,41 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
+
union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
unsigned long v;
@@ -3108,6 +3311,46 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} s;
+ struct uv1h_rh_gam_alias210_overlay_config_2_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s1;
+ struct uvxh_rh_gam_alias210_overlay_config_2_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } sx;
+ struct uv2h_rh_gam_alias210_overlay_config_2_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s2;
+ struct uv3h_rh_gam_alias210_overlay_config_2_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s3;
+ struct uv4h_rh_gam_alias210_overlay_config_2_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s4;
};
/* ========================================================================= */
@@ -3126,6 +3369,21 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
unsigned long v;
@@ -3134,6 +3392,31 @@ union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} s;
+ struct uv1h_rh_gam_alias210_redirect_config_0_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s1;
+ struct uvxh_rh_gam_alias210_redirect_config_0_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } sx;
+ struct uv2h_rh_gam_alias210_redirect_config_0_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s2;
+ struct uv3h_rh_gam_alias210_redirect_config_0_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s3;
+ struct uv4h_rh_gam_alias210_redirect_config_0_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s4;
};
/* ========================================================================= */
@@ -3152,6 +3435,21 @@ union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
unsigned long v;
@@ -3160,6 +3458,31 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} s;
+ struct uv1h_rh_gam_alias210_redirect_config_1_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s1;
+ struct uvxh_rh_gam_alias210_redirect_config_1_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } sx;
+ struct uv2h_rh_gam_alias210_redirect_config_1_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s2;
+ struct uv3h_rh_gam_alias210_redirect_config_1_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s3;
+ struct uv4h_rh_gam_alias210_redirect_config_1_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s4;
};
/* ========================================================================= */
@@ -3178,6 +3501,21 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
unsigned long v;
@@ -3186,6 +3524,31 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} s;
+ struct uv1h_rh_gam_alias210_redirect_config_2_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s1;
+ struct uvxh_rh_gam_alias210_redirect_config_2_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } sx;
+ struct uv2h_rh_gam_alias210_redirect_config_2_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s2;
+ struct uv3h_rh_gam_alias210_redirect_config_2_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s3;
+ struct uv4h_rh_gam_alias210_redirect_config_2_mmr_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s4;
};
/* ========================================================================= */
@@ -3384,6 +3747,162 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
};
/* ========================================================================= */
+/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */
+/* ========================================================================= */
+#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR")
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR")
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x483000UL
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR)
+
+
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 52
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x000ffffffc000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x03f0000000000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT ( \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \
+ is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT)
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK ( \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \
+ is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK)
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK ( \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \
+ is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK)
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK ( \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \
+ is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK)
+
+union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
+ unsigned long v;
+ struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s3;
+ struct uv4h_rh_gam_mmioh_overlay_config0_mmr_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s4;
+ struct uv4ah_rh_gam_mmioh_overlay_config0_mmr_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } s4a;
+};
+
+/* ========================================================================= */
+/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */
+/* ========================================================================= */
+#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x483000UL
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR)
+
+
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 52
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x000ffffffc000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x03f0000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT ( \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \
+ is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT)
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK ( \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \
+ is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK)
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK ( \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \
+ is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK)
+
+union uvh_rh_gam_mmioh_overlay_config1_mmr_u {
+ unsigned long v;
+ struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s3;
+ struct uv4h_rh_gam_mmioh_overlay_config1_mmr_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s4;
+ struct uv4ah_rh_gam_mmioh_overlay_config1_mmr_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } s4a;
+};
+
+/* ========================================================================= */
/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */
/* ========================================================================= */
#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
@@ -3438,6 +3957,112 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u {
};
/* ========================================================================= */
+/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */
+/* ========================================================================= */
+#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR")
+#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR")
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x483800UL
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR)
+
+#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH")
+#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH")
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH ( \
+ is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \
+ is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH)
+
+
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
+
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
+
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000000fffUL
+
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK ( \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \
+ is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK)
+
+union uvh_rh_gam_mmioh_redirect_config0_mmr_u {
+ unsigned long v;
+ struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s3;
+ struct uv4h_rh_gam_mmioh_redirect_config0_mmr_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s4;
+ struct uv4ah_rh_gam_mmioh_redirect_config0_mmr_s {
+ unsigned long nasid:12; /* RW */
+ unsigned long rsvd_12_63:52;
+ } s4a;
+};
+
+/* ========================================================================= */
+/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */
+/* ========================================================================= */
+#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR")
+#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR")
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x484800UL
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR)
+
+#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH")
+#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH")
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH ( \
+ is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \
+ is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH)
+
+
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
+
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
+
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000000fffUL
+
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK ( \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \
+ is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK)
+
+union uvh_rh_gam_mmioh_redirect_config1_mmr_u {
+ unsigned long v;
+ struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s3;
+ struct uv4h_rh_gam_mmioh_redirect_config1_mmr_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s4;
+ struct uv4ah_rh_gam_mmioh_redirect_config1_mmr_s {
+ unsigned long nasid:12; /* RW */
+ unsigned long rsvd_12_63:52;
+ } s4a;
+};
+
+/* ========================================================================= */
/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */
/* ========================================================================= */
#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
@@ -4138,88 +4763,6 @@ union uv3h_gr0_gam_gr_config_u {
};
/* ========================================================================= */
-/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */
-/* ========================================================================= */
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL
-
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-union uv3h_rh_gam_mmioh_overlay_config0_mmr_u {
- unsigned long v;
- struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s {
- unsigned long rsvd_0_25:26;
- unsigned long base:20; /* RW */
- unsigned long m_io:6; /* RW */
- unsigned long n_io:4;
- unsigned long rsvd_56_62:7;
- unsigned long enable:1; /* RW */
- } s3;
-};
-
-/* ========================================================================= */
-/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */
-/* ========================================================================= */
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1604000UL
-
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-union uv3h_rh_gam_mmioh_overlay_config1_mmr_u {
- unsigned long v;
- struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s {
- unsigned long rsvd_0_25:26;
- unsigned long base:20; /* RW */
- unsigned long m_io:6; /* RW */
- unsigned long n_io:4;
- unsigned long rsvd_56_62:7;
- unsigned long enable:1; /* RW */
- } s3;
-};
-
-/* ========================================================================= */
-/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */
-/* ========================================================================= */
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
-
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
-
-union uv3h_rh_gam_mmioh_redirect_config0_mmr_u {
- unsigned long v;
- struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s {
- unsigned long nasid:15; /* RW */
- unsigned long rsvd_15_63:49;
- } s3;
-};
-
-/* ========================================================================= */
-/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */
-/* ========================================================================= */
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
-
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
-
-union uv3h_rh_gam_mmioh_redirect_config1_mmr_u {
- unsigned long v;
- struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s {
- unsigned long nasid:15; /* RW */
- unsigned long rsvd_15_63:49;
- } s3;
-};
-
-/* ========================================================================= */
/* UV4H_LB_PROC_INTD_QUEUE_FIRST */
/* ========================================================================= */
#define UV4H_LB_PROC_INTD_QUEUE_FIRST 0xa4100UL
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index aa4747569e23..fc2f082ac635 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -212,6 +212,7 @@ enum x86_legacy_i8042_state {
struct x86_legacy_features {
enum x86_legacy_i8042_state i8042;
int rtc;
+ int warm_reset;
int no_vga;
int reserve_bios_regions;
struct x86_legacy_devices devices;
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index afdd5ae0fcc4..aebf60357758 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -9,6 +9,7 @@
#define SETUP_PCI 3
#define SETUP_EFI 4
#define SETUP_APPLE_PROPERTIES 5
+#define SETUP_JAILHOUSE 6
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
@@ -126,6 +127,27 @@ struct boot_e820_entry {
__u32 type;
} __attribute__((packed));
+/*
+ * Smallest compatible version of jailhouse_setup_data required by this kernel.
+ */
+#define JAILHOUSE_SETUP_REQUIRED_VERSION 1
+
+/*
+ * The boot loader is passing platform information via this Jailhouse-specific
+ * setup data structure.
+ */
+struct jailhouse_setup_data {
+ u16 version;
+ u16 compatible_version;
+ u16 pm_timer_address;
+ u16 num_cpus;
+ u64 pci_mmconfig_base;
+ u32 tsc_khz;
+ u32 apic_khz;
+ u8 standard_ioapic;
+ u8 cpu_ids[255];
+} __attribute__((packed));
+
/* The so-called "zeropage" */
struct boot_params {
struct screen_info screen_info; /* 0x000 */
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 1a5bfead93b4..197c2e6c7376 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -40,6 +40,9 @@
*/
#define HV_X64_ACCESS_FREQUENCY_MSRS (1 << 11)
+/* AccessReenlightenmentControls privilege */
+#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
+
/*
* Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
* and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
@@ -234,6 +237,30 @@
#define HV_X64_MSR_CRASH_PARAMS \
(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
+/* TSC emulation after migration */
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+
+struct hv_reenlightenment_control {
+ u64 vector:8;
+ u64 reserved1:8;
+ u64 enabled:1;
+ u64 reserved2:15;
+ u64 target_vp:32;
+};
+
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+
+struct hv_tsc_emulation_control {
+ u64 enabled:1;
+ u64 reserved:63;
+};
+
+struct hv_tsc_emulation_status {
+ u64 inprogress:1;
+ u64 reserved:63;
+};
+
#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 09cc06483bed..7a2ade4aa235 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -25,6 +25,7 @@
#define KVM_FEATURE_STEAL_TIME 5
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
+#define KVM_FEATURE_PV_TLB_FLUSH 9
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -51,6 +52,9 @@ struct kvm_steal_time {
__u32 pad[11];
};
+#define KVM_VCPU_PREEMPTED (1 << 0)
+#define KVM_VCPU_FLUSH_TLB (1 << 1)
+
#define KVM_CLOCK_PAIRING_WALLCLOCK 0
struct kvm_clock_pairing {
__s64 sec;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7e2baf7304ae..29786c87e864 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -115,6 +115,8 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
+obj-$(CONFIG_JAILHOUSE_GUEST) += jailhouse.o
+
obj-$(CONFIG_EISA) += eisa.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f4c463df8b08..ec3a286163c3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -68,8 +68,9 @@ int acpi_ioapic;
int acpi_strict;
int acpi_disable_cmcff;
+/* ACPI SCI override configuration */
u8 acpi_sci_flags __initdata;
-int acpi_sci_override_gsi __initdata;
+u32 acpi_sci_override_gsi __initdata = INVALID_ACPI_IRQ;
int acpi_skip_timer_override __initdata;
int acpi_use_timer_override __initdata;
int acpi_fix_pin2_polarity __initdata;
@@ -112,8 +113,6 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
-#define ACPI_INVALID_GSI INT_MIN
-
/*
* This is just a simple wrapper around early_memremap(),
* with sanity checks for phys == 0 and size == 0.
@@ -372,7 +371,7 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
* and acpi_isa_irq_to_gsi() may give wrong result.
*/
if (gsi < nr_legacy_irqs() && isa_irq_to_gsi[gsi] == gsi)
- isa_irq_to_gsi[gsi] = ACPI_INVALID_GSI;
+ isa_irq_to_gsi[gsi] = INVALID_ACPI_IRQ;
isa_irq_to_gsi[bus_irq] = gsi;
}
@@ -620,24 +619,24 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
}
rc = acpi_get_override_irq(gsi, &trigger, &polarity);
- if (rc == 0) {
- trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
- polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
- irq = acpi_register_gsi(NULL, gsi, trigger, polarity);
- if (irq >= 0) {
- *irqp = irq;
- return 0;
- }
- }
+ if (rc)
+ return rc;
- return -1;
+ trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+ polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
+ irq = acpi_register_gsi(NULL, gsi, trigger, polarity);
+ if (irq < 0)
+ return irq;
+
+ *irqp = irq;
+ return 0;
}
EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
{
if (isa_irq < nr_legacy_irqs() &&
- isa_irq_to_gsi[isa_irq] != ACPI_INVALID_GSI) {
+ isa_irq_to_gsi[isa_irq] != INVALID_ACPI_IRQ) {
*gsi = isa_irq_to_gsi[isa_irq];
return 0;
}
@@ -676,8 +675,7 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
mutex_lock(&acpi_ioapic_lock);
irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info);
/* Don't set up the ACPI SCI because it's already set up */
- if (irq >= 0 && enable_update_mptable &&
- acpi_gbl_FADT.sci_interrupt != gsi)
+ if (irq >= 0 && enable_update_mptable && gsi != acpi_gbl_FADT.sci_interrupt)
mp_config_acpi_gsi(dev, gsi, trigger, polarity);
mutex_unlock(&acpi_ioapic_lock);
#endif
@@ -1211,8 +1209,9 @@ static int __init acpi_parse_madt_ioapic_entries(void)
/*
* If BIOS did not supply an INT_SRC_OVR for the SCI
* pretend we got one so we can set the SCI flags.
+ * But ignore setting up SCI on hardware reduced platforms.
*/
- if (!acpi_sci_override_gsi)
+ if (acpi_sci_override_gsi == INVALID_ACPI_IRQ && !acpi_gbl_reduced_hardware)
acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0,
acpi_gbl_FADT.sci_interrupt);
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 7188aea91549..f1915b744052 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -138,6 +138,8 @@ static int __init acpi_sleep_setup(char *str)
acpi_nvs_nosave_s3();
if (strncmp(str, "old_ordering", 12) == 0)
acpi_old_suspend_ordering();
+ if (strncmp(str, "nobl", 4) == 0)
+ acpi_sleep_no_blacklist();
str = strchr(str, ',');
if (str != NULL)
str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4817d743c263..30571fdaaf6f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -298,7 +298,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
tgt_rip = next_rip + o_dspl;
n_dspl = tgt_rip - orig_insn;
- DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
+ DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl);
if (tgt_rip - orig_insn >= 0) {
if (n_dspl - 2 <= 127)
@@ -355,7 +355,7 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
local_irq_restore(flags);
- DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
+ DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
instr, a->instrlen - a->padlen, a->padlen);
}
@@ -376,7 +376,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
u8 *instr, *replacement;
u8 insnbuf[MAX_PATCH_LEN];
- DPRINTK("alt table %p -> %p", start, end);
+ DPRINTK("alt table %px, -> %px", start, end);
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
@@ -400,14 +400,14 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
continue;
}
- DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
+ DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d",
a->cpuid >> 5,
a->cpuid & 0x1f,
instr, a->instrlen,
replacement, a->replacementlen, a->padlen);
- DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
- DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
+ DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
+ DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
memcpy(insnbuf, replacement, a->replacementlen);
insnbuf_sz = a->replacementlen;
@@ -433,7 +433,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
a->instrlen - a->replacementlen);
insnbuf_sz += a->instrlen - a->replacementlen;
}
- DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
+ DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr);
text_poke_early(instr, insnbuf, insnbuf_sz);
}
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index f5d92bc3b884..2c4d5ece7456 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -30,6 +30,7 @@
#include <asm/dma.h>
#include <asm/amd_nb.h>
#include <asm/x86_init.h>
+#include <linux/crash_dump.h>
/*
* Using 512M as goal, in case kexec will load kernel_big
@@ -56,6 +57,33 @@ int fallback_aper_force __initdata;
int fix_aperture __initdata = 1;
+#ifdef CONFIG_PROC_VMCORE
+/*
+ * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
+ * use the same range because it will remain configured in the northbridge.
+ * Trying to dump this area via /proc/vmcore may crash the machine, so exclude
+ * it from vmcore.
+ */
+static unsigned long aperture_pfn_start, aperture_page_count;
+
+static int gart_oldmem_pfn_is_ram(unsigned long pfn)
+{
+ return likely((pfn < aperture_pfn_start) ||
+ (pfn >= aperture_pfn_start + aperture_page_count));
+}
+
+static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
+{
+ aperture_pfn_start = aper_base >> PAGE_SHIFT;
+ aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
+ WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram));
+}
+#else
+static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
+{
+}
+#endif
+
/* This code runs before the PCI subsystem is initialized, so just
access the northbridge directly. */
@@ -435,8 +463,16 @@ int __init gart_iommu_hole_init(void)
out:
if (!fix && !fallback_aper_force) {
- if (last_aper_base)
+ if (last_aper_base) {
+ /*
+ * If this is the kdump kernel, the first kernel
+ * may have allocated the range over its e820 RAM
+ * and fixed up the northbridge
+ */
+ exclude_from_vmcore(last_aper_base, last_aper_order);
+
return 1;
+ }
return 0;
}
@@ -473,6 +509,14 @@ out:
return 0;
}
+ /*
+ * If this is the kdump kernel _and_ the first kernel did not
+ * configure the aperture in the northbridge, this range may
+ * overlap with the first kernel's memory. We can't access the
+ * range through vmcore even though it should be part of the dump.
+ */
+ exclude_from_vmcore(aper_alloc, aper_order);
+
/* Fix up the north bridges */
for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus, dev_base, dev_limit;
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 25a87028cb3f..e84c9eb4e5b4 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -19,6 +19,7 @@
#include <asm/smp.h>
#include <asm/apic.h>
#include <asm/ipi.h>
+#include <asm/jailhouse_para.h>
#include <linux/acpi.h>
@@ -84,12 +85,8 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
static void flat_send_IPI_allbutself(int vector)
{
int cpu = smp_processor_id();
-#ifdef CONFIG_HOTPLUG_CPU
- int hotplug = 1;
-#else
- int hotplug = 0;
-#endif
- if (hotplug || vector == NMI_VECTOR) {
+
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU) || vector == NMI_VECTOR) {
if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
unsigned long mask = cpumask_bits(cpu_online_mask)[0];
@@ -218,6 +215,15 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
+static void physflat_init_apic_ldr(void)
+{
+ /*
+ * LDR and DFR are not involved in physflat mode, rather:
+ * "In physical destination mode, the destination processor is
+ * specified by its local APIC ID [...]." (Intel SDM, 10.6.2.1)
+ */
+}
+
static void physflat_send_IPI_allbutself(int vector)
{
default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
@@ -230,7 +236,8 @@ static void physflat_send_IPI_all(int vector)
static int physflat_probe(void)
{
- if (apic == &apic_physflat || num_possible_cpus() > 8)
+ if (apic == &apic_physflat || num_possible_cpus() > 8 ||
+ jailhouse_paravirt())
return 1;
return 0;
@@ -251,8 +258,7 @@ static struct apic apic_physflat __ro_after_init = {
.dest_logical = 0,
.check_apicid_used = NULL,
- /* not needed, but shouldn't hurt: */
- .init_apic_ldr = flat_init_apic_ldr,
+ .init_apic_ldr = physflat_init_apic_ldr,
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8a7963421460..8ad2e410974f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -800,18 +800,18 @@ static int irq_polarity(int idx)
/*
* Determine IRQ line polarity (high active or low active):
*/
- switch (mp_irqs[idx].irqflag & 0x03) {
- case 0:
+ switch (mp_irqs[idx].irqflag & MP_IRQPOL_MASK) {
+ case MP_IRQPOL_DEFAULT:
/* conforms to spec, ie. bus-type dependent polarity */
if (test_bit(bus, mp_bus_not_pci))
return default_ISA_polarity(idx);
else
return default_PCI_polarity(idx);
- case 1:
+ case MP_IRQPOL_ACTIVE_HIGH:
return IOAPIC_POL_HIGH;
- case 2:
+ case MP_IRQPOL_RESERVED:
pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n");
- case 3:
+ case MP_IRQPOL_ACTIVE_LOW:
default: /* Pointless default required due to do gcc stupidity */
return IOAPIC_POL_LOW;
}
@@ -845,8 +845,8 @@ static int irq_trigger(int idx)
/*
* Determine IRQ trigger mode (edge or level sensitive):
*/
- switch ((mp_irqs[idx].irqflag >> 2) & 0x03) {
- case 0:
+ switch (mp_irqs[idx].irqflag & MP_IRQTRIG_MASK) {
+ case MP_IRQTRIG_DEFAULT:
/* conforms to spec, ie. bus-type dependent trigger mode */
if (test_bit(bus, mp_bus_not_pci))
trigger = default_ISA_trigger(idx);
@@ -854,11 +854,11 @@ static int irq_trigger(int idx)
trigger = default_PCI_trigger(idx);
/* Take EISA into account */
return eisa_irq_trigger(idx, bus, trigger);
- case 1:
+ case MP_IRQTRIG_EDGE:
return IOAPIC_EDGE;
- case 2:
+ case MP_IRQTRIG_RESERVED:
pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n");
- case 3:
+ case MP_IRQTRIG_LEVEL:
default: /* Pointless default required due to do gcc stupidity */
return IOAPIC_LEVEL;
}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index e1b8e8bf6b3c..46b675aaf20b 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -137,6 +137,8 @@ static int __init early_get_pnodeid(void)
case UV3_HUB_PART_NUMBER_X:
uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
break;
+
+ /* Update: UV4A has only a modified revision to indicate HUB fixes */
case UV4_HUB_PART_NUMBER:
uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1;
uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */
@@ -316,6 +318,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
} else if (!strcmp(oem_table_id, "UVH")) {
/* Only UV1 systems: */
uv_system_type = UV_NON_UNIQUE_APIC;
+ x86_platform.legacy.warm_reset = 0;
__this_cpu_write(x2apic_extra_bits, pnodeid << uvh_apicid.s.pnode_shift);
uv_set_apicid_hibit();
uv_apic = 1;
@@ -767,6 +770,7 @@ static __init void map_gru_high(int max_pnode)
return;
}
+ /* Only UV3 has distributed GRU mode */
if (is_uv3_hub() && gru.s3.mode) {
map_gru_distributed(gru.v);
return;
@@ -790,63 +794,61 @@ static __init void map_mmr_high(int max_pnode)
pr_info("UV: MMR disabled\n");
}
-/*
- * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY
- * and REDIRECT MMR regs are exactly the same on UV3.
- */
-struct mmioh_config {
- unsigned long overlay;
- unsigned long redirect;
- char *id;
-};
-
-static __initdata struct mmioh_config mmiohs[] = {
- {
- UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR,
- UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR,
- "MMIOH0"
- },
- {
- UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR,
- UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR,
- "MMIOH1"
- },
-};
-
-/* UV3 & UV4 have identical MMIOH overlay configs */
-static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
+/* UV3/4 have identical MMIOH overlay configs, UV4A is slightly different */
+static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
{
- union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay;
+ unsigned long overlay;
unsigned long mmr;
unsigned long base;
+ unsigned long nasid_mask;
+ unsigned long m_overlay;
int i, n, shift, m_io, max_io;
int nasid, lnasid, fi, li;
char *id;
- id = mmiohs[index].id;
- overlay.v = uv_read_local_mmr(mmiohs[index].overlay);
-
- pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n", id, overlay.v, overlay.s3.base, overlay.s3.m_io);
- if (!overlay.s3.enable) {
+ if (index == 0) {
+ id = "MMIOH0";
+ m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR;
+ overlay = uv_read_local_mmr(m_overlay);
+ base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK;
+ mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR;
+ m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK)
+ >> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT;
+ shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT;
+ n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
+ nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK;
+ } else {
+ id = "MMIOH1";
+ m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR;
+ overlay = uv_read_local_mmr(m_overlay);
+ base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK;
+ mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR;
+ m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK)
+ >> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT;
+ shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT;
+ n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH;
+ nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK;
+ }
+ pr_info("UV: %s overlay 0x%lx base:0x%lx m_io:%d\n", id, overlay, base, m_io);
+ if (!(overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK)) {
pr_info("UV: %s disabled\n", id);
return;
}
- shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT;
- base = (unsigned long)overlay.s3.base;
- m_io = overlay.s3.m_io;
- mmr = mmiohs[index].redirect;
- n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
/* Convert to NASID: */
min_pnode *= 2;
max_pnode *= 2;
max_io = lnasid = fi = li = -1;
for (i = 0; i < n; i++) {
- union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect;
+ unsigned long m_redirect = mmr + i * 8;
+ unsigned long redirect = uv_read_local_mmr(m_redirect);
+
+ nasid = redirect & nasid_mask;
+ if (i == 0)
+ pr_info("UV: %s redirect base 0x%lx(@0x%lx) 0x%04x\n",
+ id, redirect, m_redirect, nasid);
- redirect.v = uv_read_local_mmr(mmr + i * 8);
- nasid = redirect.s3.nasid;
/* Invalid NASID: */
if (nasid < min_pnode || max_pnode < nasid)
nasid = -1;
@@ -894,8 +896,8 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode)
if (is_uv3_hub() || is_uv4_hub()) {
/* Map both MMIOH regions: */
- map_mmioh_high_uv3(0, min_pnode, max_pnode);
- map_mmioh_high_uv3(1, min_pnode, max_pnode);
+ map_mmioh_high_uv34(0, min_pnode, max_pnode);
+ map_mmioh_high_uv34(1, min_pnode, max_pnode);
return;
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ea831c858195..5bddbdcbc4a3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -556,6 +556,51 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
}
}
+static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
+{
+ u64 msr;
+
+ /*
+ * BIOS support is required for SME and SEV.
+ * For SME: If BIOS has enabled SME then adjust x86_phys_bits by
+ * the SME physical address space reduction value.
+ * If BIOS has not enabled SME then don't advertise the
+ * SME feature (set in scattered.c).
+ * For SEV: If BIOS has not enabled SEV then don't advertise the
+ * SEV feature (set in scattered.c).
+ *
+ * In all cases, since support for SME and SEV requires long mode,
+ * don't advertise the feature under CONFIG_X86_32.
+ */
+ if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) {
+ /* Check if memory encryption is enabled */
+ rdmsrl(MSR_K8_SYSCFG, msr);
+ if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ goto clear_all;
+
+ /*
+ * Always adjust physical address bits. Even though this
+ * will be a value above 32-bits this is still done for
+ * CONFIG_X86_32 so that accurate values are reported.
+ */
+ c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
+
+ if (IS_ENABLED(CONFIG_X86_32))
+ goto clear_all;
+
+ rdmsrl(MSR_K7_HWCR, msr);
+ if (!(msr & MSR_K7_HWCR_SMMLOCK))
+ goto clear_sev;
+
+ return;
+
+clear_all:
+ clear_cpu_cap(c, X86_FEATURE_SME);
+clear_sev:
+ clear_cpu_cap(c, X86_FEATURE_SEV);
+ }
+}
+
static void early_init_amd(struct cpuinfo_x86 *c)
{
u32 dummy;
@@ -627,26 +672,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_E400);
- /*
- * BIOS support is required for SME. If BIOS has enabled SME then
- * adjust x86_phys_bits by the SME physical address space reduction
- * value. If BIOS has not enabled SME then don't advertise the
- * feature (set in scattered.c). Also, since the SME support requires
- * long mode, don't advertise the feature under CONFIG_X86_32.
- */
- if (cpu_has(c, X86_FEATURE_SME)) {
- u64 msr;
-
- /* Check if SME is enabled */
- rdmsrl(MSR_K8_SYSCFG, msr);
- if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) {
- c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
- if (IS_ENABLED(CONFIG_X86_32))
- clear_cpu_cap(c, X86_FEATURE_SME);
- } else {
- clear_cpu_cap(c, X86_FEATURE_SME);
- }
- }
+ early_detect_mem_encrypt(c);
}
static void init_amd_k8(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 390b3dc3d438..3bfb2b23d79c 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/utsname.h>
#include <linux/cpu.h>
+#include <linux/module.h>
#include <asm/nospec-branch.h>
#include <asm/cmdline.h>
@@ -90,10 +91,31 @@ static const char *spectre_v2_strings[] = {
};
#undef pr_fmt
-#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
+#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+#ifdef RETPOLINE
+static bool spectre_v2_bad_module;
+
+bool retpoline_module_ok(bool has_retpoline)
+{
+ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
+ return true;
+
+ pr_err("System may be vunerable to spectre v2\n");
+ spectre_v2_bad_module = true;
+ return false;
+}
+
+static inline const char *spectre_v2_module_string(void)
+{
+ return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
+}
+#else
+static inline const char *spectre_v2_module_string(void) { return ""; }
+#endif
+
static void __init spec2_print_if_insecure(const char *reason)
{
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
@@ -249,6 +271,12 @@ retpoline_auto:
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Filling RSB on context switch\n");
}
+
+ /* Initialize Indirect Branch Prediction Barrier if supported */
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+ pr_info("Enabling Indirect Branch Prediction Barrier\n");
+ }
}
#undef pr_fmt
@@ -278,6 +306,14 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return sprintf(buf, "Not affected\n");
- return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ spectre_v2_module_string());
}
#endif
+
+void __ibp_barrier(void)
+{
+ __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
+}
+EXPORT_SYMBOL_GPL(__ibp_barrier);
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 68bc6d9b3132..c578cd29c2d2 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -106,6 +106,10 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#endif
+ if (c->x86_power & (1 << 8)) {
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ }
}
static void init_centaur(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ef29ad001991..c7c996a692fd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -47,6 +47,8 @@
#include <asm/pat.h>
#include <asm/microcode.h>
#include <asm/microcode_intel.h>
+#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
@@ -769,6 +771,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_7_0_EBX] = ebx;
c->x86_capability[CPUID_7_ECX] = ecx;
+ c->x86_capability[CPUID_7_EDX] = edx;
}
/* Extended state features: level 0x0000000d */
@@ -876,6 +879,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#endif
}
+static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_CENTAUR, 5 },
+ { X86_VENDOR_INTEL, 5 },
+ { X86_VENDOR_NSC, 5 },
+ { X86_VENDOR_ANY, 4 },
+ {}
+};
+
+static const __initdata struct x86_cpu_id cpu_no_meltdown[] = {
+ { X86_VENDOR_AMD },
+ {}
+};
+
+static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+{
+ u64 ia32_cap = 0;
+
+ if (x86_match_cpu(cpu_no_meltdown))
+ return false;
+
+ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+ /* Rogue Data Cache Load? No! */
+ if (ia32_cap & ARCH_CAP_RDCL_NO)
+ return false;
+
+ return true;
+}
+
/*
* Do minimum CPU detection early.
* Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -923,11 +961,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- if (c->x86_vendor != X86_VENDOR_AMD)
- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
-
- setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
- setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+ if (!x86_match_cpu(cpu_no_speculation)) {
+ if (cpu_vulnerable_to_meltdown(c))
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+ }
fpu__init_system(c);
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index bea8d3e24f50..479ca4728de0 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -31,6 +31,7 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
extern const struct hypervisor_x86 x86_hyper_xen_pv;
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
extern const struct hypervisor_x86 x86_hyper_kvm;
+extern const struct hypervisor_x86 x86_hyper_jailhouse;
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
@@ -45,6 +46,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
#ifdef CONFIG_KVM_GUEST
&x86_hyper_kvm,
#endif
+#ifdef CONFIG_JAILHOUSE_GUEST
+ &x86_hyper_jailhouse,
+#endif
};
enum x86_hypervisor_type x86_hyper_type;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b1af22073e28..6936d14d4c77 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -102,6 +102,59 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
}
+/*
+ * Early microcode releases for the Spectre v2 mitigation were broken.
+ * Information taken from;
+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
+ * - https://kb.vmware.com/s/article/52345
+ * - Microcode revisions observed in the wild
+ * - Release note from 20180108 microcode release
+ */
+struct sku_microcode {
+ u8 model;
+ u8 stepping;
+ u32 microcode;
+};
+static const struct sku_microcode spectre_bad_microcodes[] = {
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 },
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 },
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 },
+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 },
+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 },
+ { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
+ { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
+ { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 },
+ { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
+ { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
+ { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
+ { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
+ { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 },
+ { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
+ { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 },
+ { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 },
+ { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 },
+ { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
+ { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
+ { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
+ /* Updated in the 20180108 release; blacklist until we know otherwise */
+ { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 },
+ /* Observed in the wild */
+ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
+ { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
+};
+
+static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
+ if (c->x86_model == spectre_bad_microcodes[i].model &&
+ c->x86_mask == spectre_bad_microcodes[i].stepping)
+ return (c->microcode <= spectre_bad_microcodes[i].microcode);
+ }
+ return false;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -123,6 +176,30 @@ static void early_init_intel(struct cpuinfo_x86 *c)
c->microcode = intel_get_microcode_revision();
/*
+ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
+ * and they also have a different bit for STIBP support. Also,
+ * a hypervisor might have set the individual AMD bits even on
+ * Intel CPUs, for finer-grained selection of what's available.
+ */
+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
+ set_cpu_cap(c, X86_FEATURE_IBRS);
+ set_cpu_cap(c, X86_FEATURE_IBPB);
+ }
+ if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
+ set_cpu_cap(c, X86_FEATURE_STIBP);
+
+ /* Now if any of them are set, check the blacklist and clear the lot */
+ if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
+ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
+ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
+ clear_cpu_cap(c, X86_FEATURE_IBRS);
+ clear_cpu_cap(c, X86_FEATURE_IBPB);
+ clear_cpu_cap(c, X86_FEATURE_STIBP);
+ clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
+ clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP);
+ }
+
+ /*
* Atom erratum AAE44/AAF40/AAG38/AAH41:
*
* A race condition between speculative fetches and invalidating
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 99442370de40..410629f10ad3 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -135,6 +135,40 @@ struct rdt_resource rdt_resources_all[] = {
.format_str = "%d=%0*x",
.fflags = RFTYPE_RES_CACHE,
},
+ [RDT_RESOURCE_L2DATA] =
+ {
+ .rid = RDT_RESOURCE_L2DATA,
+ .name = "L2DATA",
+ .domains = domain_init(RDT_RESOURCE_L2DATA),
+ .msr_base = IA32_L2_CBM_BASE,
+ .msr_update = cat_wrmsr,
+ .cache_level = 2,
+ .cache = {
+ .min_cbm_bits = 1,
+ .cbm_idx_mult = 2,
+ .cbm_idx_offset = 0,
+ },
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
+ },
+ [RDT_RESOURCE_L2CODE] =
+ {
+ .rid = RDT_RESOURCE_L2CODE,
+ .name = "L2CODE",
+ .domains = domain_init(RDT_RESOURCE_L2CODE),
+ .msr_base = IA32_L2_CBM_BASE,
+ .msr_update = cat_wrmsr,
+ .cache_level = 2,
+ .cache = {
+ .min_cbm_bits = 1,
+ .cbm_idx_mult = 2,
+ .cbm_idx_offset = 1,
+ },
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
+ },
[RDT_RESOURCE_MBA] =
{
.rid = RDT_RESOURCE_MBA,
@@ -259,15 +293,15 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
r->alloc_enabled = true;
}
-static void rdt_get_cdp_l3_config(int type)
+static void rdt_get_cdp_config(int level, int type)
{
- struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct rdt_resource *r_l = &rdt_resources_all[level];
struct rdt_resource *r = &rdt_resources_all[type];
- r->num_closid = r_l3->num_closid / 2;
- r->cache.cbm_len = r_l3->cache.cbm_len;
- r->default_ctrl = r_l3->default_ctrl;
- r->cache.shareable_bits = r_l3->cache.shareable_bits;
+ r->num_closid = r_l->num_closid / 2;
+ r->cache.cbm_len = r_l->cache.cbm_len;
+ r->default_ctrl = r_l->default_ctrl;
+ r->cache.shareable_bits = r_l->cache.shareable_bits;
r->data_width = (r->cache.cbm_len + 3) / 4;
r->alloc_capable = true;
/*
@@ -277,6 +311,18 @@ static void rdt_get_cdp_l3_config(int type)
r->alloc_enabled = false;
}
+static void rdt_get_cdp_l3_config(void)
+{
+ rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA);
+ rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE);
+}
+
+static void rdt_get_cdp_l2_config(void)
+{
+ rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA);
+ rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
+}
+
static int get_cache_id(int cpu, int level)
{
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
@@ -645,6 +691,7 @@ enum {
RDT_FLAG_L3_CAT,
RDT_FLAG_L3_CDP,
RDT_FLAG_L2_CAT,
+ RDT_FLAG_L2_CDP,
RDT_FLAG_MBA,
};
@@ -667,6 +714,7 @@ static struct rdt_options rdt_options[] __initdata = {
RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3),
RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3),
RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2),
+ RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2),
RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA),
};
#define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
@@ -729,15 +777,15 @@ static __init bool get_rdt_alloc_resources(void)
if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]);
- if (rdt_cpu_has(X86_FEATURE_CDP_L3)) {
- rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA);
- rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE);
- }
+ if (rdt_cpu_has(X86_FEATURE_CDP_L3))
+ rdt_get_cdp_l3_config();
ret = true;
}
if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
/* CPUID 0x10.2 fields are same format at 0x10.1 */
rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]);
+ if (rdt_cpu_has(X86_FEATURE_CDP_L2))
+ rdt_get_cdp_l2_config();
ret = true;
}
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 3397244984f5..3fd7a70ee04a 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -7,12 +7,15 @@
#include <linux/jump_label.h>
#define IA32_L3_QOS_CFG 0xc81
+#define IA32_L2_QOS_CFG 0xc82
#define IA32_L3_CBM_BASE 0xc90
#define IA32_L2_CBM_BASE 0xd10
#define IA32_MBA_THRTL_BASE 0xd50
#define L3_QOS_CDP_ENABLE 0x01ULL
+#define L2_QOS_CDP_ENABLE 0x01ULL
+
/*
* Event IDs are used to program IA32_QM_EVTSEL before reading event
* counter from IA32_QM_CTR
@@ -357,6 +360,8 @@ enum {
RDT_RESOURCE_L3DATA,
RDT_RESOURCE_L3CODE,
RDT_RESOURCE_L2,
+ RDT_RESOURCE_L2DATA,
+ RDT_RESOURCE_L2CODE,
RDT_RESOURCE_MBA,
/* Must be the last */
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 64c5ff97ee0d..bdab7d2f51af 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -990,6 +990,7 @@ out_destroy:
kernfs_remove(kn);
return ret;
}
+
static void l3_qos_cfg_update(void *arg)
{
bool *enable = arg;
@@ -997,8 +998,17 @@ static void l3_qos_cfg_update(void *arg)
wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
}
-static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
+static void l2_qos_cfg_update(void *arg)
{
+ bool *enable = arg;
+
+ wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
+}
+
+static int set_cache_qos_cfg(int level, bool enable)
+{
+ void (*update)(void *arg);
+ struct rdt_resource *r_l;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
int cpu;
@@ -1006,16 +1016,24 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
- list_for_each_entry(d, &r->domains, list) {
+ if (level == RDT_RESOURCE_L3)
+ update = l3_qos_cfg_update;
+ else if (level == RDT_RESOURCE_L2)
+ update = l2_qos_cfg_update;
+ else
+ return -EINVAL;
+
+ r_l = &rdt_resources_all[level];
+ list_for_each_entry(d, &r_l->domains, list) {
/* Pick one CPU from each domain instance to update MSR */
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
}
cpu = get_cpu();
/* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
if (cpumask_test_cpu(cpu, cpu_mask))
- l3_qos_cfg_update(&enable);
+ update(&enable);
/* Update QOS_CFG MSR on all other cpus in cpu_mask. */
- smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1);
+ smp_call_function_many(cpu_mask, update, &enable, 1);
put_cpu();
free_cpumask_var(cpu_mask);
@@ -1023,52 +1041,99 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
return 0;
}
-static int cdp_enable(void)
+static int cdp_enable(int level, int data_type, int code_type)
{
- struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA];
- struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE];
- struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
+ struct rdt_resource *r_lcode = &rdt_resources_all[code_type];
+ struct rdt_resource *r_l = &rdt_resources_all[level];
int ret;
- if (!r_l3->alloc_capable || !r_l3data->alloc_capable ||
- !r_l3code->alloc_capable)
+ if (!r_l->alloc_capable || !r_ldata->alloc_capable ||
+ !r_lcode->alloc_capable)
return -EINVAL;
- ret = set_l3_qos_cfg(r_l3, true);
+ ret = set_cache_qos_cfg(level, true);
if (!ret) {
- r_l3->alloc_enabled = false;
- r_l3data->alloc_enabled = true;
- r_l3code->alloc_enabled = true;
+ r_l->alloc_enabled = false;
+ r_ldata->alloc_enabled = true;
+ r_lcode->alloc_enabled = true;
}
return ret;
}
-static void cdp_disable(void)
+static int cdpl3_enable(void)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+ return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA,
+ RDT_RESOURCE_L3CODE);
+}
+
+static int cdpl2_enable(void)
+{
+ return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA,
+ RDT_RESOURCE_L2CODE);
+}
+
+static void cdp_disable(int level, int data_type, int code_type)
+{
+ struct rdt_resource *r = &rdt_resources_all[level];
r->alloc_enabled = r->alloc_capable;
- if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) {
- rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled = false;
- rdt_resources_all[RDT_RESOURCE_L3CODE].alloc_enabled = false;
- set_l3_qos_cfg(r, false);
+ if (rdt_resources_all[data_type].alloc_enabled) {
+ rdt_resources_all[data_type].alloc_enabled = false;
+ rdt_resources_all[code_type].alloc_enabled = false;
+ set_cache_qos_cfg(level, false);
}
}
+static void cdpl3_disable(void)
+{
+ cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE);
+}
+
+static void cdpl2_disable(void)
+{
+ cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE);
+}
+
+static void cdp_disable_all(void)
+{
+ if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
+ cdpl3_disable();
+ if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
+ cdpl2_disable();
+}
+
static int parse_rdtgroupfs_options(char *data)
{
char *token, *o = data;
int ret = 0;
while ((token = strsep(&o, ",")) != NULL) {
- if (!*token)
- return -EINVAL;
+ if (!*token) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (!strcmp(token, "cdp"))
- ret = cdp_enable();
+ if (!strcmp(token, "cdp")) {
+ ret = cdpl3_enable();
+ if (ret)
+ goto out;
+ } else if (!strcmp(token, "cdpl2")) {
+ ret = cdpl2_enable();
+ if (ret)
+ goto out;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
}
+ return 0;
+
+out:
+ pr_err("Invalid mount option \"%s\"\n", token);
+
return ret;
}
@@ -1223,7 +1288,7 @@ out_mongrp:
out_info:
kernfs_remove(kn_info);
out_cdp:
- cdp_disable();
+ cdp_disable_all();
out:
rdt_last_cmd_clear();
mutex_unlock(&rdtgroup_mutex);
@@ -1383,7 +1448,7 @@ static void rdt_kill_sb(struct super_block *sb)
/*Put everything back to default values. */
for_each_alloc_enabled_rdt_resource(r)
reset_all_ctrls(r);
- cdp_disable();
+ cdp_disable_all();
rmdir_all_sub();
static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
static_branch_disable_cpuslocked(&rdt_mon_enable_key);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 4ca632a06e0b..5bbd06f38ff6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -59,6 +59,7 @@ static struct severity {
#define MCGMASK(x, y) .mcgmask = x, .mcgres = y
#define MASK(x, y) .mask = x, .result = y
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
+#define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
@@ -101,6 +102,22 @@ static struct severity {
NOSER, BITCLR(MCI_STATUS_UC)
),
+ /*
+ * known AO MCACODs reported via MCE or CMC:
+ *
+ * SRAO could be signaled either via a machine check exception or
+ * CMCI with the corresponding bit S 1 or 0. So we don't need to
+ * check bit S for SRAO.
+ */
+ MCESEV(
+ AO, "Action optional: memory scrubbing error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
+ ),
+ MCESEV(
+ AO, "Action optional: last level cache writeback error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
+ ),
+
/* ignore OVER for UCNA */
MCESEV(
UCNA, "Uncorrected no action required",
@@ -149,15 +166,6 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
),
- /* known AO MCACODs: */
- MCESEV(
- AO, "Action optional: memory scrubbing error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
- ),
- MCESEV(
- AO, "Action optional: last level cache writeback error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
- ),
MCESEV(
SOME, "Action optional: unknown MCACOD",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 868e412b4f0c..c3655e0fc156 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -503,10 +503,8 @@ static int mce_usable_address(struct mce *m)
bool mce_is_memory_error(struct mce *m)
{
if (m->cpuvendor == X86_VENDOR_AMD) {
- /* ErrCodeExt[20:16] */
- u8 xec = (m->status >> 16) & 0x1f;
+ return amd_mce_is_memory_error(m);
- return (xec == 0x0 || xec == 0x8);
} else if (m->cpuvendor == X86_VENDOR_INTEL) {
/*
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
@@ -530,6 +528,17 @@ bool mce_is_memory_error(struct mce *m)
}
EXPORT_SYMBOL_GPL(mce_is_memory_error);
+static bool mce_is_correctable(struct mce *m)
+{
+ if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
+ return false;
+
+ if (m->status & MCI_STATUS_UC)
+ return false;
+
+ return true;
+}
+
static bool cec_add_mce(struct mce *m)
{
if (!m)
@@ -537,7 +546,7 @@ static bool cec_add_mce(struct mce *m)
/* We eat only correctable DRAM errors with usable addresses. */
if (mce_is_memory_error(m) &&
- !(m->status & MCI_STATUS_UC) &&
+ mce_is_correctable(m) &&
mce_usable_address(m))
if (!cec_add_elem(m->addr >> PAGE_SHIFT))
return true;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 486f640b02ef..0f32ad242324 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -110,6 +110,20 @@ const char *smca_get_long_name(enum smca_bank_types t)
}
EXPORT_SYMBOL_GPL(smca_get_long_name);
+static enum smca_bank_types smca_get_bank_type(struct mce *m)
+{
+ struct smca_bank *b;
+
+ if (m->bank >= N_SMCA_BANK_TYPES)
+ return N_SMCA_BANK_TYPES;
+
+ b = &smca_banks[m->bank];
+ if (!b->hwid)
+ return N_SMCA_BANK_TYPES;
+
+ return b->hwid->bank_type;
+}
+
static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */
@@ -407,7 +421,9 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
(deferred_error_int_vector != amd_deferred_error_interrupt))
deferred_error_int_vector = amd_deferred_error_interrupt;
- low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
+ if (!mce_flags.smca)
+ low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
+
wrmsr(MSR_CU_DEF_ERR, low, high);
}
@@ -738,6 +754,17 @@ out_err:
}
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
+bool amd_mce_is_memory_error(struct mce *m)
+{
+ /* ErrCodeExt[20:16] */
+ u8 xec = (m->status >> 16) & 0x1f;
+
+ if (mce_flags.smca)
+ return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
+
+ return m->bank == 4 && xec == 0x8;
+}
+
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{
struct mce m;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index c4fa4a85d4cb..e4fc595cd6ea 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -239,7 +239,7 @@ static int __init save_microcode_in_initrd(void)
break;
case X86_VENDOR_AMD:
if (c->x86 >= 0x10)
- return save_microcode_in_initrd_amd(cpuid_eax(1));
+ ret = save_microcode_in_initrd_amd(cpuid_eax(1));
break;
default:
break;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index d9e460fc7a3b..f7c55b0e753a 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -45,6 +45,9 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
/* Current microcode patch used in early patching on the APs. */
static struct microcode_intel *intel_ucode_patch;
+/* last level cache size per core */
+static int llc_size_per_core;
+
static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
unsigned int s2, unsigned int p2)
{
@@ -912,12 +915,14 @@ static bool is_blacklisted(unsigned int cpu)
/*
* Late loading on model 79 with microcode revision less than 0x0b000021
- * may result in a system hang. This behavior is documented in item
- * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+ * and LLC size per core bigger than 2.5MB may result in a system hang.
+ * This behavior is documented in item BDF90, #334165 (Intel Xeon
+ * Processor E7-8800/4800 v4 Product Family).
*/
if (c->x86 == 6 &&
c->x86_model == INTEL_FAM6_BROADWELL_X &&
c->x86_mask == 0x01 &&
+ llc_size_per_core > 2621440 &&
c->microcode < 0x0b000021) {
pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
@@ -975,6 +980,15 @@ static struct microcode_ops microcode_intel_ops = {
.apply_microcode = apply_microcode_intel,
};
+static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
+{
+ u64 llc_size = c->x86_cache_size * 1024;
+
+ do_div(llc_size, c->x86_max_cores);
+
+ return (int)llc_size;
+}
+
struct microcode_ops * __init init_intel_microcode(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -985,5 +999,7 @@ struct microcode_ops * __init init_intel_microcode(void)
return NULL;
}
+ llc_size_per_core = calc_llc_size_per_core(c);
+
return &microcode_intel_ops;
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 85eb5fc180c8..9340f41ce8d3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -251,6 +251,12 @@ static void __init ms_hyperv_init_platform(void)
hyperv_setup_mmu_ops();
/* Setup the IDT for hypervisor callback */
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
+
+ /* Setup the IDT for reenlightenment notifications */
+ if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT)
+ alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
+ hyperv_reenlightenment_vector);
+
#endif
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d0e69769abfd..772c219b6889 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -21,16 +21,16 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
- { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
- { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
{ X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
+ { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 },
{ X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
+ { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index ef61f540cf0a..91b2cff4b79a 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -295,7 +295,7 @@ trace:
restore_mcount_regs
jmp fgraph_trace
-END(function_hook)
+ENDPROC(function_hook)
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 68e1867cca80..45fb4d2565f8 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -142,6 +142,15 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, " Hypervisor callback interrupts\n");
}
#endif
+#if IS_ENABLED(CONFIG_HYPERV)
+ if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
+ seq_printf(p, "%*s: ", prec, "HRE");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ irq_stats(j)->irq_hv_reenlightenment_count);
+ seq_puts(p, " Hyper-V reenlightenment interrupts\n");
+ }
+#endif
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index f73f475d0573..d177940aa090 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -24,7 +24,6 @@
#include <linux/cpumask.h>
#include <linux/cpuset.h>
#include <linux/mutex.h>
-#include <linux/sched.h>
#include <linux/sysctl.h>
#include <linux/nodemask.h>
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
new file mode 100644
index 000000000000..b68fd895235a
--- /dev/null
+++ b/arch/x86/kernel/jailhouse.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL2.0
+/*
+ * Jailhouse paravirt_ops implementation
+ *
+ * Copyright (c) Siemens AG, 2015-2017
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ */
+
+#include <linux/acpi_pmtmr.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <asm/apic.h>
+#include <asm/cpu.h>
+#include <asm/hypervisor.h>
+#include <asm/i8259.h>
+#include <asm/irqdomain.h>
+#include <asm/pci_x86.h>
+#include <asm/reboot.h>
+#include <asm/setup.h>
+
+static __initdata struct jailhouse_setup_data setup_data;
+static unsigned int precalibrated_tsc_khz;
+
+static uint32_t jailhouse_cpuid_base(void)
+{
+ if (boot_cpu_data.cpuid_level < 0 ||
+ !boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return 0;
+
+ return hypervisor_cpuid_base("Jailhouse\0\0\0", 0);
+}
+
+static uint32_t __init jailhouse_detect(void)
+{
+ return jailhouse_cpuid_base();
+}
+
+static void jailhouse_get_wallclock(struct timespec *now)
+{
+ memset(now, 0, sizeof(*now));
+}
+
+static void __init jailhouse_timer_init(void)
+{
+ lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ);
+}
+
+static unsigned long jailhouse_get_tsc(void)
+{
+ return precalibrated_tsc_khz;
+}
+
+static void __init jailhouse_x2apic_init(void)
+{
+#ifdef CONFIG_X86_X2APIC
+ if (!x2apic_enabled())
+ return;
+ /*
+ * We do not have access to IR inside Jailhouse non-root cells. So
+ * we have to run in physical mode.
+ */
+ x2apic_phys = 1;
+ /*
+ * This will trigger the switch to apic_x2apic_phys. Empty OEM IDs
+ * ensure that only this APIC driver picks up the call.
+ */
+ default_acpi_madt_oem_check("", "");
+#endif
+}
+
+static void __init jailhouse_get_smp_config(unsigned int early)
+{
+ struct ioapic_domain_cfg ioapic_cfg = {
+ .type = IOAPIC_DOMAIN_STRICT,
+ .ops = &mp_ioapic_irqdomain_ops,
+ };
+ struct mpc_intsrc mp_irq = {
+ .type = MP_INTSRC,
+ .irqtype = mp_INT,
+ .irqflag = MP_IRQPOL_ACTIVE_HIGH | MP_IRQTRIG_EDGE,
+ };
+ unsigned int cpu;
+
+ jailhouse_x2apic_init();
+
+ register_lapic_address(0xfee00000);
+
+ for (cpu = 0; cpu < setup_data.num_cpus; cpu++) {
+ generic_processor_info(setup_data.cpu_ids[cpu],
+ boot_cpu_apic_version);
+ }
+
+ smp_found_config = 1;
+
+ if (setup_data.standard_ioapic) {
+ mp_register_ioapic(0, 0xfec00000, gsi_top, &ioapic_cfg);
+
+ /* Register 1:1 mapping for legacy UART IRQs 3 and 4 */
+ mp_irq.srcbusirq = mp_irq.dstirq = 3;
+ mp_save_irq(&mp_irq);
+
+ mp_irq.srcbusirq = mp_irq.dstirq = 4;
+ mp_save_irq(&mp_irq);
+ }
+}
+
+static void jailhouse_no_restart(void)
+{
+ pr_notice("Jailhouse: Restart not supported, halting\n");
+ machine_halt();
+}
+
+static int __init jailhouse_pci_arch_init(void)
+{
+ pci_direct_init(1);
+
+ /*
+ * There are no bridges on the virtual PCI root bus under Jailhouse,
+ * thus no other way to discover all devices than a full scan.
+ * Respect any overrides via the command line, though.
+ */
+ if (pcibios_last_bus < 0)
+ pcibios_last_bus = 0xff;
+
+ return 0;
+}
+
+static void __init jailhouse_init_platform(void)
+{
+ u64 pa_data = boot_params.hdr.setup_data;
+ struct setup_data header;
+ void *mapping;
+
+ x86_init.irqs.pre_vector_init = x86_init_noop;
+ x86_init.timers.timer_init = jailhouse_timer_init;
+ x86_init.mpparse.get_smp_config = jailhouse_get_smp_config;
+ x86_init.pci.arch_init = jailhouse_pci_arch_init;
+
+ x86_platform.calibrate_cpu = jailhouse_get_tsc;
+ x86_platform.calibrate_tsc = jailhouse_get_tsc;
+ x86_platform.get_wallclock = jailhouse_get_wallclock;
+ x86_platform.legacy.rtc = 0;
+ x86_platform.legacy.warm_reset = 0;
+ x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT;
+
+ legacy_pic = &null_legacy_pic;
+
+ machine_ops.emergency_restart = jailhouse_no_restart;
+
+ while (pa_data) {
+ mapping = early_memremap(pa_data, sizeof(header));
+ memcpy(&header, mapping, sizeof(header));
+ early_memunmap(mapping, sizeof(header));
+
+ if (header.type == SETUP_JAILHOUSE &&
+ header.len >= sizeof(setup_data)) {
+ pa_data += offsetof(struct setup_data, data);
+
+ mapping = early_memremap(pa_data, sizeof(setup_data));
+ memcpy(&setup_data, mapping, sizeof(setup_data));
+ early_memunmap(mapping, sizeof(setup_data));
+
+ break;
+ }
+
+ pa_data = header.next;
+ }
+
+ if (!pa_data)
+ panic("Jailhouse: No valid setup data found");
+
+ if (setup_data.compatible_version > JAILHOUSE_SETUP_REQUIRED_VERSION)
+ panic("Jailhouse: Unsupported setup data structure");
+
+ pmtmr_ioport = setup_data.pm_timer_address;
+ pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport);
+
+ precalibrated_tsc_khz = setup_data.tsc_khz;
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+
+ pci_probe = 0;
+
+ /*
+ * Avoid that the kernel complains about missing ACPI tables - there
+ * are none in a non-root cell.
+ */
+ disable_acpi();
+}
+
+bool jailhouse_paravirt(void)
+{
+ return jailhouse_cpuid_base() != 0;
+}
+
+static bool jailhouse_x2apic_available(void)
+{
+ /*
+ * The x2APIC is only available if the root cell enabled it. Jailhouse
+ * does not support switching between xAPIC and x2APIC.
+ */
+ return x2apic_enabled();
+}
+
+const struct hypervisor_x86 x86_hyper_jailhouse __refconst = {
+ .name = "Jailhouse",
+ .detect = jailhouse_detect,
+ .init.init_platform = jailhouse_init_platform,
+ .init.x2apic_available = jailhouse_x2apic_available,
+};
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b40ffbf156c1..4e37d1a851a6 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -498,6 +498,34 @@ static void __init kvm_apf_trap_init(void)
update_intr_gate(X86_TRAP_PF, async_page_fault);
}
+static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask);
+
+static void kvm_flush_tlb_others(const struct cpumask *cpumask,
+ const struct flush_tlb_info *info)
+{
+ u8 state;
+ int cpu;
+ struct kvm_steal_time *src;
+ struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask);
+
+ cpumask_copy(flushmask, cpumask);
+ /*
+ * We have to call flush only on online vCPUs. And
+ * queue flush_on_enter for pre-empted vCPUs
+ */
+ for_each_cpu(cpu, flushmask) {
+ src = &per_cpu(steal_time, cpu);
+ state = READ_ONCE(src->preempted);
+ if ((state & KVM_VCPU_PREEMPTED)) {
+ if (try_cmpxchg(&src->preempted, &state,
+ state | KVM_VCPU_FLUSH_TLB))
+ __cpumask_clear_cpu(cpu, flushmask);
+ }
+ }
+
+ native_flush_tlb_others(flushmask, info);
+}
+
static void __init kvm_guest_init(void)
{
int i;
@@ -517,6 +545,9 @@ static void __init kvm_guest_init(void)
pv_time_ops.steal_clock = kvm_steal_clock;
}
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
+ pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
+
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write);
@@ -598,6 +629,22 @@ static __init int activate_jump_labels(void)
}
arch_initcall(activate_jump_labels);
+static __init int kvm_setup_pv_tlb_flush(void)
+{
+ int cpu;
+
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
+ for_each_possible_cpu(cpu) {
+ zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+ }
+ pr_info("KVM setup pv remote TLB flush\n");
+ }
+
+ return 0;
+}
+arch_initcall(kvm_setup_pv_tlb_flush);
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
@@ -643,7 +690,7 @@ __visible bool __kvm_vcpu_is_preempted(long cpu)
{
struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
- return !!src->preempted;
+ return !!(src->preempted & KVM_VCPU_PREEMPTED);
}
PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 3a4b12809ab5..27d0a1712663 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -281,7 +281,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
int ELCR_fallback = 0;
intsrc.type = MP_INTSRC;
- intsrc.irqflag = 0; /* conforming */
+ intsrc.irqflag = MP_IRQTRIG_DEFAULT | MP_IRQPOL_DEFAULT;
intsrc.srcbus = 0;
intsrc.dstapic = mpc_ioapic_id(0);
@@ -324,10 +324,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
* copy that information over to the MP table in the
* irqflag field (level sensitive, active high polarity).
*/
- if (ELCR_trigger(i))
- intsrc.irqflag = 13;
- else
- intsrc.irqflag = 0;
+ if (ELCR_trigger(i)) {
+ intsrc.irqflag = MP_IRQTRIG_LEVEL |
+ MP_IRQPOL_ACTIVE_HIGH;
+ } else {
+ intsrc.irqflag = MP_IRQTRIG_DEFAULT |
+ MP_IRQPOL_DEFAULT;
+ }
}
intsrc.srcbusirq = i;
@@ -419,7 +422,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
construct_ioapic_table(mpc_default_type);
lintsrc.type = MP_LINTSRC;
- lintsrc.irqflag = 0; /* conforming */
+ lintsrc.irqflag = MP_IRQTRIG_DEFAULT | MP_IRQPOL_DEFAULT;
lintsrc.srcbusid = 0;
lintsrc.srcbusirq = 0;
lintsrc.destapic = MP_APIC_ALL;
@@ -664,7 +667,7 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
if (m->irqtype != mp_INT)
return 0;
- if (m->irqflag != 0x0f)
+ if (m->irqflag != (MP_IRQTRIG_LEVEL | MP_IRQPOL_ACTIVE_LOW))
return 0;
/* not legacy */
@@ -673,7 +676,8 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
if (mp_irqs[i].irqtype != mp_INT)
continue;
- if (mp_irqs[i].irqflag != 0x0f)
+ if (mp_irqs[i].irqflag != (MP_IRQTRIG_LEVEL |
+ MP_IRQPOL_ACTIVE_LOW))
continue;
if (mp_irqs[i].srcbus != m->srcbus)
@@ -784,7 +788,8 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
if (mp_irqs[i].irqtype != mp_INT)
continue;
- if (mp_irqs[i].irqflag != 0x0f)
+ if (mp_irqs[i].irqflag != (MP_IRQTRIG_LEVEL |
+ MP_IRQPOL_ACTIVE_LOW))
continue;
if (nr_m_spare > 0) {
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
index 39a59299bfa0..235fe6008ac8 100644
--- a/arch/x86/kernel/platform-quirks.c
+++ b/arch/x86/kernel/platform-quirks.c
@@ -9,6 +9,7 @@ void __init x86_early_init_platform_quirks(void)
{
x86_platform.legacy.i8042 = X86_LEGACY_I8042_EXPECTED_PRESENT;
x86_platform.legacy.rtc = 1;
+ x86_platform.legacy.warm_reset = 1;
x86_platform.legacy.reserve_bios_regions = 0;
x86_platform.legacy.devices.pnpbios = 1;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cb368c2a22ab..03408b942adb 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -21,7 +21,6 @@
#include <linux/dmi.h>
#include <linux/utsname.h>
#include <linux/stackprotector.h>
-#include <linux/tick.h>
#include <linux/cpuidle.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 68d7ab81c62f..1ae67e982af7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -114,7 +114,6 @@
#include <asm/alternative.h>
#include <asm/prom.h>
#include <asm/microcode.h>
-#include <asm/mmu_context.h>
#include <asm/kaslr.h>
#include <asm/unwind.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ed556d50d7ed..6f27facbaa9b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -75,7 +75,6 @@
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
#include <asm/i8259.h>
-#include <asm/realmode.h>
#include <asm/misc.h>
#include <asm/qspinlock.h>
@@ -934,7 +933,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
* the targeted processor.
*/
- if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
+ if (x86_platform.legacy.warm_reset) {
pr_debug("Setting warm reset code and vector.\n");
@@ -1006,7 +1005,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
/* mark "stuck" area as not stuck */
*trampoline_status = 0;
- if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
+ if (x86_platform.legacy.warm_reset) {
/*
* Cleanup possible dangling ends...
*/
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 749d189f8cd4..774ebafa97c4 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -69,9 +69,12 @@ static struct irqaction irq0 = {
static void __init setup_default_timer_irq(void)
{
- if (!nr_legacy_irqs())
- return;
- setup_irq(0, &irq0);
+ /*
+ * Unconditionally register the legacy timer; even without legacy
+ * PIC/PIT we need this for the HPET0 in legacy replacement mode.
+ */
+ if (setup_irq(0, &irq0))
+ pr_info("Failed to register legacy timer interrupt\n");
}
/* Default timer init function */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index e169e85db434..fb4302738410 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -25,6 +25,7 @@
#include <asm/geode.h>
#include <asm/apic.h>
#include <asm/intel-family.h>
+#include <asm/i8259.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -363,6 +364,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
unsigned long tscmin, tscmax;
int pitcnt;
+ if (!has_legacy_pic()) {
+ /*
+ * Relies on tsc_early_delay_calibrate() to have given us semi
+ * usable udelay(), wait for the same 50ms we would have with
+ * the PIT loop below.
+ */
+ udelay(10 * USEC_PER_MSEC);
+ udelay(10 * USEC_PER_MSEC);
+ udelay(10 * USEC_PER_MSEC);
+ udelay(10 * USEC_PER_MSEC);
+ udelay(10 * USEC_PER_MSEC);
+ return ULONG_MAX;
+ }
+
/* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
@@ -487,6 +502,9 @@ static unsigned long quick_pit_calibrate(void)
u64 tsc, delta;
unsigned long d1, d2;
+ if (!has_legacy_pic())
+ return 0;
+
/* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
@@ -988,8 +1006,6 @@ static void __init detect_art(void)
/* clocksource code */
-static struct clocksource clocksource_tsc;
-
static void tsc_resume(struct clocksource *cs)
{
tsc_verify_tsc_adjust(true);
@@ -1040,12 +1056,31 @@ static void tsc_cs_tick_stable(struct clocksource *cs)
/*
* .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
*/
+static struct clocksource clocksource_tsc_early = {
+ .name = "tsc-early",
+ .rating = 299,
+ .read = read_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS |
+ CLOCK_SOURCE_MUST_VERIFY,
+ .archdata = { .vclock_mode = VCLOCK_TSC },
+ .resume = tsc_resume,
+ .mark_unstable = tsc_cs_mark_unstable,
+ .tick_stable = tsc_cs_tick_stable,
+};
+
+/*
+ * Must mark VALID_FOR_HRES early such that when we unregister tsc_early
+ * this one will immediately take over. We will only register if TSC has
+ * been found good.
+ */
static struct clocksource clocksource_tsc = {
.name = "tsc",
.rating = 300,
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
+ CLOCK_SOURCE_VALID_FOR_HRES |
CLOCK_SOURCE_MUST_VERIFY,
.archdata = { .vclock_mode = VCLOCK_TSC },
.resume = tsc_resume,
@@ -1169,8 +1204,8 @@ static void tsc_refine_calibration_work(struct work_struct *work)
int cpu;
/* Don't bother refining TSC on unstable systems */
- if (check_tsc_unstable())
- goto out;
+ if (tsc_unstable)
+ return;
/*
* Since the work is started early in boot, we may be
@@ -1222,9 +1257,13 @@ static void tsc_refine_calibration_work(struct work_struct *work)
set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);
out:
+ if (tsc_unstable)
+ return;
+
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
+ clocksource_unregister(&clocksource_tsc_early);
}
@@ -1233,13 +1272,11 @@ static int __init init_tsc_clocksource(void)
if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
return 0;
+ if (check_tsc_unstable())
+ return 0;
+
if (tsc_clocksource_reliable)
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
- /* lower the rating if we already know its unstable: */
- if (check_tsc_unstable()) {
- clocksource_tsc.rating = 0;
- clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
- }
if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
@@ -1252,6 +1289,7 @@ static int __init init_tsc_clocksource(void)
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
+ clocksource_unregister(&clocksource_tsc_early);
return 0;
}
@@ -1356,9 +1394,12 @@ void __init tsc_init(void)
check_system_tsc_reliable();
- if (unsynchronized_tsc())
+ if (unsynchronized_tsc()) {
mark_tsc_unstable("TSCs unsynchronized");
+ return;
+ }
+ clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
detect_art();
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index a3755d293a48..85c7ef23d99f 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -528,11 +528,11 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
return 0;
}
-static int push_ret_address(struct pt_regs *regs, unsigned long ip)
+static int emulate_push_stack(struct pt_regs *regs, unsigned long val)
{
unsigned long new_sp = regs->sp - sizeof_long();
- if (copy_to_user((void __user *)new_sp, &ip, sizeof_long()))
+ if (copy_to_user((void __user *)new_sp, &val, sizeof_long()))
return -EFAULT;
regs->sp = new_sp;
@@ -566,7 +566,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
regs->ip += correction;
} else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
regs->sp += sizeof_long(); /* Pop incorrect return address */
- if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen))
+ if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen))
return -ERESTART;
}
/* popf; tell the caller to not touch TF */
@@ -655,7 +655,7 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
*
* But there is corner case, see the comment in ->post_xol().
*/
- if (push_ret_address(regs, new_ip))
+ if (emulate_push_stack(regs, new_ip))
return false;
} else if (!check_jmp_cond(auprobe, regs)) {
offs = 0;
@@ -665,6 +665,16 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
return true;
}
+static bool push_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ unsigned long *src_ptr = (void *)regs + auprobe->push.reg_offset;
+
+ if (emulate_push_stack(regs, *src_ptr))
+ return false;
+ regs->ip += auprobe->push.ilen;
+ return true;
+}
+
static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
BUG_ON(!branch_is_call(auprobe));
@@ -703,6 +713,10 @@ static const struct uprobe_xol_ops branch_xol_ops = {
.post_xol = branch_post_xol_op,
};
+static const struct uprobe_xol_ops push_xol_ops = {
+ .emulate = push_emulate_op,
+};
+
/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */
static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
{
@@ -750,6 +764,87 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
return 0;
}
+/* Returns -ENOSYS if push_xol_ops doesn't handle this insn */
+static int push_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
+{
+ u8 opc1 = OPCODE1(insn), reg_offset = 0;
+
+ if (opc1 < 0x50 || opc1 > 0x57)
+ return -ENOSYS;
+
+ if (insn->length > 2)
+ return -ENOSYS;
+ if (insn->length == 2) {
+ /* only support rex_prefix 0x41 (x64 only) */
+#ifdef CONFIG_X86_64
+ if (insn->rex_prefix.nbytes != 1 ||
+ insn->rex_prefix.bytes[0] != 0x41)
+ return -ENOSYS;
+
+ switch (opc1) {
+ case 0x50:
+ reg_offset = offsetof(struct pt_regs, r8);
+ break;
+ case 0x51:
+ reg_offset = offsetof(struct pt_regs, r9);
+ break;
+ case 0x52:
+ reg_offset = offsetof(struct pt_regs, r10);
+ break;
+ case 0x53:
+ reg_offset = offsetof(struct pt_regs, r11);
+ break;
+ case 0x54:
+ reg_offset = offsetof(struct pt_regs, r12);
+ break;
+ case 0x55:
+ reg_offset = offsetof(struct pt_regs, r13);
+ break;
+ case 0x56:
+ reg_offset = offsetof(struct pt_regs, r14);
+ break;
+ case 0x57:
+ reg_offset = offsetof(struct pt_regs, r15);
+ break;
+ }
+#else
+ return -ENOSYS;
+#endif
+ } else {
+ switch (opc1) {
+ case 0x50:
+ reg_offset = offsetof(struct pt_regs, ax);
+ break;
+ case 0x51:
+ reg_offset = offsetof(struct pt_regs, cx);
+ break;
+ case 0x52:
+ reg_offset = offsetof(struct pt_regs, dx);
+ break;
+ case 0x53:
+ reg_offset = offsetof(struct pt_regs, bx);
+ break;
+ case 0x54:
+ reg_offset = offsetof(struct pt_regs, sp);
+ break;
+ case 0x55:
+ reg_offset = offsetof(struct pt_regs, bp);
+ break;
+ case 0x56:
+ reg_offset = offsetof(struct pt_regs, si);
+ break;
+ case 0x57:
+ reg_offset = offsetof(struct pt_regs, di);
+ break;
+ }
+ }
+
+ auprobe->push.reg_offset = reg_offset;
+ auprobe->push.ilen = insn->length;
+ auprobe->ops = &push_xol_ops;
+ return 0;
+}
+
/**
* arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
* @mm: the probed address space.
@@ -771,6 +866,10 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
if (ret != -ENOSYS)
return ret;
+ ret = push_setup_xol_ops(auprobe, &insn);
+ if (ret != -ENOSYS)
+ return ret;
+
/*
* Figure out which fixups default_post_xol_op() will need to perform,
* and annotate defparam->fixups accordingly.
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3df51c287844..92fd433c50b9 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -81,6 +81,14 @@ config KVM_AMD
To compile this as a module, choose M here: the module
will be called kvm-amd.
+config KVM_AMD_SEV
+ def_bool y
+ bool "AMD Secure Encrypted Virtualization (SEV) support"
+ depends on KVM_AMD && X86_64
+ depends on CRYPTO_DEV_CCP && CRYPTO_DEV_CCP_DD && CRYPTO_DEV_SP_PSP
+ ---help---
+ Provides support for launching Encrypted VMs on AMD processors.
+
config KVM_MMU_AUDIT
bool "Audit KVM MMU"
depends on KVM && TRACEPOINTS
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0099e10eb045..20e491b94f44 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -293,13 +293,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
{
switch (func) {
case 0:
- entry->eax = 1; /* only one leaf currently */
+ entry->eax = 7;
++*nent;
break;
case 1:
entry->ecx = F(MOVBE);
++*nent;
break;
+ case 7:
+ entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ if (index == 0)
+ entry->ecx = F(RDPID);
+ ++*nent;
default:
break;
}
@@ -327,6 +332,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
+ unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
/* cpuid 1.edx */
const u32 kvm_cpuid_1_edx_x86_features =
@@ -365,7 +371,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
- 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+ 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
+ F(TOPOEXT);
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@ -387,8 +394,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(LA57) | F(PKU) |
- 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
+ F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
+ F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
+ F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG);
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
@@ -473,6 +481,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx |= F(TSC_ADJUST);
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
cpuid_mask(&entry->ecx, CPUID_7_ECX);
+ entry->ecx |= f_umip;
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
@@ -594,7 +603,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_ASYNC_PF) |
(1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
- (1 << KVM_FEATURE_PV_UNHALT);
+ (1 << KVM_FEATURE_PV_UNHALT) |
+ (1 << KVM_FEATURE_PV_TLB_FLUSH);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
@@ -604,7 +614,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->edx = 0;
break;
case 0x80000000:
- entry->eax = min(entry->eax, 0x8000001a);
+ entry->eax = min(entry->eax, 0x8000001f);
break;
case 0x80000001:
entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b514b2b2845a..d91eaeb01034 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -25,6 +25,7 @@
#include <asm/kvm_emulate.h>
#include <linux/stringify.h>
#include <asm/debugreg.h>
+#include <asm/nospec-branch.h>
#include "x86.h"
#include "tss.h"
@@ -1021,8 +1022,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
- asm("push %[flags]; popf; call *%[fastop]"
- : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
+ asm("push %[flags]; popf; " CALL_NOSPEC
+ : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
return rc;
}
@@ -3532,6 +3533,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int em_rdpid(struct x86_emulate_ctxt *ctxt)
+{
+ u64 tsc_aux = 0;
+
+ if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
+ return emulate_gp(ctxt, 0);
+ ctxt->dst.val = tsc_aux;
+ return X86EMUL_CONTINUE;
+}
+
static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
{
u64 tsc = 0;
@@ -3651,17 +3662,27 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
-static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
+static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
{
- if (ctxt->modrm_reg > VCPU_SREG_GS)
- return emulate_ud(ctxt);
+ if (segment > VCPU_SREG_GS &&
+ (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
+ ctxt->ops->cpl(ctxt) > 0)
+ return emulate_gp(ctxt, 0);
- ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
+ ctxt->dst.val = get_segment_selector(ctxt, segment);
if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
ctxt->dst.bytes = 2;
return X86EMUL_CONTINUE;
}
+static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
+{
+ if (ctxt->modrm_reg > VCPU_SREG_GS)
+ return emulate_ud(ctxt);
+
+ return em_store_sreg(ctxt, ctxt->modrm_reg);
+}
+
static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
{
u16 sel = ctxt->src.val;
@@ -3677,6 +3698,11 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
}
+static int em_sldt(struct x86_emulate_ctxt *ctxt)
+{
+ return em_store_sreg(ctxt, VCPU_SREG_LDTR);
+}
+
static int em_lldt(struct x86_emulate_ctxt *ctxt)
{
u16 sel = ctxt->src.val;
@@ -3686,6 +3712,11 @@ static int em_lldt(struct x86_emulate_ctxt *ctxt)
return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
}
+static int em_str(struct x86_emulate_ctxt *ctxt)
+{
+ return em_store_sreg(ctxt, VCPU_SREG_TR);
+}
+
static int em_ltr(struct x86_emulate_ctxt *ctxt)
{
u16 sel = ctxt->src.val;
@@ -3738,6 +3769,10 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
{
struct desc_ptr desc_ptr;
+ if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
+ ctxt->ops->cpl(ctxt) > 0)
+ return emulate_gp(ctxt, 0);
+
if (ctxt->mode == X86EMUL_MODE_PROT64)
ctxt->op_bytes = 8;
get(ctxt, &desc_ptr);
@@ -3797,6 +3832,10 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt)
static int em_smsw(struct x86_emulate_ctxt *ctxt)
{
+ if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
+ ctxt->ops->cpl(ctxt) > 0)
+ return emulate_gp(ctxt, 0);
+
if (ctxt->dst.type == OP_MEM)
ctxt->dst.bytes = 2;
ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
@@ -4382,8 +4421,8 @@ static const struct opcode group5[] = {
};
static const struct opcode group6[] = {
- DI(Prot | DstMem, sldt),
- DI(Prot | DstMem, str),
+ II(Prot | DstMem, em_sldt, sldt),
+ II(Prot | DstMem, em_str, str),
II(Prot | Priv | SrcMem16, em_lldt, lldt),
II(Prot | Priv | SrcMem16, em_ltr, ltr),
N, N, N, N,
@@ -4414,10 +4453,20 @@ static const struct opcode group8[] = {
F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
};
+/*
+ * The "memory" destination is actually always a register, since we come
+ * from the register case of group9.
+ */
+static const struct gprefix pfx_0f_c7_7 = {
+ N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
+};
+
+
static const struct group_dual group9 = { {
N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
}, {
- N, N, N, N, N, N, N, N,
+ N, N, N, N, N, N, N,
+ GP(0, &pfx_0f_c7_7),
} };
static const struct opcode group11[] = {
@@ -5335,9 +5384,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
if (!(ctxt->d & ByteOp))
fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
- asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
+ asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
: "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
- [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
+ [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
: "c"(ctxt->src2.val));
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 5c24811e8b0b..f171051eecf3 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -79,7 +79,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
if (kvm_cpu_has_extint(v))
return 1;
- if (kvm_vcpu_apicv_active(v))
+ if (!is_guest_mode(v) && kvm_vcpu_apicv_active(v))
return 0;
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e2c1fb8d35ce..924ac8ce9d50 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -364,32 +364,41 @@ static u8 count_vectors(void *bitmap)
return count;
}
-int __kvm_apic_update_irr(u32 *pir, void *regs)
+bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
{
u32 i, vec;
- u32 pir_val, irr_val;
- int max_irr = -1;
+ u32 pir_val, irr_val, prev_irr_val;
+ int max_updated_irr;
+
+ max_updated_irr = -1;
+ *max_irr = -1;
for (i = vec = 0; i <= 7; i++, vec += 32) {
pir_val = READ_ONCE(pir[i]);
irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
if (pir_val) {
+ prev_irr_val = irr_val;
irr_val |= xchg(&pir[i], 0);
*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
+ if (prev_irr_val != irr_val) {
+ max_updated_irr =
+ __fls(irr_val ^ prev_irr_val) + vec;
+ }
}
if (irr_val)
- max_irr = __fls(irr_val) + vec;
+ *max_irr = __fls(irr_val) + vec;
}
- return max_irr;
+ return ((max_updated_irr != -1) &&
+ (max_updated_irr == *max_irr));
}
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
-int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
+bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- return __kvm_apic_update_irr(pir, apic->regs);
+ return __kvm_apic_update_irr(pir, apic->regs, max_irr);
}
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
@@ -581,7 +590,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
{
int highest_irr;
- if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active)
+ if (apic->vcpu->arch.apicv_active)
highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
else
highest_irr = apic_find_highest_irr(apic);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 4b9935a38347..56c36014f7b7 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -75,8 +75,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, unsigned int dest, int dest_mode);
-int __kvm_apic_update_irr(u32 *pir, void *regs);
-int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
+bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr);
+bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr);
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
struct dest_map *dest_map);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2b8eb4da4d08..8eca1d04aeb8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -42,6 +42,7 @@
#include <linux/kern_levels.h>
#include <asm/page.h>
+#include <asm/pat.h>
#include <asm/cmpxchg.h>
#include <asm/io.h>
#include <asm/vmx.h>
@@ -381,7 +382,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
-void kvm_mmu_clear_all_pte_masks(void)
+static void kvm_mmu_clear_all_pte_masks(void)
{
shadow_user_mask = 0;
shadow_accessed_mask = 0;
@@ -2708,7 +2709,18 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
{
if (pfn_valid(pfn))
- return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
+ return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) &&
+ /*
+ * Some reserved pages, such as those from NVDIMM
+ * DAX devices, are not for MMIO, and can be mapped
+ * with cached memory type for better performance.
+ * However, the above check misconceives those pages
+ * as MMIO, and results in KVM mapping them with UC
+ * memory type, which would hurt the performance.
+ * Therefore, we check the host memory type in addition
+ * and only treat UC/UC-/WC pages as MMIO.
+ */
+ (!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
return true;
}
@@ -4951,6 +4963,16 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
if (mmio_info_in_cache(vcpu, cr2, direct))
emulation_type = 0;
emulate:
+ /*
+ * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
+ * This can happen if a guest gets a page-fault on data access but the HW
+ * table walker is not able to read the instruction page (e.g instruction
+ * page is not present in memory). In those cases we simply restart the
+ * guest.
+ */
+ if (unlikely(insn && !insn_len))
+ return 1;
+
er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
switch (er) {
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index d22ddbdf5e6e..1272861e77b9 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -19,7 +19,7 @@
#include <linux/ratelimit.h>
-char const *audit_point_name[] = {
+static char const *audit_point_name[] = {
"pre page fault",
"post page fault",
"pre pte write",
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f40d0da1f1d3..1bf20e9160bd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -37,6 +37,10 @@
#include <linux/amd-iommu.h>
#include <linux/hashtable.h>
#include <linux/frame.h>
+#include <linux/psp-sev.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
@@ -212,6 +216,9 @@ struct vcpu_svm {
*/
struct list_head ir_list;
spinlock_t ir_list_lock;
+
+ /* which host CPU was used for running this vcpu */
+ unsigned int last_cpu;
};
/*
@@ -285,8 +292,12 @@ module_param(vls, int, 0444);
static int vgif = true;
module_param(vgif, int, 0444);
+/* enable/disable SEV support */
+static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+module_param(sev, int, 0444);
+
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-static void svm_flush_tlb(struct kvm_vcpu *vcpu);
+static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
static void svm_complete_interrupts(struct vcpu_svm *svm);
static int nested_svm_exit_handled(struct vcpu_svm *svm);
@@ -320,6 +331,38 @@ enum {
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+static unsigned int max_sev_asid;
+static unsigned int min_sev_asid;
+static unsigned long *sev_asid_bitmap;
+#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
+
+struct enc_region {
+ struct list_head list;
+ unsigned long npages;
+ struct page **pages;
+ unsigned long uaddr;
+ unsigned long size;
+};
+
+static inline bool svm_sev_enabled(void)
+{
+ return max_sev_asid;
+}
+
+static inline bool sev_guest(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ return sev->active;
+}
+
+static inline int sev_get_asid(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ return sev->asid;
+}
+
static inline void mark_all_dirty(struct vmcb *vmcb)
{
vmcb->control.clean = 0;
@@ -526,9 +569,13 @@ struct svm_cpu_data {
u64 asid_generation;
u32 max_asid;
u32 next_asid;
+ u32 min_asid;
struct kvm_ldttss_desc *tss_desc;
struct page *save_area;
+
+ /* index = sev_asid, value = vmcb pointer */
+ struct vmcb **sev_vmcbs;
};
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@ -783,6 +830,7 @@ static int svm_hardware_enable(void)
sd->asid_generation = 1;
sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
sd->next_asid = sd->max_asid + 1;
+ sd->min_asid = max_sev_asid + 1;
gdt = get_current_gdt_rw();
sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
@@ -841,6 +889,7 @@ static void svm_cpu_uninit(int cpu)
return;
per_cpu(svm_data, raw_smp_processor_id()) = NULL;
+ kfree(sd->sev_vmcbs);
__free_page(sd->save_area);
kfree(sd);
}
@@ -854,11 +903,18 @@ static int svm_cpu_init(int cpu)
if (!sd)
return -ENOMEM;
sd->cpu = cpu;
- sd->save_area = alloc_page(GFP_KERNEL);
r = -ENOMEM;
+ sd->save_area = alloc_page(GFP_KERNEL);
if (!sd->save_area)
goto err_1;
+ if (svm_sev_enabled()) {
+ r = -ENOMEM;
+ sd->sev_vmcbs = kmalloc((max_sev_asid + 1) * sizeof(void *), GFP_KERNEL);
+ if (!sd->sev_vmcbs)
+ goto err_1;
+ }
+
per_cpu(svm_data, cpu) = sd;
return 0;
@@ -1046,6 +1102,48 @@ static int avic_ga_log_notifier(u32 ga_tag)
return 0;
}
+static __init int sev_hardware_setup(void)
+{
+ struct sev_user_data_status *status;
+ int rc;
+
+ /* Maximum number of encrypted guests supported simultaneously */
+ max_sev_asid = cpuid_ecx(0x8000001F);
+
+ if (!max_sev_asid)
+ return 1;
+
+ /* Minimum ASID value that should be used for SEV guest */
+ min_sev_asid = cpuid_edx(0x8000001F);
+
+ /* Initialize SEV ASID bitmap */
+ sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!sev_asid_bitmap)
+ return 1;
+
+ status = kmalloc(sizeof(*status), GFP_KERNEL);
+ if (!status)
+ return 1;
+
+ /*
+ * Check SEV platform status.
+ *
+ * PLATFORM_STATUS can be called in any state, if we failed to query
+ * the PLATFORM status then either PSP firmware does not support SEV
+ * feature or SEV firmware is dead.
+ */
+ rc = sev_platform_status(status, NULL);
+ if (rc)
+ goto err;
+
+ pr_info("SEV supported\n");
+
+err:
+ kfree(status);
+ return rc;
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -1081,6 +1179,17 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
}
+ if (sev) {
+ if (boot_cpu_has(X86_FEATURE_SEV) &&
+ IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
+ r = sev_hardware_setup();
+ if (r)
+ sev = false;
+ } else {
+ sev = false;
+ }
+ }
+
for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
@@ -1142,6 +1251,9 @@ static __exit void svm_hardware_unsetup(void)
{
int cpu;
+ if (svm_sev_enabled())
+ kfree(sev_asid_bitmap);
+
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
@@ -1294,7 +1406,7 @@ static void init_vmcb(struct vcpu_svm *svm)
if (npt_enabled) {
/* Setup VMCB for Nested Paging */
- control->nested_ctl = 1;
+ control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
clr_intercept(svm, INTERCEPT_INVLPG);
clr_exception_intercept(svm, PF_VECTOR);
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
@@ -1332,6 +1444,11 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
}
+ if (sev_guest(svm->vcpu.kvm)) {
+ svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
+ clr_exception_intercept(svm, UD_VECTOR);
+ }
+
mark_all_dirty(svm->vmcb);
enable_gif(svm);
@@ -1414,6 +1531,179 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return 0;
}
+static void __sev_asid_free(int asid)
+{
+ struct svm_cpu_data *sd;
+ int cpu, pos;
+
+ pos = asid - 1;
+ clear_bit(pos, sev_asid_bitmap);
+
+ for_each_possible_cpu(cpu) {
+ sd = per_cpu(svm_data, cpu);
+ sd->sev_vmcbs[pos] = NULL;
+ }
+}
+
+static void sev_asid_free(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ __sev_asid_free(sev->asid);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
+ struct sev_data_decommission *decommission;
+ struct sev_data_deactivate *data;
+
+ if (!handle)
+ return;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return;
+
+ /* deactivate handle */
+ data->handle = handle;
+ sev_guest_deactivate(data, NULL);
+
+ wbinvd_on_all_cpus();
+ sev_guest_df_flush(NULL);
+ kfree(data);
+
+ decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
+ if (!decommission)
+ return;
+
+ /* decommission handle */
+ decommission->handle = handle;
+ sev_guest_decommission(decommission, NULL);
+
+ kfree(decommission);
+}
+
+static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
+ unsigned long ulen, unsigned long *n,
+ int write)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ unsigned long npages, npinned, size;
+ unsigned long locked, lock_limit;
+ struct page **pages;
+ int first, last;
+
+ /* Calculate number of pages. */
+ first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
+ last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
+ npages = (last - first + 1);
+
+ locked = sev->pages_locked + npages;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
+ pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
+ return NULL;
+ }
+
+ /* Avoid using vmalloc for smaller buffers. */
+ size = npages * sizeof(struct page *);
+ if (size > PAGE_SIZE)
+ pages = vmalloc(size);
+ else
+ pages = kmalloc(size, GFP_KERNEL);
+
+ if (!pages)
+ return NULL;
+
+ /* Pin the user virtual address. */
+ npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
+ if (npinned != npages) {
+ pr_err("SEV: Failure locking %lu pages.\n", npages);
+ goto err;
+ }
+
+ *n = npages;
+ sev->pages_locked = locked;
+
+ return pages;
+
+err:
+ if (npinned > 0)
+ release_pages(pages, npinned);
+
+ kvfree(pages);
+ return NULL;
+}
+
+static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
+ unsigned long npages)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ release_pages(pages, npages);
+ kvfree(pages);
+ sev->pages_locked -= npages;
+}
+
+static void sev_clflush_pages(struct page *pages[], unsigned long npages)
+{
+ uint8_t *page_virtual;
+ unsigned long i;
+
+ if (npages == 0 || pages == NULL)
+ return;
+
+ for (i = 0; i < npages; i++) {
+ page_virtual = kmap_atomic(pages[i]);
+ clflush_cache_range(page_virtual, PAGE_SIZE);
+ kunmap_atomic(page_virtual);
+ }
+}
+
+static void __unregister_enc_region_locked(struct kvm *kvm,
+ struct enc_region *region)
+{
+ /*
+ * The guest may change the memory encryption attribute from C=0 -> C=1
+ * or vice versa for this memory range. Lets make sure caches are
+ * flushed to ensure that guest data gets written into memory with
+ * correct C-bit.
+ */
+ sev_clflush_pages(region->pages, region->npages);
+
+ sev_unpin_memory(kvm, region->pages, region->npages);
+ list_del(&region->list);
+ kfree(region);
+}
+
+static void sev_vm_destroy(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct list_head *head = &sev->regions_list;
+ struct list_head *pos, *q;
+
+ if (!sev_guest(kvm))
+ return;
+
+ mutex_lock(&kvm->lock);
+
+ /*
+ * if userspace was terminated before unregistering the memory regions
+ * then lets unpin all the registered memory.
+ */
+ if (!list_empty(head)) {
+ list_for_each_safe(pos, q, head) {
+ __unregister_enc_region_locked(kvm,
+ list_entry(pos, struct enc_region, list));
+ }
+ }
+
+ mutex_unlock(&kvm->lock);
+
+ sev_unbind_asid(kvm, sev->handle);
+ sev_asid_free(kvm);
+}
+
static void avic_vm_destroy(struct kvm *kvm)
{
unsigned long flags;
@@ -1432,6 +1722,12 @@ static void avic_vm_destroy(struct kvm *kvm)
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
}
+static void svm_vm_destroy(struct kvm *kvm)
+{
+ avic_vm_destroy(kvm);
+ sev_vm_destroy(kvm);
+}
+
static int avic_vm_init(struct kvm *kvm)
{
unsigned long flags;
@@ -2030,7 +2326,7 @@ static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
vcpu->arch.cr4 = cr4;
if (!npt_enabled)
@@ -2089,7 +2385,7 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
{
if (sd->next_asid > sd->max_asid) {
++sd->asid_generation;
- sd->next_asid = 1;
+ sd->next_asid = sd->min_asid;
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
}
@@ -2137,22 +2433,24 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
static int pf_interception(struct vcpu_svm *svm)
{
- u64 fault_address = svm->vmcb->control.exit_info_2;
+ u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
u64 error_code = svm->vmcb->control.exit_info_1;
return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
- svm->vmcb->control.insn_bytes,
+ static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+ svm->vmcb->control.insn_bytes : NULL,
svm->vmcb->control.insn_len);
}
static int npf_interception(struct vcpu_svm *svm)
{
- u64 fault_address = svm->vmcb->control.exit_info_2;
+ u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
u64 error_code = svm->vmcb->control.exit_info_1;
trace_kvm_page_fault(fault_address, error_code);
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
- svm->vmcb->control.insn_bytes,
+ static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+ svm->vmcb->control.insn_bytes : NULL,
svm->vmcb->control.insn_len);
}
@@ -2379,7 +2677,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
svm->vmcb->control.nested_cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_NPT);
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
@@ -2921,7 +3219,8 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
if (vmcb->control.asid == 0)
return false;
- if (vmcb->control.nested_ctl && !npt_enabled)
+ if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
+ !npt_enabled)
return false;
return true;
@@ -2935,7 +3234,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
else
svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
- if (nested_vmcb->control.nested_ctl) {
+ if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
kvm_mmu_unload(&svm->vcpu);
svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
nested_svm_init_mmu_context(&svm->vcpu);
@@ -2983,7 +3282,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
svm->nested.intercept = nested_vmcb->control.intercept;
- svm_flush_tlb(&svm->vcpu);
+ svm_flush_tlb(&svm->vcpu, true);
svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
@@ -4356,12 +4655,39 @@ static void reload_tss(struct kvm_vcpu *vcpu)
load_TR_desc();
}
+static void pre_sev_run(struct vcpu_svm *svm, int cpu)
+{
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ int asid = sev_get_asid(svm->vcpu.kvm);
+
+ /* Assign the asid allocated with this SEV guest */
+ svm->vmcb->control.asid = asid;
+
+ /*
+ * Flush guest TLB:
+ *
+ * 1) when different VMCB for the same ASID is to be run on the same host CPU.
+ * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
+ */
+ if (sd->sev_vmcbs[asid] == svm->vmcb &&
+ svm->last_cpu == cpu)
+ return;
+
+ svm->last_cpu = cpu;
+ sd->sev_vmcbs[asid] = svm->vmcb;
+ svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
+ mark_dirty(svm->vmcb, VMCB_ASID);
+}
+
static void pre_svm_run(struct vcpu_svm *svm)
{
int cpu = raw_smp_processor_id();
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ if (sev_guest(svm->vcpu.kvm))
+ return pre_sev_run(svm, cpu);
+
/* FIXME: handle wraparound of asid_generation */
if (svm->asid_generation != sd->asid_generation)
new_asid(svm, sd);
@@ -4779,7 +5105,7 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
-static void svm_flush_tlb(struct kvm_vcpu *vcpu)
+static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5092,7 +5418,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
svm->vmcb->save.cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_CR);
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}
static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
@@ -5106,7 +5432,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
mark_dirty(svm->vmcb, VMCB_CR);
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}
static int is_disabled(void)
@@ -5192,6 +5518,12 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
entry->edx |= SVM_FEATURE_NPT;
break;
+ case 0x8000001F:
+ /* Support memory encryption cpuid if host supports it */
+ if (boot_cpu_has(X86_FEATURE_SEV))
+ cpuid(0x8000001f, &entry->eax, &entry->ebx,
+ &entry->ecx, &entry->edx);
+
}
}
@@ -5220,6 +5552,11 @@ static bool svm_xsaves_supported(void)
return false;
}
+static bool svm_umip_emulated(void)
+{
+ return false;
+}
+
static bool svm_has_wbinvd_exit(void)
{
return true;
@@ -5521,6 +5858,828 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
return 0;
}
+static int sev_asid_new(void)
+{
+ int pos;
+
+ /*
+ * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
+ */
+ pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
+ if (pos >= max_sev_asid)
+ return -EBUSY;
+
+ set_bit(pos, sev_asid_bitmap);
+ return pos + 1;
+}
+
+static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ int asid, ret;
+
+ ret = -EBUSY;
+ asid = sev_asid_new();
+ if (asid < 0)
+ return ret;
+
+ ret = sev_platform_init(&argp->error);
+ if (ret)
+ goto e_free;
+
+ sev->active = true;
+ sev->asid = asid;
+ INIT_LIST_HEAD(&sev->regions_list);
+
+ return 0;
+
+e_free:
+ __sev_asid_free(asid);
+ return ret;
+}
+
+static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
+{
+ struct sev_data_activate *data;
+ int asid = sev_get_asid(kvm);
+ int ret;
+
+ wbinvd_on_all_cpus();
+
+ ret = sev_guest_df_flush(error);
+ if (ret)
+ return ret;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* activate ASID on the given handle */
+ data->handle = handle;
+ data->asid = asid;
+ ret = sev_guest_activate(data, error);
+ kfree(data);
+
+ return ret;
+}
+
+static int __sev_issue_cmd(int fd, int id, void *data, int *error)
+{
+ struct fd f;
+ int ret;
+
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+
+ ret = sev_issue_cmd_external_user(f.file, id, data, error);
+
+ fdput(f);
+ return ret;
+}
+
+static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ return __sev_issue_cmd(sev->fd, id, data, error);
+}
+
+static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_start *start;
+ struct kvm_sev_launch_start params;
+ void *dh_blob, *session_blob;
+ int *error = &argp->error;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ start = kzalloc(sizeof(*start), GFP_KERNEL);
+ if (!start)
+ return -ENOMEM;
+
+ dh_blob = NULL;
+ if (params.dh_uaddr) {
+ dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
+ if (IS_ERR(dh_blob)) {
+ ret = PTR_ERR(dh_blob);
+ goto e_free;
+ }
+
+ start->dh_cert_address = __sme_set(__pa(dh_blob));
+ start->dh_cert_len = params.dh_len;
+ }
+
+ session_blob = NULL;
+ if (params.session_uaddr) {
+ session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
+ if (IS_ERR(session_blob)) {
+ ret = PTR_ERR(session_blob);
+ goto e_free_dh;
+ }
+
+ start->session_address = __sme_set(__pa(session_blob));
+ start->session_len = params.session_len;
+ }
+
+ start->handle = params.handle;
+ start->policy = params.policy;
+
+ /* create memory encryption context */
+ ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
+ if (ret)
+ goto e_free_session;
+
+ /* Bind ASID to this guest */
+ ret = sev_bind_asid(kvm, start->handle, error);
+ if (ret)
+ goto e_free_session;
+
+ /* return handle to userspace */
+ params.handle = start->handle;
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
+ sev_unbind_asid(kvm, start->handle);
+ ret = -EFAULT;
+ goto e_free_session;
+ }
+
+ sev->handle = start->handle;
+ sev->fd = argp->sev_fd;
+
+e_free_session:
+ kfree(session_blob);
+e_free_dh:
+ kfree(dh_blob);
+e_free:
+ kfree(start);
+ return ret;
+}
+
+static int get_num_contig_pages(int idx, struct page **inpages,
+ unsigned long npages)
+{
+ unsigned long paddr, next_paddr;
+ int i = idx + 1, pages = 1;
+
+ /* find the number of contiguous pages starting from idx */
+ paddr = __sme_page_pa(inpages[idx]);
+ while (i < npages) {
+ next_paddr = __sme_page_pa(inpages[i++]);
+ if ((paddr + PAGE_SIZE) == next_paddr) {
+ pages++;
+ paddr = next_paddr;
+ continue;
+ }
+ break;
+ }
+
+ return pages;
+}
+
+static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_launch_update_data params;
+ struct sev_data_launch_update_data *data;
+ struct page **inpages;
+ int i, ret, pages;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ vaddr = params.uaddr;
+ size = params.len;
+ vaddr_end = vaddr + size;
+
+ /* Lock the user memory. */
+ inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
+ if (!inpages) {
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ /*
+ * The LAUNCH_UPDATE command will perform in-place encryption of the
+ * memory content (i.e it will write the same memory region with C=1).
+ * It's possible that the cache may contain the data with C=0, i.e.,
+ * unencrypted so invalidate it first.
+ */
+ sev_clflush_pages(inpages, npages);
+
+ for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
+ int offset, len;
+
+ /*
+ * If the user buffer is not page-aligned, calculate the offset
+ * within the page.
+ */
+ offset = vaddr & (PAGE_SIZE - 1);
+
+ /* Calculate the number of pages that can be encrypted in one go. */
+ pages = get_num_contig_pages(i, inpages, npages);
+
+ len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
+
+ data->handle = sev->handle;
+ data->len = len;
+ data->address = __sme_page_pa(inpages[i]) + offset;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
+ if (ret)
+ goto e_unpin;
+
+ size -= len;
+ next_vaddr = vaddr + len;
+ }
+
+e_unpin:
+ /* content of memory is updated, mark pages dirty */
+ for (i = 0; i < npages; i++) {
+ set_page_dirty_lock(inpages[i]);
+ mark_page_accessed(inpages[i]);
+ }
+ /* unlock the user pages */
+ sev_unpin_memory(kvm, inpages, npages);
+e_free:
+ kfree(data);
+ return ret;
+}
+
+static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_measure *data;
+ struct kvm_sev_launch_measure params;
+ void *blob = NULL;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* User wants to query the blob length */
+ if (!params.len)
+ goto cmd;
+
+ if (params.uaddr) {
+ if (params.len > SEV_FW_BLOB_MAX_SIZE) {
+ ret = -EINVAL;
+ goto e_free;
+ }
+
+ if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
+ ret = -EFAULT;
+ goto e_free;
+ }
+
+ ret = -ENOMEM;
+ blob = kmalloc(params.len, GFP_KERNEL);
+ if (!blob)
+ goto e_free;
+
+ data->address = __psp_pa(blob);
+ data->len = params.len;
+ }
+
+cmd:
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
+
+ /*
+ * If we query the session length, FW responded with expected data.
+ */
+ if (!params.len)
+ goto done;
+
+ if (ret)
+ goto e_free_blob;
+
+ if (blob) {
+ if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
+ ret = -EFAULT;
+ }
+
+done:
+ params.len = data->len;
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+ ret = -EFAULT;
+e_free_blob:
+ kfree(blob);
+e_free:
+ kfree(data);
+ return ret;
+}
+
+static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_finish *data;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
+
+ kfree(data);
+ return ret;
+}
+
+static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_guest_status params;
+ struct sev_data_guest_status *data;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
+ if (ret)
+ goto e_free;
+
+ params.policy = data->policy;
+ params.state = data->state;
+ params.handle = data->handle;
+
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+ ret = -EFAULT;
+e_free:
+ kfree(data);
+ return ret;
+}
+
+static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
+ unsigned long dst, int size,
+ int *error, bool enc)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_dbg *data;
+ int ret;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->handle = sev->handle;
+ data->dst_addr = dst;
+ data->src_addr = src;
+ data->len = size;
+
+ ret = sev_issue_cmd(kvm,
+ enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
+ data, error);
+ kfree(data);
+ return ret;
+}
+
+static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
+ unsigned long dst_paddr, int sz, int *err)
+{
+ int offset;
+
+ /*
+ * Its safe to read more than we are asked, caller should ensure that
+ * destination has enough space.
+ */
+ src_paddr = round_down(src_paddr, 16);
+ offset = src_paddr & 15;
+ sz = round_up(sz + offset, 16);
+
+ return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
+}
+
+static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
+ unsigned long __user dst_uaddr,
+ unsigned long dst_paddr,
+ int size, int *err)
+{
+ struct page *tpage = NULL;
+ int ret, offset;
+
+ /* if inputs are not 16-byte then use intermediate buffer */
+ if (!IS_ALIGNED(dst_paddr, 16) ||
+ !IS_ALIGNED(paddr, 16) ||
+ !IS_ALIGNED(size, 16)) {
+ tpage = (void *)alloc_page(GFP_KERNEL);
+ if (!tpage)
+ return -ENOMEM;
+
+ dst_paddr = __sme_page_pa(tpage);
+ }
+
+ ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
+ if (ret)
+ goto e_free;
+
+ if (tpage) {
+ offset = paddr & 15;
+ if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
+ page_address(tpage) + offset, size))
+ ret = -EFAULT;
+ }
+
+e_free:
+ if (tpage)
+ __free_page(tpage);
+
+ return ret;
+}
+
+static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
+ unsigned long __user vaddr,
+ unsigned long dst_paddr,
+ unsigned long __user dst_vaddr,
+ int size, int *error)
+{
+ struct page *src_tpage = NULL;
+ struct page *dst_tpage = NULL;
+ int ret, len = size;
+
+ /* If source buffer is not aligned then use an intermediate buffer */
+ if (!IS_ALIGNED(vaddr, 16)) {
+ src_tpage = alloc_page(GFP_KERNEL);
+ if (!src_tpage)
+ return -ENOMEM;
+
+ if (copy_from_user(page_address(src_tpage),
+ (void __user *)(uintptr_t)vaddr, size)) {
+ __free_page(src_tpage);
+ return -EFAULT;
+ }
+
+ paddr = __sme_page_pa(src_tpage);
+ }
+
+ /*
+ * If destination buffer or length is not aligned then do read-modify-write:
+ * - decrypt destination in an intermediate buffer
+ * - copy the source buffer in an intermediate buffer
+ * - use the intermediate buffer as source buffer
+ */
+ if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+ int dst_offset;
+
+ dst_tpage = alloc_page(GFP_KERNEL);
+ if (!dst_tpage) {
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ ret = __sev_dbg_decrypt(kvm, dst_paddr,
+ __sme_page_pa(dst_tpage), size, error);
+ if (ret)
+ goto e_free;
+
+ /*
+ * If source is kernel buffer then use memcpy() otherwise
+ * copy_from_user().
+ */
+ dst_offset = dst_paddr & 15;
+
+ if (src_tpage)
+ memcpy(page_address(dst_tpage) + dst_offset,
+ page_address(src_tpage), size);
+ else {
+ if (copy_from_user(page_address(dst_tpage) + dst_offset,
+ (void __user *)(uintptr_t)vaddr, size)) {
+ ret = -EFAULT;
+ goto e_free;
+ }
+ }
+
+ paddr = __sme_page_pa(dst_tpage);
+ dst_paddr = round_down(dst_paddr, 16);
+ len = round_up(size, 16);
+ }
+
+ ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
+
+e_free:
+ if (src_tpage)
+ __free_page(src_tpage);
+ if (dst_tpage)
+ __free_page(dst_tpage);
+ return ret;
+}
+
+static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
+{
+ unsigned long vaddr, vaddr_end, next_vaddr;
+ unsigned long dst_vaddr, dst_vaddr_end;
+ struct page **src_p, **dst_p;
+ struct kvm_sev_dbg debug;
+ unsigned long n;
+ int ret, size;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
+ return -EFAULT;
+
+ vaddr = debug.src_uaddr;
+ size = debug.len;
+ vaddr_end = vaddr + size;
+ dst_vaddr = debug.dst_uaddr;
+ dst_vaddr_end = dst_vaddr + size;
+
+ for (; vaddr < vaddr_end; vaddr = next_vaddr) {
+ int len, s_off, d_off;
+
+ /* lock userspace source and destination page */
+ src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
+ if (!src_p)
+ return -EFAULT;
+
+ dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
+ if (!dst_p) {
+ sev_unpin_memory(kvm, src_p, n);
+ return -EFAULT;
+ }
+
+ /*
+ * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the
+ * memory content (i.e it will write the same memory region with C=1).
+ * It's possible that the cache may contain the data with C=0, i.e.,
+ * unencrypted so invalidate it first.
+ */
+ sev_clflush_pages(src_p, 1);
+ sev_clflush_pages(dst_p, 1);
+
+ /*
+ * Since user buffer may not be page aligned, calculate the
+ * offset within the page.
+ */
+ s_off = vaddr & ~PAGE_MASK;
+ d_off = dst_vaddr & ~PAGE_MASK;
+ len = min_t(size_t, (PAGE_SIZE - s_off), size);
+
+ if (dec)
+ ret = __sev_dbg_decrypt_user(kvm,
+ __sme_page_pa(src_p[0]) + s_off,
+ dst_vaddr,
+ __sme_page_pa(dst_p[0]) + d_off,
+ len, &argp->error);
+ else
+ ret = __sev_dbg_encrypt_user(kvm,
+ __sme_page_pa(src_p[0]) + s_off,
+ vaddr,
+ __sme_page_pa(dst_p[0]) + d_off,
+ dst_vaddr,
+ len, &argp->error);
+
+ sev_unpin_memory(kvm, src_p, 1);
+ sev_unpin_memory(kvm, dst_p, 1);
+
+ if (ret)
+ goto err;
+
+ next_vaddr = vaddr + len;
+ dst_vaddr = dst_vaddr + len;
+ size -= len;
+ }
+err:
+ return ret;
+}
+
+static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_secret *data;
+ struct kvm_sev_launch_secret params;
+ struct page **pages;
+ void *blob, *hdr;
+ unsigned long n;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
+ if (!pages)
+ return -ENOMEM;
+
+ /*
+ * The secret must be copied into contiguous memory region, lets verify
+ * that userspace memory pages are contiguous before we issue command.
+ */
+ if (get_num_contig_pages(0, pages, n) != n) {
+ ret = -EINVAL;
+ goto e_unpin_memory;
+ }
+
+ ret = -ENOMEM;
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto e_unpin_memory;
+
+ blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
+ if (IS_ERR(blob)) {
+ ret = PTR_ERR(blob);
+ goto e_free;
+ }
+
+ data->trans_address = __psp_pa(blob);
+ data->trans_len = params.trans_len;
+
+ hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
+ if (IS_ERR(hdr)) {
+ ret = PTR_ERR(hdr);
+ goto e_free_blob;
+ }
+ data->trans_address = __psp_pa(blob);
+ data->trans_len = params.trans_len;
+
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
+
+ kfree(hdr);
+
+e_free_blob:
+ kfree(blob);
+e_free:
+ kfree(data);
+e_unpin_memory:
+ sev_unpin_memory(kvm, pages, n);
+ return ret;
+}
+
+static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
+{
+ struct kvm_sev_cmd sev_cmd;
+ int r;
+
+ if (!svm_sev_enabled())
+ return -ENOTTY;
+
+ if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
+ return -EFAULT;
+
+ mutex_lock(&kvm->lock);
+
+ switch (sev_cmd.id) {
+ case KVM_SEV_INIT:
+ r = sev_guest_init(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_START:
+ r = sev_launch_start(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_UPDATE_DATA:
+ r = sev_launch_update_data(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_MEASURE:
+ r = sev_launch_measure(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_FINISH:
+ r = sev_launch_finish(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_GUEST_STATUS:
+ r = sev_guest_status(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_DBG_DECRYPT:
+ r = sev_dbg_crypt(kvm, &sev_cmd, true);
+ break;
+ case KVM_SEV_DBG_ENCRYPT:
+ r = sev_dbg_crypt(kvm, &sev_cmd, false);
+ break;
+ case KVM_SEV_LAUNCH_SECRET:
+ r = sev_launch_secret(kvm, &sev_cmd);
+ break;
+ default:
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
+ r = -EFAULT;
+
+out:
+ mutex_unlock(&kvm->lock);
+ return r;
+}
+
+static int svm_register_enc_region(struct kvm *kvm,
+ struct kvm_enc_region *range)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct enc_region *region;
+ int ret = 0;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region)
+ return -ENOMEM;
+
+ region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
+ if (!region->pages) {
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ /*
+ * The guest may change the memory encryption attribute from C=0 -> C=1
+ * or vice versa for this memory range. Lets make sure caches are
+ * flushed to ensure that guest data gets written into memory with
+ * correct C-bit.
+ */
+ sev_clflush_pages(region->pages, region->npages);
+
+ region->uaddr = range->addr;
+ region->size = range->size;
+
+ mutex_lock(&kvm->lock);
+ list_add_tail(&region->list, &sev->regions_list);
+ mutex_unlock(&kvm->lock);
+
+ return ret;
+
+e_free:
+ kfree(region);
+ return ret;
+}
+
+static struct enc_region *
+find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct list_head *head = &sev->regions_list;
+ struct enc_region *i;
+
+ list_for_each_entry(i, head, list) {
+ if (i->uaddr == range->addr &&
+ i->size == range->size)
+ return i;
+ }
+
+ return NULL;
+}
+
+
+static int svm_unregister_enc_region(struct kvm *kvm,
+ struct kvm_enc_region *range)
+{
+ struct enc_region *region;
+ int ret;
+
+ mutex_lock(&kvm->lock);
+
+ if (!sev_guest(kvm)) {
+ ret = -ENOTTY;
+ goto failed;
+ }
+
+ region = find_enc_region(kvm, range);
+ if (!region) {
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ __unregister_enc_region_locked(kvm, region);
+
+ mutex_unlock(&kvm->lock);
+ return 0;
+
+failed:
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -5537,7 +6696,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.vcpu_reset = svm_vcpu_reset,
.vm_init = avic_vm_init,
- .vm_destroy = avic_vm_destroy,
+ .vm_destroy = svm_vm_destroy,
.prepare_guest_switch = svm_prepare_guest_switch,
.vcpu_load = svm_vcpu_load,
@@ -5597,6 +6756,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.load_eoi_exitmap = svm_load_eoi_exitmap,
.hwapic_irr_update = svm_hwapic_irr_update,
.hwapic_isr_update = svm_hwapic_isr_update,
+ .sync_pir_to_irr = kvm_lapic_find_highest_irr,
.apicv_post_state_restore = avic_post_state_restore,
.set_tss_addr = svm_set_tss_addr,
@@ -5613,6 +6773,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.invpcid_supported = svm_invpcid_supported,
.mpx_supported = svm_mpx_supported,
.xsaves_supported = svm_xsaves_supported,
+ .umip_emulated = svm_umip_emulated,
.set_supported_cpuid = svm_set_supported_cpuid,
@@ -5636,6 +6797,10 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.pre_enter_smm = svm_pre_enter_smm,
.pre_leave_smm = svm_pre_leave_smm,
.enable_smi_window = enable_smi_window,
+
+ .mem_enc_op = svm_mem_enc_op,
+ .mem_enc_reg_region = svm_register_enc_region,
+ .mem_enc_unreg_region = svm_unregister_enc_region,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 896af99a8606..9973a301364e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -418,6 +418,12 @@ struct __packed vmcs12 {
#define VMCS12_SIZE 0x1000
/*
+ * VMCS12_MAX_FIELD_INDEX is the highest index value used in any
+ * supported VMCS12 field encoding.
+ */
+#define VMCS12_MAX_FIELD_INDEX 0x17
+
+/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
*/
@@ -440,6 +446,7 @@ struct nested_vmx {
* data hold by vmcs12
*/
bool sync_shadow_vmcs;
+ bool dirty_vmcs12;
bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */
@@ -659,6 +666,8 @@ struct vcpu_vmx {
u32 host_pkru;
+ unsigned long host_debugctlmsr;
+
/*
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
* msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
@@ -687,67 +696,24 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
return &(to_vmx(vcpu)->pi_desc);
}
+#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
-#define FIELD(number, name) [number] = VMCS12_OFFSET(name)
-#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
- [number##_HIGH] = VMCS12_OFFSET(name)+4
+#define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name)
+#define FIELD64(number, name) \
+ FIELD(number, name), \
+ [ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
-static unsigned long shadow_read_only_fields[] = {
- /*
- * We do NOT shadow fields that are modified when L0
- * traps and emulates any vmx instruction (e.g. VMPTRLD,
- * VMXON...) executed by L1.
- * For example, VM_INSTRUCTION_ERROR is read
- * by L1 if a vmx instruction fails (part of the error path).
- * Note the code assumes this logic. If for some reason
- * we start shadowing these fields then we need to
- * force a shadow sync when L0 emulates vmx instructions
- * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
- * by nested_vmx_failValid)
- */
- VM_EXIT_REASON,
- VM_EXIT_INTR_INFO,
- VM_EXIT_INSTRUCTION_LEN,
- IDT_VECTORING_INFO_FIELD,
- IDT_VECTORING_ERROR_CODE,
- VM_EXIT_INTR_ERROR_CODE,
- EXIT_QUALIFICATION,
- GUEST_LINEAR_ADDRESS,
- GUEST_PHYSICAL_ADDRESS
+static u16 shadow_read_only_fields[] = {
+#define SHADOW_FIELD_RO(x) x,
+#include "vmx_shadow_fields.h"
};
static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
-static unsigned long shadow_read_write_fields[] = {
- TPR_THRESHOLD,
- GUEST_RIP,
- GUEST_RSP,
- GUEST_CR0,
- GUEST_CR3,
- GUEST_CR4,
- GUEST_INTERRUPTIBILITY_INFO,
- GUEST_RFLAGS,
- GUEST_CS_SELECTOR,
- GUEST_CS_AR_BYTES,
- GUEST_CS_LIMIT,
- GUEST_CS_BASE,
- GUEST_ES_BASE,
- GUEST_BNDCFGS,
- CR0_GUEST_HOST_MASK,
- CR0_READ_SHADOW,
- CR4_READ_SHADOW,
- TSC_OFFSET,
- EXCEPTION_BITMAP,
- CPU_BASED_VM_EXEC_CONTROL,
- VM_ENTRY_EXCEPTION_ERROR_CODE,
- VM_ENTRY_INTR_INFO_FIELD,
- VM_ENTRY_INSTRUCTION_LEN,
- VM_ENTRY_EXCEPTION_ERROR_CODE,
- HOST_FS_BASE,
- HOST_GS_BASE,
- HOST_FS_SELECTOR,
- HOST_GS_SELECTOR
+static u16 shadow_read_write_fields[] = {
+#define SHADOW_FIELD_RW(x) x,
+#include "vmx_shadow_fields.h"
};
static int max_shadow_read_write_fields =
ARRAY_SIZE(shadow_read_write_fields);
@@ -898,9 +864,13 @@ static const unsigned short vmcs_field_to_offset_table[] = {
static inline short vmcs_field_to_offset(unsigned long field)
{
- BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+ unsigned index;
+
+ if (field >> 15)
+ return -ENOENT;
- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+ index = ROL16(field, 6);
+ if (index >= ARRAY_SIZE(vmcs_field_to_offset_table))
return -ENOENT;
/*
@@ -909,10 +879,10 @@ static inline short vmcs_field_to_offset(unsigned long field)
*/
asm("lfence");
- if (vmcs_field_to_offset_table[field] == 0)
+ if (vmcs_field_to_offset_table[index] == 0)
return -ENOENT;
- return vmcs_field_to_offset_table[field];
+ return vmcs_field_to_offset_table[index];
}
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
@@ -953,8 +923,6 @@ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
enum {
- VMX_IO_BITMAP_A,
- VMX_IO_BITMAP_B,
VMX_VMREAD_BITMAP,
VMX_VMWRITE_BITMAP,
VMX_BITMAP_NR
@@ -962,8 +930,6 @@ enum {
static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
-#define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A])
-#define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B])
#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
@@ -2322,6 +2288,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vmx_vcpu_pi_load(vcpu, cpu);
vmx->host_pkru = read_pkru();
+ vmx->host_debugctlmsr = get_debugctlmsr();
}
static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
@@ -2879,7 +2846,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
/* highest index: VMX_PREEMPTION_TIMER_VALUE */
- vmx->nested.nested_vmx_vmcs_enum = 0x2e;
+ vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
}
/*
@@ -3215,6 +3182,7 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
*/
static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
struct shared_msr_entry *msr;
switch (msr_info->index) {
@@ -3226,8 +3194,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_GS_BASE);
break;
case MSR_KERNEL_GS_BASE:
- vmx_load_host_state(to_vmx(vcpu));
- msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+ vmx_load_host_state(vmx);
+ msr_info->data = vmx->msr_guest_kernel_gs_base;
break;
#endif
case MSR_EFER:
@@ -3253,13 +3221,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_MCG_EXT_CTL:
if (!msr_info->host_initiated &&
- !(to_vmx(vcpu)->msr_ia32_feature_control &
+ !(vmx->msr_ia32_feature_control &
FEATURE_CONTROL_LMCE))
return 1;
msr_info->data = vcpu->arch.mcg_ext_ctl;
break;
case MSR_IA32_FEATURE_CONTROL:
- msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
+ msr_info->data = vmx->msr_ia32_feature_control;
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
@@ -3276,7 +3244,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
/* Otherwise falls through */
default:
- msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
+ msr = find_msr_entry(vmx, msr_info->index);
if (msr) {
msr_info->data = msr->data;
break;
@@ -3598,7 +3566,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
#endif
CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING |
- CPU_BASED_USE_IO_BITMAPS |
+ CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING |
CPU_BASED_INVLPG_EXITING |
@@ -3628,6 +3596,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_ENABLE_EPT |
SECONDARY_EXEC_UNRESTRICTED_GUEST |
SECONDARY_EXEC_PAUSE_LOOP_EXITING |
+ SECONDARY_EXEC_DESC |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -3853,17 +3822,17 @@ static void free_kvm_area(void)
}
}
-enum vmcs_field_type {
- VMCS_FIELD_TYPE_U16 = 0,
- VMCS_FIELD_TYPE_U64 = 1,
- VMCS_FIELD_TYPE_U32 = 2,
- VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
+enum vmcs_field_width {
+ VMCS_FIELD_WIDTH_U16 = 0,
+ VMCS_FIELD_WIDTH_U64 = 1,
+ VMCS_FIELD_WIDTH_U32 = 2,
+ VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
};
-static inline int vmcs_field_type(unsigned long field)
+static inline int vmcs_field_width(unsigned long field)
{
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
- return VMCS_FIELD_TYPE_U32;
+ return VMCS_FIELD_WIDTH_U32;
return (field >> 13) & 0x3 ;
}
@@ -3876,43 +3845,66 @@ static void init_vmcs_shadow_fields(void)
{
int i, j;
- /* No checks for read only fields yet */
+ for (i = j = 0; i < max_shadow_read_only_fields; i++) {
+ u16 field = shadow_read_only_fields[i];
+ if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
+ (i + 1 == max_shadow_read_only_fields ||
+ shadow_read_only_fields[i + 1] != field + 1))
+ pr_err("Missing field from shadow_read_only_field %x\n",
+ field + 1);
+
+ clear_bit(field, vmx_vmread_bitmap);
+#ifdef CONFIG_X86_64
+ if (field & 1)
+ continue;
+#endif
+ if (j < i)
+ shadow_read_only_fields[j] = field;
+ j++;
+ }
+ max_shadow_read_only_fields = j;
for (i = j = 0; i < max_shadow_read_write_fields; i++) {
- switch (shadow_read_write_fields[i]) {
- case GUEST_BNDCFGS:
- if (!kvm_mpx_supported())
+ u16 field = shadow_read_write_fields[i];
+ if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
+ (i + 1 == max_shadow_read_write_fields ||
+ shadow_read_write_fields[i + 1] != field + 1))
+ pr_err("Missing field from shadow_read_write_field %x\n",
+ field + 1);
+
+ /*
+ * PML and the preemption timer can be emulated, but the
+ * processor cannot vmwrite to fields that don't exist
+ * on bare metal.
+ */
+ switch (field) {
+ case GUEST_PML_INDEX:
+ if (!cpu_has_vmx_pml())
+ continue;
+ break;
+ case VMX_PREEMPTION_TIMER_VALUE:
+ if (!cpu_has_vmx_preemption_timer())
+ continue;
+ break;
+ case GUEST_INTR_STATUS:
+ if (!cpu_has_vmx_apicv())
continue;
break;
default:
break;
}
+ clear_bit(field, vmx_vmwrite_bitmap);
+ clear_bit(field, vmx_vmread_bitmap);
+#ifdef CONFIG_X86_64
+ if (field & 1)
+ continue;
+#endif
if (j < i)
- shadow_read_write_fields[j] =
- shadow_read_write_fields[i];
+ shadow_read_write_fields[j] = field;
j++;
}
max_shadow_read_write_fields = j;
-
- /* shadowed fields guest access without vmexit */
- for (i = 0; i < max_shadow_read_write_fields; i++) {
- unsigned long field = shadow_read_write_fields[i];
-
- clear_bit(field, vmx_vmwrite_bitmap);
- clear_bit(field, vmx_vmread_bitmap);
- if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
- clear_bit(field + 1, vmx_vmwrite_bitmap);
- clear_bit(field + 1, vmx_vmread_bitmap);
- }
- }
- for (i = 0; i < max_shadow_read_only_fields; i++) {
- unsigned long field = shadow_read_only_fields[i];
-
- clear_bit(field, vmx_vmread_bitmap);
- if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
- clear_bit(field + 1, vmx_vmread_bitmap);
- }
}
static __init int alloc_kvm_area(void)
@@ -4125,9 +4117,10 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
#endif
-static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
+static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
+ bool invalidate_gpa)
{
- if (enable_ept) {
+ if (enable_ept && (invalidate_gpa || !enable_vpid)) {
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
@@ -4136,15 +4129,15 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
}
}
-static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
+ __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
}
static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
{
if (enable_ept)
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
@@ -4342,7 +4335,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
ept_load_pdptrs(vcpu);
}
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
vmcs_writel(GUEST_CR3, guest_cr3);
}
@@ -4359,6 +4352,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(to_vmx(vcpu)->rmode.vm86_active ?
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+ if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
+ vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_DESC);
+ hw_cr4 &= ~X86_CR4_UMIP;
+ } else
+ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_DESC);
+
if (cr4 & X86_CR4_VMXE) {
/*
* To use VMXON (and later other VMX instructions), a guest
@@ -4990,11 +4991,6 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
{
int f = sizeof(unsigned long);
- if (!cpu_has_vmx_msr_bitmap()) {
- WARN_ON(1);
- return;
- }
-
/*
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
* have the write-low and read-high bitmap offsets the wrong way round.
@@ -5134,7 +5130,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
if (max_irr != 256) {
vapic_page = kmap(vmx->nested.virtual_apic_page);
- __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
+ __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
+ vapic_page, &max_irr);
kunmap(vmx->nested.virtual_apic_page);
status = vmcs_read16(GUEST_INTR_STATUS);
@@ -5194,14 +5191,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu) &&
vector == vmx->nested.posted_intr_nv) {
- /* the PIR and ON have been set by L1. */
- kvm_vcpu_trigger_posted_interrupt(vcpu, true);
/*
* If a posted intr is not recognized by hardware,
* we will accomplish it in the next vmentry.
*/
vmx->nested.pi_pending = true;
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ /* the PIR and ON have been set by L1. */
+ if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
+ kvm_vcpu_kick(vcpu);
return 0;
}
return -1;
@@ -5380,6 +5378,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
struct kvm_vcpu *vcpu = &vmx->vcpu;
u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
+
if (!cpu_need_virtualize_apic_accesses(vcpu))
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
if (vmx->vpid == 0)
@@ -5398,6 +5397,11 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+
+ /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
+ * in vmx_set_cr4. */
+ exec_control &= ~SECONDARY_EXEC_DESC;
+
/* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
(handle_vmptrld).
We can NOT enable shadow_vmcs here because we don't have yet
@@ -5517,10 +5521,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
#endif
int i;
- /* I/O */
- vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
- vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
-
if (enable_shadow_vmcs) {
vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
@@ -6172,6 +6172,12 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
return kvm_set_cr4(vcpu, val);
}
+static int handle_desc(struct kvm_vcpu *vcpu)
+{
+ WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
+ return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+}
+
static int handle_cr(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification, val;
@@ -6628,7 +6634,21 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
if (!is_guest_mode(vcpu) &&
!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
trace_kvm_fast_mmio(gpa);
- return kvm_skip_emulated_instruction(vcpu);
+ /*
+ * Doing kvm_skip_emulated_instruction() depends on undefined
+ * behavior: Intel's manual doesn't mandate
+ * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
+ * occurs and while on real hardware it was observed to be set,
+ * other hypervisors (namely Hyper-V) don't set it, we end up
+ * advancing IP with some random value. Disable fast mmio when
+ * running nested and keep it for real hardware in hope that
+ * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
+ */
+ if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+ return kvm_skip_emulated_instruction(vcpu);
+ else
+ return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP,
+ NULL, 0) == EMULATE_DONE;
}
ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -6825,10 +6845,6 @@ static __init int hardware_setup(void)
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-
- memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
-
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
goto out;
@@ -6841,11 +6857,6 @@ static __init int hardware_setup(void)
!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
- if (!cpu_has_vmx_shadow_vmcs())
- enable_shadow_vmcs = 0;
- if (enable_shadow_vmcs)
- init_vmcs_shadow_fields();
-
if (!cpu_has_vmx_ept() ||
!cpu_has_vmx_ept_4levels() ||
!cpu_has_vmx_ept_mt_wb() ||
@@ -6931,6 +6942,11 @@ static __init int hardware_setup(void)
kvm_x86_ops->cancel_hv_timer = NULL;
}
+ if (!cpu_has_vmx_shadow_vmcs())
+ enable_shadow_vmcs = 0;
+ if (enable_shadow_vmcs)
+ init_vmcs_shadow_fields();
+
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
kvm_mce_cap_supported |= MCG_LMCE_P;
@@ -7461,17 +7477,17 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
p = ((char *)(get_vmcs12(vcpu))) + offset;
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+ switch (vmcs_field_width(field)) {
+ case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*ret = *((natural_width *)p);
return 0;
- case VMCS_FIELD_TYPE_U16:
+ case VMCS_FIELD_WIDTH_U16:
*ret = *((u16 *)p);
return 0;
- case VMCS_FIELD_TYPE_U32:
+ case VMCS_FIELD_WIDTH_U32:
*ret = *((u32 *)p);
return 0;
- case VMCS_FIELD_TYPE_U64:
+ case VMCS_FIELD_WIDTH_U64:
*ret = *((u64 *)p);
return 0;
default:
@@ -7488,17 +7504,17 @@ static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
if (offset < 0)
return offset;
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_U16:
+ switch (vmcs_field_width(field)) {
+ case VMCS_FIELD_WIDTH_U16:
*(u16 *)p = field_value;
return 0;
- case VMCS_FIELD_TYPE_U32:
+ case VMCS_FIELD_WIDTH_U32:
*(u32 *)p = field_value;
return 0;
- case VMCS_FIELD_TYPE_U64:
+ case VMCS_FIELD_WIDTH_U64:
*(u64 *)p = field_value;
return 0;
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+ case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*(natural_width *)p = field_value;
return 0;
default:
@@ -7514,7 +7530,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
unsigned long field;
u64 field_value;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
- const unsigned long *fields = shadow_read_write_fields;
+ const u16 *fields = shadow_read_write_fields;
const int num_fields = max_shadow_read_write_fields;
preempt_disable();
@@ -7523,23 +7539,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
for (i = 0; i < num_fields; i++) {
field = fields[i];
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_U16:
- field_value = vmcs_read16(field);
- break;
- case VMCS_FIELD_TYPE_U32:
- field_value = vmcs_read32(field);
- break;
- case VMCS_FIELD_TYPE_U64:
- field_value = vmcs_read64(field);
- break;
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
- field_value = vmcs_readl(field);
- break;
- default:
- WARN_ON(1);
- continue;
- }
+ field_value = __vmcs_readl(field);
vmcs12_write_any(&vmx->vcpu, field, field_value);
}
@@ -7551,7 +7551,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
{
- const unsigned long *fields[] = {
+ const u16 *fields[] = {
shadow_read_write_fields,
shadow_read_only_fields
};
@@ -7570,24 +7570,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
for (i = 0; i < max_fields[q]; i++) {
field = fields[q][i];
vmcs12_read_any(&vmx->vcpu, field, &field_value);
-
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_U16:
- vmcs_write16(field, (u16)field_value);
- break;
- case VMCS_FIELD_TYPE_U32:
- vmcs_write32(field, (u32)field_value);
- break;
- case VMCS_FIELD_TYPE_U64:
- vmcs_write64(field, (u64)field_value);
- break;
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
- vmcs_writel(field, (long)field_value);
- break;
- default:
- WARN_ON(1);
- break;
- }
+ __vmcs_writel(field, field_value);
}
}
@@ -7656,8 +7639,10 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
{
unsigned long field;
gva_t gva;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+
/* The value to write might be 32 or 64 bits, depending on L1's long
* mode, and eventually we need to write that into a field of several
* possible lengths. The code below first zero-extends the value to 64
@@ -7700,6 +7685,20 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
+ switch (field) {
+#define SHADOW_FIELD_RW(x) case x:
+#include "vmx_shadow_fields.h"
+ /*
+ * The fields that can be updated by L1 without a vmexit are
+ * always updated in the vmcs02, the others go down the slow
+ * path of prepare_vmcs02.
+ */
+ break;
+ default:
+ vmx->nested.dirty_vmcs12 = true;
+ break;
+ }
+
nested_vmx_succeed(vcpu);
return kvm_skip_emulated_instruction(vcpu);
}
@@ -7714,6 +7713,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
__pa(vmx->vmcs01.shadow_vmcs));
vmx->nested.sync_shadow_vmcs = true;
}
+ vmx->nested.dirty_vmcs12 = true;
}
/* Emulate the VMPTRLD instruction */
@@ -7934,7 +7934,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
- __vmx_flush_tlb(vcpu, vmx->nested.vpid02);
+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
nested_vmx_succeed(vcpu);
return kvm_skip_emulated_instruction(vcpu);
@@ -8128,6 +8128,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_XSETBV] = handle_xsetbv,
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
[EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
+ [EXIT_REASON_GDTR_IDTR] = handle_desc,
+ [EXIT_REASON_LDTR_TR] = handle_desc,
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
[EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
[EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
@@ -8937,36 +8939,23 @@ static void vmx_set_rvi(int vector)
static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
{
- if (!is_guest_mode(vcpu)) {
- vmx_set_rvi(max_irr);
- return;
- }
-
- if (max_irr == -1)
- return;
-
- /*
- * In guest mode. If a vmexit is needed, vmx_check_nested_events
- * handles it.
- */
- if (nested_exit_on_intr(vcpu))
- return;
-
/*
- * Else, fall back to pre-APICv interrupt injection since L2
- * is run without virtual interrupt delivery.
+ * When running L2, updating RVI is only relevant when
+ * vmcs12 virtual-interrupt-delivery enabled.
+ * However, it can be enabled only when L1 also
+ * intercepts external-interrupts and in that case
+ * we should not update vmcs02 RVI but instead intercept
+ * interrupt. Therefore, do nothing when running L2.
*/
- if (!kvm_event_needs_reinjection(vcpu) &&
- vmx_interrupt_allowed(vcpu)) {
- kvm_queue_interrupt(vcpu, max_irr, false);
- vmx_inject_irq(vcpu);
- }
+ if (!is_guest_mode(vcpu))
+ vmx_set_rvi(max_irr);
}
static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
+ bool max_irr_updated;
WARN_ON(!vcpu->arch.apicv_active);
if (pi_test_on(&vmx->pi_desc)) {
@@ -8976,7 +8965,23 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
* But on x86 this is just a compiler barrier anyway.
*/
smp_mb__after_atomic();
- max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
+ max_irr_updated =
+ kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
+
+ /*
+ * If we are running L2 and L1 has a new pending interrupt
+ * which can be injected, we should re-evaluate
+ * what should be done with this new L1 interrupt.
+ * If L1 intercepts external-interrupts, we should
+ * exit from L2 to L1. Otherwise, interrupt should be
+ * delivered directly to L2.
+ */
+ if (is_guest_mode(vcpu) && max_irr_updated) {
+ if (nested_exit_on_intr(vcpu))
+ kvm_vcpu_exiting_guest_mode(vcpu);
+ else
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
} else {
max_irr = kvm_lapic_find_highest_irr(vcpu);
}
@@ -9059,14 +9064,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
#endif
"pushf\n\t"
__ASM_SIZE(push) " $%c[cs]\n\t"
- "call *%[entry]\n\t"
+ CALL_NOSPEC
:
#ifdef CONFIG_X86_64
[sp]"=&r"(tmp),
#endif
ASM_CALL_CONSTRAINT
:
- [entry]"r"(entry),
+ THUNK_TARGET(entry),
[ss]"i"(__KERNEL_DS),
[cs]"i"(__KERNEL_CS)
);
@@ -9091,6 +9096,12 @@ static bool vmx_xsaves_supported(void)
SECONDARY_EXEC_XSAVES;
}
+static bool vmx_umip_emulated(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_DESC;
+}
+
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -9246,7 +9257,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long debugctlmsr, cr3, cr4;
+ unsigned long cr3, cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -9299,7 +9310,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
__write_pkru(vcpu->arch.pkru);
atomic_switch_perf_msrs(vmx);
- debugctlmsr = get_debugctlmsr();
vmx_arm_hv_timer(vcpu);
@@ -9425,8 +9435,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmexit_fill_RSB();
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
- if (debugctlmsr)
- update_debugctlmsr(debugctlmsr);
+ if (vmx->host_debugctlmsr)
+ update_debugctlmsr(vmx->host_debugctlmsr);
#ifndef CONFIG_X86_64
/*
@@ -9506,10 +9516,8 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int r;
- r = vcpu_load(vcpu);
- BUG_ON(r);
+ vcpu_load(vcpu);
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
free_nested(vmx);
vcpu_put(vcpu);
@@ -9709,7 +9717,8 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
u32 mask =
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_DESC;
u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
@@ -9875,8 +9884,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
}
}
-static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12);
+static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12);
static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
@@ -9965,11 +9974,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
(unsigned long)(vmcs12->posted_intr_desc_addr &
(PAGE_SIZE - 1)));
}
- if (cpu_has_vmx_msr_bitmap() &&
- nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) &&
- nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
- ;
- else
+ if (!nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_USE_MSR_BITMAPS);
}
@@ -10037,14 +10042,19 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
*/
-static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
+static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
{
int msr;
struct page *page;
unsigned long *msr_bitmap_l1;
unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
+ /* Nothing to do if the MSR bitmap is not in use. */
+ if (!cpu_has_vmx_msr_bitmap() ||
+ !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
+ return false;
+
/* This shortcut is ok because we support only x2APIC MSRs so far. */
if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
return false;
@@ -10052,32 +10062,41 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
if (is_error_page(page))
return false;
- msr_bitmap_l1 = (unsigned long *)kmap(page);
- memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+ msr_bitmap_l1 = (unsigned long *)kmap(page);
+ if (nested_cpu_has_apic_reg_virt(vmcs12)) {
+ /*
+ * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
+ * just lets the processor take the value from the virtual-APIC page;
+ * take those 256 bits directly from the L1 bitmap.
+ */
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+ msr_bitmap_l0[word] = msr_bitmap_l1[word];
+ msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
+ }
+ } else {
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+ msr_bitmap_l0[word] = ~0;
+ msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
+ }
+ }
- if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
- if (nested_cpu_has_apic_reg_virt(vmcs12))
- for (msr = 0x800; msr <= 0x8ff; msr++)
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- msr, MSR_TYPE_R);
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_TASKPRI),
+ MSR_TYPE_W);
+ if (nested_cpu_has_vid(vmcs12)) {
nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- APIC_BASE_MSR + (APIC_TASKPRI >> 4),
- MSR_TYPE_R | MSR_TYPE_W);
-
- if (nested_cpu_has_vid(vmcs12)) {
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- APIC_BASE_MSR + (APIC_EOI >> 4),
- MSR_TYPE_W);
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
- MSR_TYPE_W);
- }
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_EOI),
+ MSR_TYPE_W);
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_SELF_IPI),
+ MSR_TYPE_W);
}
kunmap(page);
kvm_release_page_clean(page);
@@ -10344,25 +10363,12 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
return 0;
}
-/*
- * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
- * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
- * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
- * guest in a way that will both be appropriate to L1's requests, and our
- * needs. In addition to modifying the active vmcs (which is vmcs02), this
- * function also has additional necessary side-effects, like setting various
- * vcpu->arch fields.
- * Returns 0 on success, 1 on failure. Invalid state exit qualification code
- * is assigned to entry_failure_code on failure.
- */
-static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
- bool from_vmentry, u32 *entry_failure_code)
+static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ bool from_vmentry)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 exec_control, vmcs12_exec_ctrl;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
- vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
@@ -10370,7 +10376,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
- vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
@@ -10380,15 +10385,12 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
- vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
- vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
- vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
@@ -10398,6 +10400,125 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
+ vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs12->guest_pending_dbg_exceptions);
+ vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
+ vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+
+ if (nested_cpu_has_xsaves(vmcs12))
+ vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
+ vmcs_write64(VMCS_LINK_POINTER, -1ull);
+
+ if (cpu_has_vmx_posted_intr())
+ vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
+
+ /*
+ * Whether page-faults are trapped is determined by a combination of
+ * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
+ * If enable_ept, L0 doesn't care about page faults and we should
+ * set all of these to L1's desires. However, if !enable_ept, L0 does
+ * care about (at least some) page faults, and because it is not easy
+ * (if at all possible?) to merge L0 and L1's desires, we simply ask
+ * to exit on each and every L2 page fault. This is done by setting
+ * MASK=MATCH=0 and (see below) EB.PF=1.
+ * Note that below we don't need special code to set EB.PF beyond the
+ * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
+ * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
+ * !enable_ept, EB.PF is 1, so the "or" will always be 1.
+ */
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
+ enable_ept ? vmcs12->page_fault_error_code_mask : 0);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
+ enable_ept ? vmcs12->page_fault_error_code_match : 0);
+
+ /* All VMFUNCs are currently emulated through L0 vmexits. */
+ if (cpu_has_vmx_vmfunc())
+ vmcs_write64(VM_FUNCTION_CONTROL, 0);
+
+ if (cpu_has_vmx_apicv()) {
+ vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
+ vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
+ vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
+ vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
+ }
+
+ /*
+ * Set host-state according to L0's settings (vmcs12 is irrelevant here)
+ * Some constant fields are set here by vmx_set_constant_host_state().
+ * Other fields are different per CPU, and will be set later when
+ * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
+ */
+ vmx_set_constant_host_state(vmx);
+
+ /*
+ * Set the MSR load/store lists to match L0's settings.
+ */
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+
+ set_cr4_guest_host_mask(vmx);
+
+ if (vmx_mpx_supported())
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+
+ if (enable_vpid) {
+ if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
+ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
+ else
+ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+ }
+
+ /*
+ * L1 may access the L2's PDPTR, so save them to construct vmcs12
+ */
+ if (enable_ept) {
+ vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
+ vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
+ vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
+ vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+ }
+
+ if (cpu_has_vmx_msr_bitmap())
+ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
+}
+
+/*
+ * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
+ * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
+ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
+ * guest in a way that will both be appropriate to L1's requests, and our
+ * needs. In addition to modifying the active vmcs (which is vmcs02), this
+ * function also has additional necessary side-effects, like setting various
+ * vcpu->arch fields.
+ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
+ * is assigned to entry_failure_code on failure.
+ */
+static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ bool from_vmentry, u32 *entry_failure_code)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 exec_control, vmcs12_exec_ctrl;
+
+ /*
+ * First, the fields that are shadowed. This must be kept in sync
+ * with vmx_shadow_fields.h.
+ */
+
+ vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
+ vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
+ vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
+ vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
+ vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
+
+ /*
+ * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR, HOST_GS_SELECTOR,
+ * HOST_FS_BASE, HOST_GS_BASE.
+ */
+
if (from_vmentry &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
@@ -10420,16 +10541,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
} else {
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
}
- vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
vmx_set_rflags(vcpu, vmcs12->guest_rflags);
- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
- vmcs12->guest_pending_dbg_exceptions);
- vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
- vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
-
- if (nested_cpu_has_xsaves(vmcs12))
- vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
- vmcs_write64(VMCS_LINK_POINTER, -1ull);
exec_control = vmcs12->pin_based_vm_exec_control;
@@ -10443,7 +10555,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (nested_cpu_has_posted_intr(vmcs12)) {
vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
vmx->nested.pi_pending = false;
- vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
} else {
exec_control &= ~PIN_BASED_POSTED_INTR;
}
@@ -10454,25 +10565,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (nested_cpu_has_preemption_timer(vmcs12))
vmx_start_preemption_timer(vcpu);
- /*
- * Whether page-faults are trapped is determined by a combination of
- * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
- * If enable_ept, L0 doesn't care about page faults and we should
- * set all of these to L1's desires. However, if !enable_ept, L0 does
- * care about (at least some) page faults, and because it is not easy
- * (if at all possible?) to merge L0 and L1's desires, we simply ask
- * to exit on each and every L2 page fault. This is done by setting
- * MASK=MATCH=0 and (see below) EB.PF=1.
- * Note that below we don't need special code to set EB.PF beyond the
- * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
- * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
- * !enable_ept, EB.PF is 1, so the "or" will always be 1.
- */
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
- enable_ept ? vmcs12->page_fault_error_code_mask : 0);
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
- enable_ept ? vmcs12->page_fault_error_code_match : 0);
-
if (cpu_has_secondary_exec_ctrls()) {
exec_control = vmx->secondary_exec_control;
@@ -10491,22 +10583,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control |= vmcs12_exec_ctrl;
}
- /* All VMFUNCs are currently emulated through L0 vmexits. */
- if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC)
- vmcs_write64(VM_FUNCTION_CONTROL, 0);
-
- if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
- vmcs_write64(EOI_EXIT_BITMAP0,
- vmcs12->eoi_exit_bitmap0);
- vmcs_write64(EOI_EXIT_BITMAP1,
- vmcs12->eoi_exit_bitmap1);
- vmcs_write64(EOI_EXIT_BITMAP2,
- vmcs12->eoi_exit_bitmap2);
- vmcs_write64(EOI_EXIT_BITMAP3,
- vmcs12->eoi_exit_bitmap3);
+ if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
vmcs_write16(GUEST_INTR_STATUS,
vmcs12->guest_intr_status);
- }
/*
* Write an illegal value to APIC_ACCESS_ADDR. Later,
@@ -10519,24 +10598,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}
-
- /*
- * Set host-state according to L0's settings (vmcs12 is irrelevant here)
- * Some constant fields are set here by vmx_set_constant_host_state().
- * Other fields are different per CPU, and will be set later when
- * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
- */
- vmx_set_constant_host_state(vmx);
-
- /*
- * Set the MSR load/store lists to match L0's settings.
- */
- vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
- vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
- vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
-
/*
* HOST_RSP is normally set correctly in vmx_vcpu_run() just before
* entry, but only if the current (host) sp changed from the value
@@ -10568,8 +10629,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
/*
- * Merging of IO bitmap not currently supported.
- * Rather, exit every time.
+ * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
+ * for I/O port accesses.
*/
exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
exec_control |= CPU_BASED_UNCOND_IO_EXITING;
@@ -10606,12 +10667,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
}
- set_cr4_guest_host_mask(vmx);
-
- if (from_vmentry &&
- vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
-
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
vmcs_write64(TSC_OFFSET,
vcpu->arch.tsc_offset + vmcs12->tsc_offset);
@@ -10620,9 +10675,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
- if (cpu_has_vmx_msr_bitmap())
- vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
-
if (enable_vpid) {
/*
* There is no direct mapping between vpid02 and vpid12, the
@@ -10633,16 +10685,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* even if spawn a lot of nested vCPUs.
*/
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
+ __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true);
}
} else {
- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
-
}
if (enable_pml) {
@@ -10691,6 +10740,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
/* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
vmx_set_efer(vcpu, vcpu->arch.efer);
+ if (vmx->nested.dirty_vmcs12) {
+ prepare_vmcs02_full(vcpu, vmcs12, from_vmentry);
+ vmx->nested.dirty_vmcs12 = false;
+ }
+
/* Shadow page tables on either EPT or shadow page tables. */
if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
entry_failure_code))
@@ -10699,16 +10753,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
- /*
- * L1 may access the L2's PDPTR, so save them to construct vmcs12
- */
- if (enable_ept) {
- vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
- vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
- vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
- vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
- }
-
kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
return 0;
@@ -11064,7 +11108,6 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
- vcpu->arch.exception.pending = false;
return 0;
}
@@ -11345,11 +11388,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* L1's vpid. TODO: move to a more elaborate solution, giving
* each L2 its own vpid and exposing the vpid feature to L1.
*/
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
- /* Restore posted intr vector. */
- if (nested_cpu_has_posted_intr(vmcs12))
- vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
@@ -11610,6 +11650,21 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+
+ /*
+ * RDPID causes #UD if disabled through secondary execution controls.
+ * Because it is marked as EmulateOnUD, we need to intercept it here.
+ */
+ if (info->intercept == x86_intercept_rdtscp &&
+ !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
+ ctxt->exception.vector = UD_VECTOR;
+ ctxt->exception.error_code_valid = false;
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+
+ /* TODO: check more intercepts... */
return X86EMUL_CONTINUE;
}
@@ -12123,6 +12178,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.handle_external_intr = vmx_handle_external_intr,
.mpx_supported = vmx_mpx_supported,
.xsaves_supported = vmx_xsaves_supported,
+ .umip_emulated = vmx_umip_emulated,
.check_nested_events = vmx_check_nested_events,
diff --git a/arch/x86/kvm/vmx_shadow_fields.h b/arch/x86/kvm/vmx_shadow_fields.h
new file mode 100644
index 000000000000..cd0c75f6d037
--- /dev/null
+++ b/arch/x86/kvm/vmx_shadow_fields.h
@@ -0,0 +1,77 @@
+#ifndef SHADOW_FIELD_RO
+#define SHADOW_FIELD_RO(x)
+#endif
+#ifndef SHADOW_FIELD_RW
+#define SHADOW_FIELD_RW(x)
+#endif
+
+/*
+ * We do NOT shadow fields that are modified when L0
+ * traps and emulates any vmx instruction (e.g. VMPTRLD,
+ * VMXON...) executed by L1.
+ * For example, VM_INSTRUCTION_ERROR is read
+ * by L1 if a vmx instruction fails (part of the error path).
+ * Note the code assumes this logic. If for some reason
+ * we start shadowing these fields then we need to
+ * force a shadow sync when L0 emulates vmx instructions
+ * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
+ * by nested_vmx_failValid)
+ *
+ * When adding or removing fields here, note that shadowed
+ * fields must always be synced by prepare_vmcs02, not just
+ * prepare_vmcs02_full.
+ */
+
+/*
+ * Keeping the fields ordered by size is an attempt at improving
+ * branch prediction in vmcs_read_any and vmcs_write_any.
+ */
+
+/* 16-bits */
+SHADOW_FIELD_RW(GUEST_CS_SELECTOR)
+SHADOW_FIELD_RW(GUEST_INTR_STATUS)
+SHADOW_FIELD_RW(GUEST_PML_INDEX)
+SHADOW_FIELD_RW(HOST_FS_SELECTOR)
+SHADOW_FIELD_RW(HOST_GS_SELECTOR)
+
+/* 32-bits */
+SHADOW_FIELD_RO(VM_EXIT_REASON)
+SHADOW_FIELD_RO(VM_EXIT_INTR_INFO)
+SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN)
+SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD)
+SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE)
+SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE)
+SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL)
+SHADOW_FIELD_RW(EXCEPTION_BITMAP)
+SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
+SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
+SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
+SHADOW_FIELD_RW(TPR_THRESHOLD)
+SHADOW_FIELD_RW(GUEST_CS_LIMIT)
+SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
+SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
+SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
+
+/* Natural width */
+SHADOW_FIELD_RO(EXIT_QUALIFICATION)
+SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS)
+SHADOW_FIELD_RW(GUEST_RIP)
+SHADOW_FIELD_RW(GUEST_RSP)
+SHADOW_FIELD_RW(GUEST_CR0)
+SHADOW_FIELD_RW(GUEST_CR3)
+SHADOW_FIELD_RW(GUEST_CR4)
+SHADOW_FIELD_RW(GUEST_RFLAGS)
+SHADOW_FIELD_RW(GUEST_CS_BASE)
+SHADOW_FIELD_RW(GUEST_ES_BASE)
+SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
+SHADOW_FIELD_RW(CR0_READ_SHADOW)
+SHADOW_FIELD_RW(CR4_READ_SHADOW)
+SHADOW_FIELD_RW(HOST_FS_BASE)
+SHADOW_FIELD_RW(HOST_GS_BASE)
+
+/* 64-bit */
+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS)
+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH)
+
+#undef SHADOW_FIELD_RO
+#undef SHADOW_FIELD_RW
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c53298dfbf50..07d1c7f66d97 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -67,6 +67,8 @@
#include <asm/pvclock.h>
#include <asm/div64.h>
#include <asm/irq_remapping.h>
+#include <asm/mshyperv.h>
+#include <asm/hypervisor.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -177,7 +179,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "request_irq", VCPU_STAT(request_irq_exits) },
{ "irq_exits", VCPU_STAT(irq_exits) },
{ "host_state_reload", VCPU_STAT(host_state_reload) },
- { "efer_reload", VCPU_STAT(efer_reload) },
{ "fpu_reload", VCPU_STAT(fpu_reload) },
{ "insn_emulation", VCPU_STAT(insn_emulation) },
{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
@@ -702,7 +703,8 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
!vcpu->guest_xcr0_loaded) {
/* kvm_set_xcr() also depends on this */
- xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+ if (vcpu->arch.xcr0 != host_xcr0)
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
vcpu->guest_xcr0_loaded = 1;
}
}
@@ -794,6 +796,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
return 1;
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
+ return 1;
+
if (is_long_mode(vcpu)) {
if (!(cr4 & X86_CR4_PAE))
return 1;
@@ -1036,6 +1041,7 @@ static u32 emulated_msrs[] = {
MSR_IA32_MCG_CTL,
MSR_IA32_MCG_EXT_CTL,
MSR_IA32_SMBASE,
+ MSR_SMI_COUNT,
MSR_PLATFORM_INFO,
MSR_MISC_FEATURES_ENABLES,
};
@@ -1377,6 +1383,11 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
return tsc;
}
+static inline int gtod_is_based_on_tsc(int mode)
+{
+ return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
+}
+
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
@@ -1396,7 +1407,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
* perform request to enable masterclock.
*/
if (ka->use_master_clock ||
- (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
+ (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -1459,6 +1470,19 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
vcpu->arch.tsc_offset = offset;
}
+static inline bool kvm_check_tsc_unstable(void)
+{
+#ifdef CONFIG_X86_64
+ /*
+ * TSC is marked unstable when we're running on Hyper-V,
+ * 'TSC page' clocksource is good.
+ */
+ if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
+ return false;
+#endif
+ return check_tsc_unstable();
+}
+
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
struct kvm *kvm = vcpu->kvm;
@@ -1504,7 +1528,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
*/
if (synchronizing &&
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
- if (!check_tsc_unstable()) {
+ if (!kvm_check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset;
pr_debug("kvm: matched tsc offset for %llu\n", data);
} else {
@@ -1604,18 +1628,43 @@ static u64 read_tsc(void)
return last;
}
-static inline u64 vgettsc(u64 *cycle_now)
+static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
{
long v;
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+ u64 tsc_pg_val;
+
+ switch (gtod->clock.vclock_mode) {
+ case VCLOCK_HVCLOCK:
+ tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
+ tsc_timestamp);
+ if (tsc_pg_val != U64_MAX) {
+ /* TSC page valid */
+ *mode = VCLOCK_HVCLOCK;
+ v = (tsc_pg_val - gtod->clock.cycle_last) &
+ gtod->clock.mask;
+ } else {
+ /* TSC page invalid */
+ *mode = VCLOCK_NONE;
+ }
+ break;
+ case VCLOCK_TSC:
+ *mode = VCLOCK_TSC;
+ *tsc_timestamp = read_tsc();
+ v = (*tsc_timestamp - gtod->clock.cycle_last) &
+ gtod->clock.mask;
+ break;
+ default:
+ *mode = VCLOCK_NONE;
+ }
- *cycle_now = read_tsc();
+ if (*mode == VCLOCK_NONE)
+ *tsc_timestamp = v = 0;
- v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
return v * gtod->clock.mult;
}
-static int do_monotonic_boot(s64 *t, u64 *cycle_now)
+static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
{
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
@@ -1624,9 +1673,8 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
do {
seq = read_seqcount_begin(&gtod->seq);
- mode = gtod->clock.vclock_mode;
ns = gtod->nsec_base;
- ns += vgettsc(cycle_now);
+ ns += vgettsc(tsc_timestamp, &mode);
ns >>= gtod->clock.shift;
ns += gtod->boot_ns;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
@@ -1635,7 +1683,7 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
return mode;
}
-static int do_realtime(struct timespec *ts, u64 *cycle_now)
+static int do_realtime(struct timespec *ts, u64 *tsc_timestamp)
{
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
@@ -1644,10 +1692,9 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
do {
seq = read_seqcount_begin(&gtod->seq);
- mode = gtod->clock.vclock_mode;
ts->tv_sec = gtod->wall_time_sec;
ns = gtod->nsec_base;
- ns += vgettsc(cycle_now);
+ ns += vgettsc(tsc_timestamp, &mode);
ns >>= gtod->clock.shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
@@ -1657,25 +1704,26 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
return mode;
}
-/* returns true if host is using tsc clocksource */
-static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
+/* returns true if host is using TSC based clocksource */
+static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
{
/* checked again under seqlock below */
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+ if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
return false;
- return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
+ return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
+ tsc_timestamp));
}
-/* returns true if host is using tsc clocksource */
+/* returns true if host is using TSC based clocksource */
static bool kvm_get_walltime_and_clockread(struct timespec *ts,
- u64 *cycle_now)
+ u64 *tsc_timestamp)
{
/* checked again under seqlock below */
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+ if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
return false;
- return do_realtime(ts, cycle_now) == VCLOCK_TSC;
+ return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
}
#endif
@@ -2118,6 +2166,12 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
vcpu->arch.pv_time_enabled = false;
}
+static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+{
+ ++vcpu->stat.tlb_flush;
+ kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
+}
+
static void record_steal_time(struct kvm_vcpu *vcpu)
{
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
@@ -2127,7 +2181,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
return;
- vcpu->arch.st.steal.preempted = 0;
+ /*
+ * Doing a TLB flush here, on the guest's behalf, can avoid
+ * expensive IPIs.
+ */
+ if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ kvm_vcpu_flush_tlb(vcpu, false);
if (vcpu->arch.st.steal.version & 1)
vcpu->arch.st.steal.version += 1; /* first time write, random junk */
@@ -2228,6 +2287,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.smbase = data;
break;
+ case MSR_SMI_COUNT:
+ if (!msr_info->host_initiated)
+ return 1;
+ vcpu->arch.smi_count = data;
+ break;
case MSR_KVM_WALL_CLOCK_NEW:
case MSR_KVM_WALL_CLOCK:
vcpu->kvm->arch.wall_clock = data;
@@ -2502,6 +2566,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vcpu->arch.smbase;
break;
+ case MSR_SMI_COUNT:
+ msr_info->data = vcpu->arch.smi_count;
+ break;
case MSR_IA32_PERF_STATUS:
/* TSC increment by tick */
msr_info->data = 1000ULL;
@@ -2869,13 +2936,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
}
- if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
+ if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
rdtsc() - vcpu->arch.last_host_tsc;
if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC");
- if (check_tsc_unstable()) {
+ if (kvm_check_tsc_unstable()) {
u64 offset = kvm_compute_tsc_offset(vcpu,
vcpu->arch.last_guest_tsc);
kvm_vcpu_write_tsc_offset(vcpu, offset);
@@ -2904,7 +2971,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
- vcpu->arch.st.steal.preempted = 1;
+ vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
&vcpu->arch.st.steal.preempted,
@@ -2938,12 +3005,18 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
+ /*
+ * If userspace has set any breakpoints or watchpoints, dr6 is restored
+ * on every vmexit, but if not, we might have a stale dr6 from the
+ * guest. do_debug expects dr6 to be cleared after it runs, do the same.
+ */
+ set_debugreg(0, 6);
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
- if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
+ if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
return kvm_apic_get_state(vcpu, s);
@@ -3472,6 +3545,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
void *buffer;
} u;
+ vcpu_load(vcpu);
+
u.buffer = NULL;
switch (ioctl) {
case KVM_GET_LAPIC: {
@@ -3497,8 +3572,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!lapic_in_kernel(vcpu))
goto out;
u.lapic = memdup_user(argp, sizeof(*u.lapic));
- if (IS_ERR(u.lapic))
- return PTR_ERR(u.lapic);
+ if (IS_ERR(u.lapic)) {
+ r = PTR_ERR(u.lapic);
+ goto out_nofree;
+ }
r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
break;
@@ -3672,8 +3749,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
case KVM_SET_XSAVE: {
u.xsave = memdup_user(argp, sizeof(*u.xsave));
- if (IS_ERR(u.xsave))
- return PTR_ERR(u.xsave);
+ if (IS_ERR(u.xsave)) {
+ r = PTR_ERR(u.xsave);
+ goto out_nofree;
+ }
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
break;
@@ -3695,8 +3774,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
case KVM_SET_XCRS: {
u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
- if (IS_ERR(u.xcrs))
- return PTR_ERR(u.xcrs);
+ if (IS_ERR(u.xcrs)) {
+ r = PTR_ERR(u.xcrs);
+ goto out_nofree;
+ }
r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
break;
@@ -3740,6 +3821,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
out:
kfree(u.buffer);
+out_nofree:
+ vcpu_put(vcpu);
return r;
}
@@ -4295,6 +4378,36 @@ set_identity_unlock:
r = kvm_vm_ioctl_enable_cap(kvm, &cap);
break;
}
+ case KVM_MEMORY_ENCRYPT_OP: {
+ r = -ENOTTY;
+ if (kvm_x86_ops->mem_enc_op)
+ r = kvm_x86_ops->mem_enc_op(kvm, argp);
+ break;
+ }
+ case KVM_MEMORY_ENCRYPT_REG_REGION: {
+ struct kvm_enc_region region;
+
+ r = -EFAULT;
+ if (copy_from_user(&region, argp, sizeof(region)))
+ goto out;
+
+ r = -ENOTTY;
+ if (kvm_x86_ops->mem_enc_reg_region)
+ r = kvm_x86_ops->mem_enc_reg_region(kvm, &region);
+ break;
+ }
+ case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
+ struct kvm_enc_region region;
+
+ r = -EFAULT;
+ if (copy_from_user(&region, argp, sizeof(region)))
+ goto out;
+
+ r = -ENOTTY;
+ if (kvm_x86_ops->mem_enc_unreg_region)
+ r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -5703,7 +5816,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
* handle watchpoints yet, those would be handled in
* the emulate_ops.
*/
- if (kvm_vcpu_check_breakpoint(vcpu, &r))
+ if (!(emulation_type & EMULTYPE_SKIP) &&
+ kvm_vcpu_check_breakpoint(vcpu, &r))
return r;
ctxt->interruptibility = 0;
@@ -5889,6 +6003,43 @@ static void tsc_khz_changed(void *data)
__this_cpu_write(cpu_tsc_khz, khz);
}
+#ifdef CONFIG_X86_64
+static void kvm_hyperv_tsc_notifier(void)
+{
+ struct kvm *kvm;
+ struct kvm_vcpu *vcpu;
+ int cpu;
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list)
+ kvm_make_mclock_inprogress_request(kvm);
+
+ hyperv_stop_tsc_emulation();
+
+ /* TSC frequency always matches when on Hyper-V */
+ for_each_present_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ kvm_max_guest_tsc_khz = tsc_khz;
+
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ struct kvm_arch *ka = &kvm->arch;
+
+ spin_lock(&ka->pvclock_gtod_sync_lock);
+
+ pvclock_update_vm_gtod_copy(kvm);
+
+ kvm_for_each_vcpu(cpu, vcpu, kvm)
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+
+ kvm_for_each_vcpu(cpu, vcpu, kvm)
+ kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
+
+ spin_unlock(&ka->pvclock_gtod_sync_lock);
+ }
+ spin_unlock(&kvm_lock);
+}
+#endif
+
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -6110,9 +6261,9 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
update_pvclock_gtod(tk);
/* disable master clock if host does not trust, or does not
- * use, TSC clocksource
+ * use, TSC based clocksource.
*/
- if (gtod->clock.vclock_mode != VCLOCK_TSC &&
+ if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
atomic_read(&kvm_guest_has_master_clock) != 0)
queue_work(system_long_wq, &pvclock_gtod_work);
@@ -6174,6 +6325,9 @@ int kvm_arch_init(void *opaque)
kvm_lapic_init();
#ifdef CONFIG_X86_64
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
+
+ if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
+ set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
#endif
return 0;
@@ -6186,6 +6340,10 @@ out:
void kvm_arch_exit(void)
{
+#ifdef CONFIG_X86_64
+ if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
+ clear_hv_tscchange_cb();
+#endif
kvm_lapic_exit();
perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
@@ -6448,6 +6606,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
kvm_x86_ops->queue_exception(vcpu);
} else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) {
vcpu->arch.smi_pending = false;
+ ++vcpu->arch.smi_count;
enter_smm(vcpu);
} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
--vcpu->arch.nmi_pending;
@@ -6749,7 +6908,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
else {
- if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
+ if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
@@ -6758,12 +6917,6 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
}
-static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
-{
- ++vcpu->stat.tlb_flush;
- kvm_x86_ops->tlb_flush(vcpu);
-}
-
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
@@ -6832,7 +6985,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- kvm_vcpu_flush_tlb(vcpu);
+ kvm_vcpu_flush_tlb(vcpu, true);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
@@ -6981,10 +7134,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* This handles the case where a posted interrupt was
* notified with kvm_vcpu_kick.
*/
- if (kvm_lapic_enabled(vcpu)) {
- if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
- kvm_x86_ops->sync_pir_to_irr(vcpu);
- }
+ if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
+ kvm_x86_ops->sync_pir_to_irr(vcpu);
if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
|| need_resched() || signal_pending(current)) {
@@ -7005,7 +7156,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
trace_kvm_entry(vcpu->vcpu_id);
- wait_lapic_expire(vcpu);
+ if (lapic_timer_advance_ns)
+ wait_lapic_expire(vcpu);
guest_enter_irqoff();
if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -7266,8 +7418,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
+ vcpu_load(vcpu);
kvm_sigset_activate(vcpu);
-
kvm_load_guest_fpu(vcpu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
@@ -7314,11 +7466,14 @@ out:
post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu);
+ vcpu_put(vcpu);
return r;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
+
if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
/*
* We are here if userspace calls get_regs() in the middle of
@@ -7352,11 +7507,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->rip = kvm_rip_read(vcpu);
regs->rflags = kvm_get_rflags(vcpu);
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
+
vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
@@ -7386,6 +7544,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ vcpu_put(vcpu);
return 0;
}
@@ -7404,6 +7563,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
{
struct desc_ptr dt;
+ vcpu_load(vcpu);
+
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -7435,12 +7596,15 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
set_bit(vcpu->arch.interrupt.nr,
(unsigned long *)sregs->interrupt_bitmap);
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ vcpu_load(vcpu);
+
kvm_apic_accept_events(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
vcpu->arch.pv.pv_unhalted)
@@ -7448,21 +7612,26 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
else
mp_state->mp_state = vcpu->arch.mp_state;
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ int ret = -EINVAL;
+
+ vcpu_load(vcpu);
+
if (!lapic_in_kernel(vcpu) &&
mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
- return -EINVAL;
+ goto out;
/* INITs are latched while in SMM */
if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
(mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
- return -EINVAL;
+ goto out;
if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
@@ -7470,7 +7639,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
} else
vcpu->arch.mp_state = mp_state->mp_state;
kvm_make_request(KVM_REQ_EVENT, vcpu);
- return 0;
+
+ ret = 0;
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
@@ -7524,18 +7697,21 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int mmu_reset_needed = 0;
int pending_vec, max_bits, idx;
struct desc_ptr dt;
+ int ret = -EINVAL;
+
+ vcpu_load(vcpu);
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
(sregs->cr4 & X86_CR4_OSXSAVE))
- return -EINVAL;
+ goto out;
if (kvm_valid_sregs(vcpu, sregs))
- return -EINVAL;
+ goto out;
apic_base_msr.data = sregs->apic_base;
apic_base_msr.host_initiated = true;
if (kvm_set_apic_base(vcpu, &apic_base_msr))
- return -EINVAL;
+ goto out;
dt.size = sregs->idt.limit;
dt.address = sregs->idt.base;
@@ -7601,7 +7777,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_EVENT, vcpu);
- return 0;
+ ret = 0;
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -7610,6 +7789,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
unsigned long rflags;
int i, r;
+ vcpu_load(vcpu);
+
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
if (vcpu->arch.exception.pending)
@@ -7655,7 +7836,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
r = 0;
out:
-
+ vcpu_put(vcpu);
return r;
}
@@ -7669,6 +7850,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
gpa_t gpa;
int idx;
+ vcpu_load(vcpu);
+
idx = srcu_read_lock(&vcpu->kvm->srcu);
gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -7677,14 +7860,17 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
tr->writeable = 1;
tr->usermode = 0;
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- struct fxregs_state *fxsave =
- &vcpu->arch.guest_fpu.state.fxsave;
+ struct fxregs_state *fxsave;
+
+ vcpu_load(vcpu);
+ fxsave = &vcpu->arch.guest_fpu.state.fxsave;
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
fpu->fsw = fxsave->swd;
@@ -7694,13 +7880,17 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
fpu->last_dp = fxsave->rdp;
memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- struct fxregs_state *fxsave =
- &vcpu->arch.guest_fpu.state.fxsave;
+ struct fxregs_state *fxsave;
+
+ vcpu_load(vcpu);
+
+ fxsave = &vcpu->arch.guest_fpu.state.fxsave;
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
@@ -7711,6 +7901,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
fxsave->rdp = fpu->last_dp;
memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
+ vcpu_put(vcpu);
return 0;
}
@@ -7767,7 +7958,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
{
struct kvm_vcpu *vcpu;
- if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
+ if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
printk_once(KERN_WARNING
"kvm: SMP vm created on host with unstable TSC; "
"guest TSC will not be reliable\n");
@@ -7779,16 +7970,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- int r;
-
kvm_vcpu_mtrr_init(vcpu);
- r = vcpu_load(vcpu);
- if (r)
- return r;
+ vcpu_load(vcpu);
kvm_vcpu_reset(vcpu, false);
kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
- return r;
+ return 0;
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -7798,13 +7985,15 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
kvm_hv_vcpu_postcreate(vcpu);
- if (vcpu_load(vcpu))
+ if (mutex_lock_killable(&vcpu->mutex))
return;
+ vcpu_load(vcpu);
msr.data = 0x0;
msr.index = MSR_IA32_TSC;
msr.host_initiated = true;
kvm_write_tsc(vcpu, &msr);
vcpu_put(vcpu);
+ mutex_unlock(&vcpu->mutex);
if (!kvmclock_periodic_sync)
return;
@@ -7815,11 +8004,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- int r;
vcpu->arch.apf.msr_val = 0;
- r = vcpu_load(vcpu);
- BUG_ON(r);
+ vcpu_load(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
@@ -7831,6 +8018,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.hflags = 0;
vcpu->arch.smi_pending = 0;
+ vcpu->arch.smi_count = 0;
atomic_set(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = 0;
vcpu->arch.nmi_injected = false;
@@ -7924,7 +8112,7 @@ int kvm_arch_hardware_enable(void)
return ret;
local_tsc = rdtsc();
- stable = !check_tsc_unstable();
+ stable = !kvm_check_tsc_unstable();
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!stable && vcpu->cpu == smp_processor_id())
@@ -8190,9 +8378,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
- int r;
- r = vcpu_load(vcpu);
- BUG_ON(r);
+ vcpu_load(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index d0b95b7a90b4..b91215d1fd80 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -12,6 +12,7 @@
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.exception.pending = false;
vcpu->arch.exception.injected = false;
}
@@ -265,36 +266,8 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
static inline bool kvm_mwait_in_guest(void)
{
- unsigned int eax, ebx, ecx, edx;
-
- if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
- return false;
-
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- /* All AMD CPUs have a working MWAIT implementation */
- return true;
- case X86_VENDOR_INTEL:
- /* Handle Intel below */
- break;
- default:
- return false;
- }
-
- /*
- * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
- * they would allow guest to stop the CPU completely by disabling
- * interrupts then invoking MWAIT.
- */
- if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
- return false;
-
- cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
-
- if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
- return false;
-
- return true;
+ return boot_cpu_has(X86_FEATURE_MWAIT) &&
+ !boot_cpu_has_bug(X86_BUG_MONITOR);
}
#endif
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index f23934bbaf4e..69a473919260 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_RETPOLINE) += retpoline.o
+OBJECT_FILES_NON_STANDARD_retpoline.o :=y
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 4846eff7e4c8..f5b7f1b3b6d7 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -162,7 +162,7 @@ void __delay(unsigned long loops)
}
EXPORT_SYMBOL(__delay);
-inline void __const_udelay(unsigned long xloops)
+void __const_udelay(unsigned long xloops)
{
unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
int d0;
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index dfb2ba91b670..480edc3a5e03 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,6 +7,7 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
+#include <asm/bitsperlong.h>
.macro THUNK reg
.section .text.__x86.indirect_thunk
@@ -36,7 +37,6 @@ GENERATE_THUNK(_ASM_DX)
GENERATE_THUNK(_ASM_SI)
GENERATE_THUNK(_ASM_DI)
GENERATE_THUNK(_ASM_BP)
-GENERATE_THUNK(_ASM_SP)
#ifdef CONFIG_64BIT
GENERATE_THUNK(r8)
GENERATE_THUNK(r9)
@@ -47,3 +47,58 @@ GENERATE_THUNK(r13)
GENERATE_THUNK(r14)
GENERATE_THUNK(r15)
#endif
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version - two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+.macro STUFF_RSB nr:req sp:req
+ mov $(\nr / 2), %_ASM_BX
+ .align 16
+771:
+ call 772f
+773: /* speculation trap */
+ pause
+ lfence
+ jmp 773b
+ .align 16
+772:
+ call 774f
+775: /* speculation trap */
+ pause
+ lfence
+ jmp 775b
+ .align 16
+774:
+ dec %_ASM_BX
+ jnz 771b
+ add $((BITS_PER_LONG/8) * \nr), \sp
+.endm
+
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+ENTRY(__fill_rsb)
+ STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
+ ret
+END(__fill_rsb)
+EXPORT_SYMBOL_GPL(__fill_rsb)
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+
+ENTRY(__clear_rsb)
+ STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
+ ret
+END(__clear_rsb)
+EXPORT_SYMBOL_GPL(__clear_rsb)
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 9fe656c42aa5..45f5d6cf65ae 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -21,16 +21,16 @@ ex_fixup_handler(const struct exception_table_entry *x)
return (ex_handler_t)((unsigned long)&x->handler + x->handler);
}
-bool ex_handler_default(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_default(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);
return true;
}
EXPORT_SYMBOL(ex_handler_default);
-bool ex_handler_fault(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
@@ -42,8 +42,8 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
* Handler for UD0 exception following a failed test against the
* result of a refcount inc/dec/add/sub.
*/
-bool ex_handler_refcount(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
/* First unconditionally saturate the refcount. */
*(int *)regs->cx = INT_MIN / 2;
@@ -95,8 +95,8 @@ EXPORT_SYMBOL(ex_handler_refcount);
* of vulnerability by restoring from the initial state (essentially, zeroing
* out all the FPU registers) if we can't restore from the task's FPU state.
*/
-bool ex_handler_fprestore(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);
@@ -108,8 +108,8 @@ bool ex_handler_fprestore(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL_GPL(ex_handler_fprestore);
-bool ex_handler_ext(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_ext(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
/* Special hack for uaccess_err */
current->thread.uaccess_err = 1;
@@ -118,8 +118,8 @@ bool ex_handler_ext(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_ext);
-bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip))
@@ -133,8 +133,8 @@ bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
-bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx,
@@ -147,8 +147,8 @@ bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
-bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
if (static_cpu_has(X86_BUG_NULL_SEG))
asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
@@ -157,7 +157,7 @@ bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_clear_fs);
-bool ex_has_fault_handler(unsigned long ip)
+__visible bool ex_has_fault_handler(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b3e40773dce0..800de815519c 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -439,18 +439,13 @@ static noinline int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd_ref))
return -1;
- if (pgd_none(*pgd)) {
- set_pgd(pgd, *pgd_ref);
- arch_flush_lazy_mmu_mode();
- } else if (CONFIG_PGTABLE_LEVELS > 4) {
- /*
- * With folded p4d, pgd_none() is always false, so the pgd may
- * point to an empty page table entry and pgd_page_vaddr()
- * will return garbage.
- *
- * We will do the correct sanity check on the p4d level.
- */
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ if (CONFIG_PGTABLE_LEVELS > 4) {
+ if (pgd_none(*pgd)) {
+ set_pgd(pgd, *pgd_ref);
+ arch_flush_lazy_mmu_mode();
+ } else {
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ }
}
/* With 4-level paging, copying happens on the p4d level. */
@@ -459,7 +454,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (p4d_none(*p4d_ref))
return -1;
- if (p4d_none(*p4d)) {
+ if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) {
set_p4d(p4d, *p4d_ref);
arch_flush_lazy_mmu_mode();
} else {
@@ -470,6 +465,7 @@ static noinline int vmalloc_fault(unsigned long address)
* Below here mismatches are bugs because these lower tables
* are shared:
*/
+ BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4);
pud = pud_offset(p4d, address);
pud_ref = pud_offset(p4d_ref, address);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index fe7d57a8fb60..1555bd7d3449 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -678,6 +678,25 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
}
/**
+ * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type
+ * of @pfn cannot be overridden by UC MTRR memory type.
+ *
+ * Only to be called when PAT is enabled.
+ *
+ * Returns true, if the PAT memory type of @pfn is UC, UC-, or WC.
+ * Returns false in other cases.
+ */
+bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn)
+{
+ enum page_cache_mode cm = lookup_memtype(PFN_PHYS(pfn));
+
+ return cm == _PAGE_CACHE_MODE_UC ||
+ cm == _PAGE_CACHE_MODE_UC_MINUS ||
+ cm == _PAGE_CACHE_MODE_WC;
+}
+EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
+
+/**
* io_reserve_memtype - Request a memory type mapping for a region of memory
* @start: start (physical address) of the region
* @end: end (physical address) of the region
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a1561957dccb..5bfe61a5e8e3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -151,6 +151,34 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
local_irq_restore(flags);
}
+static void sync_current_stack_to_mm(struct mm_struct *mm)
+{
+ unsigned long sp = current_stack_pointer;
+ pgd_t *pgd = pgd_offset(mm, sp);
+
+ if (CONFIG_PGTABLE_LEVELS > 4) {
+ if (unlikely(pgd_none(*pgd))) {
+ pgd_t *pgd_ref = pgd_offset_k(sp);
+
+ set_pgd(pgd, *pgd_ref);
+ }
+ } else {
+ /*
+ * "pgd" is faked. The top level entries are "p4d"s, so sync
+ * the p4d. This compiles to approximately the same code as
+ * the 5-level case.
+ */
+ p4d_t *p4d = p4d_offset(pgd, sp);
+
+ if (unlikely(p4d_none(*p4d))) {
+ pgd_t *pgd_ref = pgd_offset_k(sp);
+ p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
+
+ set_p4d(p4d, *p4d_ref);
+ }
+ }
+}
+
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -226,11 +254,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* mapped in the new pgd, we'll double-fault. Forcibly
* map it.
*/
- unsigned int index = pgd_index(current_stack_pointer);
- pgd_t *pgd = next->pgd + index;
-
- if (unlikely(pgd_none(*pgd)))
- set_pgd(pgd, init_mm.pgd[index]);
+ sync_current_stack_to_mm(next);
}
/* Stop remote flushes for the previous mm */
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 511921045312..43867bc85368 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -300,6 +300,7 @@ int __init intel_mid_pci_init(void)
pci_root_ops = intel_mid_pci_ops;
pci_soc_mode = 1;
/* Continue with standard init */
+ acpi_noirq_set();
return 1;
}
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 2dd15e967c3f..c310a8284358 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -25,7 +25,6 @@
#include <linux/spinlock.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
-#include <linux/init.h>
#include <linux/mc146818rtc.h>
#include <linux/efi.h>
#include <linux/uaccess.h>
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 86676cec99a1..2c67bae6bb53 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -194,7 +194,7 @@ void __init x86_intel_mid_early_setup(void)
x86_platform.calibrate_tsc = intel_mid_calibrate_tsc;
x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
- x86_init.pci.init = intel_mid_pci_init;
+ x86_init.pci.arch_init = intel_mid_pci_init;
x86_init.pci.fixup_irqs = x86_init_noop;
legacy_pic = &null_legacy_pic;
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 19b43e3a9f0f..7be1e1fe9ae3 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -96,8 +96,7 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table)
pentry->freq_hz, pentry->irq);
mp_irq.type = MP_INTSRC;
mp_irq.irqtype = mp_INT;
- /* triggering mode edge bit 2-3, active high polarity bit 0-1 */
- mp_irq.irqflag = 5;
+ mp_irq.irqflag = MP_IRQTRIG_EDGE | MP_IRQPOL_ACTIVE_HIGH;
mp_irq.srcbus = MP_BUS_ISA;
mp_irq.srcbusirq = pentry->irq; /* IRQ */
mp_irq.dstapic = MP_APIC_ALL;
@@ -168,7 +167,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
totallen, (u32)pentry->phys_addr, pentry->irq);
mp_irq.type = MP_INTSRC;
mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = 0xf; /* level trigger and active low */
+ mp_irq.irqflag = MP_IRQTRIG_LEVEL | MP_IRQPOL_ACTIVE_LOW;
mp_irq.srcbus = MP_BUS_ISA;
mp_irq.srcbusirq = pentry->irq; /* IRQ */
mp_irq.dstapic = MP_APIC_ALL;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 8538a6723171..c2e9285d1bf1 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1751,7 +1751,8 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
uv1 = 1;
/* the 14-bit pnode */
- write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
+ write_mmr_descriptor_base(pnode,
+ (n << UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT | m));
/*
* Initializing all 8 (ITEMS_PER_DESC) descriptors for each
* cpu even though we only use the first one; one descriptor can
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index 972b8e8d939c..09af7ff53044 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -13,28 +13,28 @@ else
posttest_64bit = -n
endif
-distill_awk = $(srctree)/arch/x86/tools/distill.awk
+reformatter = $(srctree)/arch/x86/tools/objdump_reformat.awk
chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk
quiet_cmd_posttest = TEST $@
- cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose)
+ cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(reformatter) | $(obj)/insn_decoder_test $(posttest_64bit) $(posttest_verbose)
quiet_cmd_sanitytest = TEST $@
cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000
-posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity
+posttest: $(obj)/insn_decoder_test vmlinux $(obj)/insn_sanity
$(call cmd,posttest)
$(call cmd,sanitytest)
-hostprogs-y += test_get_len insn_sanity
+hostprogs-y += insn_decoder_test insn_sanity
# -I needed for generated C source and C source which in the kernel tree.
-HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/
+HOSTCFLAGS_insn_decoder_test.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/
HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
# Dependencies are also needed.
-$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
+$(obj)/insn_decoder_test.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/insn_decoder_test.c
index ecf31e0358c8..a3b4fd954931 100644
--- a/arch/x86/tools/test_get_len.c
+++ b/arch/x86/tools/insn_decoder_test.c
@@ -9,10 +9,6 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2009
*/
@@ -21,6 +17,7 @@
#include <string.h>
#include <assert.h>
#include <unistd.h>
+#include <stdarg.h>
#define unlikely(cond) (cond)
@@ -33,7 +30,7 @@
* particular. See if insn_get_length() and the disassembler agree
* on the length of each instruction in an elf disassembly.
*
- * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len
+ * Usage: objdump -d a.out | awk -f objdump_reformat.awk | ./insn_decoder_test
*/
const char *prog;
@@ -42,8 +39,8 @@ static int x86_64;
static void usage(void)
{
- fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |"
- " %s [-y|-n] [-v]\n", prog);
+ fprintf(stderr, "Usage: objdump -d a.out | awk -f objdump_reformat.awk"
+ " | %s [-y|-n] [-v]\n", prog);
fprintf(stderr, "\t-y 64bit mode\n");
fprintf(stderr, "\t-n 32bit mode\n");
fprintf(stderr, "\t-v verbose mode\n");
@@ -52,10 +49,21 @@ static void usage(void)
static void malformed_line(const char *line, int line_nr)
{
- fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line);
+ fprintf(stderr, "%s: error: malformed line %d:\n%s",
+ prog, line_nr, line);
exit(3);
}
+static void pr_warn(const char *fmt, ...)
+{
+ va_list ap;
+
+ fprintf(stderr, "%s: warning: ", prog);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
static void dump_field(FILE *fp, const char *name, const char *indent,
struct insn_field *field)
{
@@ -153,21 +161,20 @@ int main(int argc, char **argv)
insn_get_length(&insn);
if (insn.length != nb) {
warnings++;
- fprintf(stderr, "Warning: %s found difference at %s\n",
- prog, sym);
- fprintf(stderr, "Warning: %s", line);
- fprintf(stderr, "Warning: objdump says %d bytes, but "
- "insn_get_length() says %d\n", nb,
- insn.length);
+ pr_warn("Found an x86 instruction decoder bug, "
+ "please report this.\n", sym);
+ pr_warn("%s", line);
+ pr_warn("objdump says %d bytes, but insn_get_length() "
+ "says %d\n", nb, insn.length);
if (verbose)
dump_insn(stderr, &insn);
}
}
if (warnings)
- fprintf(stderr, "Warning: decoded and checked %d"
- " instructions with %d warnings\n", insns, warnings);
+ pr_warn("Decoded and checked %d instructions with %d "
+ "failures\n", insns, warnings);
else
- fprintf(stdout, "Success: decoded and checked %d"
- " instructions\n", insns);
+ fprintf(stdout, "%s: success: Decoded and checked %d"
+ " instructions\n", prog, insns);
return 0;
}
diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/objdump_reformat.awk
index e0edeccc1429..f418c91b71f0 100644
--- a/arch/x86/tools/distill.awk
+++ b/arch/x86/tools/objdump_reformat.awk
@@ -1,7 +1,7 @@
#!/bin/awk -f
# SPDX-License-Identifier: GPL-2.0
-# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len
-# Distills the disassembly as follows:
+# Usage: objdump -d a.out | awk -f objdump_reformat.awk | ./insn_decoder_test
+# Reformats the disassembly as follows:
# - Removes all lines except the disassembled instructions.
# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes
# into a single line.
diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c
index 2cfcfe4f6b2a..dd2ad82eee80 100644
--- a/arch/x86/xen/mmu_hvm.c
+++ b/arch/x86/xen/mmu_hvm.c
@@ -75,6 +75,6 @@ void __init xen_hvm_init_mmu_ops(void)
if (is_pagetable_dying_supported())
pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
#ifdef CONFIG_PROC_VMCORE
- register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram);
+ WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram));
#endif
}
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 02f3445a2b5f..cd97a62394e7 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -23,8 +23,6 @@ static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(char *, irq_name);
static bool xen_pvspin = true;
-#include <asm/qspinlock.h>
-
static void xen_qlock_kick(int cpu)
{
int irq = per_cpu(lock_kicker_irq, cpu);