summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/events/intel/core.c33
-rw-r--r--arch/x86/events/intel/ds.c10
-rw-r--r--arch/x86/events/rapl.c12
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/include/asm/perf_event.h28
-rw-r--r--arch/x86/include/asm/sev.h2
-rw-r--r--arch/x86/kernel/cpu/bugs.c21
-rw-r--r--arch/x86/kvm/mmu/mmu.c12
-rw-r--r--arch/x86/kvm/svm/sev.c10
-rw-r--r--arch/x86/kvm/vmx/nested.c11
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/um/os-Linux/registers.c21
-rw-r--r--arch/x86/um/signal.c13
-rw-r--r--arch/x86/virt/svm/sev.c23
-rw-r--r--arch/x86/xen/mmu_pv.c71
16 files changed, 196 insertions, 79 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 87198d957e2f..be2c311f5118 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2599,7 +2599,8 @@ config MITIGATION_IBPB_ENTRY
depends on CPU_SUP_AMD && X86_64
default y
help
- Compile the kernel with support for the retbleed=ibpb mitigation.
+ Compile the kernel with support for the retbleed=ibpb and
+ spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations.
config MITIGATION_IBRS_ENTRY
bool "Enable IBRS on kernel entry"
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7601196d1d18..e86333eee266 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4905,20 +4905,22 @@ static inline bool intel_pmu_broken_perf_cap(void)
static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
{
- unsigned int sub_bitmaps, eax, ebx, ecx, edx;
+ unsigned int cntr, fixed_cntr, ecx, edx;
+ union cpuid35_eax eax;
+ union cpuid35_ebx ebx;
- cpuid(ARCH_PERFMON_EXT_LEAF, &sub_bitmaps, &ebx, &ecx, &edx);
+ cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx);
- if (ebx & ARCH_PERFMON_EXT_UMASK2)
+ if (ebx.split.umask2)
pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2;
- if (ebx & ARCH_PERFMON_EXT_EQ)
+ if (ebx.split.eq)
pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ;
- if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
+ if (eax.split.cntr_subleaf) {
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
- &eax, &ebx, &ecx, &edx);
- pmu->cntr_mask64 = eax;
- pmu->fixed_cntr_mask64 = ebx;
+ &cntr, &fixed_cntr, &ecx, &edx);
+ pmu->cntr_mask64 = cntr;
+ pmu->fixed_cntr_mask64 = fixed_cntr;
}
if (!intel_pmu_broken_perf_cap()) {
@@ -4941,11 +4943,6 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
else
pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
- if (pmu->intel_cap.pebs_output_pt_available)
- pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
- else
- pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT;
-
intel_pmu_check_event_constraints(pmu->event_constraints,
pmu->cntr_mask64,
pmu->fixed_cntr_mask64,
@@ -5023,9 +5020,6 @@ static bool init_hybrid_pmu(int cpu)
pr_info("%s PMU driver: ", pmu->name);
- if (pmu->intel_cap.pebs_output_pt_available)
- pr_cont("PEBS-via-PT ");
-
pr_cont("\n");
x86_pmu_show_pmu_cap(&pmu->pmu);
@@ -5048,8 +5042,11 @@ static void intel_pmu_cpu_starting(int cpu)
init_debug_store_on_cpu(cpu);
/*
- * Deal with CPUs that don't clear their LBRs on power-up.
+ * Deal with CPUs that don't clear their LBRs on power-up, and that may
+ * even boot with LBRs enabled.
*/
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && x86_pmu.lbr_nr)
+ msr_clear_bit(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR_BIT);
intel_pmu_lbr_reset();
cpuc->lbr_sel = NULL;
@@ -6370,11 +6367,9 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
if (pmu->pmu_type & hybrid_small_tiny) {
pmu->intel_cap.perf_metrics = 0;
- pmu->intel_cap.pebs_output_pt_available = 1;
pmu->mid_ack = true;
} else if (pmu->pmu_type & hybrid_big) {
pmu->intel_cap.perf_metrics = 1;
- pmu->intel_cap.pebs_output_pt_available = 0;
pmu->late_ack = true;
}
}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ba74e1198328..c2e2eae7309c 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2578,7 +2578,15 @@ void __init intel_ds_init(void)
}
pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
- if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
+ /*
+ * The PEBS-via-PT is not supported on hybrid platforms,
+ * because not all CPUs of a hybrid machine support it.
+ * The global x86_pmu.intel_cap, which only contains the
+ * common capabilities, is used to check the availability
+ * of the feature. The per-PMU pebs_output_pt_available
+ * in a hybrid machine should be ignored.
+ */
+ if (x86_pmu.intel_cap.pebs_output_pt_available) {
pr_cont("PEBS-via-PT, ");
x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
}
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index d3bb3865c1b1..4952faf03e82 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -370,6 +370,10 @@ static int rapl_pmu_event_init(struct perf_event *event)
unsigned int rapl_pmu_idx;
struct rapl_pmus *rapl_pmus;
+ /* only look at RAPL events */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
/* unsupported modes and filters */
if (event->attr.sample_period) /* no sampling */
return -EINVAL;
@@ -387,10 +391,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
rapl_pmus_scope = rapl_pmus->pmu.scope;
if (rapl_pmus_scope == PERF_PMU_SCOPE_PKG || rapl_pmus_scope == PERF_PMU_SCOPE_DIE) {
- /* only look at RAPL package events */
- if (event->attr.type != rapl_pmus_pkg->pmu.type)
- return -ENOENT;
-
cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1);
if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
return -EINVAL;
@@ -398,10 +398,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
bit = cfg - 1;
event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr;
} else if (rapl_pmus_scope == PERF_PMU_SCOPE_CORE) {
- /* only look at RAPL core events */
- if (event->attr.type != rapl_pmus_core->pmu.type)
- return -ENOENT;
-
cfg = array_index_nospec((long)cfg, NR_RAPL_CORE_DOMAINS + 1);
if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
return -EINVAL;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 9a71880eec07..72765b2fe0d8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -395,7 +395,8 @@
#define MSR_IA32_PASID_VALID BIT_ULL(31)
/* DEBUGCTLMSR bits (others vary by model): */
-#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */
+#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT)
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 1ac79f361645..0ba8d20f2d1d 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -188,11 +188,33 @@ union cpuid10_edx {
* detection/enumeration details:
*/
#define ARCH_PERFMON_EXT_LEAF 0x00000023
-#define ARCH_PERFMON_EXT_UMASK2 0x1
-#define ARCH_PERFMON_EXT_EQ 0x2
-#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
+union cpuid35_eax {
+ struct {
+ unsigned int leaf0:1;
+ /* Counters Sub-Leaf */
+ unsigned int cntr_subleaf:1;
+ /* Auto Counter Reload Sub-Leaf */
+ unsigned int acr_subleaf:1;
+ /* Events Sub-Leaf */
+ unsigned int events_subleaf:1;
+ unsigned int reserved:28;
+ } split;
+ unsigned int full;
+};
+
+union cpuid35_ebx {
+ struct {
+ /* UnitMask2 Supported */
+ unsigned int umask2:1;
+ /* EQ-bit Supported */
+ unsigned int eq:1;
+ unsigned int reserved:30;
+ } split;
+ unsigned int full;
+};
+
/*
* Intel Architectural LBR CPUID detection/enumeration details:
*/
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 5d9685f92e5c..1581246491b5 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -531,6 +531,7 @@ static inline void __init snp_secure_tsc_init(void) { }
#ifdef CONFIG_KVM_AMD_SEV
bool snp_probe_rmptable_info(void);
+int snp_rmptable_init(void);
int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
void snp_dump_hva_rmpentry(unsigned long address);
int psmash(u64 pfn);
@@ -541,6 +542,7 @@ void kdump_sev_callback(void);
void snp_fixup_e820_tables(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
+static inline int snp_rmptable_init(void) { return -ENOSYS; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
static inline void snp_dump_hva_rmpentry(unsigned long address) {}
static inline int psmash(u64 pfn) { return -ENODEV; }
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5a505aa65489..a5d0998d7604 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1115,6 +1115,8 @@ do_cmd_auto:
case RETBLEED_MITIGATION_IBPB:
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+ mitigate_smt = true;
/*
* IBPB on entry already obviates the need for
@@ -1124,9 +1126,6 @@ do_cmd_auto:
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
- setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
- mitigate_smt = true;
-
/*
* There is no need for RSB filling: entry_ibpb() ensures
* all predictions, including the RSB, are invalidated,
@@ -2646,6 +2645,7 @@ static void __init srso_select_mitigation(void)
if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB;
/*
@@ -2655,6 +2655,13 @@ static void __init srso_select_mitigation(void)
*/
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
@@ -2663,8 +2670,8 @@ static void __init srso_select_mitigation(void)
ibpb_on_vmexit:
case SRSO_CMD_IBPB_ON_VMEXIT:
- if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) {
- if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+ if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
+ if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
@@ -2676,8 +2683,8 @@ ibpb_on_vmexit:
setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
- pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
- }
+ pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
+ }
break;
default:
break;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index d4ac4a1f8b81..8160870398b9 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7460,7 +7460,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
return true;
}
-static void kvm_mmu_start_lpage_recovery(struct once *once)
+static int kvm_mmu_start_lpage_recovery(struct once *once)
{
struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
struct kvm *kvm = container_of(ka, struct kvm, arch);
@@ -7471,13 +7471,14 @@ static void kvm_mmu_start_lpage_recovery(struct once *once)
kvm_nx_huge_page_recovery_worker_kill,
kvm, "kvm-nx-lpage-recovery");
- if (!nx_thread)
- return;
+ if (IS_ERR(nx_thread))
+ return PTR_ERR(nx_thread);
vhost_task_start(nx_thread);
/* Make the task visible only once it is fully started. */
WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread);
+ return 0;
}
int kvm_mmu_post_init_vm(struct kvm *kvm)
@@ -7485,10 +7486,7 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
if (nx_hugepage_mitigation_hard_disabled)
return 0;
- call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
- if (!kvm->arch.nx_huge_page_recovery_thread)
- return -ENOMEM;
- return 0;
+ return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
}
void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 080f8cecd7ca..661108d65ee7 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2972,6 +2972,16 @@ void __init sev_hardware_setup(void)
WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_FLUSHBYASID)))
goto out;
+ /*
+ * The kernel's initcall infrastructure lacks the ability to express
+ * dependencies between initcalls, whereas the modules infrastructure
+ * automatically handles dependencies via symbol loading. Ensure the
+ * PSP SEV driver is initialized before proceeding if KVM is built-in,
+ * as the dependency isn't handled by the initcall infrastructure.
+ */
+ if (IS_BUILTIN(CONFIG_KVM_AMD) && sev_module_init())
+ goto out;
+
/* Retrieve SEV CPUID information */
cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 8a7af02d466e..ed8a3cb53961 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5084,6 +5084,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
load_vmcs12_host_state(vcpu, vmcs12);
+ /*
+ * Process events if an injectable IRQ or NMI is pending, even
+ * if the event is blocked (RFLAGS.IF is cleared on VM-Exit).
+ * If an event became pending while L2 was active, KVM needs to
+ * either inject the event or request an IRQ/NMI window. SMIs
+ * don't need to be processed as SMM is mutually exclusive with
+ * non-root mode. INIT/SIPI don't need to be checked as INIT
+ * is blocked post-VMXON, and SIPIs are ignored.
+ */
+ if (kvm_cpu_has_injectable_intr(vcpu) || vcpu->arch.nmi_pending)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 12d5f47c1bbe..4b64ab350bcd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12879,11 +12879,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
mutex_unlock(&kvm->slots_lock);
}
kvm_unload_vcpu_mmus(kvm);
+ kvm_destroy_vcpus(kvm);
kvm_x86_call(vm_destroy)(kvm);
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
- kvm_destroy_vcpus(kvm);
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
kvm_mmu_uninit_vm(kvm);
diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c
index 76eaeb93928c..eb1cdadc8a61 100644
--- a/arch/x86/um/os-Linux/registers.c
+++ b/arch/x86/um/os-Linux/registers.c
@@ -18,6 +18,7 @@
#include <registers.h>
#include <sys/mman.h>
+static unsigned long ptrace_regset;
unsigned long host_fp_size;
int get_fp_registers(int pid, unsigned long *regs)
@@ -27,7 +28,7 @@ int get_fp_registers(int pid, unsigned long *regs)
.iov_len = host_fp_size,
};
- if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
+ if (ptrace(PTRACE_GETREGSET, pid, ptrace_regset, &iov) < 0)
return -errno;
return 0;
}
@@ -39,7 +40,7 @@ int put_fp_registers(int pid, unsigned long *regs)
.iov_len = host_fp_size,
};
- if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
+ if (ptrace(PTRACE_SETREGSET, pid, ptrace_regset, &iov) < 0)
return -errno;
return 0;
}
@@ -58,9 +59,23 @@ int arch_init_registers(int pid)
return -ENOMEM;
/* GDB has x86_xsave_length, which uses x86_cpuid_count */
- ret = ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov);
+ ptrace_regset = NT_X86_XSTATE;
+ ret = ptrace(PTRACE_GETREGSET, pid, ptrace_regset, &iov);
if (ret)
ret = -errno;
+
+ if (ret == -ENODEV) {
+#ifdef CONFIG_X86_32
+ ptrace_regset = NT_PRXFPREG;
+#else
+ ptrace_regset = NT_PRFPREG;
+#endif
+ iov.iov_len = 2 * 1024 * 1024;
+ ret = ptrace(PTRACE_GETREGSET, pid, ptrace_regset, &iov);
+ if (ret)
+ ret = -errno;
+ }
+
munmap(iov.iov_base, 2 * 1024 * 1024);
host_fp_size = iov.iov_len;
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 75087e85b6fd..2934e170b0fe 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -187,7 +187,12 @@ static int copy_sc_to_user(struct sigcontext __user *to,
* Put magic/size values for userspace. We do not bother to verify them
* later on, however, userspace needs them should it try to read the
* XSTATE data. And ptrace does not fill in these parts.
+ *
+ * Skip this if we do not have an XSTATE frame.
*/
+ if (host_fp_size <= sizeof(to_fp64->fpstate))
+ return 0;
+
BUILD_BUG_ON(sizeof(int) != FP_XSTATE_MAGIC2_SIZE);
#ifdef CONFIG_X86_32
__put_user(offsetof(struct _fpstate_32, _fxsr_env) +
@@ -367,11 +372,13 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
int err = 0, sig = ksig->sig;
unsigned long fp_to;
- frame = (struct rt_sigframe __user *)
- round_down(stack_top - sizeof(struct rt_sigframe), 16);
+ frame = (void __user *)stack_top - sizeof(struct rt_sigframe);
/* Add required space for math frame */
- frame = (struct rt_sigframe __user *)((unsigned long)frame - math_size);
+ frame = (void __user *)((unsigned long)frame - math_size);
+
+ /* ABI requires 16 byte boundary alignment */
+ frame = (void __user *)round_down((unsigned long)frame, 16);
/* Subtract 128 for a red zone and 8 for proper alignment */
frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8);
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 1dcc027ec77e..42e74a5a7d78 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -505,19 +505,19 @@ static bool __init setup_rmptable(void)
* described in the SNP_INIT_EX firmware command description in the SNP
* firmware ABI spec.
*/
-static int __init snp_rmptable_init(void)
+int __init snp_rmptable_init(void)
{
unsigned int i;
u64 val;
- if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
- return 0;
+ if (WARN_ON_ONCE(!cc_platform_has(CC_ATTR_HOST_SEV_SNP)))
+ return -ENOSYS;
- if (!amd_iommu_snp_en)
- goto nosnp;
+ if (WARN_ON_ONCE(!amd_iommu_snp_en))
+ return -ENOSYS;
if (!setup_rmptable())
- goto nosnp;
+ return -ENOSYS;
/*
* Check if SEV-SNP is already enabled, this can happen in case of
@@ -530,7 +530,7 @@ static int __init snp_rmptable_init(void)
/* Zero out the RMP bookkeeping area */
if (!clear_rmptable_bookkeeping()) {
free_rmp_segment_table();
- goto nosnp;
+ return -ENOSYS;
}
/* Zero out the RMP entries */
@@ -562,17 +562,8 @@ skip_enable:
crash_kexec_post_notifiers = true;
return 0;
-
-nosnp:
- cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
- return -ENOSYS;
}
-/*
- * This must be called after the IOMMU has been initialized.
- */
-device_initcall(snp_rmptable_init);
-
static void set_rmp_segment_info(unsigned int segment_shift)
{
rmp_segment_shift = segment_shift;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 2c70cd35e72c..d078de2c952b 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -111,6 +111,51 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
*/
static DEFINE_SPINLOCK(xen_reservation_lock);
+/* Protected by xen_reservation_lock. */
+#define MIN_CONTIG_ORDER 9 /* 2MB */
+static unsigned int discontig_frames_order = MIN_CONTIG_ORDER;
+static unsigned long discontig_frames_early[1UL << MIN_CONTIG_ORDER] __initdata;
+static unsigned long *discontig_frames __refdata = discontig_frames_early;
+static bool discontig_frames_dyn;
+
+static int alloc_discontig_frames(unsigned int order)
+{
+ unsigned long *new_array, *old_array;
+ unsigned int old_order;
+ unsigned long flags;
+
+ BUG_ON(order < MIN_CONTIG_ORDER);
+ BUILD_BUG_ON(sizeof(discontig_frames_early) != PAGE_SIZE);
+
+ new_array = (unsigned long *)__get_free_pages(GFP_KERNEL,
+ order - MIN_CONTIG_ORDER);
+ if (!new_array)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&xen_reservation_lock, flags);
+
+ old_order = discontig_frames_order;
+
+ if (order > discontig_frames_order || !discontig_frames_dyn) {
+ if (!discontig_frames_dyn)
+ old_array = NULL;
+ else
+ old_array = discontig_frames;
+
+ discontig_frames = new_array;
+ discontig_frames_order = order;
+ discontig_frames_dyn = true;
+ } else {
+ old_array = new_array;
+ }
+
+ spin_unlock_irqrestore(&xen_reservation_lock, flags);
+
+ free_pages((unsigned long)old_array, old_order - MIN_CONTIG_ORDER);
+
+ return 0;
+}
+
/*
* Note about cr3 (pagetable base) values:
*
@@ -814,6 +859,9 @@ static void __init xen_after_bootmem(void)
SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
+
+ if (alloc_discontig_frames(MIN_CONTIG_ORDER))
+ BUG();
}
static void xen_unpin_page(struct mm_struct *mm, struct page *page,
@@ -2203,10 +2251,6 @@ void __init xen_init_mmu_ops(void)
memset(dummy_mapping, 0xff, PAGE_SIZE);
}
-/* Protected by xen_reservation_lock. */
-#define MAX_CONTIG_ORDER 9 /* 2MB */
-static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
-
#define VOID_PTE (mfn_pte(0, __pgprot(0)))
static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
unsigned long *in_frames,
@@ -2323,18 +2367,25 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
unsigned int address_bits,
dma_addr_t *dma_handle)
{
- unsigned long *in_frames = discontig_frames, out_frame;
+ unsigned long *in_frames, out_frame;
unsigned long flags;
int success;
unsigned long vstart = (unsigned long)phys_to_virt(pstart);
- if (unlikely(order > MAX_CONTIG_ORDER))
- return -ENOMEM;
+ if (unlikely(order > discontig_frames_order)) {
+ if (!discontig_frames_dyn)
+ return -ENOMEM;
+
+ if (alloc_discontig_frames(order))
+ return -ENOMEM;
+ }
memset((void *) vstart, 0, PAGE_SIZE << order);
spin_lock_irqsave(&xen_reservation_lock, flags);
+ in_frames = discontig_frames;
+
/* 1. Zap current PTEs, remembering MFNs. */
xen_zap_pfn_range(vstart, order, in_frames, NULL);
@@ -2358,12 +2409,12 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
{
- unsigned long *out_frames = discontig_frames, in_frame;
+ unsigned long *out_frames, in_frame;
unsigned long flags;
int success;
unsigned long vstart;
- if (unlikely(order > MAX_CONTIG_ORDER))
+ if (unlikely(order > discontig_frames_order))
return;
vstart = (unsigned long)phys_to_virt(pstart);
@@ -2371,6 +2422,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
spin_lock_irqsave(&xen_reservation_lock, flags);
+ out_frames = discontig_frames;
+
/* 1. Find start MFN of contiguous extent. */
in_frame = virt_to_mfn((void *)vstart);