summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig7
-rw-r--r--arch/x86/boot/Makefile2
-rw-r--r--arch/x86/boot/compressed/kaslr.c2
-rw-r--r--arch/x86/boot/string.c2
-rw-r--r--arch/x86/coco/tdx/tdx.c40
-rw-r--r--arch/x86/entry/entry_64_compat.S4
-rw-r--r--arch/x86/entry/vdso/vdso.lds.S2
-rw-r--r--arch/x86/entry/vdso/vma.c30
-rw-r--r--arch/x86/events/amd/brs.c2
-rw-r--r--arch/x86/events/amd/core.c5
-rw-r--r--arch/x86/events/amd/ibs.c4
-rw-r--r--arch/x86/events/amd/lbr.c6
-rw-r--r--arch/x86/events/amd/uncore.c1
-rw-r--r--arch/x86/events/core.c48
-rw-r--r--arch/x86/events/intel/core.c23
-rw-r--r--arch/x86/events/intel/ds.c4
-rw-r--r--arch/x86/events/intel/lbr.c30
-rw-r--r--arch/x86/events/intel/p4.c2
-rw-r--r--arch/x86/events/intel/pt.c9
-rw-r--r--arch/x86/events/intel/uncore.h24
-rw-r--r--arch/x86/events/intel/uncore_snb.c3
-rw-r--r--arch/x86/events/intel/uncore_snbep.c495
-rw-r--r--arch/x86/events/perf_event.h31
-rw-r--r--arch/x86/hyperv/hv_init.c58
-rw-r--r--arch/x86/ia32/Makefile2
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/cpufeatures.h4
-rw-r--r--arch/x86/include/asm/elf.h4
-rw-r--r--arch/x86/include/asm/fpu/signal.h7
-rw-r--r--arch/x86/include/asm/ftrace.h49
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h11
-rw-r--r--arch/x86/include/asm/hyperv_timer.h9
-rw-r--r--arch/x86/include/asm/irq_remapping.h4
-rw-r--r--arch/x86/include/asm/irqdomain.h4
-rw-r--r--arch/x86/include/asm/mshyperv.h2
-rw-r--r--arch/x86/include/asm/msi.h6
-rw-r--r--arch/x86/include/asm/msr-index.h29
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/pci.h5
-rw-r--r--arch/x86/include/asm/pgtable.h9
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h2
-rw-r--r--arch/x86/include/asm/resctrl.h8
-rw-r--r--arch/x86/include/asm/segment.h2
-rw-r--r--arch/x86/include/asm/sgx.h33
-rw-r--r--arch/x86/include/asm/sighandling.h9
-rw-r--r--arch/x86/include/asm/signal.h5
-rw-r--r--arch/x86/include/asm/tdx.h2
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/alternative.c4
-rw-r--r--arch/x86/kernel/apic/apic.c13
-rw-r--r--arch/x86/kernel/apic/msi.c211
-rw-r--r--arch/x86/kernel/apic/vector.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c6
-rw-r--r--arch/x86/kernel/cpu/bugs.c21
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c1
-rw-r--r--arch/x86/kernel/cpu/hygon.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c63
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c33
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c8
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c6
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c4
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c3
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h10
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c4
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c35
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c9
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c8
-rw-r--r--arch/x86/kernel/cpu/tsx.c38
-rw-r--r--arch/x86/kernel/devicetree.c18
-rw-r--r--arch/x86/kernel/espfix_64.c12
-rw-r--r--arch/x86/kernel/fpu/core.c21
-rw-r--r--arch/x86/kernel/fpu/init.c7
-rw-r--r--arch/x86/kernel/fpu/regset.c2
-rw-r--r--arch/x86/kernel/fpu/signal.c2
-rw-r--r--arch/x86/kernel/fpu/xstate.c64
-rw-r--r--arch/x86/kernel/fpu/xstate.h4
-rw-r--r--arch/x86/kernel/head_32.S22
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/module.c11
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/ptrace.c174
-rw-r--r--arch/x86/kernel/signal.c654
-rw-r--r--arch/x86/kernel/signal_32.c (renamed from arch/x86/ia32/ia32_signal.c)117
-rw-r--r--arch/x86/kernel/signal_64.c383
-rw-r--r--arch/x86/kernel/traps.c4
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--arch/x86/kvm/cpuid.c6
-rw-r--r--arch/x86/kvm/mmu/mmu.c13
-rw-r--r--arch/x86/kvm/reverse_cpuid.h3
-rw-r--r--arch/x86/kvm/svm/nested.c12
-rw-r--r--arch/x86/kvm/svm/svm.c26
-rw-r--r--arch/x86/kvm/vmx/nested.c4
-rw-r--r--arch/x86/kvm/x86.c29
-rw-r--r--arch/x86/kvm/xen.c32
-rw-r--r--arch/x86/mm/ioremap.c8
-rw-r--r--arch/x86/mm/pat/set_memory.c18
-rw-r--r--arch/x86/net/bpf_jit_comp.c13
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c3
-rw-r--r--arch/x86/power/cpu.c22
-rw-r--r--arch/x86/xen/enlighten_pv.c3
-rw-r--r--arch/x86/xen/setup.c3
-rw-r--r--arch/x86/xen/smp.c24
-rw-r--r--arch/x86/xen/smp_pv.c12
-rw-r--r--arch/x86/xen/spinlock.c6
-rw-r--r--arch/x86/xen/xen-asm.S8
107 files changed, 1873 insertions, 1412 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 67745ceab0db..276b3baf1c50 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -69,6 +69,7 @@ config X86
select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CACHE_LINE_SIZE
+ select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
@@ -81,6 +82,7 @@ config X86
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_PTE_DEVMAP if X86_64
@@ -462,8 +464,8 @@ config X86_X2APIC
Some Intel systems circa 2022 and later are locked into x2APIC mode
and can not fall back to the legacy APIC modes if SGX or TDX are
- enabled in the BIOS. They will be unable to boot without enabling
- this option.
+ enabled in the BIOS. They will boot with very reduced functionality
+ without enabling this option.
If you don't know what to do here, say N.
@@ -1109,7 +1111,6 @@ config X86_LOCAL_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
select IRQ_DOMAIN_HIERARCHY
- select PCI_MSI_IRQ_DOMAIN if PCI_MSI
config X86_IO_APIC
def_bool y
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 9860ca5979f8..9e38ffaadb5d 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -83,7 +83,7 @@ cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
- @$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')'
+ @$(kecho) 'Kernel: $@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')'
OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index e476bcbd9b42..454757fbdfe5 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -668,7 +668,7 @@ static bool process_mem_region(struct mem_vector *region,
}
}
#endif
- return 0;
+ return false;
}
#ifdef CONFIG_EFI
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 8a3fff9128bb..1c8541ae3b3a 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -350,7 +350,7 @@ static int _kstrtoul(const char *s, unsigned int base, unsigned long *res)
}
/**
- * kstrtoul - convert a string to an unsigned long
+ * boot_kstrtoul - convert a string to an unsigned long
* @s: The start of the string. The string must be null-terminated, and may also
* include a single newline before its terminating null. The first character
* may also be a plus sign, but not a minus sign.
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index b8998cf0508a..cfd4c95b9f04 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -5,6 +5,8 @@
#define pr_fmt(fmt) "tdx: " fmt
#include <linux/cpufeature.h>
+#include <linux/export.h>
+#include <linux/io.h>
#include <asm/coco.h>
#include <asm/tdx.h>
#include <asm/vmx.h>
@@ -15,6 +17,7 @@
/* TDX module Call Leaf IDs */
#define TDX_GET_INFO 1
#define TDX_GET_VEINFO 3
+#define TDX_GET_REPORT 4
#define TDX_ACCEPT_PAGE 6
/* TDX hypercall Leaf IDs */
@@ -36,6 +39,12 @@
#define ATTR_SEPT_VE_DISABLE BIT(28)
+/* TDX Module call error codes */
+#define TDCALL_RETURN_CODE(a) ((a) >> 32)
+#define TDCALL_INVALID_OPERAND 0xc0000100
+
+#define TDREPORT_SUBTYPE_0 0
+
/*
* Wrapper for standard use of __tdx_hypercall with no output aside from
* return code.
@@ -100,6 +109,37 @@ static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
}
+/**
+ * tdx_mcall_get_report0() - Wrapper to get TDREPORT0 (a.k.a. TDREPORT
+ * subtype 0) using TDG.MR.REPORT TDCALL.
+ * @reportdata: Address of the input buffer which contains user-defined
+ * REPORTDATA to be included into TDREPORT.
+ * @tdreport: Address of the output buffer to store TDREPORT.
+ *
+ * Refer to section titled "TDG.MR.REPORT leaf" in the TDX Module
+ * v1.0 specification for more information on TDG.MR.REPORT TDCALL.
+ * It is used in the TDX guest driver module to get the TDREPORT0.
+ *
+ * Return 0 on success, -EINVAL for invalid operands, or -EIO on
+ * other TDCALL failures.
+ */
+int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport)
+{
+ u64 ret;
+
+ ret = __tdx_module_call(TDX_GET_REPORT, virt_to_phys(tdreport),
+ virt_to_phys(reportdata), TDREPORT_SUBTYPE_0,
+ 0, NULL);
+ if (ret) {
+ if (TDCALL_RETURN_CODE(ret) == TDCALL_INVALID_OPERAND)
+ return -EINVAL;
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tdx_mcall_get_report0);
+
static void tdx_parse_tdinfo(u64 *cc_mask)
{
struct tdx_module_output out;
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 4dd19819053a..59b93901660d 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -61,7 +61,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/* Construct struct pt_regs on stack */
- pushq $__USER32_DS /* pt_regs->ss */
+ pushq $__USER_DS /* pt_regs->ss */
pushq $0 /* pt_regs->sp = 0 (placeholder) */
/*
@@ -197,7 +197,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
/* Construct struct pt_regs on stack */
- pushq $__USER32_DS /* pt_regs->ss */
+ pushq $__USER_DS /* pt_regs->ss */
pushq %r8 /* pt_regs->sp */
pushq %r11 /* pt_regs->flags */
pushq $__USER32_CS /* pt_regs->cs */
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index 4bf48462fca7..e8c60ae7a7c8 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -27,7 +27,9 @@ VERSION {
__vdso_time;
clock_getres;
__vdso_clock_getres;
+#ifdef CONFIG_X86_SGX
__vdso_sgx_enter_enclave;
+#endif
local: *;
};
}
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index d45c5fcfeac2..b8f3f9b9e53c 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -98,24 +98,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
}
#ifdef CONFIG_TIME_NS
-static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
- if (likely(vma->vm_mm == current->mm))
- return current->nsproxy->time_ns->vvar_page;
-
- /*
- * VM_PFNMAP | VM_IO protect .fault() handler from being called
- * through interfaces like /proc/$pid/mem or
- * process_vm_{readv,writev}() as long as there's no .access()
- * in special_mapping_vmops().
- * For more details check_vma_flags() and __access_remote_vm()
- */
-
- WARN(1, "vvar_page accessed remotely");
-
- return NULL;
-}
-
/*
* The vvar page layout depends on whether a task belongs to the root or
* non-root time namespace. Whenever a task changes its namespace, the VVAR
@@ -140,11 +122,6 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
return 0;
}
-#else
-static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
- return NULL;
-}
#endif
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
@@ -210,11 +187,10 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
pgprot_decrypted(vma->vm_page_prot));
}
} else if (sym_offset == image->sym_hvclock_page) {
- struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
+ pfn = hv_get_tsc_pfn();
- if (tsc_pg && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
- return vmf_insert_pfn(vma, vmf->address,
- virt_to_phys(tsc_pg) >> PAGE_SHIFT);
+ if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
+ return vmf_insert_pfn(vma, vmf->address, pfn);
} else if (sym_offset == image->sym_timens_page) {
struct page *timens_page = find_timens_vvar_page(vma);
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
index f1bff153d945..58461fa18b6f 100644
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -384,7 +384,7 @@ static void amd_brs_poison_buffer(void)
* On ctxswin, sched_in = true, called after the PMU has started
* On ctxswout, sched_in = false, called before the PMU is stopped
*/
-void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
+void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 8b70237c33f7..d6f3703e4119 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -861,8 +861,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
pmu_enabled = cpuc->enabled;
cpuc->enabled = 0;
- /* stop everything (includes BRS) */
- amd_pmu_disable_all();
+ amd_brs_disable_all();
/* Drain BRS is in use (could be inactive) */
if (cpuc->lbr_users)
@@ -873,7 +872,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
cpuc->enabled = pmu_enabled;
if (pmu_enabled)
- amd_pmu_enable_all(0);
+ amd_brs_enable_all();
return amd_pmu_adjust_nmi_window(handled);
}
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 4cb710efbdd9..da3f5ebac4e1 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -631,7 +631,7 @@ static const struct attribute_group *op_attr_update[] = {
static struct perf_ibs perf_ibs_fetch = {
.pmu = {
- .task_ctx_nr = perf_invalid_context,
+ .task_ctx_nr = perf_hw_context,
.event_init = perf_ibs_init,
.add = perf_ibs_add,
@@ -655,7 +655,7 @@ static struct perf_ibs perf_ibs_fetch = {
static struct perf_ibs perf_ibs_op = {
.pmu = {
- .task_ctx_nr = perf_invalid_context,
+ .task_ctx_nr = perf_hw_context,
.event_init = perf_ibs_init,
.add = perf_ibs_add,
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 38a75216c12c..eb31f850841a 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -352,7 +352,7 @@ void amd_pmu_lbr_add(struct perf_event *event)
cpuc->br_sel = reg->reg;
}
- perf_sched_cb_inc(event->ctx->pmu);
+ perf_sched_cb_inc(event->pmu);
if (!cpuc->lbr_users++ && !event->total_time_running)
amd_pmu_lbr_reset();
@@ -370,10 +370,10 @@ void amd_pmu_lbr_del(struct perf_event *event)
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
- perf_sched_cb_dec(event->ctx->pmu);
+ perf_sched_cb_dec(event->pmu);
}
-void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index d568afc705d2..83f15fe411b3 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -553,6 +553,7 @@ static void uncore_clean_online(void)
hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
hlist_del(&uncore->node);
+ kfree(uncore->events);
kfree(uncore);
}
}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index b30b8bbcd1e2..85a63a41c471 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -90,6 +90,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
+DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
+
/*
* This one is magic, it will get called even when PMU init fails (because
* there is no PMU), in which case it should simply return NULL.
@@ -2031,6 +2033,7 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
+ static_call_update(x86_pmu_filter, x86_pmu.filter);
}
static void _x86_pmu_read(struct perf_event *event)
@@ -2052,23 +2055,6 @@ void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
pr_info("... event mask: %016Lx\n", intel_ctrl);
}
-/*
- * The generic code is not hybrid friendly. The hybrid_pmu->pmu
- * of the first registered PMU is unconditionally assigned to
- * each possible cpuctx->ctx.pmu.
- * Update the correct hybrid PMU to the cpuctx->ctx.pmu.
- */
-void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu)
-{
- struct perf_cpu_context *cpuctx;
-
- if (!pmu->pmu_cpu_context)
- return;
-
- cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
- cpuctx->ctx.pmu = pmu;
-}
-
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
@@ -2175,13 +2161,9 @@ static int __init init_hw_perf_events(void)
if (err)
goto out2;
} else {
- u8 cpu_type = get_this_hybrid_cpu_type();
struct x86_hybrid_pmu *hybrid_pmu;
int i, j;
- if (!cpu_type && x86_pmu.get_hybrid_cpu_type)
- cpu_type = x86_pmu.get_hybrid_cpu_type();
-
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
hybrid_pmu = &x86_pmu.hybrid_pmu[i];
@@ -2195,9 +2177,6 @@ static int __init init_hw_perf_events(void)
(hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
if (err)
break;
-
- if (cpu_type == hybrid_pmu->cpu_type)
- x86_pmu_update_cpu_context(&hybrid_pmu->pmu, raw_smp_processor_id());
}
if (i < x86_pmu.num_hybrid_pmus) {
@@ -2646,15 +2625,15 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
NULL,
};
-static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
- static_call_cond(x86_pmu_sched_task)(ctx, sched_in);
+ static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in);
}
-static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
- struct perf_event_context *next)
+static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
+ struct perf_event_pmu_context *next_epc)
{
- static_call_cond(x86_pmu_swap_task_ctx)(prev, next);
+ static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc);
}
void perf_check_microcode(void)
@@ -2689,12 +2668,13 @@ static int x86_pmu_aux_output_match(struct perf_event *event)
return 0;
}
-static int x86_pmu_filter_match(struct perf_event *event)
+static bool x86_pmu_filter(struct pmu *pmu, int cpu)
{
- if (x86_pmu.filter_match)
- return x86_pmu.filter_match(event);
+ bool ret = false;
- return 1;
+ static_call_cond(x86_pmu_filter)(pmu, cpu, &ret);
+
+ return ret;
}
static struct pmu pmu = {
@@ -2725,7 +2705,7 @@ static struct pmu pmu = {
.aux_output_match = x86_pmu_aux_output_match,
- .filter_match = x86_pmu_filter_match,
+ .filter = x86_pmu_filter,
};
void arch_perf_update_userpage(struct perf_event *event,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 1b92bf05fd65..dfd2c124cdf8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4536,8 +4536,6 @@ end:
cpumask_set_cpu(cpu, &pmu->supported_cpus);
cpuc->pmu = &pmu->pmu;
- x86_pmu_update_cpu_context(&pmu->pmu, cpu);
-
return true;
}
@@ -4671,17 +4669,17 @@ static void intel_pmu_cpu_dead(int cpu)
cpumask_clear_cpu(cpu, &hybrid_pmu(cpuc->pmu)->supported_cpus);
}
-static void intel_pmu_sched_task(struct perf_event_context *ctx,
+static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
bool sched_in)
{
- intel_pmu_pebs_sched_task(ctx, sched_in);
- intel_pmu_lbr_sched_task(ctx, sched_in);
+ intel_pmu_pebs_sched_task(pmu_ctx, sched_in);
+ intel_pmu_lbr_sched_task(pmu_ctx, sched_in);
}
-static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
- struct perf_event_context *next)
+static void intel_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
+ struct perf_event_pmu_context *next_epc)
{
- intel_pmu_lbr_swap_task_ctx(prev, next);
+ intel_pmu_lbr_swap_task_ctx(prev_epc, next_epc);
}
static int intel_pmu_check_period(struct perf_event *event, u64 value)
@@ -4705,12 +4703,11 @@ static int intel_pmu_aux_output_match(struct perf_event *event)
return is_intel_pt_event(event);
}
-static int intel_pmu_filter_match(struct perf_event *event)
+static void intel_pmu_filter(struct pmu *pmu, int cpu, bool *ret)
{
- struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
- unsigned int cpu = smp_processor_id();
+ struct x86_hybrid_pmu *hpmu = hybrid_pmu(pmu);
- return cpumask_test_cpu(cpu, &pmu->supported_cpus);
+ *ret = !cpumask_test_cpu(cpu, &hpmu->supported_cpus);
}
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
@@ -6413,7 +6410,7 @@ __init int intel_pmu_init(void)
static_call_update(intel_pmu_set_topdown_event_period,
&adl_set_topdown_event_period);
- x86_pmu.filter_match = intel_pmu_filter_match;
+ x86_pmu.filter = intel_pmu_filter;
x86_pmu.get_event_constraints = adl_get_event_constraints;
x86_pmu.hw_config = adl_hw_config;
x86_pmu.limit_period = spr_limit_period;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 446d2833efa7..88e58b6ee73c 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1069,7 +1069,7 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
}
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1177,7 +1177,7 @@ static void
pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct perf_event *event, bool add)
{
- struct pmu *pmu = event->ctx->pmu;
+ struct pmu *pmu = event->pmu;
/*
* Make sure we get updated with the first PEBS
* event. It will trigger also during removal, but
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8259d725054d..017baba56b01 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -515,21 +515,21 @@ static void __intel_pmu_lbr_save(void *ctx)
cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
}
-void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
- struct perf_event_context *next)
+void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
+ struct perf_event_pmu_context *next_epc)
{
void *prev_ctx_data, *next_ctx_data;
- swap(prev->task_ctx_data, next->task_ctx_data);
+ swap(prev_epc->task_ctx_data, next_epc->task_ctx_data);
/*
- * Architecture specific synchronization makes sense in
- * case both prev->task_ctx_data and next->task_ctx_data
+ * Architecture specific synchronization makes sense in case
+ * both prev_epc->task_ctx_data and next_epc->task_ctx_data
* pointers are allocated.
*/
- prev_ctx_data = next->task_ctx_data;
- next_ctx_data = prev->task_ctx_data;
+ prev_ctx_data = next_epc->task_ctx_data;
+ next_ctx_data = prev_epc->task_ctx_data;
if (!prev_ctx_data || !next_ctx_data)
return;
@@ -538,7 +538,7 @@ void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
task_context_opt(next_ctx_data)->lbr_callstack_users);
}
-void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
void *task_ctx;
@@ -551,7 +551,7 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
* the task was scheduled out, restore the stack. Otherwise flush
* the LBR stack.
*/
- task_ctx = ctx ? ctx->task_ctx_data : NULL;
+ task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL;
if (task_ctx) {
if (sched_in)
__intel_pmu_lbr_restore(task_ctx);
@@ -587,8 +587,8 @@ void intel_pmu_lbr_add(struct perf_event *event)
cpuc->br_sel = event->hw.branch_reg.reg;
- if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
- task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
+ if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data)
+ task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++;
/*
* Request pmu::sched_task() callback, which will fire inside the
@@ -611,7 +611,7 @@ void intel_pmu_lbr_add(struct perf_event *event)
*/
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
cpuc->lbr_pebs_users++;
- perf_sched_cb_inc(event->ctx->pmu);
+ perf_sched_cb_inc(event->pmu);
if (!cpuc->lbr_users++ && !event->total_time_running)
intel_pmu_lbr_reset();
}
@@ -664,8 +664,8 @@ void intel_pmu_lbr_del(struct perf_event *event)
return;
if (branch_user_callstack(cpuc->br_sel) &&
- event->ctx->task_ctx_data)
- task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
+ event->pmu_ctx->task_ctx_data)
+ task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--;
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
cpuc->lbr_select = 0;
@@ -675,7 +675,7 @@ void intel_pmu_lbr_del(struct perf_event *event)
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
- perf_sched_cb_dec(event->ctx->pmu);
+ perf_sched_cb_dec(event->pmu);
}
static inline bool vlbr_exclude_host(void)
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index 03bbcc2fa2ff..35936188db01 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -24,7 +24,7 @@ struct p4_event_bind {
unsigned int escr_msr[2]; /* ESCR MSR for this event */
unsigned int escr_emask; /* valid ESCR EventMask bits */
unsigned int shared; /* event is shared across threads */
- char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on absence */
+ signed char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on absence */
};
struct p4_pebs_bind {
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 82ef87e9a897..42a55794004a 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1263,6 +1263,15 @@ static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages)
if (1 << order != nr_pages)
goto out;
+ /*
+ * Some processors cannot always support single range for more than
+ * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might
+ * also be affected, so for now rather than trying to keep track of
+ * which ones, just disable it for all.
+ */
+ if (nr_pages > 1)
+ goto out;
+
buf->single = true;
buf->nr_pages = nr_pages;
ret = 0;
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 2adeaf4de4df..e278e2e7c051 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -2,6 +2,7 @@
#include <linux/slab.h>
#include <linux/pci.h>
#include <asm/apicdef.h>
+#include <asm/intel-family.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/perf_event.h>
@@ -88,12 +89,12 @@ struct intel_uncore_type {
* to identify which platform component each PMON block of that type is
* supposed to monitor.
*/
- struct intel_uncore_topology *topology;
+ struct intel_uncore_topology **topology;
/*
* Optional callbacks for managing mapping of Uncore units to PMONs
*/
int (*get_topology)(struct intel_uncore_type *type);
- int (*set_mapping)(struct intel_uncore_type *type);
+ void (*set_mapping)(struct intel_uncore_type *type);
void (*cleanup_mapping)(struct intel_uncore_type *type);
};
@@ -178,11 +179,26 @@ struct freerunning_counters {
unsigned *box_offsets;
};
-struct intel_uncore_topology {
- u64 configuration;
+struct uncore_iio_topology {
+ int pci_bus_no;
int segment;
};
+struct uncore_upi_topology {
+ int die_to;
+ int pmu_idx_to;
+ int enabled;
+};
+
+struct intel_uncore_topology {
+ int pmu_idx;
+ union {
+ void *untyped;
+ struct uncore_iio_topology *iio;
+ struct uncore_upi_topology *upi;
+ };
+};
+
struct pci2phy_map {
struct list_head list;
int segment;
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 1ef4f7861e2e..1f4869227efb 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1338,6 +1338,7 @@ static void __uncore_imc_init_box(struct intel_uncore_box *box,
/* MCHBAR is disabled */
if (!(mch_bar & BIT(0))) {
pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n");
+ pci_dev_put(pdev);
return;
}
mch_bar &= ~BIT(0);
@@ -1352,6 +1353,8 @@ static void __uncore_imc_init_box(struct intel_uncore_box *box,
box->io_addr = ioremap(addr, type->mmio_map_size);
if (!box->io_addr)
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
+
+ pci_dev_put(pdev);
}
static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index ed869443efb2..44c2f879f708 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -445,6 +445,7 @@
#define ICX_UPI_PCI_PMON_CTR0 0x320
#define ICX_UPI_PCI_PMON_BOX_CTL 0x318
#define ICX_UPI_CTL_UMASK_EXT 0xffffff
+#define ICX_UBOX_DID 0x3450
/* ICX M3UPI*/
#define ICX_M3UPI_PCI_PMON_CTL0 0xd8
@@ -457,6 +458,7 @@
/* SPR */
#define SPR_RAW_EVENT_MASK_EXT 0xffffff
+#define SPR_UBOX_DID 0x3250
/* SPR CHA */
#define SPR_CHA_PMON_CTL_TID_EN (1 << 16)
@@ -1372,6 +1374,28 @@ static struct pci_driver snbep_uncore_pci_driver = {
#define NODE_ID_MASK 0x7
+/* Each three bits from 0 to 23 of GIDNIDMAP register correspond Node ID. */
+#define GIDNIDMAP(config, id) (((config) >> (3 * (id))) & 0x7)
+
+static int upi_nodeid_groupid(struct pci_dev *ubox_dev, int nodeid_loc, int idmap_loc,
+ int *nodeid, int *groupid)
+{
+ int ret;
+
+ /* get the Node ID of the local register */
+ ret = pci_read_config_dword(ubox_dev, nodeid_loc, nodeid);
+ if (ret)
+ goto err;
+
+ *nodeid = *nodeid & NODE_ID_MASK;
+ /* get the Node ID mapping */
+ ret = pci_read_config_dword(ubox_dev, idmap_loc, groupid);
+ if (ret)
+ goto err;
+err:
+ return ret;
+}
+
/*
* build pci bus to socket mapping
*/
@@ -1397,13 +1421,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
* the topology.
*/
if (nr_node_ids <= 8) {
- /* get the Node ID of the local register */
- err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
- if (err)
- break;
- nodeid = config & NODE_ID_MASK;
- /* get the Node ID mapping */
- err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
+ err = upi_nodeid_groupid(ubox_dev, nodeid_loc, idmap_loc,
+ &nodeid, &config);
if (err)
break;
@@ -1421,7 +1440,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
* to a particular node.
*/
for (i = 0; i < 8; i++) {
- if (nodeid == ((config >> (3 * i)) & 0x7)) {
+ if (nodeid == GIDNIDMAP(config, i)) {
if (topology_max_die_per_package() > 1)
die_id = i;
else
@@ -2891,6 +2910,7 @@ static bool hswep_has_limit_sbox(unsigned int device)
return false;
pci_read_config_dword(dev, HSWEP_PCU_CAPID4_OFFET, &capid4);
+ pci_dev_put(dev);
if (!hswep_get_chop(capid4))
return true;
@@ -3699,10 +3719,16 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
.read_counter = uncore_msr_read_counter,
};
-static inline u8 skx_iio_stack(struct intel_uncore_pmu *pmu, int die)
+static struct intel_uncore_topology *pmu_topology(struct intel_uncore_pmu *pmu, int die)
{
- return pmu->type->topology[die].configuration >>
- (pmu->pmu_idx * BUS_NUM_STRIDE);
+ int idx;
+
+ for (idx = 0; idx < pmu->type->num_boxes; idx++) {
+ if (pmu->type->topology[die][idx].pmu_idx == pmu->pmu_idx)
+ return &pmu->type->topology[die][idx];
+ }
+
+ return NULL;
}
static umode_t
@@ -3710,8 +3736,9 @@ pmu_iio_mapping_visible(struct kobject *kobj, struct attribute *attr,
int die, int zero_bus_pmu)
{
struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj));
+ struct intel_uncore_topology *pmut = pmu_topology(pmu, die);
- return (!skx_iio_stack(pmu, die) && pmu->pmu_idx != zero_bus_pmu) ? 0 : attr->mode;
+ return (pmut && !pmut->iio->pci_bus_no && pmu->pmu_idx != zero_bus_pmu) ? 0 : attr->mode;
}
static umode_t
@@ -3727,9 +3754,10 @@ static ssize_t skx_iio_mapping_show(struct device *dev,
struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev);
struct dev_ext_attribute *ea = to_dev_ext_attribute(attr);
long die = (long)ea->var;
+ struct intel_uncore_topology *pmut = pmu_topology(pmu, die);
- return sprintf(buf, "%04x:%02x\n", pmu->type->topology[die].segment,
- skx_iio_stack(pmu, die));
+ return sprintf(buf, "%04x:%02x\n", pmut ? pmut->iio->segment : 0,
+ pmut ? pmut->iio->pci_bus_no : 0);
}
static int skx_msr_cpu_bus_read(int cpu, u64 *topology)
@@ -3764,18 +3792,79 @@ static int die_to_cpu(int die)
return res;
}
-static int skx_iio_get_topology(struct intel_uncore_type *type)
+enum {
+ IIO_TOPOLOGY_TYPE,
+ UPI_TOPOLOGY_TYPE,
+ TOPOLOGY_MAX
+};
+
+static const size_t topology_size[TOPOLOGY_MAX] = {
+ sizeof(*((struct intel_uncore_topology *)NULL)->iio),
+ sizeof(*((struct intel_uncore_topology *)NULL)->upi)
+};
+
+static int pmu_alloc_topology(struct intel_uncore_type *type, int topology_type)
{
- int die, ret = -EPERM;
+ int die, idx;
+ struct intel_uncore_topology **topology;
+
+ if (!type->num_boxes)
+ return -EPERM;
- type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology),
- GFP_KERNEL);
- if (!type->topology)
- return -ENOMEM;
+ topology = kcalloc(uncore_max_dies(), sizeof(*topology), GFP_KERNEL);
+ if (!topology)
+ goto err;
for (die = 0; die < uncore_max_dies(); die++) {
- ret = skx_msr_cpu_bus_read(die_to_cpu(die),
- &type->topology[die].configuration);
+ topology[die] = kcalloc(type->num_boxes, sizeof(**topology), GFP_KERNEL);
+ if (!topology[die])
+ goto clear;
+ for (idx = 0; idx < type->num_boxes; idx++) {
+ topology[die][idx].untyped = kcalloc(type->num_boxes,
+ topology_size[topology_type],
+ GFP_KERNEL);
+ if (!topology[die][idx].untyped)
+ goto clear;
+ }
+ }
+
+ type->topology = topology;
+
+ return 0;
+clear:
+ for (; die >= 0; die--) {
+ for (idx = 0; idx < type->num_boxes; idx++)
+ kfree(topology[die][idx].untyped);
+ kfree(topology[die]);
+ }
+ kfree(topology);
+err:
+ return -ENOMEM;
+}
+
+static void pmu_free_topology(struct intel_uncore_type *type)
+{
+ int die, idx;
+
+ if (type->topology) {
+ for (die = 0; die < uncore_max_dies(); die++) {
+ for (idx = 0; idx < type->num_boxes; idx++)
+ kfree(type->topology[die][idx].untyped);
+ kfree(type->topology[die]);
+ }
+ kfree(type->topology);
+ type->topology = NULL;
+ }
+}
+
+static int skx_pmu_get_topology(struct intel_uncore_type *type,
+ int (*topology_cb)(struct intel_uncore_type*, int, int, u64))
+{
+ int die, ret = -EPERM;
+ u64 cpu_bus_msr;
+
+ for (die = 0; die < uncore_max_dies(); die++) {
+ ret = skx_msr_cpu_bus_read(die_to_cpu(die), &cpu_bus_msr);
if (ret)
break;
@@ -3783,15 +3872,33 @@ static int skx_iio_get_topology(struct intel_uncore_type *type)
if (ret < 0)
break;
- type->topology[die].segment = ret;
+ ret = topology_cb(type, ret, die, cpu_bus_msr);
+ if (ret)
+ break;
}
- if (ret < 0) {
- kfree(type->topology);
- type->topology = NULL;
+ return ret;
+}
+
+static int skx_iio_topology_cb(struct intel_uncore_type *type, int segment,
+ int die, u64 cpu_bus_msr)
+{
+ int idx;
+ struct intel_uncore_topology *t;
+
+ for (idx = 0; idx < type->num_boxes; idx++) {
+ t = &type->topology[die][idx];
+ t->pmu_idx = idx;
+ t->iio->segment = segment;
+ t->iio->pci_bus_no = (cpu_bus_msr >> (idx * BUS_NUM_STRIDE)) & 0xff;
}
- return ret;
+ return 0;
+}
+
+static int skx_iio_get_topology(struct intel_uncore_type *type)
+{
+ return skx_pmu_get_topology(type, skx_iio_topology_cb);
}
static struct attribute_group skx_iio_mapping_group = {
@@ -3803,8 +3910,25 @@ static const struct attribute_group *skx_iio_attr_update[] = {
NULL,
};
-static int
-pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
+static void pmu_clear_mapping_attr(const struct attribute_group **groups,
+ struct attribute_group *ag)
+{
+ int i;
+
+ for (i = 0; groups[i]; i++) {
+ if (groups[i] == ag) {
+ for (i++; groups[i]; i++)
+ groups[i - 1] = groups[i];
+ groups[i - 1] = NULL;
+ break;
+ }
+ }
+}
+
+static void
+pmu_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag,
+ ssize_t (*show)(struct device*, struct device_attribute*, char*),
+ int topology_type)
{
char buf[64];
int ret;
@@ -3812,11 +3936,13 @@ pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
struct attribute **attrs = NULL;
struct dev_ext_attribute *eas = NULL;
- ret = type->get_topology(type);
+ ret = pmu_alloc_topology(type, topology_type);
if (ret < 0)
goto clear_attr_update;
- ret = -ENOMEM;
+ ret = type->get_topology(type);
+ if (ret < 0)
+ goto clear_topology;
/* One more for NULL. */
attrs = kcalloc((uncore_max_dies() + 1), sizeof(*attrs), GFP_KERNEL);
@@ -3828,20 +3954,20 @@ pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
goto clear_attrs;
for (die = 0; die < uncore_max_dies(); die++) {
- sprintf(buf, "die%ld", die);
+ snprintf(buf, sizeof(buf), "die%ld", die);
sysfs_attr_init(&eas[die].attr.attr);
eas[die].attr.attr.name = kstrdup(buf, GFP_KERNEL);
if (!eas[die].attr.attr.name)
goto err;
eas[die].attr.attr.mode = 0444;
- eas[die].attr.show = skx_iio_mapping_show;
+ eas[die].attr.show = show;
eas[die].attr.store = NULL;
eas[die].var = (void *)die;
attrs[die] = &eas[die].attr.attr;
}
ag->attrs = attrs;
- return 0;
+ return;
err:
for (; die >= 0; die--)
kfree(eas[die].attr.attr.name);
@@ -3849,14 +3975,13 @@ err:
clear_attrs:
kfree(attrs);
clear_topology:
- kfree(type->topology);
+ pmu_free_topology(type);
clear_attr_update:
- type->attr_update = NULL;
- return ret;
+ pmu_clear_mapping_attr(type->attr_update, ag);
}
static void
-pmu_iio_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
+pmu_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
{
struct attribute **attr = ag->attrs;
@@ -3868,17 +3993,23 @@ pmu_iio_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *
kfree(attr_to_ext_attr(*ag->attrs));
kfree(ag->attrs);
ag->attrs = NULL;
- kfree(type->topology);
+ pmu_free_topology(type);
+}
+
+static void
+pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
+{
+ pmu_set_mapping(type, ag, skx_iio_mapping_show, IIO_TOPOLOGY_TYPE);
}
-static int skx_iio_set_mapping(struct intel_uncore_type *type)
+static void skx_iio_set_mapping(struct intel_uncore_type *type)
{
- return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
+ pmu_iio_set_mapping(type, &skx_iio_mapping_group);
}
static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
{
- pmu_iio_cleanup_mapping(type, &skx_iio_mapping_group);
+ pmu_cleanup_mapping(type, &skx_iio_mapping_group);
}
static struct intel_uncore_type skx_uncore_iio = {
@@ -4139,6 +4270,132 @@ static struct intel_uncore_ops skx_upi_uncore_pci_ops = {
.read_counter = snbep_uncore_pci_read_counter,
};
+static umode_t
+skx_upi_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+{
+ struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj));
+
+ return pmu->type->topology[die][pmu->pmu_idx].upi->enabled ? attr->mode : 0;
+}
+
+static ssize_t skx_upi_mapping_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev);
+ struct dev_ext_attribute *ea = to_dev_ext_attribute(attr);
+ long die = (long)ea->var;
+ struct uncore_upi_topology *upi = pmu->type->topology[die][pmu->pmu_idx].upi;
+
+ return sysfs_emit(buf, "upi_%d,die_%d\n", upi->pmu_idx_to, upi->die_to);
+}
+
+#define SKX_UPI_REG_DID 0x2058
+#define SKX_UPI_REGS_ADDR_DEVICE_LINK0 0x0e
+#define SKX_UPI_REGS_ADDR_FUNCTION 0x00
+
+/*
+ * UPI Link Parameter 0
+ * | Bit | Default | Description
+ * | 19:16 | 0h | base_nodeid - The NodeID of the sending socket.
+ * | 12:8 | 00h | sending_port - The processor die port number of the sending port.
+ */
+#define SKX_KTILP0_OFFSET 0x94
+
+/*
+ * UPI Pcode Status. This register is used by PCode to store the link training status.
+ * | Bit | Default | Description
+ * | 4 | 0h | ll_status_valid — Bit indicates the valid training status
+ * logged from PCode to the BIOS.
+ */
+#define SKX_KTIPCSTS_OFFSET 0x120
+
+static int upi_fill_topology(struct pci_dev *dev, struct intel_uncore_topology *tp,
+ int pmu_idx)
+{
+ int ret;
+ u32 upi_conf;
+ struct uncore_upi_topology *upi = tp->upi;
+
+ tp->pmu_idx = pmu_idx;
+ ret = pci_read_config_dword(dev, SKX_KTIPCSTS_OFFSET, &upi_conf);
+ if (ret) {
+ ret = pcibios_err_to_errno(ret);
+ goto err;
+ }
+ upi->enabled = (upi_conf >> 4) & 1;
+ if (upi->enabled) {
+ ret = pci_read_config_dword(dev, SKX_KTILP0_OFFSET,
+ &upi_conf);
+ if (ret) {
+ ret = pcibios_err_to_errno(ret);
+ goto err;
+ }
+ upi->die_to = (upi_conf >> 16) & 0xf;
+ upi->pmu_idx_to = (upi_conf >> 8) & 0x1f;
+ }
+err:
+ return ret;
+}
+
+static int skx_upi_topology_cb(struct intel_uncore_type *type, int segment,
+ int die, u64 cpu_bus_msr)
+{
+ int idx, ret;
+ struct intel_uncore_topology *upi;
+ unsigned int devfn;
+ struct pci_dev *dev = NULL;
+ u8 bus = cpu_bus_msr >> (3 * BUS_NUM_STRIDE);
+
+ for (idx = 0; idx < type->num_boxes; idx++) {
+ upi = &type->topology[die][idx];
+ devfn = PCI_DEVFN(SKX_UPI_REGS_ADDR_DEVICE_LINK0 + idx,
+ SKX_UPI_REGS_ADDR_FUNCTION);
+ dev = pci_get_domain_bus_and_slot(segment, bus, devfn);
+ if (dev) {
+ ret = upi_fill_topology(dev, upi, idx);
+ if (ret)
+ break;
+ }
+ }
+
+ pci_dev_put(dev);
+ return ret;
+}
+
+static int skx_upi_get_topology(struct intel_uncore_type *type)
+{
+ /* CPX case is not supported */
+ if (boot_cpu_data.x86_stepping == 11)
+ return -EPERM;
+
+ return skx_pmu_get_topology(type, skx_upi_topology_cb);
+}
+
+static struct attribute_group skx_upi_mapping_group = {
+ .is_visible = skx_upi_mapping_visible,
+};
+
+static const struct attribute_group *skx_upi_attr_update[] = {
+ &skx_upi_mapping_group,
+ NULL
+};
+
+static void
+pmu_upi_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
+{
+ pmu_set_mapping(type, ag, skx_upi_mapping_show, UPI_TOPOLOGY_TYPE);
+}
+
+static void skx_upi_set_mapping(struct intel_uncore_type *type)
+{
+ pmu_upi_set_mapping(type, &skx_upi_mapping_group);
+}
+
+static void skx_upi_cleanup_mapping(struct intel_uncore_type *type)
+{
+ pmu_cleanup_mapping(type, &skx_upi_mapping_group);
+}
+
static struct intel_uncore_type skx_uncore_upi = {
.name = "upi",
.num_counters = 4,
@@ -4151,6 +4408,10 @@ static struct intel_uncore_type skx_uncore_upi = {
.box_ctl = SKX_UPI_PCI_PMON_BOX_CTL,
.ops = &skx_upi_uncore_pci_ops,
.format_group = &skx_upi_uncore_format_group,
+ .attr_update = skx_upi_attr_update,
+ .get_topology = skx_upi_get_topology,
+ .set_mapping = skx_upi_set_mapping,
+ .cleanup_mapping = skx_upi_cleanup_mapping,
};
static void skx_m2m_uncore_pci_init_box(struct intel_uncore_box *box)
@@ -4461,11 +4722,6 @@ static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_map
int die, stack_id, ret = -EPERM;
struct pci_dev *dev = NULL;
- type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology),
- GFP_KERNEL);
- if (!type->topology)
- return -ENOMEM;
-
while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, SNR_ICX_MESH2IIO_MMAP_DID, dev))) {
ret = pci_read_config_dword(dev, SNR_ICX_SAD_CONTROL_CFG, &sad_cfg);
if (ret) {
@@ -4483,14 +4739,12 @@ static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_map
/* Convert stack id from SAD_CONTROL to PMON notation. */
stack_id = sad_pmon_mapping[stack_id];
- ((u8 *)&(type->topology[die].configuration))[stack_id] = dev->bus->number;
- type->topology[die].segment = pci_domain_nr(dev->bus);
+ type->topology[die][stack_id].iio->segment = pci_domain_nr(dev->bus);
+ type->topology[die][stack_id].pmu_idx = stack_id;
+ type->topology[die][stack_id].iio->pci_bus_no = dev->bus->number;
}
- if (ret) {
- kfree(type->topology);
- type->topology = NULL;
- }
+ pci_dev_put(dev);
return ret;
}
@@ -4519,14 +4773,14 @@ static int snr_iio_get_topology(struct intel_uncore_type *type)
return sad_cfg_iio_topology(type, snr_sad_pmon_mapping);
}
-static int snr_iio_set_mapping(struct intel_uncore_type *type)
+static void snr_iio_set_mapping(struct intel_uncore_type *type)
{
- return pmu_iio_set_mapping(type, &snr_iio_mapping_group);
+ pmu_iio_set_mapping(type, &snr_iio_mapping_group);
}
static void snr_iio_cleanup_mapping(struct intel_uncore_type *type)
{
- pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group);
+ pmu_cleanup_mapping(type, &snr_iio_mapping_group);
}
static struct event_constraint snr_uncore_iio_constraints[] = {
@@ -4857,6 +5111,8 @@ static int snr_uncore_mmio_map(struct intel_uncore_box *box,
addr += box_ctl;
+ pci_dev_put(pdev);
+
box->io_addr = ioremap(addr, type->mmio_map_size);
if (!box->io_addr) {
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
@@ -5137,14 +5393,19 @@ static int icx_iio_get_topology(struct intel_uncore_type *type)
return sad_cfg_iio_topology(type, icx_sad_pmon_mapping);
}
-static int icx_iio_set_mapping(struct intel_uncore_type *type)
+static void icx_iio_set_mapping(struct intel_uncore_type *type)
{
- return pmu_iio_set_mapping(type, &icx_iio_mapping_group);
+ /* Detect ICX-D system. This case is not supported */
+ if (boot_cpu_data.x86_model == INTEL_FAM6_ICELAKE_D) {
+ pmu_clear_mapping_attr(type->attr_update, &icx_iio_mapping_group);
+ return;
+ }
+ pmu_iio_set_mapping(type, &icx_iio_mapping_group);
}
static void icx_iio_cleanup_mapping(struct intel_uncore_type *type)
{
- pmu_iio_cleanup_mapping(type, &icx_iio_mapping_group);
+ pmu_cleanup_mapping(type, &icx_iio_mapping_group);
}
static struct intel_uncore_type icx_uncore_iio = {
@@ -5337,6 +5598,76 @@ static const struct attribute_group icx_upi_uncore_format_group = {
.attrs = icx_upi_uncore_formats_attr,
};
+#define ICX_UPI_REGS_ADDR_DEVICE_LINK0 0x02
+#define ICX_UPI_REGS_ADDR_FUNCTION 0x01
+
+static int discover_upi_topology(struct intel_uncore_type *type, int ubox_did, int dev_link0)
+{
+ struct pci_dev *ubox = NULL;
+ struct pci_dev *dev = NULL;
+ u32 nid, gid;
+ int i, idx, ret = -EPERM;
+ struct intel_uncore_topology *upi;
+ unsigned int devfn;
+
+ /* GIDNIDMAP method supports machines which have less than 8 sockets. */
+ if (uncore_max_dies() > 8)
+ goto err;
+
+ while ((ubox = pci_get_device(PCI_VENDOR_ID_INTEL, ubox_did, ubox))) {
+ ret = upi_nodeid_groupid(ubox, SKX_CPUNODEID, SKX_GIDNIDMAP, &nid, &gid);
+ if (ret) {
+ ret = pcibios_err_to_errno(ret);
+ break;
+ }
+
+ for (i = 0; i < 8; i++) {
+ if (nid != GIDNIDMAP(gid, i))
+ continue;
+ for (idx = 0; idx < type->num_boxes; idx++) {
+ upi = &type->topology[nid][idx];
+ devfn = PCI_DEVFN(dev_link0 + idx, ICX_UPI_REGS_ADDR_FUNCTION);
+ dev = pci_get_domain_bus_and_slot(pci_domain_nr(ubox->bus),
+ ubox->bus->number,
+ devfn);
+ if (dev) {
+ ret = upi_fill_topology(dev, upi, idx);
+ if (ret)
+ goto err;
+ }
+ }
+ }
+ }
+err:
+ pci_dev_put(ubox);
+ pci_dev_put(dev);
+ return ret;
+}
+
+static int icx_upi_get_topology(struct intel_uncore_type *type)
+{
+ return discover_upi_topology(type, ICX_UBOX_DID, ICX_UPI_REGS_ADDR_DEVICE_LINK0);
+}
+
+static struct attribute_group icx_upi_mapping_group = {
+ .is_visible = skx_upi_mapping_visible,
+};
+
+static const struct attribute_group *icx_upi_attr_update[] = {
+ &icx_upi_mapping_group,
+ NULL
+};
+
+static void icx_upi_set_mapping(struct intel_uncore_type *type)
+{
+ pmu_upi_set_mapping(type, &icx_upi_mapping_group);
+}
+
+static void icx_upi_cleanup_mapping(struct intel_uncore_type *type)
+{
+ pmu_cleanup_mapping(type, &icx_upi_mapping_group);
+}
+
static struct intel_uncore_type icx_uncore_upi = {
.name = "upi",
.num_counters = 4,
@@ -5349,6 +5680,10 @@ static struct intel_uncore_type icx_uncore_upi = {
.box_ctl = ICX_UPI_PCI_PMON_BOX_CTL,
.ops = &skx_upi_uncore_pci_ops,
.format_group = &icx_upi_uncore_format_group,
+ .attr_update = icx_upi_attr_update,
+ .get_topology = icx_upi_get_topology,
+ .set_mapping = icx_upi_set_mapping,
+ .cleanup_mapping = icx_upi_cleanup_mapping,
};
static struct event_constraint icx_uncore_m3upi_constraints[] = {
@@ -5780,9 +6115,43 @@ static struct intel_uncore_type spr_uncore_m2m = {
.name = "m2m",
};
+static struct attribute_group spr_upi_mapping_group = {
+ .is_visible = skx_upi_mapping_visible,
+};
+
+static const struct attribute_group *spr_upi_attr_update[] = {
+ &uncore_alias_group,
+ &spr_upi_mapping_group,
+ NULL
+};
+
+#define SPR_UPI_REGS_ADDR_DEVICE_LINK0 0x01
+
+static void spr_upi_set_mapping(struct intel_uncore_type *type)
+{
+ pmu_upi_set_mapping(type, &spr_upi_mapping_group);
+}
+
+static void spr_upi_cleanup_mapping(struct intel_uncore_type *type)
+{
+ pmu_cleanup_mapping(type, &spr_upi_mapping_group);
+}
+
+static int spr_upi_get_topology(struct intel_uncore_type *type)
+{
+ return discover_upi_topology(type, SPR_UBOX_DID, SPR_UPI_REGS_ADDR_DEVICE_LINK0);
+}
+
static struct intel_uncore_type spr_uncore_upi = {
- SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
+ .format_group = &spr_uncore_raw_format_group,
+ .ops = &spr_uncore_pci_ops,
.name = "upi",
+ .attr_update = spr_upi_attr_update,
+ .get_topology = spr_upi_get_topology,
+ .set_mapping = spr_upi_set_mapping,
+ .cleanup_mapping = spr_upi_cleanup_mapping,
};
static struct intel_uncore_type spr_uncore_m3upi = {
@@ -5986,6 +6355,12 @@ static void uncore_type_customized_copy(struct intel_uncore_type *to_type,
to_type->format_group = from_type->format_group;
if (from_type->attr_update)
to_type->attr_update = from_type->attr_update;
+ if (from_type->set_mapping)
+ to_type->set_mapping = from_type->set_mapping;
+ if (from_type->get_topology)
+ to_type->get_topology = from_type->get_topology;
+ if (from_type->cleanup_mapping)
+ to_type->cleanup_mapping = from_type->cleanup_mapping;
}
static struct intel_uncore_type **
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 332d2e6d8ae4..0e849f28a5c1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -811,7 +811,7 @@ struct x86_pmu {
void (*cpu_dead)(int cpu);
void (*check_microcode)(void);
- void (*sched_task)(struct perf_event_context *ctx,
+ void (*sched_task)(struct perf_event_pmu_context *pmu_ctx,
bool sched_in);
/*
@@ -894,12 +894,12 @@ struct x86_pmu {
int num_topdown_events;
/*
- * perf task context (i.e. struct perf_event_context::task_ctx_data)
+ * perf task context (i.e. struct perf_event_pmu_context::task_ctx_data)
* switch helper to bridge calls from perf/core to perf/x86.
* See struct pmu::swap_task_ctx() usage for examples;
*/
- void (*swap_task_ctx)(struct perf_event_context *prev,
- struct perf_event_context *next);
+ void (*swap_task_ctx)(struct perf_event_pmu_context *prev_epc,
+ struct perf_event_pmu_context *next_epc);
/*
* AMD bits
@@ -925,7 +925,7 @@ struct x86_pmu {
int (*aux_output_match) (struct perf_event *event);
- int (*filter_match)(struct perf_event *event);
+ void (*filter)(struct pmu *pmu, int cpu, bool *ret);
/*
* Hybrid support
*
@@ -1180,8 +1180,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs);
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
u64 intel_ctrl);
-void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu);
-
extern struct event_constraint emptyconstraint;
extern struct event_constraint unconstrained;
@@ -1306,7 +1304,7 @@ void amd_pmu_lbr_reset(void);
void amd_pmu_lbr_read(void);
void amd_pmu_lbr_add(struct perf_event *event);
void amd_pmu_lbr_del(struct perf_event *event);
-void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
void amd_pmu_lbr_enable_all(void);
void amd_pmu_lbr_disable_all(void);
int amd_pmu_lbr_hw_config(struct perf_event *event);
@@ -1322,7 +1320,6 @@ void amd_brs_enable_all(void);
void amd_brs_disable_all(void);
void amd_brs_drain(void);
void amd_brs_lopwr_init(void);
-void amd_brs_disable_all(void);
int amd_brs_hw_config(struct perf_event *event);
void amd_brs_reset(void);
@@ -1330,7 +1327,7 @@ static inline void amd_pmu_brs_add(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- perf_sched_cb_inc(event->ctx->pmu);
+ perf_sched_cb_inc(event->pmu);
cpuc->lbr_users++;
/*
* No need to reset BRS because it is reset
@@ -1345,10 +1342,10 @@ static inline void amd_pmu_brs_del(struct perf_event *event)
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
- perf_sched_cb_dec(event->ctx->pmu);
+ perf_sched_cb_dec(event->pmu);
}
-void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in);
+void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
#else
static inline int amd_brs_init(void)
{
@@ -1373,7 +1370,7 @@ static inline void amd_pmu_brs_del(struct perf_event *event)
{
}
-static inline void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
+static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
}
@@ -1533,7 +1530,7 @@ void intel_pmu_pebs_enable_all(void);
void intel_pmu_pebs_disable_all(void);
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
+void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
void intel_pmu_auto_reload_read(struct perf_event *event);
@@ -1541,10 +1538,10 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
void intel_ds_init(void);
-void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
- struct perf_event_context *next);
+void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
+ struct perf_event_pmu_context *next_epc);
-void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
u64 lbr_from_signext_quirk_wr(u64 val);
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index f49bc3ec76e6..41ef036ebb7b 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void)
static int hv_cpu_init(unsigned int cpu)
{
union hv_vp_assist_msr_contents msr = { 0 };
- struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
+ struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
int ret;
ret = hv_common_cpu_init(cpu);
@@ -87,34 +87,32 @@ static int hv_cpu_init(unsigned int cpu)
if (!hv_vp_assist_page)
return 0;
- if (!*hvp) {
- if (hv_root_partition) {
- /*
- * For root partition we get the hypervisor provided VP assist
- * page, instead of allocating a new page.
- */
- rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
- *hvp = memremap(msr.pfn <<
- HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
- PAGE_SIZE, MEMREMAP_WB);
- } else {
- /*
- * The VP assist page is an "overlay" page (see Hyper-V TLFS's
- * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
- * out to make sure we always write the EOI MSR in
- * hv_apic_eoi_write() *after* the EOI optimization is disabled
- * in hv_cpu_die(), otherwise a CPU may not be stopped in the
- * case of CPU offlining and the VM will hang.
- */
+ if (hv_root_partition) {
+ /*
+ * For root partition we get the hypervisor provided VP assist
+ * page, instead of allocating a new page.
+ */
+ rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+ *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
+ PAGE_SIZE, MEMREMAP_WB);
+ } else {
+ /*
+ * The VP assist page is an "overlay" page (see Hyper-V TLFS's
+ * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
+ * out to make sure we always write the EOI MSR in
+ * hv_apic_eoi_write() *after* the EOI optimization is disabled
+ * in hv_cpu_die(), otherwise a CPU may not be stopped in the
+ * case of CPU offlining and the VM will hang.
+ */
+ if (!*hvp)
*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
- if (*hvp)
- msr.pfn = vmalloc_to_pfn(*hvp);
- }
- WARN_ON(!(*hvp));
- if (*hvp) {
- msr.enable = 1;
- wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
- }
+ if (*hvp)
+ msr.pfn = vmalloc_to_pfn(*hvp);
+
+ }
+ if (!WARN_ON(!(*hvp))) {
+ msr.enable = 1;
+ wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
}
return hyperv_init_ghcb();
@@ -464,6 +462,8 @@ void __init hyperv_init(void)
BUG_ON(!src);
memcpy_to_page(pg, 0, src, HV_HYP_PAGE_SIZE);
memunmap(src);
+
+ hv_remap_tsc_clocksource();
} else {
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -537,8 +537,6 @@ void hyperv_cleanup(void)
union hv_x64_msr_hypercall_contents hypercall_msr;
union hv_reference_tsc_msr tsc_msr;
- unregister_syscore_ops(&hv_syscore_ops);
-
/* Reset our OS id */
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index e481056698de..333556a86b2a 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -3,7 +3,5 @@
# Makefile for the ia32 kernel emulation subsystem.
#
-obj-$(CONFIG_IA32_EMULATION) := ia32_signal.o
-
audit-class-$(CONFIG_AUDIT) := audit.o
obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3415321c8240..3216da7074ba 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -249,7 +249,6 @@ static inline u64 native_x2apic_icr_read(void)
extern int x2apic_mode;
extern int x2apic_phys;
extern void __init x2apic_set_max_apicid(u32 apicid);
-extern void __init check_x2apic(void);
extern void x2apic_setup(void);
static inline int x2apic_enabled(void)
{
@@ -258,13 +257,13 @@ static inline int x2apic_enabled(void)
#define x2apic_supported() (boot_cpu_has(X86_FEATURE_X2APIC))
#else /* !CONFIG_X86_X2APIC */
-static inline void check_x2apic(void) { }
static inline void x2apic_setup(void) { }
static inline int x2apic_enabled(void) { return 0; }
#define x2apic_mode (0)
#define x2apic_supported() (0)
#endif /* !CONFIG_X86_X2APIC */
+extern void __init check_x2apic(void);
struct irq_data;
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b71f4f2ecdd5..c9f4730bb113 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -304,6 +304,10 @@
#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
+#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */
+
+
+#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index cb0ff1055ab1..be8b58da63b9 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -152,10 +152,6 @@ do { \
(elf_check_arch_ia32(x) || \
(IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64))
-#if __USER32_DS != __USER_DS
-# error "The following code assumes __USER32_DS == __USER_DS"
-#endif
-
static inline void elf_common_init(struct thread_struct *t,
struct pt_regs *regs, const u16 ds)
{
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index e1c9df9102a5..611fa41711af 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -13,16 +13,9 @@
#ifdef CONFIG_X86_64
# include <uapi/asm/sigcontext.h>
# include <asm/user32.h>
-struct ksignal;
-int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
- compat_sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct ksignal *ksig,
- compat_sigset_t *set, struct pt_regs *regs);
#else
# define user_i387_ia32_struct user_i387_struct
# define user32_fxsr_struct user_fxsr_struct
-# define ia32_setup_frame __setup_frame
-# define ia32_setup_rt_frame __setup_rt_frame
#endif
extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 908d99b127d3..5061ac98ffa1 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -34,19 +34,6 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
return addr;
}
-/*
- * When a ftrace registered caller is tracing a function that is
- * also set by a register_ftrace_direct() call, it needs to be
- * differentiated in the ftrace_caller trampoline. To do this, we
- * place the direct caller in the ORIG_AX part of pt_regs. This
- * tells the ftrace_caller that there's a direct caller.
- */
-static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
-{
- /* Emulate a call */
- regs->orig_ax = addr;
-}
-
#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
struct ftrace_regs {
struct pt_regs regs;
@@ -61,9 +48,25 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
return &fregs->regs;
}
-#define ftrace_instruction_pointer_set(fregs, _ip) \
+#define ftrace_regs_set_instruction_pointer(fregs, _ip) \
do { (fregs)->regs.ip = (_ip); } while (0)
+#define ftrace_regs_get_instruction_pointer(fregs) \
+ ((fregs)->regs.ip)
+
+#define ftrace_regs_get_argument(fregs, n) \
+ regs_get_kernel_argument(&(fregs)->regs, n)
+#define ftrace_regs_get_stack_pointer(fregs) \
+ kernel_stack_pointer(&(fregs)->regs)
+#define ftrace_regs_return_value(fregs) \
+ regs_return_value(&(fregs)->regs)
+#define ftrace_regs_set_return_value(fregs, ret) \
+ regs_set_return_value(&(fregs)->regs, ret)
+#define ftrace_override_function_with_return(fregs) \
+ override_function_with_return(&(fregs)->regs)
+#define ftrace_regs_query_register_offset(name) \
+ regs_query_register_offset(name)
+
struct ftrace_ops;
#define ftrace_graph_func ftrace_graph_func
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
@@ -72,6 +75,24 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
#endif
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/*
+ * When a ftrace registered caller is tracing a function that is
+ * also set by a register_ftrace_direct() call, it needs to be
+ * differentiated in the ftrace_caller trampoline. To do this, we
+ * place the direct caller in the ORIG_AX part of pt_regs. This
+ * tells the ftrace_caller that there's a direct caller.
+ */
+static inline void
+__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+{
+ /* Emulate a call */
+ regs->orig_ax = addr;
+}
+#define arch_ftrace_set_direct_caller(fregs, addr) \
+ __arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
#ifdef CONFIG_DYNAMIC_FTRACE
struct dyn_arch_ftrace {
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 3089ec352743..6d9368ea3701 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -374,11 +374,20 @@ struct hv_nested_enlightenments_control {
struct hv_vp_assist_page {
__u32 apic_assist;
__u32 reserved1;
- __u64 vtl_control[3];
+ __u32 vtl_entry_reason;
+ __u32 vtl_reserved;
+ __u64 vtl_ret_x64rax;
+ __u64 vtl_ret_x64rcx;
struct hv_nested_enlightenments_control nested_control;
__u8 enlighten_vmentry;
__u8 reserved2[7];
__u64 current_nested_vmcs;
+ __u8 synthetic_time_unhalted_timer_expired;
+ __u8 reserved3[7];
+ __u8 virtualization_fault_information[40];
+ __u8 reserved4[8];
+ __u8 intercept_message[256];
+ __u8 vtl_ret_actions[256];
} __packed;
struct hv_enlightened_vmcs {
diff --git a/arch/x86/include/asm/hyperv_timer.h b/arch/x86/include/asm/hyperv_timer.h
new file mode 100644
index 000000000000..388fa81b8f38
--- /dev/null
+++ b/arch/x86/include/asm/hyperv_timer.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_HYPERV_TIMER_H
+#define _ASM_X86_HYPERV_TIMER_H
+
+#include <asm/msr.h>
+
+#define hv_get_raw_timer() rdtsc_ordered()
+
+#endif
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 7cc49432187f..7a2ed154a5e1 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -44,10 +44,6 @@ extern int irq_remapping_reenable(int);
extern int irq_remap_enable_fault_handling(void);
extern void panic_if_irq_remap(const char *msg);
-/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */
-extern struct irq_domain *
-arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id);
-
/* Get parent irqdomain for interrupt remapping irqdomain */
static inline struct irq_domain *arch_get_ir_parent_domain(void)
{
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index 125c23b7bad3..30c325c235c0 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -7,9 +7,7 @@
#ifdef CONFIG_X86_LOCAL_APIC
enum {
- /* Allocate contiguous CPU vectors */
- X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1,
- X86_IRQ_ALLOC_LEGACY = 0x2,
+ X86_IRQ_ALLOC_LEGACY = 0x1,
};
extern int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 61f0c206bff0..6d502f3efb0f 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -19,8 +19,6 @@ typedef int (*hyperv_fill_flush_list_func)(
struct hv_guest_mapping_flush_list *flush,
void *data);
-#define hv_get_raw_timer() rdtsc_ordered()
-
void hyperv_vector_handler(struct pt_regs *regs);
#if IS_ENABLED(CONFIG_HYPERV)
diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
index d71c7e8b738d..935c6d470341 100644
--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -62,4 +62,10 @@ typedef struct x86_msi_addr_hi {
struct msi_msg;
u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid);
+#define X86_VECTOR_MSI_FLAGS_SUPPORTED \
+ (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX | MSI_FLAG_PCI_MSIX_ALLOC_DYN)
+
+#define X86_VECTOR_MSI_FLAGS_REQUIRED \
+ (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS)
+
#endif /* _ASM_X86_MSI_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 10ac52705892..6bcd5d2dd718 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -4,12 +4,7 @@
#include <linux/bits.h>
-/*
- * CPU model specific register (MSR) numbers.
- *
- * Do not add new entries to this file unless the definitions are shared
- * between multiple compilation units.
- */
+/* CPU model specific register (MSR) numbers. */
/* x86-64 specific MSRs */
#define MSR_EFER 0xc0000080 /* extended feature register */
@@ -535,6 +530,11 @@
#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
+
+#define MSR_AMD64_DE_CFG 0xc0011029
+#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
+#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+
#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
@@ -640,9 +640,6 @@
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
#define MSR_FAM10H_NODE_ID 0xc001100c
-#define MSR_F10H_DECFG 0xc0011029
-#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
-#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
@@ -1050,6 +1047,20 @@
#define VMX_BASIC_MEM_TYPE_WB 6LLU
#define VMX_BASIC_INOUT 0x0040000000000000LLU
+/* Resctrl MSRs: */
+/* - Intel: */
+#define MSR_IA32_L3_QOS_CFG 0xc81
+#define MSR_IA32_L2_QOS_CFG 0xc82
+#define MSR_IA32_QM_EVTSEL 0xc8d
+#define MSR_IA32_QM_CTR 0xc8e
+#define MSR_IA32_PQR_ASSOC 0xc8f
+#define MSR_IA32_L3_CBM_BASE 0xc90
+#define MSR_IA32_L2_CBM_BASE 0xd10
+#define MSR_IA32_MBA_THRTL_BASE 0xd50
+
+/* - AMD: */
+#define MSR_IA32_MBA_BW_BASE 0xc0000200
+
/* MSR_IA32_VMX_MISC bits */
#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14)
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c936ce9f0c47..dfdb103ae4f6 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -321,7 +321,7 @@ static inline void indirect_branch_prediction_barrier(void)
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
-extern void write_spec_ctrl_current(u64 val, bool force);
+extern void update_spec_ctrl_cond(u64 val);
extern u64 spec_ctrl_current(void);
/*
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 736793d65bcb..b40c462b4af3 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -21,7 +21,7 @@ struct pci_sysdata {
#ifdef CONFIG_X86_64
void *iommu; /* IOMMU private data */
#endif
-#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+#ifdef CONFIG_PCI_MSI
void *fwnode; /* IRQ domain for MSI assignment */
#endif
#if IS_ENABLED(CONFIG_VMD)
@@ -52,7 +52,7 @@ static inline int pci_proc_domain(struct pci_bus *bus)
}
#endif
-#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+#ifdef CONFIG_PCI_MSI
static inline void *_pci_root_bus_fwnode(struct pci_bus *bus)
{
return to_pci_sysdata(bus)->fwnode;
@@ -92,6 +92,7 @@ void pcibios_scan_root(int bus);
struct irq_routing_table *pcibios_get_irq_routing_table(void);
int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev);
#define HAVE_PCI_MMAP
#define arch_can_pci_mmap_wc() pat_enabled()
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5059799bebe3..286a71810f9e 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -139,6 +139,7 @@ static inline int pmd_dirty(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_DIRTY;
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_ACCESSED;
@@ -1438,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void)
return true;
}
+#ifdef CONFIG_XEN_PV
+#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
+static inline bool arch_has_hw_nonleaf_pmd_young(void)
+{
+ return !cpu_feature_enabled(X86_FEATURE_XENPV);
+}
+#endif
+
#ifdef CONFIG_PAGE_TABLE_CHECK
static inline bool pte_user_accessible_page(pte_t pte)
{
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 60ece592b220..dbb38a6b4dfb 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -37,7 +37,7 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
* rsi = lockval (second argument)
* rdx = internal variable (set to 0)
*/
-asm (".pushsection .spinlock.text;"
+asm (".pushsection .spinlock.text, \"ax\";"
".globl " PV_UNLOCK ";"
".type " PV_UNLOCK ", @function;"
".align 4,0x90;"
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index d24b04ebf950..52788f79786f 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -7,8 +7,6 @@
#include <linux/sched.h>
#include <linux/jump_label.h>
-#define IA32_PQR_ASSOC 0x0c8f
-
/**
* struct resctrl_pqr_state - State cache for the PQR MSR
* @cur_rmid: The cached Resource Monitoring ID
@@ -16,8 +14,8 @@
* @default_rmid: The user assigned Resource Monitoring ID
* @default_closid: The user assigned cached Class Of Service ID
*
- * The upper 32 bits of IA32_PQR_ASSOC contain closid and the
- * lower 10 bits rmid. The update to IA32_PQR_ASSOC always
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
* contains both parts, so we need to cache them. This also
* stores the user configured per cpu CLOSID and RMID.
*
@@ -77,7 +75,7 @@ static void __resctrl_sched_in(void)
if (closid != state->cur_closid || rmid != state->cur_rmid) {
state->cur_closid = closid;
state->cur_rmid = rmid;
- wrmsr(IA32_PQR_ASSOC, rmid, closid);
+ wrmsr(MSR_IA32_PQR_ASSOC, rmid, closid);
}
}
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 2e7890dd58a4..c390a672d560 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -135,6 +135,7 @@
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __USER32_CS __USER_CS
#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
/* segment for calling fn: */
@@ -210,7 +211,6 @@
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
-#define __USER32_DS __USER_DS
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
#define __CPUNODE_SEG (GDT_ENTRY_CPUNODE*8 + 3)
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index eae20fa52b93..6a0069761508 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -115,17 +115,36 @@ enum sgx_miscselect {
* %SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to
* sign cryptographic tokens that can be passed to
* EINIT as an authorization to run an enclave.
+ * %SGX_ATTR_ASYNC_EXIT_NOTIFY: Allow enclaves to be notified after an
+ * asynchronous exit has occurred.
*/
enum sgx_attribute {
- SGX_ATTR_INIT = BIT(0),
- SGX_ATTR_DEBUG = BIT(1),
- SGX_ATTR_MODE64BIT = BIT(2),
- SGX_ATTR_PROVISIONKEY = BIT(4),
- SGX_ATTR_EINITTOKENKEY = BIT(5),
- SGX_ATTR_KSS = BIT(7),
+ SGX_ATTR_INIT = BIT(0),
+ SGX_ATTR_DEBUG = BIT(1),
+ SGX_ATTR_MODE64BIT = BIT(2),
+ /* BIT(3) is reserved */
+ SGX_ATTR_PROVISIONKEY = BIT(4),
+ SGX_ATTR_EINITTOKENKEY = BIT(5),
+ /* BIT(6) is for CET */
+ SGX_ATTR_KSS = BIT(7),
+ /* BIT(8) is reserved */
+ /* BIT(9) is reserved */
+ SGX_ATTR_ASYNC_EXIT_NOTIFY = BIT(10),
};
-#define SGX_ATTR_RESERVED_MASK (BIT_ULL(3) | BIT_ULL(6) | GENMASK_ULL(63, 8))
+#define SGX_ATTR_RESERVED_MASK (BIT_ULL(3) | \
+ BIT_ULL(6) | \
+ BIT_ULL(8) | \
+ BIT_ULL(9) | \
+ GENMASK_ULL(63, 11))
+
+#define SGX_ATTR_UNPRIV_MASK (SGX_ATTR_DEBUG | \
+ SGX_ATTR_MODE64BIT | \
+ SGX_ATTR_KSS | \
+ SGX_ATTR_ASYNC_EXIT_NOTIFY)
+
+#define SGX_ATTR_PRIV_MASK (SGX_ATTR_PROVISIONKEY | \
+ SGX_ATTR_EINITTOKENKEY)
/**
* struct sgx_secs - SGX Enclave Control Structure (SECS)
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 65e667279e0f..e770c4fc47f4 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -15,4 +15,13 @@
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
+void __user *
+get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
+ void __user **fpstate);
+
+int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs);
+int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+
#endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 2dfb5fea13af..4a4043ca6493 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -28,11 +28,6 @@ typedef struct {
#define SA_IA32_ABI 0x02000000u
#define SA_X32_ABI 0x01000000u
-#ifndef CONFIG_COMPAT
-#define compat_sigset_t compat_sigset_t
-typedef sigset_t compat_sigset_t;
-#endif
-
#endif /* __ASSEMBLY__ */
#include <uapi/asm/signal.h>
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 020c81a7c729..28d889c9aa16 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -67,6 +67,8 @@ void tdx_safe_halt(void);
bool tdx_early_handle_ve(struct pt_regs *regs);
+int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport);
+
#else
static inline void tdx_early_init(void) { };
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f901658d9f7c..cceaafdd2d84 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -44,7 +44,7 @@ obj-y += head_$(BITS).o
obj-y += head$(BITS).o
obj-y += ebda.o
obj-y += platform-quirks.o
-obj-y += process_$(BITS).o signal.o
+obj-y += process_$(BITS).o signal.o signal_$(BITS).o
obj-$(CONFIG_COMPAT) += signal_compat.o
obj-y += traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o dumpstack.o nmi.o
@@ -54,7 +54,7 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
obj-$(CONFIG_X86_32) += sys_ia32.o
-obj-$(CONFIG_IA32_EMULATION) += sys_ia32.o
+obj-$(CONFIG_IA32_EMULATION) += sys_ia32.o signal_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o
obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5cadcea035e0..a9bea860e22a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -624,7 +624,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
#else
-void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { }
+void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { }
#endif /* CONFIG_X86_KERNEL_IBT */
@@ -1608,7 +1608,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
default:
BUG_ON(len != insn.length);
- };
+ }
switch (tp->opcode) {
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c6876d3ea4b1..20d9a604da7c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1931,16 +1931,19 @@ void __init check_x2apic(void)
}
}
#else /* CONFIG_X86_X2APIC */
-static int __init validate_x2apic(void)
+void __init check_x2apic(void)
{
if (!apic_is_x2apic_enabled())
- return 0;
+ return;
/*
- * Checkme: Can we simply turn off x2apic here instead of panic?
+ * Checkme: Can we simply turn off x2APIC here instead of disabling the APIC?
*/
- panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n");
+ pr_err("Kernel does not support x2APIC, please recompile with CONFIG_X86_X2APIC.\n");
+ pr_err("Disabling APIC, expect reduced performance and functionality.\n");
+
+ disable_apic = 1;
+ setup_clear_cpu_cap(X86_FEATURE_APIC);
}
-early_initcall(validate_x2apic);
static inline void try_to_enable_x2apic(int remap_mode) { }
static inline void __x2apic_enable(void) { }
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 7517eb05bdc1..35d5b8fb18ef 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -142,70 +142,139 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)
return ret;
}
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
+/**
+ * pci_dev_has_default_msi_parent_domain - Check whether the device has the default
+ * MSI parent domain associated
+ * @dev: Pointer to the PCI device
*/
-static struct irq_chip pci_msi_controller = {
- .name = "PCI-MSI",
- .irq_unmask = pci_msi_unmask_irq,
- .irq_mask = pci_msi_mask_irq,
- .irq_ack = irq_chip_ack_parent,
- .irq_retrigger = irq_chip_retrigger_hierarchy,
- .irq_set_affinity = msi_set_affinity,
- .flags = IRQCHIP_SKIP_SET_WAKE |
- IRQCHIP_AFFINITY_PRE_STARTUP,
-};
+bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev)
+{
+ struct irq_domain *domain = dev_get_msi_domain(&dev->dev);
-int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
- msi_alloc_info_t *arg)
+ if (!domain)
+ domain = dev_get_msi_domain(&dev->bus->dev);
+ if (!domain)
+ return false;
+
+ return domain == x86_vector_domain;
+}
+
+/**
+ * x86_msi_prepare - Setup of msi_alloc_info_t for allocations
+ * @domain: The domain for which this setup happens
+ * @dev: The device for which interrupts are allocated
+ * @nvec: The number of vectors to allocate
+ * @alloc: The allocation info structure to initialize
+ *
+ * This function is to be used for all types of MSI domains above the x86
+ * vector domain and any intermediates. It is always invoked from the
+ * top level interrupt domain. The domain specific allocation
+ * functionality is determined via the @domain's bus token which allows to
+ * map the X86 specific allocation type.
+ */
+static int x86_msi_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *alloc)
{
- init_irq_alloc_info(arg, NULL);
- if (to_pci_dev(dev)->msix_enabled) {
- arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
- } else {
- arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
- arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
+ struct msi_domain_info *info = domain->host_data;
+
+ init_irq_alloc_info(alloc, NULL);
+
+ switch (info->bus_token) {
+ case DOMAIN_BUS_PCI_DEVICE_MSI:
+ alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
+ return 0;
+ case DOMAIN_BUS_PCI_DEVICE_MSIX:
+ case DOMAIN_BUS_PCI_DEVICE_IMS:
+ alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
+ return 0;
+ default:
+ return -EINVAL;
}
-
- return 0;
}
-EXPORT_SYMBOL_GPL(pci_msi_prepare);
-static struct msi_domain_ops pci_msi_domain_ops = {
- .msi_prepare = pci_msi_prepare,
-};
+/**
+ * x86_init_dev_msi_info - Domain info setup for MSI domains
+ * @dev: The device for which the domain should be created
+ * @domain: The (root) domain providing this callback
+ * @real_parent: The real parent domain of the to initialize domain
+ * @info: The domain info for the to initialize domain
+ *
+ * This function is to be used for all types of MSI domains above the x86
+ * vector domain and any intermediates. The domain specific functionality
+ * is determined via the @real_parent.
+ */
+static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *real_parent, struct msi_domain_info *info)
+{
+ const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
+
+ /* MSI parent domain specific settings */
+ switch (real_parent->bus_token) {
+ case DOMAIN_BUS_ANY:
+ /* Only the vector domain can have the ANY token */
+ if (WARN_ON_ONCE(domain != real_parent))
+ return false;
+ info->chip->irq_set_affinity = msi_set_affinity;
+ /* See msi_set_affinity() for the gory details */
+ info->flags |= MSI_FLAG_NOMASK_QUIRK;
+ break;
+ case DOMAIN_BUS_DMAR:
+ case DOMAIN_BUS_AMDVI:
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ /* Is the target supported? */
+ switch(info->bus_token) {
+ case DOMAIN_BUS_PCI_DEVICE_MSI:
+ case DOMAIN_BUS_PCI_DEVICE_MSIX:
+ break;
+ case DOMAIN_BUS_PCI_DEVICE_IMS:
+ if (!(pops->supported_flags & MSI_FLAG_PCI_IMS))
+ return false;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ /*
+ * Mask out the domain specific MSI feature flags which are not
+ * supported by the real parent.
+ */
+ info->flags &= pops->supported_flags;
+ /* Enforce the required flags */
+ info->flags |= X86_VECTOR_MSI_FLAGS_REQUIRED;
+
+ /* This is always invoked from the top level MSI domain! */
+ info->ops->msi_prepare = x86_msi_prepare;
+
+ info->chip->irq_ack = irq_chip_ack_parent;
+ info->chip->irq_retrigger = irq_chip_retrigger_hierarchy;
+ info->chip->flags |= IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP;
-static struct msi_domain_info pci_msi_domain_info = {
- .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_PCI_MSIX,
- .ops = &pci_msi_domain_ops,
- .chip = &pci_msi_controller,
- .handler = handle_edge_irq,
- .handler_name = "edge",
+ info->handler = handle_edge_irq;
+ info->handler_name = "edge";
+
+ return true;
+}
+
+static const struct msi_parent_ops x86_vector_msi_parent_ops = {
+ .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED,
+ .init_dev_msi_info = x86_init_dev_msi_info,
};
struct irq_domain * __init native_create_pci_msi_domain(void)
{
- struct fwnode_handle *fn;
- struct irq_domain *d;
-
if (disable_apic)
return NULL;
- fn = irq_domain_alloc_named_fwnode("PCI-MSI");
- if (!fn)
- return NULL;
-
- d = pci_msi_create_irq_domain(fn, &pci_msi_domain_info,
- x86_vector_domain);
- if (!d) {
- irq_domain_free_fwnode(fn);
- pr_warn("Failed to initialize PCI-MSI irqdomain.\n");
- } else {
- d->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK;
- }
- return d;
+ x86_vector_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+ x86_vector_domain->msi_parent_ops = &x86_vector_msi_parent_ops;
+ return x86_vector_domain;
}
void __init x86_create_pci_msi_domain(void)
@@ -213,41 +282,19 @@ void __init x86_create_pci_msi_domain(void)
x86_pci_msi_default_domain = x86_init.irqs.create_pci_msi_domain();
}
-#ifdef CONFIG_IRQ_REMAP
-static struct irq_chip pci_msi_ir_controller = {
- .name = "IR-PCI-MSI",
- .irq_unmask = pci_msi_unmask_irq,
- .irq_mask = pci_msi_mask_irq,
- .irq_ack = irq_chip_ack_parent,
- .irq_retrigger = irq_chip_retrigger_hierarchy,
- .flags = IRQCHIP_SKIP_SET_WAKE |
- IRQCHIP_AFFINITY_PRE_STARTUP,
-};
-
-static struct msi_domain_info pci_msi_ir_domain_info = {
- .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
- .ops = &pci_msi_domain_ops,
- .chip = &pci_msi_ir_controller,
- .handler = handle_edge_irq,
- .handler_name = "edge",
-};
-
-struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent,
- const char *name, int id)
+/* Keep around for hyperV */
+int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+ msi_alloc_info_t *arg)
{
- struct fwnode_handle *fn;
- struct irq_domain *d;
+ init_irq_alloc_info(arg, NULL);
- fn = irq_domain_alloc_named_id_fwnode(name, id);
- if (!fn)
- return NULL;
- d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent);
- if (!d)
- irq_domain_free_fwnode(fn);
- return d;
+ if (to_pci_dev(dev)->msix_enabled)
+ arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
+ else
+ arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
+ return 0;
}
-#endif
+EXPORT_SYMBOL_GPL(pci_msi_prepare);
#ifdef CONFIG_DMAR_TABLE
/*
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3e6f6b448f6a..c1efebd27e6c 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -539,10 +539,6 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
if (disable_apic)
return -ENXIO;
- /* Currently vector allocator can't guarantee contiguous allocations */
- if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1)
- return -ENOSYS;
-
/*
* Catch any attempt to touch the cascade interrupt on a PIC
* equipped system.
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 860b60273df3..c75d75b9f11a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -770,8 +770,6 @@ static void init_amd_gh(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
}
-#define MSR_AMD64_DE_CFG 0xC0011029
-
static void init_amd_ln(struct cpuinfo_x86 *c)
{
/*
@@ -965,8 +963,8 @@ static void init_amd(struct cpuinfo_x86 *c)
* msr_set_bit() uses the safe accessors, too, even if the MSR
* is not present.
*/
- msr_set_bit(MSR_F10H_DECFG,
- MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+ msr_set_bit(MSR_AMD64_DE_CFG,
+ MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT);
/* A serializing LFENCE stops RDTSC speculation */
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3e3230cccaa7..6daf84229548 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -60,11 +60,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
static DEFINE_MUTEX(spec_ctrl_mutex);
+/* Update SPEC_CTRL MSR and its cached copy unconditionally */
+static void update_spec_ctrl(u64 val)
+{
+ this_cpu_write(x86_spec_ctrl_current, val);
+ wrmsrl(MSR_IA32_SPEC_CTRL, val);
+}
+
/*
* Keep track of the SPEC_CTRL MSR value for the current task, which may differ
* from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
*/
-void write_spec_ctrl_current(u64 val, bool force)
+void update_spec_ctrl_cond(u64 val)
{
if (this_cpu_read(x86_spec_ctrl_current) == val)
return;
@@ -75,7 +82,7 @@ void write_spec_ctrl_current(u64 val, bool force)
* When KERNEL_IBRS this MSR is written on return-to-user, unless
* forced the update can be delayed until that time.
*/
- if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
+ if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
wrmsrl(MSR_IA32_SPEC_CTRL, val);
}
@@ -1328,7 +1335,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void)
if (ia32_cap & ARCH_CAP_RRSBA) {
x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
}
@@ -1450,7 +1457,7 @@ static void __init spectre_v2_select_mitigation(void)
if (spectre_v2_in_ibrs_mode(mode)) {
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
switch (mode) {
@@ -1564,7 +1571,7 @@ static void __init spectre_v2_select_mitigation(void)
static void update_stibp_msr(void * __unused)
{
u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
- write_spec_ctrl_current(val, true);
+ update_spec_ctrl(val);
}
/* Update x86_spec_ctrl_base in case SMT state changed. */
@@ -1797,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
}
@@ -2048,7 +2055,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
void x86_spec_ctrl_setup_ap(void)
{
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
x86_amd_ssb_disable();
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index c881bcafba7d..d95221117129 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -75,6 +75,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_SGX_LC, X86_FEATURE_SGX },
{ X86_FEATURE_SGX1, X86_FEATURE_SGX },
{ X86_FEATURE_SGX2, X86_FEATURE_SGX1 },
+ { X86_FEATURE_SGX_EDECCSSA, X86_FEATURE_SGX1 },
{ X86_FEATURE_XFD, X86_FEATURE_XSAVES },
{ X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 21fd425088fe..c393b8773ace 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -326,8 +326,8 @@ static void init_hygon(struct cpuinfo_x86 *c)
* msr_set_bit() uses the safe accessors, too, even if the MSR
* is not present.
*/
- msr_set_bit(MSR_F10H_DECFG,
- MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+ msr_set_bit(MSR_AMD64_DE_CFG,
+ MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT);
/* A serializing LFENCE stops RDTSC speculation */
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 2d7ea5480ec3..427899650483 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1034,8 +1034,32 @@ static const struct {
static struct ratelimit_state bld_ratelimit;
+static unsigned int sysctl_sld_mitigate = 1;
static DEFINE_SEMAPHORE(buslock_sem);
+#ifdef CONFIG_PROC_SYSCTL
+static struct ctl_table sld_sysctls[] = {
+ {
+ .procname = "split_lock_mitigate",
+ .data = &sysctl_sld_mitigate,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {}
+};
+
+static int __init sld_mitigate_sysctl_init(void)
+{
+ register_sysctl_init("kernel", sld_sysctls);
+ return 0;
+}
+
+late_initcall(sld_mitigate_sysctl_init);
+#endif
+
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
int len = strlen(opt), ratelimit;
@@ -1146,12 +1170,20 @@ static void split_lock_init(void)
split_lock_verify_msr(sld_state != sld_off);
}
-static void __split_lock_reenable(struct work_struct *work)
+static void __split_lock_reenable_unlock(struct work_struct *work)
{
sld_update_msr(true);
up(&buslock_sem);
}
+static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
+
+static void __split_lock_reenable(struct work_struct *work)
+{
+ sld_update_msr(true);
+}
+static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
+
/*
* If a CPU goes offline with pending delayed work to re-enable split lock
* detection then the delayed work will be executed on some other CPU. That
@@ -1169,10 +1201,9 @@ static int splitlock_cpu_offline(unsigned int cpu)
return 0;
}
-static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
-
static void split_lock_warn(unsigned long ip)
{
+ struct delayed_work *work;
int cpu;
if (!current->reported_split_lock)
@@ -1180,14 +1211,26 @@ static void split_lock_warn(unsigned long ip)
current->comm, current->pid, ip);
current->reported_split_lock = 1;
- /* misery factor #1, sleep 10ms before trying to execute split lock */
- if (msleep_interruptible(10) > 0)
- return;
- /* Misery factor #2, only allow one buslocked disabled core at a time */
- if (down_interruptible(&buslock_sem) == -EINTR)
- return;
+ if (sysctl_sld_mitigate) {
+ /*
+ * misery factor #1:
+ * sleep 10ms before trying to execute split lock.
+ */
+ if (msleep_interruptible(10) > 0)
+ return;
+ /*
+ * Misery factor #2:
+ * only allow one buslocked disabled core at a time.
+ */
+ if (down_interruptible(&buslock_sem) == -EINTR)
+ return;
+ work = &sl_reenable_unlock;
+ } else {
+ work = &sl_reenable;
+ }
+
cpu = get_cpu();
- schedule_delayed_work_on(cpu, &split_lock_reenable, 2);
+ schedule_delayed_work_on(cpu, work, 2);
/* Disable split lock detection on this CPU to make progress */
sld_update_msr(false);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 1c87501e0fa3..10fb5b5c9efa 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -788,6 +788,24 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
return status & MCI_STATUS_DEFERRED;
}
+static bool _log_error_deferred(unsigned int bank, u32 misc)
+{
+ if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
+ mca_msr_reg(bank, MCA_ADDR), misc))
+ return false;
+
+ /*
+ * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
+ * Return true here to avoid accessing these registers.
+ */
+ if (!mce_flags.smca)
+ return true;
+
+ /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
+ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
+ return true;
+}
+
/*
* We have three scenarios for checking for Deferred errors:
*
@@ -799,19 +817,8 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
*/
static void log_error_deferred(unsigned int bank)
{
- bool defrd;
-
- defrd = _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
- mca_msr_reg(bank, MCA_ADDR), 0);
-
- if (!mce_flags.smca)
- return;
-
- /* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */
- if (defrd) {
- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
+ if (_log_error_deferred(bank, 0))
return;
- }
/*
* Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
@@ -832,7 +839,7 @@ static void amd_deferred_error_interrupt(void)
static void log_error_thresholding(unsigned int bank, u64 misc)
{
- _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), mca_msr_reg(bank, MCA_ADDR), misc);
+ _log_error_deferred(bank, misc);
}
static void log_and_reset_block(struct threshold_block *block)
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 00483d1c27e4..c4477162c07d 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -203,6 +203,11 @@ static struct severity {
BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
),
MCESEV(
+ PANIC, "Uncorrected in kernel",
+ BITSET(MCI_STATUS_UC),
+ KERNEL
+ ),
+ MCESEV(
UC, "Uncorrected",
BITSET(MCI_STATUS_UC)
),
@@ -391,9 +396,6 @@ static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char
*msg = s->msg;
s->covered = 1;
- if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL)
- return MCE_PANIC_SEVERITY;
-
return s->sev;
}
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 831613959a92..46668e255421 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -475,6 +475,12 @@ static bool __init ms_hyperv_x2apic_available(void)
* (logically) generates MSIs directly to the system APIC irq domain.
* There is no HPET, and PCI MSI/MSI-X interrupts are remapped by the
* pci-hyperv host bridge.
+ *
+ * Note: for a Hyper-V root partition, this will always return false.
+ * The hypervisor doesn't expose these HYPERV_CPUID_VIRT_STACK_* cpuids by
+ * default, they are implemented as intercepts by the Windows Hyper-V stack.
+ * Even a nested root partition (L2 root) will not get them because the
+ * nested (L1) hypervisor filters them out.
*/
static bool __init ms_hyperv_msi_ext_dest_id(void)
{
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 3266ea36667c..c98e52ff5f20 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -575,7 +575,7 @@ static void clear_closid_rmid(int cpu)
state->default_rmid = 0;
state->cur_closid = 0;
state->cur_rmid = 0;
- wrmsr(IA32_PQR_ASSOC, 0, 0);
+ wrmsr(MSR_IA32_PQR_ASSOC, 0, 0);
}
static int resctrl_online_cpu(unsigned int cpu)
@@ -828,7 +828,6 @@ static __init void rdt_init_res_defs_intel(void)
if (r->rid == RDT_RESOURCE_L3 ||
r->rid == RDT_RESOURCE_L2) {
r->cache.arch_has_sparse_bitmaps = false;
- r->cache.arch_has_empty_bitmaps = false;
r->cache.arch_has_per_cpu_cfg = false;
r->cache.min_cbm_bits = 1;
} else if (r->rid == RDT_RESOURCE_MBA) {
@@ -849,7 +848,6 @@ static __init void rdt_init_res_defs_amd(void)
if (r->rid == RDT_RESOURCE_L3 ||
r->rid == RDT_RESOURCE_L2) {
r->cache.arch_has_sparse_bitmaps = true;
- r->cache.arch_has_empty_bitmaps = true;
r->cache.arch_has_per_cpu_cfg = true;
r->cache.min_cbm_bits = 0;
} else if (r->rid == RDT_RESOURCE_MBA) {
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 1dafbdc5ac31..1df0e3262bca 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -105,8 +105,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
return false;
}
- if ((!r->cache.arch_has_empty_bitmaps && val == 0) ||
- val > r->default_ctrl) {
+ if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) {
rdt_last_cmd_puts("Mask out of range\n");
return false;
}
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 5f7128686cfd..5ebd28e6aa0c 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -8,16 +8,6 @@
#include <linux/fs_context.h>
#include <linux/jump_label.h>
-#define MSR_IA32_L3_QOS_CFG 0xc81
-#define MSR_IA32_L2_QOS_CFG 0xc82
-#define MSR_IA32_L3_CBM_BASE 0xc90
-#define MSR_IA32_L2_CBM_BASE 0xd10
-#define MSR_IA32_MBA_THRTL_BASE 0xd50
-#define MSR_IA32_MBA_BW_BASE 0xc0000200
-
-#define MSR_IA32_QM_CTR 0x0c8e
-#define MSR_IA32_QM_EVTSEL 0x0c8d
-
#define L3_QOS_CDP_ENABLE 0x01ULL
#define L2_QOS_CDP_ENABLE 0x01ULL
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index d961ae3ed96e..ba8d0763b36b 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -477,7 +477,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
* pseudo-locked followed by reading of kernel memory to load it
* into the cache.
*/
- __wrmsr(IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
+ __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
/*
* Cache was flushed earlier. Now access kernel memory to read it
* into cache region associated with just activated plr->closid.
@@ -513,7 +513,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
* Critical section end: restore closid with capacity bitmask that
* does not overlap with pseudo-locked region.
*/
- __wrmsr(IA32_PQR_ASSOC, rmid_p, closid_p);
+ __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, closid_p);
/* Re-enable the hardware prefetcher(s) */
wrmsrl(MSR_MISC_FEATURE_CONTROL, saved_msr);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index fc01f81f6e2a..f53944fb8f7f 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -40,6 +40,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 },
{ X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 },
{ X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 },
+ { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 1ec20807de1e..68f8b18d2278 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -160,8 +160,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
return ret;
pginfo.addr = encl_page->desc & PAGE_MASK;
- pginfo.contents = (unsigned long)kmap_atomic(b.contents);
- pcmd_page = kmap_atomic(b.pcmd);
+ pginfo.contents = (unsigned long)kmap_local_page(b.contents);
+ pcmd_page = kmap_local_page(b.pcmd);
pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
if (secs_page)
@@ -187,8 +187,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
*/
pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
- kunmap_atomic(pcmd_page);
- kunmap_atomic((void *)(unsigned long)pginfo.contents);
+ kunmap_local(pcmd_page);
+ kunmap_local((void *)(unsigned long)pginfo.contents);
get_page(b.pcmd);
sgx_encl_put_backing(&b);
@@ -197,10 +197,10 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
- pcmd_page = kmap_atomic(b.pcmd);
+ pcmd_page = kmap_local_page(b.pcmd);
if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
pr_warn("PCMD page not empty after truncate.\n");
- kunmap_atomic(pcmd_page);
+ kunmap_local(pcmd_page);
}
put_page(b.pcmd);
@@ -680,11 +680,15 @@ const struct vm_operations_struct sgx_vm_ops = {
void sgx_encl_release(struct kref *ref)
{
struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
+ unsigned long max_page_index = PFN_DOWN(encl->base + encl->size - 1);
struct sgx_va_page *va_page;
struct sgx_encl_page *entry;
- unsigned long index;
+ unsigned long count = 0;
+
+ XA_STATE(xas, &encl->page_array, PFN_DOWN(encl->base));
- xa_for_each(&encl->page_array, index, entry) {
+ xas_lock(&xas);
+ xas_for_each(&xas, entry, max_page_index) {
if (entry->epc_page) {
/*
* The page and its radix tree entry cannot be freed
@@ -699,9 +703,20 @@ void sgx_encl_release(struct kref *ref)
}
kfree(entry);
- /* Invoke scheduler to prevent soft lockups. */
- cond_resched();
+ /*
+ * Invoke scheduler on every XA_CHECK_SCHED iteration
+ * to prevent soft lockups.
+ */
+ if (!(++count % XA_CHECK_SCHED)) {
+ xas_pause(&xas);
+ xas_unlock(&xas);
+
+ cond_resched();
+
+ xas_lock(&xas);
+ }
}
+ xas_unlock(&xas);
xa_destroy(&encl->page_array);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index ebe79d60619f..21ca0a831b70 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -111,7 +111,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
encl->base = secs->base;
encl->size = secs->size;
encl->attributes = secs->attributes;
- encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | SGX_ATTR_KSS;
+ encl->attributes_mask = SGX_ATTR_UNPRIV_MASK;
/* Set only after completion, as encl->lock has not been taken. */
set_bit(SGX_ENCL_CREATED, &encl->flags);
@@ -221,11 +221,11 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
pginfo.addr = encl_page->desc & PAGE_MASK;
pginfo.metadata = (unsigned long)secinfo;
- pginfo.contents = (unsigned long)kmap_atomic(src_page);
+ pginfo.contents = (unsigned long)kmap_local_page(src_page);
ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));
- kunmap_atomic((void *)pginfo.contents);
+ kunmap_local((void *)pginfo.contents);
put_page(src_page);
return ret ? -EIO : 0;
@@ -356,6 +356,9 @@ static int sgx_validate_offset_length(struct sgx_encl *encl,
if (!length || !IS_ALIGNED(length, PAGE_SIZE))
return -EINVAL;
+ if (offset + length < offset)
+ return -EINVAL;
+
if (offset + length - PAGE_SIZE >= encl->size)
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 0aad028f04d4..e5a37b6e9aa5 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -165,17 +165,17 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
pginfo.addr = 0;
pginfo.secs = 0;
- pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
- pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
+ pginfo.contents = (unsigned long)kmap_local_page(backing->contents);
+ pginfo.metadata = (unsigned long)kmap_local_page(backing->pcmd) +
backing->pcmd_offset;
ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
set_page_dirty(backing->pcmd);
set_page_dirty(backing->contents);
- kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
+ kunmap_local((void *)(unsigned long)(pginfo.metadata -
backing->pcmd_offset));
- kunmap_atomic((void *)(unsigned long)pginfo.contents);
+ kunmap_local((void *)(unsigned long)pginfo.contents);
return ret;
}
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
index ec7bbac3a9f2..8009c8346d8f 100644
--- a/arch/x86/kernel/cpu/tsx.c
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -58,24 +58,6 @@ static void tsx_enable(void)
wrmsrl(MSR_IA32_TSX_CTRL, tsx);
}
-static bool tsx_ctrl_is_supported(void)
-{
- u64 ia32_cap = x86_read_arch_cap_msr();
-
- /*
- * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
- * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
- *
- * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
- * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
- * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
- * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
- * tsx= cmdline requests will do nothing on CPUs without
- * MSR_IA32_TSX_CTRL support.
- */
- return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
-}
-
static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
{
if (boot_cpu_has_bug(X86_BUG_TAA))
@@ -135,7 +117,7 @@ static void tsx_clear_cpuid(void)
rdmsrl(MSR_TSX_FORCE_ABORT, msr);
msr |= MSR_TFA_TSX_CPUID_CLEAR;
wrmsrl(MSR_TSX_FORCE_ABORT, msr);
- } else if (tsx_ctrl_is_supported()) {
+ } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) {
rdmsrl(MSR_IA32_TSX_CTRL, msr);
msr |= TSX_CTRL_CPUID_CLEAR;
wrmsrl(MSR_IA32_TSX_CTRL, msr);
@@ -158,7 +140,8 @@ static void tsx_dev_mode_disable(void)
u64 mcu_opt_ctrl;
/* Check if RTM_ALLOW exists */
- if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() ||
+ if (!boot_cpu_has_bug(X86_BUG_TAA) ||
+ !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) ||
!cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL))
return;
@@ -191,7 +174,20 @@ void __init tsx_init(void)
return;
}
- if (!tsx_ctrl_is_supported()) {
+ /*
+ * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
+ * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
+ *
+ * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
+ * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
+ * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
+ * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
+ * tsx= cmdline requests will do nothing on CPUs without
+ * MSR_IA32_TSX_CTRL support.
+ */
+ if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) {
+ setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL);
+ } else {
tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED;
return;
}
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 5cd51f25f446..28da5dd83fc0 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -31,11 +31,6 @@ char __initdata cmd_line[COMMAND_LINE_SIZE];
int __initdata of_ioapic;
-void __init early_init_dt_add_memory_arch(u64 base, u64 size)
-{
- BUG();
-}
-
void __init add_dtb(u64 data)
{
initial_dtb = data + offsetof(struct setup_data, data);
@@ -167,7 +162,14 @@ static void __init dtb_lapic_setup(void)
return;
}
smp_found_config = 1;
- pic_mode = 1;
+ if (of_property_read_bool(dn, "intel,virtual-wire-mode")) {
+ pr_info("Virtual Wire compatibility mode.\n");
+ pic_mode = 0;
+ } else {
+ pr_info("IMCR and PIC compatibility mode.\n");
+ pic_mode = 1;
+ }
+
register_lapic_address(lapic_addr);
}
@@ -248,7 +250,7 @@ static void __init dtb_add_ioapic(struct device_node *dn)
ret = of_address_to_resource(dn, 0, &r);
if (ret) {
- printk(KERN_ERR "Can't obtain address from device node %pOF.\n", dn);
+ pr_err("Can't obtain address from device node %pOF.\n", dn);
return;
}
mp_register_ioapic(++ioapic_id, r.start, gsi_top, &cfg);
@@ -265,7 +267,7 @@ static void __init dtb_ioapic_setup(void)
of_ioapic = 1;
return;
}
- printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
+ pr_err("Error: No information about IO-APIC in OF.\n");
}
#else
static void __init dtb_ioapic_setup(void) {}
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 9417d5aa7305..16f9814c9be0 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -94,17 +94,7 @@ static inline unsigned long espfix_base_addr(unsigned int cpu)
static void init_espfix_random(void)
{
- unsigned long rand;
-
- /*
- * This is run before the entropy pools are initialized,
- * but this is hopefully better than nothing.
- */
- if (!arch_get_random_longs(&rand, 1)) {
- /* The constant is an arbitrary large prime */
- rand = rdtsc();
- rand *= 0xc345c6b72fd16123UL;
- }
+ unsigned long rand = get_random_long();
slot_random = rand % ESPFIX_STACKS_PER_PAGE;
page_random = (rand / ESPFIX_STACKS_PER_PAGE)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 3b28c5b25e12..9baa89a8877d 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -391,8 +391,6 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
{
struct fpstate *kstate = gfpu->fpstate;
const union fpregs_state *ustate = buf;
- struct pkru_state *xpkru;
- int ret;
if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE)
@@ -406,16 +404,15 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
if (ustate->xsave.header.xfeatures & ~xcr0)
return -EINVAL;
- ret = copy_uabi_from_kernel_to_xstate(kstate, ustate);
- if (ret)
- return ret;
+ /*
+ * Nullify @vpkru to preserve its current value if PKRU's bit isn't set
+ * in the header. KVM's odd ABI is to leave PKRU untouched in this
+ * case (all other components are eventually re-initialized).
+ */
+ if (!(ustate->xsave.header.xfeatures & XFEATURE_MASK_PKRU))
+ vpkru = NULL;
- /* Retrieve PKRU if not in init state */
- if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
- xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU);
- *vpkru = xpkru->pkru;
- }
- return 0;
+ return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru);
}
EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
#endif /* CONFIG_KVM */
@@ -605,9 +602,9 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal)
if (test_thread_flag(TIF_NEED_FPU_LOAD))
fpregs_restore_userregs();
save_fpregs_to_fpstate(dst_fpu);
+ fpregs_unlock();
if (!(clone_flags & CLONE_THREAD))
fpu_inherit_perms(dst_fpu);
- fpregs_unlock();
/*
* Children never inherit PASID state.
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 8946f89761cc..851eb13edc01 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -133,9 +133,6 @@ static void __init fpu__init_system_generic(void)
fpu__init_system_mxcsr();
}
-/* Get alignment of the TYPE. */
-#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
-
/*
* Enforce that 'MEMBER' is the last field of 'TYPE'.
*
@@ -143,8 +140,8 @@ static void __init fpu__init_system_generic(void)
* because that's how C aligns structs.
*/
#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
- BUILD_BUG_ON(sizeof(TYPE) != ALIGN(offsetofend(TYPE, MEMBER), \
- TYPE_ALIGN(TYPE)))
+ BUILD_BUG_ON(sizeof(TYPE) != \
+ ALIGN(offsetofend(TYPE, MEMBER), _Alignof(TYPE)))
/*
* We append the 'struct fpu' to the task_struct:
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 75ffaef8c299..6d056b68f4ed 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -167,7 +167,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
}
fpu_force_restore(fpu);
- ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf);
+ ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf, &target->thread.pkru);
out:
vfree(tmpbuf);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 91d4b6de58ab..558076dbde5b 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -396,7 +396,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
fpregs = &fpu->fpstate->regs;
if (use_xsave() && !fx_only) {
- if (copy_sigframe_from_user_to_xstate(fpu->fpstate, buf_fx))
+ if (copy_sigframe_from_user_to_xstate(tsk, buf_fx))
return false;
} else {
if (__copy_from_user(&fpregs->fxsave, buf_fx,
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 59e543b95a3c..714166cc25f2 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -440,8 +440,8 @@ static void __init __xstate_dump_leaves(void)
}
}
-#define XSTATE_WARN_ON(x) do { \
- if (WARN_ONCE(x, "XSAVE consistency problem, dumping leaves")) { \
+#define XSTATE_WARN_ON(x, fmt, ...) do { \
+ if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \
__xstate_dump_leaves(); \
} \
} while (0)
@@ -554,8 +554,7 @@ static bool __init check_xstate_against_struct(int nr)
(nr >= XFEATURE_MAX) ||
(nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) {
- WARN_ONCE(1, "no structure for xstate: %d\n", nr);
- XSTATE_WARN_ON(1);
+ XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
return false;
}
return true;
@@ -598,12 +597,13 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
* XSAVES.
*/
if (!xsaves && xfeature_is_supervisor(i)) {
- XSTATE_WARN_ON(1);
+ XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
return false;
}
}
size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
- XSTATE_WARN_ON(size != kernel_size);
+ XSTATE_WARN_ON(size != kernel_size,
+ "size %u != kernel_size %u\n", size, kernel_size);
return size == kernel_size;
}
@@ -1200,8 +1200,36 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
}
+/**
+ * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
+ * @fpstate: The fpstate buffer to copy to
+ * @kbuf: The UABI format buffer, if it comes from the kernel
+ * @ubuf: The UABI format buffer, if it comes from userspace
+ * @pkru: The location to write the PKRU value to
+ *
+ * Converts from the UABI format into the kernel internal hardware
+ * dependent format.
+ *
+ * This function ultimately has three different callers with distinct PKRU
+ * behavior.
+ * 1. When called from sigreturn the PKRU register will be restored from
+ * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to
+ * @fpstate is sufficient to cover this case, but the caller will also
+ * pass a pointer to the thread_struct's pkru field in @pkru and updating
+ * it is harmless.
+ * 2. When called from ptrace the PKRU register will be restored from the
+ * thread_struct's pkru field. A pointer to that is passed in @pkru.
+ * The kernel will restore it manually, so the XRSTOR behavior that resets
+ * the PKRU register to the hardware init value (0) if the corresponding
+ * xfeatures bit is not set is emulated here.
+ * 3. When called from KVM the PKRU register will be restored from the vcpu's
+ * pkru field. A pointer to that is passed in @pkru. KVM hasn't used
+ * XRSTOR and hasn't had the PKRU resetting behavior described above. To
+ * preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
+ * bit is not set.
+ */
static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
- const void __user *ubuf)
+ const void __user *ubuf, u32 *pkru)
{
struct xregs_state *xsave = &fpstate->regs.xsave;
unsigned int offset, size;
@@ -1250,6 +1278,20 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
}
}
+ if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
+ struct pkru_state *xpkru;
+
+ xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
+ *pkru = xpkru->pkru;
+ } else {
+ /*
+ * KVM may pass NULL here to indicate that it does not need
+ * PKRU updated.
+ */
+ if (pkru)
+ *pkru = 0;
+ }
+
/*
* The state that came in from userspace was user-state only.
* Mask all the user states out of 'xfeatures':
@@ -1268,9 +1310,9 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
* Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
* format and copy to the target thread. Used by ptrace and KVM.
*/
-int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
+int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
{
- return copy_uabi_to_xstate(fpstate, kbuf, NULL);
+ return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
}
/*
@@ -1278,10 +1320,10 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
* XSAVE[S] format and copy to the target thread. This is called from the
* sigreturn() and rt_sigreturn() system calls.
*/
-int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate,
+int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
const void __user *ubuf)
{
- return copy_uabi_to_xstate(fpstate, NULL, ubuf);
+ return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
}
static bool validate_independent_components(u64 mask)
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 5ad47031383b..a4ecb04d8d64 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -46,8 +46,8 @@ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
u32 pkru_val, enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
-extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf);
-extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf);
+extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
+extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf);
extern void fpu__init_cpu_xstate(void);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 9b7acc9c7874..67c8ed99144b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -261,16 +261,6 @@ SYM_FUNC_START(startup_32_smp)
addl $__PAGE_OFFSET, %esp
/*
- * start system 32-bit setup. We need to re-do some of the things done
- * in 16-bit mode for the "real" operations.
- */
- movl setup_once_ref,%eax
- andl %eax,%eax
- jz 1f # Did we do this already?
- call *%eax
-1:
-
-/*
* Check if it is 486
*/
movb $4,X86 # at least 486
@@ -331,18 +321,7 @@ SYM_FUNC_END(startup_32_smp)
#include "verify_cpu.S"
-/*
- * setup_once
- *
- * The setup work we only want to run on the BSP.
- *
- * Warning: %esi is live across this function.
- */
__INIT
-setup_once:
- andl $0,setup_once_ref /* Once is enough, thanks */
- RET
-
SYM_FUNC_START(early_idt_handler_array)
# 36(%esp) %eflags
# 32(%esp) %cs
@@ -458,7 +437,6 @@ SYM_DATA(early_recursion_flag, .long 0)
__REFDATA
.align 4
SYM_DATA(initial_code, .long i386_start_kernel)
-SYM_DATA(setup_once_ref, .long setup_once)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
#define PGD_ALIGN (2 * PAGE_SIZE)
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 15aefa3f3e18..3aa5304200c5 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -407,7 +407,7 @@ struct legacy_pic null_legacy_pic = {
.make_irq = legacy_pic_uint_noop,
};
-struct legacy_pic default_legacy_pic = {
+static struct legacy_pic default_legacy_pic = {
.nr_legacy_irqs = NR_IRQS_LEGACY,
.chip = &i8259A_chip,
.mask = mask_8259A_irq,
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index a98687642dd0..d85a6980e263 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -251,14 +251,12 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
- const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+ const Elf_Shdr *s, *alt = NULL, *locks = NULL,
*para = NULL, *orc = NULL, *orc_ip = NULL,
*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
- if (!strcmp(".text", secstrings + s->sh_name))
- text = s;
if (!strcmp(".altinstructions", secstrings + s->sh_name))
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
@@ -302,12 +300,13 @@ int module_finalize(const Elf_Ehdr *hdr,
void *iseg = (void *)ibt_endbr->sh_addr;
apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size);
}
- if (locks && text) {
+ if (locks) {
void *lseg = (void *)locks->sh_addr;
- void *tseg = (void *)text->sh_addr;
+ void *text = me->core_layout.base;
+ void *text_end = text + me->core_layout.text_size;
alternatives_smp_module_add(me, me->name,
lseg, lseg + locks->sh_size,
- tseg, tseg + text->sh_size);
+ text, text_end);
}
if (orc && orc_ip)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62671ccf0404..40d156a31676 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
}
if (updmsr)
- write_spec_ctrl_current(msr, false);
+ update_spec_ctrl_cond(msr);
}
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 37c12fb92906..dfaa270a7cc9 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -44,16 +44,35 @@
#include "tls.h"
-enum x86_regset {
- REGSET_GENERAL,
- REGSET_FP,
- REGSET_XFP,
- REGSET_IOPERM64 = REGSET_XFP,
- REGSET_XSTATE,
- REGSET_TLS,
- REGSET_IOPERM32,
+enum x86_regset_32 {
+ REGSET32_GENERAL,
+ REGSET32_FP,
+ REGSET32_XFP,
+ REGSET32_XSTATE,
+ REGSET32_TLS,
+ REGSET32_IOPERM,
};
+enum x86_regset_64 {
+ REGSET64_GENERAL,
+ REGSET64_FP,
+ REGSET64_IOPERM,
+ REGSET64_XSTATE,
+};
+
+#define REGSET_GENERAL \
+({ \
+ BUILD_BUG_ON((int)REGSET32_GENERAL != (int)REGSET64_GENERAL); \
+ REGSET32_GENERAL; \
+})
+
+#define REGSET_FP \
+({ \
+ BUILD_BUG_ON((int)REGSET32_FP != (int)REGSET64_FP); \
+ REGSET32_FP; \
+})
+
+
struct pt_regs_offset {
const char *name;
int offset;
@@ -788,13 +807,13 @@ long arch_ptrace(struct task_struct *child, long request,
#ifdef CONFIG_X86_32
case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
return copy_regset_to_user(child, &user_x86_32_view,
- REGSET_XFP,
+ REGSET32_XFP,
0, sizeof(struct user_fxsr_struct),
datap) ? -EIO : 0;
case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
return copy_regset_from_user(child, &user_x86_32_view,
- REGSET_XFP,
+ REGSET32_XFP,
0, sizeof(struct user_fxsr_struct),
datap) ? -EIO : 0;
#endif
@@ -1086,13 +1105,13 @@ static long ia32_arch_ptrace(struct task_struct *child, compat_long_t request,
case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
return copy_regset_to_user(child, &user_x86_32_view,
- REGSET_XFP, 0,
+ REGSET32_XFP, 0,
sizeof(struct user32_fxsr_struct),
datap);
case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
return copy_regset_from_user(child, &user_x86_32_view,
- REGSET_XFP, 0,
+ REGSET32_XFP, 0,
sizeof(struct user32_fxsr_struct),
datap);
@@ -1215,29 +1234,38 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
#ifdef CONFIG_X86_64
static struct user_regset x86_64_regsets[] __ro_after_init = {
- [REGSET_GENERAL] = {
- .core_note_type = NT_PRSTATUS,
- .n = sizeof(struct user_regs_struct) / sizeof(long),
- .size = sizeof(long), .align = sizeof(long),
- .regset_get = genregs_get, .set = genregs_set
+ [REGSET64_GENERAL] = {
+ .core_note_type = NT_PRSTATUS,
+ .n = sizeof(struct user_regs_struct) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = genregs_get,
+ .set = genregs_set
},
- [REGSET_FP] = {
- .core_note_type = NT_PRFPREG,
- .n = sizeof(struct fxregs_state) / sizeof(long),
- .size = sizeof(long), .align = sizeof(long),
- .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
+ [REGSET64_FP] = {
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(struct fxregs_state) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .active = regset_xregset_fpregs_active,
+ .regset_get = xfpregs_get,
+ .set = xfpregs_set
},
- [REGSET_XSTATE] = {
- .core_note_type = NT_X86_XSTATE,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = xstateregs_active, .regset_get = xstateregs_get,
- .set = xstateregs_set
+ [REGSET64_XSTATE] = {
+ .core_note_type = NT_X86_XSTATE,
+ .size = sizeof(u64),
+ .align = sizeof(u64),
+ .active = xstateregs_active,
+ .regset_get = xstateregs_get,
+ .set = xstateregs_set
},
- [REGSET_IOPERM64] = {
- .core_note_type = NT_386_IOPERM,
- .n = IO_BITMAP_LONGS,
- .size = sizeof(long), .align = sizeof(long),
- .active = ioperm_active, .regset_get = ioperm_get
+ [REGSET64_IOPERM] = {
+ .core_note_type = NT_386_IOPERM,
+ .n = IO_BITMAP_LONGS,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .active = ioperm_active,
+ .regset_get = ioperm_get
},
};
@@ -1256,43 +1284,57 @@ static const struct user_regset_view user_x86_64_view = {
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
static struct user_regset x86_32_regsets[] __ro_after_init = {
- [REGSET_GENERAL] = {
- .core_note_type = NT_PRSTATUS,
- .n = sizeof(struct user_regs_struct32) / sizeof(u32),
- .size = sizeof(u32), .align = sizeof(u32),
- .regset_get = genregs32_get, .set = genregs32_set
+ [REGSET32_GENERAL] = {
+ .core_note_type = NT_PRSTATUS,
+ .n = sizeof(struct user_regs_struct32) / sizeof(u32),
+ .size = sizeof(u32),
+ .align = sizeof(u32),
+ .regset_get = genregs32_get,
+ .set = genregs32_set
},
- [REGSET_FP] = {
- .core_note_type = NT_PRFPREG,
- .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
- .size = sizeof(u32), .align = sizeof(u32),
- .active = regset_fpregs_active, .regset_get = fpregs_get, .set = fpregs_set
+ [REGSET32_FP] = {
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
+ .size = sizeof(u32),
+ .align = sizeof(u32),
+ .active = regset_fpregs_active,
+ .regset_get = fpregs_get,
+ .set = fpregs_set
},
- [REGSET_XFP] = {
- .core_note_type = NT_PRXFPREG,
- .n = sizeof(struct fxregs_state) / sizeof(u32),
- .size = sizeof(u32), .align = sizeof(u32),
- .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
+ [REGSET32_XFP] = {
+ .core_note_type = NT_PRXFPREG,
+ .n = sizeof(struct fxregs_state) / sizeof(u32),
+ .size = sizeof(u32),
+ .align = sizeof(u32),
+ .active = regset_xregset_fpregs_active,
+ .regset_get = xfpregs_get,
+ .set = xfpregs_set
},
- [REGSET_XSTATE] = {
- .core_note_type = NT_X86_XSTATE,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = xstateregs_active, .regset_get = xstateregs_get,
- .set = xstateregs_set
+ [REGSET32_XSTATE] = {
+ .core_note_type = NT_X86_XSTATE,
+ .size = sizeof(u64),
+ .align = sizeof(u64),
+ .active = xstateregs_active,
+ .regset_get = xstateregs_get,
+ .set = xstateregs_set
},
- [REGSET_TLS] = {
- .core_note_type = NT_386_TLS,
- .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
- .size = sizeof(struct user_desc),
- .align = sizeof(struct user_desc),
- .active = regset_tls_active,
- .regset_get = regset_tls_get, .set = regset_tls_set
+ [REGSET32_TLS] = {
+ .core_note_type = NT_386_TLS,
+ .n = GDT_ENTRY_TLS_ENTRIES,
+ .bias = GDT_ENTRY_TLS_MIN,
+ .size = sizeof(struct user_desc),
+ .align = sizeof(struct user_desc),
+ .active = regset_tls_active,
+ .regset_get = regset_tls_get,
+ .set = regset_tls_set
},
- [REGSET_IOPERM32] = {
- .core_note_type = NT_386_IOPERM,
- .n = IO_BITMAP_BYTES / sizeof(u32),
- .size = sizeof(u32), .align = sizeof(u32),
- .active = ioperm_active, .regset_get = ioperm_get
+ [REGSET32_IOPERM] = {
+ .core_note_type = NT_386_IOPERM,
+ .n = IO_BITMAP_BYTES / sizeof(u32),
+ .size = sizeof(u32),
+ .align = sizeof(u32),
+ .active = ioperm_active,
+ .regset_get = ioperm_get
},
};
@@ -1311,10 +1353,10 @@ u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
{
#ifdef CONFIG_X86_64
- x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
+ x86_64_regsets[REGSET64_XSTATE].n = size / sizeof(u64);
#endif
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
- x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64);
+ x86_32_regsets[REGSET32_XSTATE].n = size / sizeof(u64);
#endif
xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 9c7265b524c7..1504eb8d25aa 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -37,180 +37,27 @@
#include <asm/sighandling.h>
#include <asm/vm86.h>
-#ifdef CONFIG_X86_64
-#include <linux/compat.h>
-#include <asm/proto.h>
-#include <asm/ia32_unistd.h>
-#include <asm/fpu/xstate.h>
-#endif /* CONFIG_X86_64 */
-
#include <asm/syscall.h>
#include <asm/sigframe.h>
#include <asm/signal.h>
-#ifdef CONFIG_X86_64
-/*
- * If regs->ss will cause an IRET fault, change it. Otherwise leave it
- * alone. Using this generally makes no sense unless
- * user_64bit_mode(regs) would return true.
- */
-static void force_valid_ss(struct pt_regs *regs)
+static inline int is_ia32_compat_frame(struct ksignal *ksig)
{
- u32 ar;
- asm volatile ("lar %[old_ss], %[ar]\n\t"
- "jz 1f\n\t" /* If invalid: */
- "xorl %[ar], %[ar]\n\t" /* set ar = 0 */
- "1:"
- : [ar] "=r" (ar)
- : [old_ss] "rm" ((u16)regs->ss));
-
- /*
- * For a valid 64-bit user context, we need DPL 3, type
- * read-write data or read-write exp-down data, and S and P
- * set. We can't use VERW because VERW doesn't check the
- * P bit.
- */
- ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
- if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
- ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
- regs->ss = __USER_DS;
+ return IS_ENABLED(CONFIG_IA32_EMULATION) &&
+ ksig->ka.sa.sa_flags & SA_IA32_ABI;
}
-# define CONTEXT_COPY_SIZE offsetof(struct sigcontext, reserved1)
-#else
-# define CONTEXT_COPY_SIZE sizeof(struct sigcontext)
-#endif
-static bool restore_sigcontext(struct pt_regs *regs,
- struct sigcontext __user *usc,
- unsigned long uc_flags)
+static inline int is_ia32_frame(struct ksignal *ksig)
{
- struct sigcontext sc;
-
- /* Always make any pending restarted system calls return -EINTR */
- current->restart_block.fn = do_no_restart_syscall;
-
- if (copy_from_user(&sc, usc, CONTEXT_COPY_SIZE))
- return false;
-
-#ifdef CONFIG_X86_32
- loadsegment(gs, sc.gs);
- regs->fs = sc.fs;
- regs->es = sc.es;
- regs->ds = sc.ds;
-#endif /* CONFIG_X86_32 */
-
- regs->bx = sc.bx;
- regs->cx = sc.cx;
- regs->dx = sc.dx;
- regs->si = sc.si;
- regs->di = sc.di;
- regs->bp = sc.bp;
- regs->ax = sc.ax;
- regs->sp = sc.sp;
- regs->ip = sc.ip;
-
-#ifdef CONFIG_X86_64
- regs->r8 = sc.r8;
- regs->r9 = sc.r9;
- regs->r10 = sc.r10;
- regs->r11 = sc.r11;
- regs->r12 = sc.r12;
- regs->r13 = sc.r13;
- regs->r14 = sc.r14;
- regs->r15 = sc.r15;
-#endif /* CONFIG_X86_64 */
-
- /* Get CS/SS and force CPL3 */
- regs->cs = sc.cs | 0x03;
- regs->ss = sc.ss | 0x03;
-
- regs->flags = (regs->flags & ~FIX_EFLAGS) | (sc.flags & FIX_EFLAGS);
- /* disable syscall checks */
- regs->orig_ax = -1;
-
-#ifdef CONFIG_X86_64
- /*
- * Fix up SS if needed for the benefit of old DOSEMU and
- * CRIU.
- */
- if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && user_64bit_mode(regs)))
- force_valid_ss(regs);
-#endif
-
- return fpu__restore_sig((void __user *)sc.fpstate,
- IS_ENABLED(CONFIG_X86_32));
+ return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(ksig);
}
-static __always_inline int
-__unsafe_setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
- struct pt_regs *regs, unsigned long mask)
+static inline int is_x32_frame(struct ksignal *ksig)
{
-#ifdef CONFIG_X86_32
- unsigned int gs;
- savesegment(gs, gs);
-
- unsafe_put_user(gs, (unsigned int __user *)&sc->gs, Efault);
- unsafe_put_user(regs->fs, (unsigned int __user *)&sc->fs, Efault);
- unsafe_put_user(regs->es, (unsigned int __user *)&sc->es, Efault);
- unsafe_put_user(regs->ds, (unsigned int __user *)&sc->ds, Efault);
-#endif /* CONFIG_X86_32 */
-
- unsafe_put_user(regs->di, &sc->di, Efault);
- unsafe_put_user(regs->si, &sc->si, Efault);
- unsafe_put_user(regs->bp, &sc->bp, Efault);
- unsafe_put_user(regs->sp, &sc->sp, Efault);
- unsafe_put_user(regs->bx, &sc->bx, Efault);
- unsafe_put_user(regs->dx, &sc->dx, Efault);
- unsafe_put_user(regs->cx, &sc->cx, Efault);
- unsafe_put_user(regs->ax, &sc->ax, Efault);
-#ifdef CONFIG_X86_64
- unsafe_put_user(regs->r8, &sc->r8, Efault);
- unsafe_put_user(regs->r9, &sc->r9, Efault);
- unsafe_put_user(regs->r10, &sc->r10, Efault);
- unsafe_put_user(regs->r11, &sc->r11, Efault);
- unsafe_put_user(regs->r12, &sc->r12, Efault);
- unsafe_put_user(regs->r13, &sc->r13, Efault);
- unsafe_put_user(regs->r14, &sc->r14, Efault);
- unsafe_put_user(regs->r15, &sc->r15, Efault);
-#endif /* CONFIG_X86_64 */
-
- unsafe_put_user(current->thread.trap_nr, &sc->trapno, Efault);
- unsafe_put_user(current->thread.error_code, &sc->err, Efault);
- unsafe_put_user(regs->ip, &sc->ip, Efault);
-#ifdef CONFIG_X86_32
- unsafe_put_user(regs->cs, (unsigned int __user *)&sc->cs, Efault);
- unsafe_put_user(regs->flags, &sc->flags, Efault);
- unsafe_put_user(regs->sp, &sc->sp_at_signal, Efault);
- unsafe_put_user(regs->ss, (unsigned int __user *)&sc->ss, Efault);
-#else /* !CONFIG_X86_32 */
- unsafe_put_user(regs->flags, &sc->flags, Efault);
- unsafe_put_user(regs->cs, &sc->cs, Efault);
- unsafe_put_user(0, &sc->gs, Efault);
- unsafe_put_user(0, &sc->fs, Efault);
- unsafe_put_user(regs->ss, &sc->ss, Efault);
-#endif /* CONFIG_X86_32 */
-
- unsafe_put_user(fpstate, (unsigned long __user *)&sc->fpstate, Efault);
-
- /* non-iBCS2 extensions.. */
- unsafe_put_user(mask, &sc->oldmask, Efault);
- unsafe_put_user(current->thread.cr2, &sc->cr2, Efault);
- return 0;
-Efault:
- return -EFAULT;
+ return IS_ENABLED(CONFIG_X86_X32_ABI) &&
+ ksig->ka.sa.sa_flags & SA_X32_ABI;
}
-#define unsafe_put_sigcontext(sc, fp, regs, set, label) \
-do { \
- if (__unsafe_setup_sigcontext(sc, fp, regs, set->sig[0])) \
- goto label; \
-} while(0);
-
-#define unsafe_put_sigmask(set, frame, label) \
- unsafe_put_user(*(__u64 *)(set), \
- (__u64 __user *)&(frame)->uc.uc_sigmask, \
- label)
-
/*
* Set up a signal frame.
*/
@@ -223,24 +70,12 @@ do { \
/*
* Determine which stack to use..
*/
-static unsigned long align_sigframe(unsigned long sp)
-{
-#ifdef CONFIG_X86_32
- /*
- * Align the stack pointer according to the i386 ABI,
- * i.e. so that on function entry ((sp + 4) & 15) == 0.
- */
- sp = ((sp + 4) & -FRAME_ALIGNMENT) - 4;
-#else /* !CONFIG_X86_32 */
- sp = round_down(sp, FRAME_ALIGNMENT) - 8;
-#endif
- return sp;
-}
-
-static void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
+void __user *
+get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
void __user **fpstate)
{
+ struct k_sigaction *ka = &ksig->ka;
+ int ia32_frame = is_ia32_frame(ksig);
/* Default to using normal stack */
bool nested_altstack = on_sig_stack(regs->sp);
bool entering_altstack = false;
@@ -249,7 +84,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
unsigned long buf_fx = 0;
/* redzone */
- if (IS_ENABLED(CONFIG_X86_64))
+ if (!ia32_frame)
sp -= 128;
/* This is the X/Open sanctioned signal stack switching. */
@@ -263,7 +98,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
sp = current->sas_ss_sp + current->sas_ss_size;
entering_altstack = true;
}
- } else if (IS_ENABLED(CONFIG_X86_32) &&
+ } else if (ia32_frame &&
!nested_altstack &&
regs->ss != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
@@ -273,11 +108,19 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
entering_altstack = true;
}
- sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
- &buf_fx, &math_size);
+ sp = fpu__alloc_mathframe(sp, ia32_frame, &buf_fx, &math_size);
*fpstate = (void __user *)sp;
- sp = align_sigframe(sp - frame_size);
+ sp -= frame_size;
+
+ if (ia32_frame)
+ /*
+ * Align the stack pointer according to the i386 ABI,
+ * i.e. so that on function entry ((sp + 4) & 15) == 0.
+ */
+ sp = ((sp + 4) & -FRAME_ALIGNMENT) - 4;
+ else
+ sp = round_down(sp, FRAME_ALIGNMENT) - 8;
/*
* If we are on the alternate signal stack and would overflow it, don't.
@@ -300,391 +143,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
return (void __user *)sp;
}
-#ifdef CONFIG_X86_32
-static const struct {
- u16 poplmovl;
- u32 val;
- u16 int80;
-} __attribute__((packed)) retcode = {
- 0xb858, /* popl %eax; movl $..., %eax */
- __NR_sigreturn,
- 0x80cd, /* int $0x80 */
-};
-
-static const struct {
- u8 movl;
- u32 val;
- u16 int80;
- u8 pad;
-} __attribute__((packed)) rt_retcode = {
- 0xb8, /* movl $..., %eax */
- __NR_rt_sigreturn,
- 0x80cd, /* int $0x80 */
- 0
-};
-
-static int
-__setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
- struct pt_regs *regs)
-{
- struct sigframe __user *frame;
- void __user *restorer;
- void __user *fp = NULL;
-
- frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fp);
-
- if (!user_access_begin(frame, sizeof(*frame)))
- return -EFAULT;
-
- unsafe_put_user(sig, &frame->sig, Efault);
- unsafe_put_sigcontext(&frame->sc, fp, regs, set, Efault);
- unsafe_put_user(set->sig[1], &frame->extramask[0], Efault);
- if (current->mm->context.vdso)
- restorer = current->mm->context.vdso +
- vdso_image_32.sym___kernel_sigreturn;
- else
- restorer = &frame->retcode;
- if (ksig->ka.sa.sa_flags & SA_RESTORER)
- restorer = ksig->ka.sa.sa_restorer;
-
- /* Set up to return from userspace. */
- unsafe_put_user(restorer, &frame->pretcode, Efault);
-
- /*
- * This is popl %eax ; movl $__NR_sigreturn, %eax ; int $0x80
- *
- * WE DO NOT USE IT ANY MORE! It's only left here for historical
- * reasons and because gdb uses it as a signature to notice
- * signal handler stack frames.
- */
- unsafe_put_user(*((u64 *)&retcode), (u64 *)frame->retcode, Efault);
- user_access_end();
-
- /* Set up registers for signal handler */
- regs->sp = (unsigned long)frame;
- regs->ip = (unsigned long)ksig->ka.sa.sa_handler;
- regs->ax = (unsigned long)sig;
- regs->dx = 0;
- regs->cx = 0;
-
- regs->ds = __USER_DS;
- regs->es = __USER_DS;
- regs->ss = __USER_DS;
- regs->cs = __USER_CS;
-
- return 0;
-
-Efault:
- user_access_end();
- return -EFAULT;
-}
-
-static int __setup_rt_frame(int sig, struct ksignal *ksig,
- sigset_t *set, struct pt_regs *regs)
-{
- struct rt_sigframe __user *frame;
- void __user *restorer;
- void __user *fp = NULL;
-
- frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fp);
-
- if (!user_access_begin(frame, sizeof(*frame)))
- return -EFAULT;
-
- unsafe_put_user(sig, &frame->sig, Efault);
- unsafe_put_user(&frame->info, &frame->pinfo, Efault);
- unsafe_put_user(&frame->uc, &frame->puc, Efault);
-
- /* Create the ucontext. */
- if (static_cpu_has(X86_FEATURE_XSAVE))
- unsafe_put_user(UC_FP_XSTATE, &frame->uc.uc_flags, Efault);
- else
- unsafe_put_user(0, &frame->uc.uc_flags, Efault);
- unsafe_put_user(0, &frame->uc.uc_link, Efault);
- unsafe_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
-
- /* Set up to return from userspace. */
- restorer = current->mm->context.vdso +
- vdso_image_32.sym___kernel_rt_sigreturn;
- if (ksig->ka.sa.sa_flags & SA_RESTORER)
- restorer = ksig->ka.sa.sa_restorer;
- unsafe_put_user(restorer, &frame->pretcode, Efault);
-
- /*
- * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
- *
- * WE DO NOT USE IT ANY MORE! It's only left here for historical
- * reasons and because gdb uses it as a signature to notice
- * signal handler stack frames.
- */
- unsafe_put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode, Efault);
- unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
- unsafe_put_sigmask(set, frame, Efault);
- user_access_end();
-
- if (copy_siginfo_to_user(&frame->info, &ksig->info))
- return -EFAULT;
-
- /* Set up registers for signal handler */
- regs->sp = (unsigned long)frame;
- regs->ip = (unsigned long)ksig->ka.sa.sa_handler;
- regs->ax = (unsigned long)sig;
- regs->dx = (unsigned long)&frame->info;
- regs->cx = (unsigned long)&frame->uc;
-
- regs->ds = __USER_DS;
- regs->es = __USER_DS;
- regs->ss = __USER_DS;
- regs->cs = __USER_CS;
-
- return 0;
-Efault:
- user_access_end();
- return -EFAULT;
-}
-#else /* !CONFIG_X86_32 */
-static unsigned long frame_uc_flags(struct pt_regs *regs)
-{
- unsigned long flags;
-
- if (boot_cpu_has(X86_FEATURE_XSAVE))
- flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
- else
- flags = UC_SIGCONTEXT_SS;
-
- if (likely(user_64bit_mode(regs)))
- flags |= UC_STRICT_RESTORE_SS;
-
- return flags;
-}
-
-static int __setup_rt_frame(int sig, struct ksignal *ksig,
- sigset_t *set, struct pt_regs *regs)
-{
- struct rt_sigframe __user *frame;
- void __user *fp = NULL;
- unsigned long uc_flags;
-
- /* x86-64 should always use SA_RESTORER. */
- if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
- return -EFAULT;
-
- frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
- uc_flags = frame_uc_flags(regs);
-
- if (!user_access_begin(frame, sizeof(*frame)))
- return -EFAULT;
-
- /* Create the ucontext. */
- unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
- unsafe_put_user(0, &frame->uc.uc_link, Efault);
- unsafe_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
-
- /* Set up to return from userspace. If provided, use a stub
- already in userspace. */
- unsafe_put_user(ksig->ka.sa.sa_restorer, &frame->pretcode, Efault);
- unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
- unsafe_put_sigmask(set, frame, Efault);
- user_access_end();
-
- if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
- if (copy_siginfo_to_user(&frame->info, &ksig->info))
- return -EFAULT;
- }
-
- /* Set up registers for signal handler */
- regs->di = sig;
- /* In case the signal handler was declared without prototypes */
- regs->ax = 0;
-
- /* This also works for non SA_SIGINFO handlers because they expect the
- next argument after the signal number on the stack. */
- regs->si = (unsigned long)&frame->info;
- regs->dx = (unsigned long)&frame->uc;
- regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
-
- regs->sp = (unsigned long)frame;
-
- /*
- * Set up the CS and SS registers to run signal handlers in
- * 64-bit mode, even if the handler happens to be interrupting
- * 32-bit or 16-bit code.
- *
- * SS is subtle. In 64-bit mode, we don't need any particular
- * SS descriptor, but we do need SS to be valid. It's possible
- * that the old SS is entirely bogus -- this can happen if the
- * signal we're trying to deliver is #GP or #SS caused by a bad
- * SS value. We also have a compatibility issue here: DOSEMU
- * relies on the contents of the SS register indicating the
- * SS value at the time of the signal, even though that code in
- * DOSEMU predates sigreturn's ability to restore SS. (DOSEMU
- * avoids relying on sigreturn to restore SS; instead it uses
- * a trampoline.) So we do our best: if the old SS was valid,
- * we keep it. Otherwise we replace it.
- */
- regs->cs = __USER_CS;
-
- if (unlikely(regs->ss != __USER_DS))
- force_valid_ss(regs);
-
- return 0;
-
-Efault:
- user_access_end();
- return -EFAULT;
-}
-#endif /* CONFIG_X86_32 */
-
-#ifdef CONFIG_X86_X32_ABI
-static int x32_copy_siginfo_to_user(struct compat_siginfo __user *to,
- const struct kernel_siginfo *from)
-{
- struct compat_siginfo new;
-
- copy_siginfo_to_external32(&new, from);
- if (from->si_signo == SIGCHLD) {
- new._sifields._sigchld_x32._utime = from->si_utime;
- new._sifields._sigchld_x32._stime = from->si_stime;
- }
- if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
- return -EFAULT;
- return 0;
-}
-
-int copy_siginfo_to_user32(struct compat_siginfo __user *to,
- const struct kernel_siginfo *from)
-{
- if (in_x32_syscall())
- return x32_copy_siginfo_to_user(to, from);
- return __copy_siginfo_to_user32(to, from);
-}
-#endif /* CONFIG_X86_X32_ABI */
-
-static int x32_setup_rt_frame(struct ksignal *ksig,
- compat_sigset_t *set,
- struct pt_regs *regs)
-{
-#ifdef CONFIG_X86_X32_ABI
- struct rt_sigframe_x32 __user *frame;
- unsigned long uc_flags;
- void __user *restorer;
- void __user *fp = NULL;
-
- if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
- return -EFAULT;
-
- frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fp);
-
- uc_flags = frame_uc_flags(regs);
-
- if (!user_access_begin(frame, sizeof(*frame)))
- return -EFAULT;
-
- /* Create the ucontext. */
- unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
- unsafe_put_user(0, &frame->uc.uc_link, Efault);
- unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
- unsafe_put_user(0, &frame->uc.uc__pad0, Efault);
- restorer = ksig->ka.sa.sa_restorer;
- unsafe_put_user(restorer, (unsigned long __user *)&frame->pretcode, Efault);
- unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
- unsafe_put_sigmask(set, frame, Efault);
- user_access_end();
-
- if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
- if (x32_copy_siginfo_to_user(&frame->info, &ksig->info))
- return -EFAULT;
- }
-
- /* Set up registers for signal handler */
- regs->sp = (unsigned long) frame;
- regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
-
- /* We use the x32 calling convention here... */
- regs->di = ksig->sig;
- regs->si = (unsigned long) &frame->info;
- regs->dx = (unsigned long) &frame->uc;
-
- loadsegment(ds, __USER_DS);
- loadsegment(es, __USER_DS);
-
- regs->cs = __USER_CS;
- regs->ss = __USER_DS;
-#endif /* CONFIG_X86_X32_ABI */
-
- return 0;
-#ifdef CONFIG_X86_X32_ABI
-Efault:
- user_access_end();
- return -EFAULT;
-#endif
-}
-
-/*
- * Do a signal return; undo the signal stack.
- */
-#ifdef CONFIG_X86_32
-SYSCALL_DEFINE0(sigreturn)
-{
- struct pt_regs *regs = current_pt_regs();
- struct sigframe __user *frame;
- sigset_t set;
-
- frame = (struct sigframe __user *)(regs->sp - 8);
-
- if (!access_ok(frame, sizeof(*frame)))
- goto badframe;
- if (__get_user(set.sig[0], &frame->sc.oldmask) ||
- __get_user(set.sig[1], &frame->extramask[0]))
- goto badframe;
-
- set_current_blocked(&set);
-
- /*
- * x86_32 has no uc_flags bits relevant to restore_sigcontext.
- * Save a few cycles by skipping the __get_user.
- */
- if (!restore_sigcontext(regs, &frame->sc, 0))
- goto badframe;
- return regs->ax;
-
-badframe:
- signal_fault(regs, frame, "sigreturn");
-
- return 0;
-}
-#endif /* CONFIG_X86_32 */
-
-SYSCALL_DEFINE0(rt_sigreturn)
-{
- struct pt_regs *regs = current_pt_regs();
- struct rt_sigframe __user *frame;
- sigset_t set;
- unsigned long uc_flags;
-
- frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
- if (!access_ok(frame, sizeof(*frame)))
- goto badframe;
- if (__get_user(*(__u64 *)&set, (__u64 __user *)&frame->uc.uc_sigmask))
- goto badframe;
- if (__get_user(uc_flags, &frame->uc.uc_flags))
- goto badframe;
-
- set_current_blocked(&set);
-
- if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
- goto badframe;
-
- if (restore_altstack(&frame->uc.uc_stack))
- goto badframe;
-
- return regs->ax;
-
-badframe:
- signal_fault(regs, frame, "rt_sigreturn");
- return 0;
-}
-
/*
* There are four different struct types for signal frame: sigframe_ia32,
* rt_sigframe_ia32, rt_sigframe_x32, and rt_sigframe. Use the worst case
@@ -743,43 +201,22 @@ unsigned long get_sigframe_size(void)
return max_frame_size;
}
-static inline int is_ia32_compat_frame(struct ksignal *ksig)
-{
- return IS_ENABLED(CONFIG_IA32_EMULATION) &&
- ksig->ka.sa.sa_flags & SA_IA32_ABI;
-}
-
-static inline int is_ia32_frame(struct ksignal *ksig)
-{
- return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(ksig);
-}
-
-static inline int is_x32_frame(struct ksignal *ksig)
-{
- return IS_ENABLED(CONFIG_X86_X32_ABI) &&
- ksig->ka.sa.sa_flags & SA_X32_ABI;
-}
-
static int
setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
{
- int usig = ksig->sig;
- sigset_t *set = sigmask_to_save();
- compat_sigset_t *cset = (compat_sigset_t *) set;
-
/* Perform fixup for the pre-signal frame. */
rseq_signal_deliver(ksig, regs);
/* Set up the stack frame */
if (is_ia32_frame(ksig)) {
if (ksig->ka.sa.sa_flags & SA_SIGINFO)
- return ia32_setup_rt_frame(usig, ksig, cset, regs);
+ return ia32_setup_rt_frame(ksig, regs);
else
- return ia32_setup_frame(usig, ksig, cset, regs);
+ return ia32_setup_frame(ksig, regs);
} else if (is_x32_frame(ksig)) {
- return x32_setup_rt_frame(ksig, cset, regs);
+ return x32_setup_rt_frame(ksig, regs);
} else {
- return __setup_rt_frame(ksig->sig, ksig, set, regs);
+ return x64_setup_rt_frame(ksig, regs);
}
}
@@ -969,36 +406,3 @@ bool sigaltstack_size_valid(size_t ss_size)
return true;
}
#endif /* CONFIG_DYNAMIC_SIGFRAME */
-
-#ifdef CONFIG_X86_X32_ABI
-COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
-{
- struct pt_regs *regs = current_pt_regs();
- struct rt_sigframe_x32 __user *frame;
- sigset_t set;
- unsigned long uc_flags;
-
- frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
-
- if (!access_ok(frame, sizeof(*frame)))
- goto badframe;
- if (__get_user(set.sig[0], (__u64 __user *)&frame->uc.uc_sigmask))
- goto badframe;
- if (__get_user(uc_flags, &frame->uc.uc_flags))
- goto badframe;
-
- set_current_blocked(&set);
-
- if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
- goto badframe;
-
- if (compat_restore_altstack(&frame->uc.uc_stack))
- goto badframe;
-
- return regs->ax;
-
-badframe:
- signal_fault(regs, frame, "x32 rt_sigreturn");
- return 0;
-}
-#endif
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/kernel/signal_32.c
index c9c3859322fa..2553136cf39b 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/kernel/signal_32.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * linux/arch/x86_64/ia32/ia32_signal.c
- *
* Copyright (C) 1991, 1992 Linus Torvalds
*
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
@@ -26,7 +24,6 @@
#include <linux/uaccess.h>
#include <asm/fpu/signal.h>
#include <asm/ptrace.h>
-#include <asm/ia32_unistd.h>
#include <asm/user32.h>
#include <uapi/asm/sigcontext.h>
#include <asm/proto.h>
@@ -35,6 +32,9 @@
#include <asm/sighandling.h>
#include <asm/smap.h>
+#ifdef CONFIG_IA32_EMULATION
+#include <asm/ia32_unistd.h>
+
static inline void reload_segments(struct sigcontext_32 *sc)
{
unsigned int cur;
@@ -53,6 +53,21 @@ static inline void reload_segments(struct sigcontext_32 *sc)
loadsegment(es, sc->es | 0x03);
}
+#define sigset32_t compat_sigset_t
+#define restore_altstack32 compat_restore_altstack
+#define unsafe_save_altstack32 unsafe_compat_save_altstack
+
+#else
+
+#define sigset32_t sigset_t
+#define __NR_ia32_sigreturn __NR_sigreturn
+#define __NR_ia32_rt_sigreturn __NR_rt_sigreturn
+#define restore_altstack32 restore_altstack
+#define unsafe_save_altstack32 unsafe_save_altstack
+#define __copy_siginfo_to_user32 copy_siginfo_to_user
+
+#endif
+
/*
* Do a signal return; undo the signal stack.
*/
@@ -86,6 +101,7 @@ static bool ia32_restore_sigcontext(struct pt_regs *regs,
/* disable syscall checks */
regs->orig_ax = -1;
+#ifdef CONFIG_IA32_EMULATION
/*
* Reload fs and gs if they have changed in the signal
* handler. This does not handle long fs/gs base changes in
@@ -93,10 +109,17 @@ static bool ia32_restore_sigcontext(struct pt_regs *regs,
* normal case.
*/
reload_segments(&sc);
+#else
+ loadsegment(gs, sc.gs);
+ regs->fs = sc.fs;
+ regs->es = sc.es;
+ regs->ds = sc.ds;
+#endif
+
return fpu__restore_sig(compat_ptr(sc.fpstate), 1);
}
-COMPAT_SYSCALL_DEFINE0(sigreturn)
+SYSCALL32_DEFINE0(sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
@@ -119,7 +142,7 @@ badframe:
return 0;
}
-COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
+SYSCALL32_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_ia32 __user *frame;
@@ -129,7 +152,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
- if (__get_user(set.sig[0], (__u64 __user *)&frame->uc.uc_sigmask))
+ if (__get_user(*(__u64 *)&set, (__u64 __user *)&frame->uc.uc_sigmask))
goto badframe;
set_current_blocked(&set);
@@ -137,7 +160,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
if (!ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
- if (compat_restore_altstack(&frame->uc.uc_stack))
+ if (restore_altstack32(&frame->uc.uc_stack))
goto badframe;
return regs->ax;
@@ -159,9 +182,15 @@ __unsafe_setup_sigcontext32(struct sigcontext_32 __user *sc,
struct pt_regs *regs, unsigned int mask)
{
unsafe_put_user(get_user_seg(gs), (unsigned int __user *)&sc->gs, Efault);
+#ifdef CONFIG_IA32_EMULATION
unsafe_put_user(get_user_seg(fs), (unsigned int __user *)&sc->fs, Efault);
unsafe_put_user(get_user_seg(ds), (unsigned int __user *)&sc->ds, Efault);
unsafe_put_user(get_user_seg(es), (unsigned int __user *)&sc->es, Efault);
+#else
+ unsafe_put_user(regs->fs, (unsigned int __user *)&sc->fs, Efault);
+ unsafe_put_user(regs->es, (unsigned int __user *)&sc->es, Efault);
+ unsafe_put_user(regs->ds, (unsigned int __user *)&sc->ds, Efault);
+#endif
unsafe_put_user(regs->di, &sc->di, Efault);
unsafe_put_user(regs->si, &sc->si, Efault);
@@ -196,43 +225,9 @@ do { \
goto label; \
} while(0)
-/*
- * Determine which stack to use..
- */
-static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
- size_t frame_size,
- void __user **fpstate)
-{
- unsigned long sp, fx_aligned, math_size;
-
- /* Default to using normal stack */
- sp = regs->sp;
-
- /* This is the X/Open sanctioned signal stack switching. */
- if (ksig->ka.sa.sa_flags & SA_ONSTACK)
- sp = sigsp(sp, ksig);
- /* This is the legacy signal stack switching. */
- else if (regs->ss != __USER32_DS &&
- !(ksig->ka.sa.sa_flags & SA_RESTORER) &&
- ksig->ka.sa.sa_restorer)
- sp = (unsigned long) ksig->ka.sa.sa_restorer;
-
- sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
- *fpstate = (struct _fpstate_32 __user *) sp;
- if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
- math_size))
- return (void __user *) -1L;
-
- sp -= frame_size;
- /* Align the stack pointer according to the i386 ABI,
- * i.e. so that on function entry ((sp + 4) & 15) == 0. */
- sp = ((sp + 4) & -16ul) - 4;
- return (void __user *) sp;
-}
-
-int ia32_setup_frame(int sig, struct ksignal *ksig,
- compat_sigset_t *set, struct pt_regs *regs)
+int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs)
{
+ sigset32_t *set = (sigset32_t *) sigmask_to_save();
struct sigframe_ia32 __user *frame;
void __user *restorer;
void __user *fp = NULL;
@@ -264,7 +259,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
- unsafe_put_user(sig, &frame->sig, Efault);
+ unsafe_put_user(ksig->sig, &frame->sig, Efault);
unsafe_put_sigcontext32(&frame->sc, fp, regs, set, Efault);
unsafe_put_user(set->sig[1], &frame->extramask[0], Efault);
unsafe_put_user(ptr_to_compat(restorer), &frame->pretcode, Efault);
@@ -280,15 +275,20 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
/* Make -mregparm=3 work */
- regs->ax = sig;
+ regs->ax = ksig->sig;
regs->dx = 0;
regs->cx = 0;
- loadsegment(ds, __USER32_DS);
- loadsegment(es, __USER32_DS);
+#ifdef CONFIG_IA32_EMULATION
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+#else
+ regs->ds = __USER_DS;
+ regs->es = __USER_DS;
+#endif
regs->cs = __USER32_CS;
- regs->ss = __USER32_DS;
+ regs->ss = __USER_DS;
return 0;
Efault:
@@ -296,9 +296,9 @@ Efault:
return -EFAULT;
}
-int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
- compat_sigset_t *set, struct pt_regs *regs)
+int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
{
+ sigset32_t *set = (sigset32_t *) sigmask_to_save();
struct rt_sigframe_ia32 __user *frame;
void __user *restorer;
void __user *fp = NULL;
@@ -321,7 +321,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
- unsafe_put_user(sig, &frame->sig, Efault);
+ unsafe_put_user(ksig->sig, &frame->sig, Efault);
unsafe_put_user(ptr_to_compat(&frame->info), &frame->pinfo, Efault);
unsafe_put_user(ptr_to_compat(&frame->uc), &frame->puc, Efault);
@@ -331,7 +331,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
else
unsafe_put_user(0, &frame->uc.uc_flags, Efault);
unsafe_put_user(0, &frame->uc.uc_link, Efault);
- unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
+ unsafe_save_altstack32(&frame->uc.uc_stack, regs->sp, Efault);
if (ksig->ka.sa.sa_flags & SA_RESTORER)
restorer = ksig->ka.sa.sa_restorer;
@@ -357,15 +357,20 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
/* Make -mregparm=3 work */
- regs->ax = sig;
+ regs->ax = ksig->sig;
regs->dx = (unsigned long) &frame->info;
regs->cx = (unsigned long) &frame->uc;
- loadsegment(ds, __USER32_DS);
- loadsegment(es, __USER32_DS);
+#ifdef CONFIG_IA32_EMULATION
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+#else
+ regs->ds = __USER_DS;
+ regs->es = __USER_DS;
+#endif
regs->cs = __USER32_CS;
- regs->ss = __USER32_DS;
+ regs->ss = __USER_DS;
return 0;
Efault:
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
new file mode 100644
index 000000000000..ff9c55064223
--- /dev/null
+++ b/arch/x86/kernel/signal_64.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+
+#include <asm/ucontext.h>
+#include <asm/fpu/signal.h>
+#include <asm/sighandling.h>
+
+#include <asm/syscall.h>
+#include <asm/sigframe.h>
+#include <asm/signal.h>
+
+/*
+ * If regs->ss will cause an IRET fault, change it. Otherwise leave it
+ * alone. Using this generally makes no sense unless
+ * user_64bit_mode(regs) would return true.
+ */
+static void force_valid_ss(struct pt_regs *regs)
+{
+ u32 ar;
+ asm volatile ("lar %[old_ss], %[ar]\n\t"
+ "jz 1f\n\t" /* If invalid: */
+ "xorl %[ar], %[ar]\n\t" /* set ar = 0 */
+ "1:"
+ : [ar] "=r" (ar)
+ : [old_ss] "rm" ((u16)regs->ss));
+
+ /*
+ * For a valid 64-bit user context, we need DPL 3, type
+ * read-write data or read-write exp-down data, and S and P
+ * set. We can't use VERW because VERW doesn't check the
+ * P bit.
+ */
+ ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
+ if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
+ ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
+ regs->ss = __USER_DS;
+}
+
+static bool restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext __user *usc,
+ unsigned long uc_flags)
+{
+ struct sigcontext sc;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current->restart_block.fn = do_no_restart_syscall;
+
+ if (copy_from_user(&sc, usc, offsetof(struct sigcontext, reserved1)))
+ return false;
+
+ regs->bx = sc.bx;
+ regs->cx = sc.cx;
+ regs->dx = sc.dx;
+ regs->si = sc.si;
+ regs->di = sc.di;
+ regs->bp = sc.bp;
+ regs->ax = sc.ax;
+ regs->sp = sc.sp;
+ regs->ip = sc.ip;
+ regs->r8 = sc.r8;
+ regs->r9 = sc.r9;
+ regs->r10 = sc.r10;
+ regs->r11 = sc.r11;
+ regs->r12 = sc.r12;
+ regs->r13 = sc.r13;
+ regs->r14 = sc.r14;
+ regs->r15 = sc.r15;
+
+ /* Get CS/SS and force CPL3 */
+ regs->cs = sc.cs | 0x03;
+ regs->ss = sc.ss | 0x03;
+
+ regs->flags = (regs->flags & ~FIX_EFLAGS) | (sc.flags & FIX_EFLAGS);
+ /* disable syscall checks */
+ regs->orig_ax = -1;
+
+ /*
+ * Fix up SS if needed for the benefit of old DOSEMU and
+ * CRIU.
+ */
+ if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && user_64bit_mode(regs)))
+ force_valid_ss(regs);
+
+ return fpu__restore_sig((void __user *)sc.fpstate, 0);
+}
+
+static __always_inline int
+__unsafe_setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
+ struct pt_regs *regs, unsigned long mask)
+{
+ unsafe_put_user(regs->di, &sc->di, Efault);
+ unsafe_put_user(regs->si, &sc->si, Efault);
+ unsafe_put_user(regs->bp, &sc->bp, Efault);
+ unsafe_put_user(regs->sp, &sc->sp, Efault);
+ unsafe_put_user(regs->bx, &sc->bx, Efault);
+ unsafe_put_user(regs->dx, &sc->dx, Efault);
+ unsafe_put_user(regs->cx, &sc->cx, Efault);
+ unsafe_put_user(regs->ax, &sc->ax, Efault);
+ unsafe_put_user(regs->r8, &sc->r8, Efault);
+ unsafe_put_user(regs->r9, &sc->r9, Efault);
+ unsafe_put_user(regs->r10, &sc->r10, Efault);
+ unsafe_put_user(regs->r11, &sc->r11, Efault);
+ unsafe_put_user(regs->r12, &sc->r12, Efault);
+ unsafe_put_user(regs->r13, &sc->r13, Efault);
+ unsafe_put_user(regs->r14, &sc->r14, Efault);
+ unsafe_put_user(regs->r15, &sc->r15, Efault);
+
+ unsafe_put_user(current->thread.trap_nr, &sc->trapno, Efault);
+ unsafe_put_user(current->thread.error_code, &sc->err, Efault);
+ unsafe_put_user(regs->ip, &sc->ip, Efault);
+ unsafe_put_user(regs->flags, &sc->flags, Efault);
+ unsafe_put_user(regs->cs, &sc->cs, Efault);
+ unsafe_put_user(0, &sc->gs, Efault);
+ unsafe_put_user(0, &sc->fs, Efault);
+ unsafe_put_user(regs->ss, &sc->ss, Efault);
+
+ unsafe_put_user(fpstate, (unsigned long __user *)&sc->fpstate, Efault);
+
+ /* non-iBCS2 extensions.. */
+ unsafe_put_user(mask, &sc->oldmask, Efault);
+ unsafe_put_user(current->thread.cr2, &sc->cr2, Efault);
+ return 0;
+Efault:
+ return -EFAULT;
+}
+
+#define unsafe_put_sigcontext(sc, fp, regs, set, label) \
+do { \
+ if (__unsafe_setup_sigcontext(sc, fp, regs, set->sig[0])) \
+ goto label; \
+} while(0);
+
+#define unsafe_put_sigmask(set, frame, label) \
+ unsafe_put_user(*(__u64 *)(set), \
+ (__u64 __user *)&(frame)->uc.uc_sigmask, \
+ label)
+
+static unsigned long frame_uc_flags(struct pt_regs *regs)
+{
+ unsigned long flags;
+
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
+ flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
+ else
+ flags = UC_SIGCONTEXT_SS;
+
+ if (likely(user_64bit_mode(regs)))
+ flags |= UC_STRICT_RESTORE_SS;
+
+ return flags;
+}
+
+int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
+{
+ sigset_t *set = sigmask_to_save();
+ struct rt_sigframe __user *frame;
+ void __user *fp = NULL;
+ unsigned long uc_flags;
+
+ /* x86-64 should always use SA_RESTORER. */
+ if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
+ return -EFAULT;
+
+ frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
+ uc_flags = frame_uc_flags(regs);
+
+ if (!user_access_begin(frame, sizeof(*frame)))
+ return -EFAULT;
+
+ /* Create the ucontext. */
+ unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
+ unsafe_put_user(0, &frame->uc.uc_link, Efault);
+ unsafe_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ unsafe_put_user(ksig->ka.sa.sa_restorer, &frame->pretcode, Efault);
+ unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
+ unsafe_put_sigmask(set, frame, Efault);
+ user_access_end();
+
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ return -EFAULT;
+ }
+
+ /* Set up registers for signal handler */
+ regs->di = ksig->sig;
+ /* In case the signal handler was declared without prototypes */
+ regs->ax = 0;
+
+ /* This also works for non SA_SIGINFO handlers because they expect the
+ next argument after the signal number on the stack. */
+ regs->si = (unsigned long)&frame->info;
+ regs->dx = (unsigned long)&frame->uc;
+ regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
+
+ regs->sp = (unsigned long)frame;
+
+ /*
+ * Set up the CS and SS registers to run signal handlers in
+ * 64-bit mode, even if the handler happens to be interrupting
+ * 32-bit or 16-bit code.
+ *
+ * SS is subtle. In 64-bit mode, we don't need any particular
+ * SS descriptor, but we do need SS to be valid. It's possible
+ * that the old SS is entirely bogus -- this can happen if the
+ * signal we're trying to deliver is #GP or #SS caused by a bad
+ * SS value. We also have a compatibility issue here: DOSEMU
+ * relies on the contents of the SS register indicating the
+ * SS value at the time of the signal, even though that code in
+ * DOSEMU predates sigreturn's ability to restore SS. (DOSEMU
+ * avoids relying on sigreturn to restore SS; instead it uses
+ * a trampoline.) So we do our best: if the old SS was valid,
+ * we keep it. Otherwise we replace it.
+ */
+ regs->cs = __USER_CS;
+
+ if (unlikely(regs->ss != __USER_DS))
+ force_valid_ss(regs);
+
+ return 0;
+
+Efault:
+ user_access_end();
+ return -EFAULT;
+}
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+SYSCALL_DEFINE0(rt_sigreturn)
+{
+ struct pt_regs *regs = current_pt_regs();
+ struct rt_sigframe __user *frame;
+ sigset_t set;
+ unsigned long uc_flags;
+
+ frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
+ if (!access_ok(frame, sizeof(*frame)))
+ goto badframe;
+ if (__get_user(*(__u64 *)&set, (__u64 __user *)&frame->uc.uc_sigmask))
+ goto badframe;
+ if (__get_user(uc_flags, &frame->uc.uc_flags))
+ goto badframe;
+
+ set_current_blocked(&set);
+
+ if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
+ goto badframe;
+
+ if (restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+
+ return regs->ax;
+
+badframe:
+ signal_fault(regs, frame, "rt_sigreturn");
+ return 0;
+}
+
+#ifdef CONFIG_X86_X32_ABI
+static int x32_copy_siginfo_to_user(struct compat_siginfo __user *to,
+ const struct kernel_siginfo *from)
+{
+ struct compat_siginfo new;
+
+ copy_siginfo_to_external32(&new, from);
+ if (from->si_signo == SIGCHLD) {
+ new._sifields._sigchld_x32._utime = from->si_utime;
+ new._sifields._sigchld_x32._stime = from->si_stime;
+ }
+ if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
+ return -EFAULT;
+ return 0;
+}
+
+int copy_siginfo_to_user32(struct compat_siginfo __user *to,
+ const struct kernel_siginfo *from)
+{
+ if (in_x32_syscall())
+ return x32_copy_siginfo_to_user(to, from);
+ return __copy_siginfo_to_user32(to, from);
+}
+
+int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
+{
+ compat_sigset_t *set = (compat_sigset_t *) sigmask_to_save();
+ struct rt_sigframe_x32 __user *frame;
+ unsigned long uc_flags;
+ void __user *restorer;
+ void __user *fp = NULL;
+
+ if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
+ return -EFAULT;
+
+ frame = get_sigframe(ksig, regs, sizeof(*frame), &fp);
+
+ uc_flags = frame_uc_flags(regs);
+
+ if (!user_access_begin(frame, sizeof(*frame)))
+ return -EFAULT;
+
+ /* Create the ucontext. */
+ unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
+ unsafe_put_user(0, &frame->uc.uc_link, Efault);
+ unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
+ unsafe_put_user(0, &frame->uc.uc__pad0, Efault);
+ restorer = ksig->ka.sa.sa_restorer;
+ unsafe_put_user(restorer, (unsigned long __user *)&frame->pretcode, Efault);
+ unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
+ unsafe_put_sigmask(set, frame, Efault);
+ user_access_end();
+
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+ if (x32_copy_siginfo_to_user(&frame->info, &ksig->info))
+ return -EFAULT;
+ }
+
+ /* Set up registers for signal handler */
+ regs->sp = (unsigned long) frame;
+ regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
+
+ /* We use the x32 calling convention here... */
+ regs->di = ksig->sig;
+ regs->si = (unsigned long) &frame->info;
+ regs->dx = (unsigned long) &frame->uc;
+
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+
+ regs->cs = __USER_CS;
+ regs->ss = __USER_DS;
+
+ return 0;
+
+Efault:
+ user_access_end();
+ return -EFAULT;
+}
+
+COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
+{
+ struct pt_regs *regs = current_pt_regs();
+ struct rt_sigframe_x32 __user *frame;
+ sigset_t set;
+ unsigned long uc_flags;
+
+ frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
+
+ if (!access_ok(frame, sizeof(*frame)))
+ goto badframe;
+ if (__get_user(set.sig[0], (__u64 __user *)&frame->uc.uc_sigmask))
+ goto badframe;
+ if (__get_user(uc_flags, &frame->uc.uc_flags))
+ goto badframe;
+
+ set_current_blocked(&set);
+
+ if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
+ goto badframe;
+
+ if (compat_restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+
+ return regs->ax;
+
+badframe:
+ signal_fault(regs, frame, "x32 rt_sigreturn");
+ return 0;
+}
+#endif /* CONFIG_X86_X32_ABI */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d3fdec706f1d..d1e1679f32cf 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -68,13 +68,13 @@
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
-#include <asm/proto.h>
#else
#include <asm/processor-flags.h>
#include <asm/setup.h>
-#include <asm/proto.h>
#endif
+#include <asm/proto.h>
+
DECLARE_BITMAP(system_vectors, NR_VECTORS);
static inline void cond_local_irq_enable(struct pt_regs *regs)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index cafacb2e58cc..a78e73da4a74 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -51,7 +51,7 @@ int tsc_clocksource_reliable;
static u32 art_to_tsc_numerator;
static u32 art_to_tsc_denominator;
static u64 art_to_tsc_offset;
-struct clocksource *art_related_clocksource;
+static struct clocksource *art_related_clocksource;
struct cyc2ns {
struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index b63cf8f7745e..6c07f6daaa22 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -722,8 +722,9 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
switch (opc1) {
case 0xeb: /* jmp 8 */
case 0xe9: /* jmp 32 */
- case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
break;
+ case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
+ goto setup;
case 0xe8: /* call relative */
branch_clear_offset(auprobe, insn);
@@ -753,6 +754,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
return -ENOTSUPP;
}
+setup:
auprobe->branch.opc1 = opc1;
auprobe->branch.ilen = insn->length;
auprobe->branch.offs = insn->immediate.value;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 62bc7a01cecc..c92c49a0b35b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -665,7 +665,7 @@ void kvm_set_cpu_caps(void)
);
kvm_cpu_cap_init_scattered(CPUID_12_EAX,
- SF(SGX1) | SF(SGX2)
+ SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
);
kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
@@ -1047,9 +1047,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
* userspace. ATTRIBUTES.XFRM is not adjusted as userspace is
* expected to derive it from supported XCR0.
*/
- entry->eax &= SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT |
- SGX_ATTR_PROVISIONKEY | SGX_ATTR_EINITTOKENKEY |
- SGX_ATTR_KSS;
+ entry->eax &= SGX_ATTR_PRIV_MASK | SGX_ATTR_UNPRIV_MASK;
entry->ebx &= 0;
break;
/* Intel PT */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 1ccb769f62af..b6f96d47e596 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2443,6 +2443,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
{
bool list_unstable, zapped_root = false;
+ lockdep_assert_held_write(&kvm->mmu_lock);
trace_kvm_mmu_prepare_zap_page(sp);
++kvm->stat.mmu_shadow_zapped;
*nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
@@ -4262,14 +4263,14 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
if (is_page_fault_stale(vcpu, fault, mmu_seq))
goto out_unlock;
- r = make_mmu_pages_available(vcpu);
- if (r)
- goto out_unlock;
-
- if (is_tdp_mmu_fault)
+ if (is_tdp_mmu_fault) {
r = kvm_tdp_mmu_map(vcpu, fault);
- else
+ } else {
+ r = make_mmu_pages_available(vcpu);
+ if (r)
+ goto out_unlock;
r = __direct_map(vcpu, fault);
+ }
out_unlock:
if (is_tdp_mmu_fault)
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index a19d473d0184..4e5b8444f161 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -23,6 +23,7 @@ enum kvm_only_cpuid_leafs {
/* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */
#define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0)
#define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1)
+#define KVM_X86_FEATURE_SGX_EDECCSSA KVM_X86_FEATURE(CPUID_12_EAX, 11)
struct cpuid_reg {
u32 function;
@@ -78,6 +79,8 @@ static __always_inline u32 __feature_translate(int x86_feature)
return KVM_X86_FEATURE_SGX1;
else if (x86_feature == X86_FEATURE_SGX2)
return KVM_X86_FEATURE_SGX2;
+ else if (x86_feature == X86_FEATURE_SGX_EDECCSSA)
+ return KVM_X86_FEATURE_SGX_EDECCSSA;
return x86_feature;
}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 4c620999d230..995bc0f90759 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1091,6 +1091,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN))
+ return;
+
+ kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN);
}
@@ -1125,6 +1131,9 @@ void svm_free_nested(struct vcpu_svm *svm)
if (!svm->nested.initialized)
return;
+ if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr))
+ svm_switch_vmcb(svm, &svm->vmcb01);
+
svm_vcpu_free_msrpm(svm->nested.msrpm);
svm->nested.msrpm = NULL;
@@ -1143,9 +1152,6 @@ void svm_free_nested(struct vcpu_svm *svm)
svm->nested.initialized = false;
}
-/*
- * Forcibly leave nested mode in order to be able to reset the VCPU later on.
- */
void svm_leave_nested(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9f88c8e6766e..ce362e88a567 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -346,12 +346,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
return 0;
}
-static int is_external_interrupt(u32 info)
-{
- info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
- return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
-}
-
static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1438,6 +1432,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
*/
svm_clear_current_vmcb(svm->vmcb);
+ svm_leave_nested(vcpu);
svm_free_nested(svm);
sev_free_vcpu(vcpu);
@@ -2709,9 +2704,9 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
msr->data = 0;
switch (msr->index) {
- case MSR_F10H_DECFG:
- if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
- msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
+ case MSR_AMD64_DE_CFG:
+ if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
+ msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE;
break;
case MSR_IA32_PERF_CAPABILITIES:
return 0;
@@ -2812,7 +2807,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0x1E;
}
break;
- case MSR_F10H_DECFG:
+ case MSR_AMD64_DE_CFG:
msr_info->data = svm->msr_decfg;
break;
default:
@@ -3041,7 +3036,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
- case MSR_F10H_DECFG: {
+ case MSR_AMD64_DE_CFG: {
struct kvm_msr_entry msr_entry;
msr_entry.index = msr->index;
@@ -3425,15 +3420,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return 0;
}
- if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
- exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
- exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
- exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
- printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
- "exit_code 0x%x\n",
- __func__, svm->vmcb->control.exit_int_info,
- exit_code);
-
if (exit_fastpath != EXIT_FASTPATH_NONE)
return 1;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0c62352dda6a..5b0d4859e4b7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4854,6 +4854,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu)
{
+ kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
}
@@ -6440,9 +6441,6 @@ out:
return kvm_state.size;
}
-/*
- * Forcibly leave nested mode in order to be able to reset the VCPU later on.
- */
void vmx_leave_nested(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu)) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ecea83f0da49..69227f77b201 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -628,6 +628,12 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto
ex->payload = payload;
}
+/* Forcibly leave the nested mode in cases like a vCPU reset */
+static void kvm_leave_nested(struct kvm_vcpu *vcpu)
+{
+ kvm_x86_ops.nested_ops->leave_nested(vcpu);
+}
+
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
unsigned nr, bool has_error, u32 error_code,
bool has_payload, unsigned long payload, bool reinject)
@@ -1557,7 +1563,7 @@ static const u32 msr_based_features_all[] = {
MSR_IA32_VMX_EPT_VPID_CAP,
MSR_IA32_VMX_VMFUNC,
- MSR_F10H_DECFG,
+ MSR_AMD64_DE_CFG,
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
@@ -5195,7 +5201,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
- kvm_x86_ops.nested_ops->leave_nested(vcpu);
+ kvm_leave_nested(vcpu);
kvm_smm_changed(vcpu, events->smi.smm);
}
@@ -9805,7 +9811,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
int kvm_check_nested_events(struct kvm_vcpu *vcpu)
{
- if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
+ if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
kvm_x86_ops.nested_ops->triple_fault(vcpu);
return 1;
}
@@ -10560,10 +10566,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
r = 0;
goto out;
}
- if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
- if (is_guest_mode(vcpu)) {
+ if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
+ if (is_guest_mode(vcpu))
kvm_x86_ops.nested_ops->triple_fault(vcpu);
- } else {
+
+ if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
vcpu->mmio_needed = 0;
r = 0;
@@ -11997,8 +12004,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
WARN_ON_ONCE(!init_event &&
(old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu)));
+ /*
+ * SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's
+ * possible to INIT the vCPU while L2 is active. Force the vCPU back
+ * into L1 as EFER.SVME is cleared on INIT (along with all other EFER
+ * bits), i.e. virtualization is disabled.
+ */
+ if (is_guest_mode(vcpu))
+ kvm_leave_nested(vcpu);
+
kvm_lapic_reset(vcpu, init_event);
+ WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu));
vcpu->arch.hflags = 0;
vcpu->arch.smi_pending = 0;
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 2dae413bd62a..f3098c0e386a 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -954,6 +954,14 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
}
+static inline int max_evtchn_port(struct kvm *kvm)
+{
+ if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode)
+ return EVTCHN_2L_NR_CHANNELS;
+ else
+ return COMPAT_EVTCHN_2L_NR_CHANNELS;
+}
+
static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
evtchn_port_t *ports)
{
@@ -1042,6 +1050,10 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
*r = -EFAULT;
goto out;
}
+ if (ports[i] >= max_evtchn_port(vcpu->kvm)) {
+ *r = -EINVAL;
+ goto out;
+ }
}
if (sched_poll.nr_ports == 1)
@@ -1215,6 +1227,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
bool longmode;
u64 input, params[6], r = -ENOSYS;
bool handled = false;
+ u8 cpl;
input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX);
@@ -1242,9 +1255,17 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
params[5] = (u64)kvm_r9_read(vcpu);
}
#endif
+ cpl = static_call(kvm_x86_get_cpl)(vcpu);
trace_kvm_xen_hypercall(input, params[0], params[1], params[2],
params[3], params[4], params[5]);
+ /*
+ * Only allow hypercall acceleration for CPL0. The rare hypercalls that
+ * are permitted in guest userspace can be handled by the VMM.
+ */
+ if (unlikely(cpl > 0))
+ goto handle_in_userspace;
+
switch (input) {
case __HYPERVISOR_xen_version:
if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) {
@@ -1279,10 +1300,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
if (handled)
return kvm_xen_hypercall_set_result(vcpu, r);
+handle_in_userspace:
vcpu->run->exit_reason = KVM_EXIT_XEN;
vcpu->run->xen.type = KVM_EXIT_XEN_HCALL;
vcpu->run->xen.u.hcall.longmode = longmode;
- vcpu->run->xen.u.hcall.cpl = static_call(kvm_x86_get_cpl)(vcpu);
+ vcpu->run->xen.u.hcall.cpl = cpl;
vcpu->run->xen.u.hcall.input = input;
vcpu->run->xen.u.hcall.params[0] = params[0];
vcpu->run->xen.u.hcall.params[1] = params[1];
@@ -1297,14 +1319,6 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
return 0;
}
-static inline int max_evtchn_port(struct kvm *kvm)
-{
- if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode)
- return EVTCHN_2L_NR_CHANNELS;
- else
- return COMPAT_EVTCHN_2L_NR_CHANNELS;
-}
-
static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port)
{
int poll_evtchn = vcpu->arch.xen.poll_evtchn;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 78c5bc654cff..6453fbaedb08 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -217,9 +217,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
* Mappings have to be page-aligned
*/
offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PHYSICAL_PAGE_MASK;
+ phys_addr &= PAGE_MASK;
size = PAGE_ALIGN(last_addr+1) - phys_addr;
+ /*
+ * Mask out any bits not part of the actual physical
+ * address, like memory encryption bits.
+ */
+ phys_addr &= PHYSICAL_PAGE_MASK;
+
retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
pcm, &new_pcm);
if (retval) {
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 2e5a045731de..ef34ba21aa92 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/cc_platform.h>
#include <linux/set_memory.h>
+#include <linux/memregion.h>
#include <asm/e820/api.h>
#include <asm/processor.h>
@@ -330,6 +331,23 @@ void arch_invalidate_pmem(void *addr, size_t size)
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
#endif
+#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
+bool cpu_cache_has_invalidate_memregion(void)
+{
+ return !cpu_feature_enabled(X86_FEATURE_HYPERVISOR);
+}
+EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, DEVMEM);
+
+int cpu_cache_invalidate_memregion(int res_desc)
+{
+ if (WARN_ON_ONCE(!cpu_cache_has_invalidate_memregion()))
+ return -ENXIO;
+ wbinvd_on_all_cpus();
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, DEVMEM);
+#endif
+
static void __cpa_flush_all(void *arg)
{
unsigned long cache = (unsigned long)arg;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 00127abd89ee..99620428ad78 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -11,7 +11,6 @@
#include <linux/bpf.h>
#include <linux/memory.h>
#include <linux/sort.h>
-#include <linux/init.h>
#include <asm/extable.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
@@ -389,18 +388,6 @@ out:
return ret;
}
-int __init bpf_arch_init_dispatcher_early(void *ip)
-{
- const u8 *nop_insn = x86_nops[5];
-
- if (is_endbr(*(u32 *)ip))
- ip += ENDBR_INSN_SIZE;
-
- if (memcmp(ip, nop_insn, X86_PATCH_SIZE))
- text_poke_early(ip, nop_insn, X86_PATCH_SIZE);
- return 0;
-}
-
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *old_addr, void *new_addr)
{
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 994a229cb79f..68244a3422d1 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -183,13 +183,12 @@ err_sysfs:
return r;
}
-static int xo15_sci_remove(struct acpi_device *device)
+static void xo15_sci_remove(struct acpi_device *device)
{
acpi_disable_gpe(NULL, xo15_sci_gpe);
acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
cancel_work_sync(&sci_work);
sysfs_remove_file(&device->dev.kobj, &lid_wake_on_close_attr.attr);
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index bb176c72891c..93ae33248f42 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -513,15 +513,23 @@ static int pm_cpu_check(const struct x86_cpu_id *c)
static void pm_save_spec_msr(void)
{
- u32 spec_msr_id[] = {
- MSR_IA32_SPEC_CTRL,
- MSR_IA32_TSX_CTRL,
- MSR_TSX_FORCE_ABORT,
- MSR_IA32_MCU_OPT_CTRL,
- MSR_AMD64_LS_CFG,
+ struct msr_enumeration {
+ u32 msr_no;
+ u32 feature;
+ } msr_enum[] = {
+ { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL },
+ { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL },
+ { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT },
+ { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL },
+ { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD },
+ { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC },
};
+ int i;
- msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id));
+ for (i = 0; i < ARRAY_SIZE(msr_enum); i++) {
+ if (boot_cpu_has(msr_enum[i].feature))
+ msr_build_context(&msr_enum[i].msr_no, 1);
+ }
}
static int pm_check_save_msr(void)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 745420853a7c..1a2ba31635a5 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -23,6 +23,7 @@
#include <linux/start_kernel.h>
#include <linux/sched.h>
#include <linux/kprobes.h>
+#include <linux/kstrtox.h>
#include <linux/memblock.h>
#include <linux/export.h>
#include <linux/mm.h>
@@ -113,7 +114,7 @@ static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE);
static int __init parse_xen_msr_safe(char *str)
{
if (str)
- return strtobool(str, &xen_msr_safe);
+ return kstrtobool(str, &xen_msr_safe);
return -EINVAL;
}
early_param("xen_msr_safe", parse_xen_msr_safe);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 4f4309500559..8db26f10fb1d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/kstrtox.h>
#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/memblock.h>
@@ -85,7 +86,7 @@ static void __init xen_parse_512gb(void)
arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit=");
if (!arg)
val = true;
- else if (strtobool(arg + strlen("xen_512gb_limit="), &val))
+ else if (kstrtobool(arg + strlen("xen_512gb_limit="), &val))
return;
xen_512gb_limit = val;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c3e1f9a7d43a..4b0d6fff88de 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -32,30 +32,30 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
void xen_smp_intr_free(unsigned int cpu)
{
+ kfree(per_cpu(xen_resched_irq, cpu).name);
+ per_cpu(xen_resched_irq, cpu).name = NULL;
if (per_cpu(xen_resched_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu).irq, NULL);
per_cpu(xen_resched_irq, cpu).irq = -1;
- kfree(per_cpu(xen_resched_irq, cpu).name);
- per_cpu(xen_resched_irq, cpu).name = NULL;
}
+ kfree(per_cpu(xen_callfunc_irq, cpu).name);
+ per_cpu(xen_callfunc_irq, cpu).name = NULL;
if (per_cpu(xen_callfunc_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu).irq, NULL);
per_cpu(xen_callfunc_irq, cpu).irq = -1;
- kfree(per_cpu(xen_callfunc_irq, cpu).name);
- per_cpu(xen_callfunc_irq, cpu).name = NULL;
}
+ kfree(per_cpu(xen_debug_irq, cpu).name);
+ per_cpu(xen_debug_irq, cpu).name = NULL;
if (per_cpu(xen_debug_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu).irq, NULL);
per_cpu(xen_debug_irq, cpu).irq = -1;
- kfree(per_cpu(xen_debug_irq, cpu).name);
- per_cpu(xen_debug_irq, cpu).name = NULL;
}
+ kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
+ per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
if (per_cpu(xen_callfuncsingle_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu).irq,
NULL);
per_cpu(xen_callfuncsingle_irq, cpu).irq = -1;
- kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
- per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
}
}
@@ -65,6 +65,7 @@ int xen_smp_intr_init(unsigned int cpu)
char *resched_name, *callfunc_name, *debug_name;
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
+ per_cpu(xen_resched_irq, cpu).name = resched_name;
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
cpu,
xen_reschedule_interrupt,
@@ -74,9 +75,9 @@ int xen_smp_intr_init(unsigned int cpu)
if (rc < 0)
goto fail;
per_cpu(xen_resched_irq, cpu).irq = rc;
- per_cpu(xen_resched_irq, cpu).name = resched_name;
callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
+ per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
cpu,
xen_call_function_interrupt,
@@ -86,10 +87,10 @@ int xen_smp_intr_init(unsigned int cpu)
if (rc < 0)
goto fail;
per_cpu(xen_callfunc_irq, cpu).irq = rc;
- per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
if (!xen_fifo_events) {
debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
+ per_cpu(xen_debug_irq, cpu).name = debug_name;
rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu,
xen_debug_interrupt,
IRQF_PERCPU | IRQF_NOBALANCING,
@@ -97,10 +98,10 @@ int xen_smp_intr_init(unsigned int cpu)
if (rc < 0)
goto fail;
per_cpu(xen_debug_irq, cpu).irq = rc;
- per_cpu(xen_debug_irq, cpu).name = debug_name;
}
callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+ per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
cpu,
xen_call_function_single_interrupt,
@@ -110,7 +111,6 @@ int xen_smp_intr_init(unsigned int cpu)
if (rc < 0)
goto fail;
per_cpu(xen_callfuncsingle_irq, cpu).irq = rc;
- per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
return 0;
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 480be82e9b7b..6175f2c5c822 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -97,18 +97,18 @@ asmlinkage __visible void cpu_bringup_and_idle(void)
void xen_smp_intr_free_pv(unsigned int cpu)
{
+ kfree(per_cpu(xen_irq_work, cpu).name);
+ per_cpu(xen_irq_work, cpu).name = NULL;
if (per_cpu(xen_irq_work, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
per_cpu(xen_irq_work, cpu).irq = -1;
- kfree(per_cpu(xen_irq_work, cpu).name);
- per_cpu(xen_irq_work, cpu).name = NULL;
}
+ kfree(per_cpu(xen_pmu_irq, cpu).name);
+ per_cpu(xen_pmu_irq, cpu).name = NULL;
if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
per_cpu(xen_pmu_irq, cpu).irq = -1;
- kfree(per_cpu(xen_pmu_irq, cpu).name);
- per_cpu(xen_pmu_irq, cpu).name = NULL;
}
}
@@ -118,6 +118,7 @@ int xen_smp_intr_init_pv(unsigned int cpu)
char *callfunc_name, *pmu_name;
callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
+ per_cpu(xen_irq_work, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
cpu,
xen_irq_work_interrupt,
@@ -127,10 +128,10 @@ int xen_smp_intr_init_pv(unsigned int cpu)
if (rc < 0)
goto fail;
per_cpu(xen_irq_work, cpu).irq = rc;
- per_cpu(xen_irq_work, cpu).name = callfunc_name;
if (is_xen_pmu) {
pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
+ per_cpu(xen_pmu_irq, cpu).name = pmu_name;
rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
xen_pmu_irq_handler,
IRQF_PERCPU|IRQF_NOBALANCING,
@@ -138,7 +139,6 @@ int xen_smp_intr_init_pv(unsigned int cpu)
if (rc < 0)
goto fail;
per_cpu(xen_pmu_irq, cpu).irq = rc;
- per_cpu(xen_pmu_irq, cpu).name = pmu_name;
}
return 0;
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 043c73dfd2c9..5c6fc16e4b92 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -75,6 +75,7 @@ void xen_init_lock_cpu(int cpu)
cpu, per_cpu(lock_kicker_irq, cpu));
name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
+ per_cpu(irq_name, cpu) = name;
irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
cpu,
dummy_handler,
@@ -85,7 +86,6 @@ void xen_init_lock_cpu(int cpu)
if (irq >= 0) {
disable_irq(irq); /* make sure it's never delivered */
per_cpu(lock_kicker_irq, cpu) = irq;
- per_cpu(irq_name, cpu) = name;
}
printk("cpu %d spinlock event irq %d\n", cpu, irq);
@@ -98,6 +98,8 @@ void xen_uninit_lock_cpu(int cpu)
if (!xen_pvspin)
return;
+ kfree(per_cpu(irq_name, cpu));
+ per_cpu(irq_name, cpu) = NULL;
/*
* When booting the kernel with 'mitigations=auto,nosmt', the secondary
* CPUs are not activated, and lock_kicker_irq is not initialized.
@@ -108,8 +110,6 @@ void xen_uninit_lock_cpu(int cpu)
unbind_from_irqhandler(irq, NULL);
per_cpu(lock_kicker_irq, cpu) = -1;
- kfree(per_cpu(irq_name, cpu));
- per_cpu(irq_name, cpu) = NULL;
}
PV_CALLEE_SAVE_REGS_THUNK(xen_vcpu_stolen);
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 6b4fdf6b9542..4a184f6e4e4d 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -262,10 +262,10 @@ SYM_CODE_START(xen_entry_SYSCALL_compat)
/*
* Neither Xen nor the kernel really knows what the old SS and
- * CS were. The kernel expects __USER32_DS and __USER32_CS, so
+ * CS were. The kernel expects __USER_DS and __USER32_CS, so
* report those values even though Xen will guess its own values.
*/
- movq $__USER32_DS, 4*8(%rsp)
+ movq $__USER_DS, 4*8(%rsp)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSCALL_compat_after_hwframe
@@ -284,10 +284,10 @@ SYM_CODE_START(xen_entry_SYSENTER_compat)
/*
* Neither Xen nor the kernel really knows what the old SS and
- * CS were. The kernel expects __USER32_DS and __USER32_CS, so
+ * CS were. The kernel expects __USER_DS and __USER32_CS, so
* report those values even though Xen will guess its own values.
*/
- movq $__USER32_DS, 4*8(%rsp)
+ movq $__USER_DS, 4*8(%rsp)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSENTER_compat_after_hwframe