summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/loongarch/include/asm/processor.h2
-rw-r--r--arch/loongarch/include/asm/ptrace.h4
-rw-r--r--arch/loongarch/kernel/head.S3
-rw-r--r--arch/loongarch/kernel/process.c4
-rw-r--r--arch/loongarch/kernel/switch.S2
-rw-r--r--arch/loongarch/net/bpf_jit.c31
-rw-r--r--arch/parisc/include/asm/hardware.h12
-rw-r--r--arch/parisc/include/uapi/asm/pdc.h36
-rw-r--r--arch/parisc/kernel/drivers.c14
-rw-r--r--arch/powerpc/Kconfig3
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h6
-rw-r--r--arch/powerpc/include/asm/syscalls.h7
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S7
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S14
-rw-r--r--arch/powerpc/kernel/interrupt.c14
-rw-r--r--arch/powerpc/kernel/interrupt_64.S13
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c13
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl7
-rw-r--r--arch/powerpc/kvm/Kconfig4
-rw-r--r--arch/powerpc/lib/vmx-helper.c12
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c67
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c8
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c12
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c11
-rw-r--r--arch/powerpc/platforms/pseries/vas.c83
-rw-r--r--arch/powerpc/platforms/pseries/vas.h6
-rw-r--r--arch/riscv/Kconfig17
-rw-r--r--arch/riscv/Makefile6
-rw-r--r--arch/riscv/include/asm/jump_label.h8
-rw-r--r--arch/riscv/include/asm/vdso/processor.h2
-rw-r--r--arch/riscv/kernel/cpu.c3
-rw-r--r--arch/riscv/mm/kasan_init.c7
-rw-r--r--arch/s390/boot/vmlinux.lds.S13
-rw-r--r--arch/s390/include/asm/futex.h3
-rw-r--r--arch/s390/kernel/perf_pai_ext.c1
-rw-r--r--arch/s390/lib/uaccess.c6
-rw-r--r--arch/s390/pci/pci_mmio.c8
-rw-r--r--arch/x86/crypto/polyval-clmulni_glue.c19
-rw-r--r--arch/x86/events/amd/ibs.c2
-rw-r--r--arch/x86/events/rapl.c4
-rw-r--r--arch/x86/include/asm/string_64.h11
-rw-r--r--arch/x86/include/asm/uaccess.h13
-rw-r--r--arch/x86/kvm/cpuid.c11
-rw-r--r--arch/x86/kvm/debugfs.c7
-rw-r--r--arch/x86/kvm/emulate.c108
-rw-r--r--arch/x86/kvm/vmx/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c27
-rw-r--r--arch/x86/kvm/xen.c64
-rw-r--r--arch/x86/purgatory/Makefile1
49 files changed, 518 insertions, 223 deletions
diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h
index 6954dc5d24e9..7184f1dc61f2 100644
--- a/arch/loongarch/include/asm/processor.h
+++ b/arch/loongarch/include/asm/processor.h
@@ -191,7 +191,7 @@ static inline void flush_thread(void)
unsigned long __get_wchan(struct task_struct *p);
#define __KSTK_TOS(tsk) ((unsigned long)task_stack_page(tsk) + \
- THREAD_SIZE - 32 - sizeof(struct pt_regs))
+ THREAD_SIZE - sizeof(struct pt_regs))
#define task_pt_regs(tsk) ((struct pt_regs *)__KSTK_TOS(tsk))
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->csr_era)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[3])
diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
index 17838c6b7ccd..59c4608de91d 100644
--- a/arch/loongarch/include/asm/ptrace.h
+++ b/arch/loongarch/include/asm/ptrace.h
@@ -29,7 +29,7 @@ struct pt_regs {
unsigned long csr_euen;
unsigned long csr_ecfg;
unsigned long csr_estat;
- unsigned long __last[0];
+ unsigned long __last[];
} __aligned(8);
static inline int regs_irqs_disabled(struct pt_regs *regs)
@@ -133,7 +133,7 @@ static inline void die_if_kernel(const char *str, struct pt_regs *regs)
#define current_pt_regs() \
({ \
unsigned long sp = (unsigned long)__builtin_frame_address(0); \
- (struct pt_regs *)((sp | (THREAD_SIZE - 1)) + 1 - 32) - 1; \
+ (struct pt_regs *)((sp | (THREAD_SIZE - 1)) + 1) - 1; \
})
/* Helpers for working with the user stack pointer */
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index 97425779ce9f..84970e266658 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -84,10 +84,9 @@ SYM_CODE_START(kernel_entry) # kernel entry point
la.pcrel tp, init_thread_union
/* Set the SP after an empty pt_regs. */
- PTR_LI sp, (_THREAD_SIZE - 32 - PT_SIZE)
+ PTR_LI sp, (_THREAD_SIZE - PT_SIZE)
PTR_ADD sp, sp, tp
set_saved_sp sp, t0, t1
- PTR_ADDI sp, sp, -4 * SZREG # init stack pointer
bl start_kernel
ASM_BUG()
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 1256e3582475..2526b68f1c0f 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -129,7 +129,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
unsigned long clone_flags = args->flags;
struct pt_regs *childregs, *regs = current_pt_regs();
- childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;
+ childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
/* set up new TSS. */
childregs = (struct pt_regs *) childksp - 1;
@@ -236,7 +236,7 @@ bool in_task_stack(unsigned long stack, struct task_struct *task,
struct stack_info *info)
{
unsigned long begin = (unsigned long)task_stack_page(task);
- unsigned long end = begin + THREAD_SIZE - 32;
+ unsigned long end = begin + THREAD_SIZE;
if (stack < begin || stack >= end)
return false;
diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S
index 43ebbc3990f7..202a163cb32f 100644
--- a/arch/loongarch/kernel/switch.S
+++ b/arch/loongarch/kernel/switch.S
@@ -26,7 +26,7 @@ SYM_FUNC_START(__switch_to)
move tp, a2
cpu_restore_nonscratch a1
- li.w t0, _THREAD_SIZE - 32
+ li.w t0, _THREAD_SIZE
PTR_ADD t0, t0, tp
set_saved_sp t0, t1, t2
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 43f0a98efe38..bdcd0c7719a9 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -279,6 +279,7 @@ static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
const u8 t1 = LOONGARCH_GPR_T1;
const u8 t2 = LOONGARCH_GPR_T2;
const u8 t3 = LOONGARCH_GPR_T3;
+ const u8 r0 = regmap[BPF_REG_0];
const u8 src = regmap[insn->src_reg];
const u8 dst = regmap[insn->dst_reg];
const s16 off = insn->off;
@@ -359,8 +360,6 @@ static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
/* r0 = atomic_cmpxchg(dst + off, r0, src); */
case BPF_CMPXCHG:
- u8 r0 = regmap[BPF_REG_0];
-
move_reg(ctx, t2, r0);
if (isdw) {
emit_insn(ctx, lld, r0, t1, 0);
@@ -390,8 +389,11 @@ static bool is_signed_bpf_cond(u8 cond)
static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
{
- const bool is32 = BPF_CLASS(insn->code) == BPF_ALU ||
- BPF_CLASS(insn->code) == BPF_JMP32;
+ u8 tm = -1;
+ u64 func_addr;
+ bool func_addr_fixed;
+ int i = insn - ctx->prog->insnsi;
+ int ret, jmp_offset;
const u8 code = insn->code;
const u8 cond = BPF_OP(code);
const u8 t1 = LOONGARCH_GPR_T1;
@@ -400,8 +402,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
const u8 dst = regmap[insn->dst_reg];
const s16 off = insn->off;
const s32 imm = insn->imm;
- int jmp_offset;
- int i = insn - ctx->prog->insnsi;
+ const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
+ const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
switch (code) {
/* dst = src */
@@ -724,24 +726,23 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
- u8 t7 = -1;
jmp_offset = bpf2la_offset(i, off, ctx);
if (imm) {
move_imm(ctx, t1, imm, false);
- t7 = t1;
+ tm = t1;
} else {
/* If imm is 0, simply use zero register. */
- t7 = LOONGARCH_GPR_ZERO;
+ tm = LOONGARCH_GPR_ZERO;
}
move_reg(ctx, t2, dst);
if (is_signed_bpf_cond(BPF_OP(code))) {
- emit_sext_32(ctx, t7, is32);
+ emit_sext_32(ctx, tm, is32);
emit_sext_32(ctx, t2, is32);
} else {
- emit_zext_32(ctx, t7, is32);
+ emit_zext_32(ctx, tm, is32);
emit_zext_32(ctx, t2, is32);
}
- if (emit_cond_jmp(ctx, cond, t2, t7, jmp_offset) < 0)
+ if (emit_cond_jmp(ctx, cond, t2, tm, jmp_offset) < 0)
goto toofar;
break;
@@ -775,10 +776,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* function call */
case BPF_JMP | BPF_CALL:
- int ret;
- u64 func_addr;
- bool func_addr_fixed;
-
mark_call(ctx);
ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
&func_addr, &func_addr_fixed);
@@ -811,8 +808,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* dst = imm64 */
case BPF_LD | BPF_IMM | BPF_DW:
- u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
-
move_imm(ctx, dst, imm64, is32);
return 1;
diff --git a/arch/parisc/include/asm/hardware.h b/arch/parisc/include/asm/hardware.h
index 9d3d7737c58b..a005ebc54779 100644
--- a/arch/parisc/include/asm/hardware.h
+++ b/arch/parisc/include/asm/hardware.h
@@ -10,12 +10,12 @@
#define SVERSION_ANY_ID PA_SVERSION_ANY_ID
struct hp_hardware {
- unsigned short hw_type:5; /* HPHW_xxx */
- unsigned short hversion;
- unsigned long sversion:28;
- unsigned short opt;
- const char name[80]; /* The hardware description */
-};
+ unsigned int hw_type:8; /* HPHW_xxx */
+ unsigned int hversion:12;
+ unsigned int sversion:12;
+ unsigned char opt;
+ unsigned char name[59]; /* The hardware description */
+} __packed;
struct parisc_device;
diff --git a/arch/parisc/include/uapi/asm/pdc.h b/arch/parisc/include/uapi/asm/pdc.h
index e794e143ec5f..7a90070136e8 100644
--- a/arch/parisc/include/uapi/asm/pdc.h
+++ b/arch/parisc/include/uapi/asm/pdc.h
@@ -363,20 +363,25 @@
#if !defined(__ASSEMBLY__)
-/* flags of the device_path */
+/* flags for hardware_path */
#define PF_AUTOBOOT 0x80
#define PF_AUTOSEARCH 0x40
#define PF_TIMER 0x0F
-struct device_path { /* page 1-69 */
- unsigned char flags; /* flags see above! */
- unsigned char bc[6]; /* bus converter routing info */
- unsigned char mod;
- unsigned int layers[6];/* device-specific layer-info */
-} __attribute__((aligned(8))) ;
+struct hardware_path {
+ unsigned char flags; /* see bit definitions below */
+ signed char bc[6]; /* Bus Converter routing info to a specific */
+ /* I/O adaptor (< 0 means none, > 63 resvd) */
+ signed char mod; /* fixed field of specified module */
+};
+
+struct pdc_module_path { /* page 1-69 */
+ struct hardware_path path;
+ unsigned int layers[6]; /* device-specific info (ctlr #, unit # ...) */
+} __attribute__((aligned(8)));
struct pz_device {
- struct device_path dp; /* see above */
+ struct pdc_module_path dp; /* see above */
/* struct iomod *hpa; */
unsigned int hpa; /* HPA base address */
/* char *spa; */
@@ -611,21 +616,6 @@ struct pdc_initiator { /* PDC_INITIATOR */
int mode;
};
-struct hardware_path {
- char flags; /* see bit definitions below */
- char bc[6]; /* Bus Converter routing info to a specific */
- /* I/O adaptor (< 0 means none, > 63 resvd) */
- char mod; /* fixed field of specified module */
-};
-
-/*
- * Device path specifications used by PDC.
- */
-struct pdc_module_path {
- struct hardware_path path;
- unsigned int layers[6]; /* device-specific info (ctlr #, unit # ...) */
-};
-
/* Only used on some pre-PA2.0 boxes */
struct pdc_memory_map { /* PDC_MEMORY_MAP */
unsigned long hpa; /* mod's register set address */
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index d126e78e101a..e7ee0c0c91d3 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -882,15 +882,13 @@ void __init walk_central_bus(void)
&root);
}
-static void print_parisc_device(struct parisc_device *dev)
+static __init void print_parisc_device(struct parisc_device *dev)
{
- char hw_path[64];
- static int count;
+ static int count __initdata;
- print_pa_hwpath(dev, hw_path);
- pr_info("%d. %s at %pap [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
- ++count, dev->name, &(dev->hpa.start), hw_path, dev->id.hw_type,
- dev->id.hversion_rev, dev->id.hversion, dev->id.sversion);
+ pr_info("%d. %s at %pap { type:%d, hv:%#x, sv:%#x, rev:%#x }",
+ ++count, dev->name, &(dev->hpa.start), dev->id.hw_type,
+ dev->id.hversion, dev->id.sversion, dev->id.hversion_rev);
if (dev->num_addrs) {
int k;
@@ -1079,7 +1077,7 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data)
-static int print_one_device(struct device * dev, void * data)
+static __init int print_one_device(struct device * dev, void * data)
{
struct parisc_device * pdev = to_parisc_device(dev);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 699df27b0e2f..2ca5418457ed 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -147,6 +147,7 @@ config PPC
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+ select ARCH_SPLIT_ARG64 if PPC32
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x
@@ -285,7 +286,7 @@ config PPC
#
config PPC_LONG_DOUBLE_128
- depends on PPC64
+ depends on PPC64 && ALTIVEC
def_bool $(success,test "$(shell,echo __LONG_DOUBLE_128__ | $(CC) -E -P -)" = 1)
config PPC_BARRIER_NOSPEC
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index fab8332fe1ad..751921f6db46 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -32,6 +32,11 @@ static inline void arch_enter_lazy_mmu_mode(void)
if (radix_enabled())
return;
+ /*
+ * apply_to_page_range can call us this preempt enabled when
+ * operating on kernel page tables.
+ */
+ preempt_disable();
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
@@ -47,6 +52,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
if (batch->index)
__flush_tlb_pending(batch);
batch->active = 0;
+ preempt_enable();
}
#define arch_flush_lazy_mmu_mode() do {} while (0)
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index a1142496cd58..6d51b007b59e 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -104,6 +104,13 @@ long sys_ppc_ftruncate64(unsigned int fd, u32 reg4,
unsigned long len1, unsigned long len2);
long sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
size_t len, int advice);
+long sys_ppc_sync_file_range2(int fd, unsigned int flags,
+ unsigned int offset1,
+ unsigned int offset2,
+ unsigned int nbytes1,
+ unsigned int nbytes2);
+long sys_ppc_fallocate(int fd, int mode, u32 offset1, u32 offset2,
+ u32 len1, u32 len2);
#endif
#ifdef CONFIG_COMPAT
long compat_sys_mmap2(unsigned long addr, size_t len,
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 930e36099015..2f68fb2ee4fc 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -813,6 +813,13 @@ kernel_dbg_exc:
EXCEPTION_COMMON(0x260)
CHECK_NAPPING()
addi r3,r1,STACK_FRAME_OVERHEAD
+ /*
+ * XXX: Returning from performance_monitor_exception taken as a
+ * soft-NMI (Linux irqs disabled) may be risky to use interrupt_return
+ * and could cause bugs in return or elsewhere. That case should just
+ * restore registers and return. There is a workaround for one known
+ * problem in interrupt_exit_kernel_prepare().
+ */
bl performance_monitor_exception
b interrupt_return
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 5381a43e50fe..651c36b056bd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -2357,9 +2357,21 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
EXC_COMMON_BEGIN(performance_monitor_common)
GEN_COMMON performance_monitor
addi r3,r1,STACK_FRAME_OVERHEAD
- bl performance_monitor_exception
+ lbz r4,PACAIRQSOFTMASK(r13)
+ cmpdi r4,IRQS_ENABLED
+ bne 1f
+ bl performance_monitor_exception_async
b interrupt_return_srr
+1:
+ bl performance_monitor_exception_nmi
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r9,0
+ mtmsrd r9,1
+ kuap_kernel_restore r9, r10
+
+ EXCEPTION_RESTORE_REGS hsrr=0
+ RFI_TO_KERNEL
/**
* Interrupt 0xf20 - Vector Unavailable Interrupt.
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index f9db0a172401..fc6631a80527 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -374,10 +374,18 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
if (regs_is_unrecoverable(regs))
unrecoverable_exception(regs);
/*
- * CT_WARN_ON comes here via program_check_exception,
- * so avoid recursion.
+ * CT_WARN_ON comes here via program_check_exception, so avoid
+ * recursion.
+ *
+ * Skip the assertion on PMIs on 64e to work around a problem caused
+ * by NMI PMIs incorrectly taking this interrupt return path, it's
+ * possible for this to hit after interrupt exit to user switches
+ * context to user. See also the comment in the performance monitor
+ * handler in exceptions-64e.S
*/
- if (TRAP(regs) != INTERRUPT_PROGRAM)
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) &&
+ TRAP(regs) != INTERRUPT_PROGRAM &&
+ TRAP(regs) != INTERRUPT_PERFMON)
CT_WARN_ON(ct_state() == CONTEXT_USER);
kuap = kuap_get_and_assert_locked();
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index 978a173eb339..a019ed6fc839 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -532,15 +532,24 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
* Returning to soft-disabled context.
* Check if a MUST_HARD_MASK interrupt has become pending, in which
* case we need to disable MSR[EE] in the return context.
+ *
+ * The MSR[EE] check catches among other things the short incoherency
+ * in hard_irq_disable() between clearing MSR[EE] and setting
+ * PACA_IRQ_HARD_DIS.
*/
ld r12,_MSR(r1)
andi. r10,r12,MSR_EE
beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled
lbz r11,PACAIRQHAPPENED(r13)
andi. r10,r11,PACA_IRQ_MUST_HARD_MASK
- beq .Lfast_kernel_interrupt_return_\srr\() // No HARD_MASK pending
+ bne 1f // HARD_MASK is pending
+ // No HARD_MASK pending, clear possible HARD_DIS set by interrupt
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ stb r11,PACAIRQHAPPENED(r13)
+ b .Lfast_kernel_interrupt_return_\srr\()
+
- /* Must clear MSR_EE from _MSR */
+1: /* Must clear MSR_EE from _MSR */
#ifdef CONFIG_PPC_BOOK3S
li r10,0
/* Clear valid before changing _MSR */
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 1ab4a4d95aba..d451a8229223 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -112,7 +112,7 @@ PPC32_SYSCALL_DEFINE6(ppc32_fadvise64,
advice);
}
-COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2,
+PPC32_SYSCALL_DEFINE6(ppc_sync_file_range2,
int, fd, unsigned int, flags,
unsigned int, offset1, unsigned int, offset2,
unsigned int, nbytes1, unsigned int, nbytes2)
@@ -122,3 +122,14 @@ COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2,
return ksys_sync_file_range(fd, offset, nbytes, flags);
}
+
+#ifdef CONFIG_PPC32
+SYSCALL_DEFINE6(ppc_fallocate,
+ int, fd, int, mode,
+ u32, offset1, u32, offset2, u32, len1, u32, len2)
+{
+ return ksys_fallocate(fd, mode,
+ merge_64(offset1, offset2),
+ merge_64(len1, len2));
+}
+#endif
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index e9e0df4f9a61..a0be127475b1 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -394,8 +394,11 @@
305 common signalfd sys_signalfd compat_sys_signalfd
306 common timerfd_create sys_timerfd_create
307 common eventfd sys_eventfd
-308 common sync_file_range2 sys_sync_file_range2 compat_sys_ppc_sync_file_range2
-309 nospu fallocate sys_fallocate compat_sys_fallocate
+308 32 sync_file_range2 sys_ppc_sync_file_range2 compat_sys_ppc_sync_file_range2
+308 64 sync_file_range2 sys_sync_file_range2
+308 spu sync_file_range2 sys_sync_file_range2
+309 32 fallocate sys_ppc_fallocate compat_sys_fallocate
+309 64 fallocate sys_fallocate
310 nospu subpage_prot sys_subpage_prot
311 32 timerfd_settime sys_timerfd_settime32
311 64 timerfd_settime sys_timerfd_settime
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 61cdd782d3c5..a9f57dad6d91 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -51,6 +51,7 @@ config KVM_BOOK3S_HV_POSSIBLE
config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors"
depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
+ depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_BOOK3S_32_HANDLER
select KVM_BOOK3S_PR_POSSIBLE
@@ -105,6 +106,7 @@ config KVM_BOOK3S_64_HV
config KVM_BOOK3S_64_PR
tristate "KVM support without using hypervisor mode in host"
depends on KVM_BOOK3S_64
+ depends on !CONTEXT_TRACKING_USER
select KVM_BOOK3S_PR_POSSIBLE
help
Support running guest kernels in virtual machines on processors
@@ -190,6 +192,7 @@ config KVM_EXIT_TIMING
config KVM_E500V2
bool "KVM support for PowerPC E500v2 processors"
depends on PPC_E500 && !PPC_E500MC
+ depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
select MMU_NOTIFIER
@@ -205,6 +208,7 @@ config KVM_E500V2
config KVM_E500MC
bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
depends on PPC_E500MC
+ depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index f76a50291fd7..d491da8d1838 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -36,7 +36,17 @@ int exit_vmx_usercopy(void)
{
disable_kernel_altivec();
pagefault_enable();
- preempt_enable();
+ preempt_enable_no_resched();
+ /*
+ * Must never explicitly call schedule (including preempt_enable())
+ * while in a kuap-unlocked user copy, because the AMR register will
+ * not be saved and restored across context switch. However preempt
+ * kernels need to be preempted as soon as possible if need_resched is
+ * set and we are preemptible. The hack here is to schedule a
+ * decrementer to fire here and reschedule for us if necessary.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT) && need_resched())
+ set_dec(1);
return 0;
}
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 623a7b7ab38b..9342e79870df 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -43,6 +43,29 @@
static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map hpte_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("hpte_lock", &hpte_lock_map);
+
+static void acquire_hpte_lock(void)
+{
+ lock_map_acquire(&hpte_lock_map);
+}
+
+static void release_hpte_lock(void)
+{
+ lock_map_release(&hpte_lock_map);
+}
+#else
+static void acquire_hpte_lock(void)
+{
+}
+
+static void release_hpte_lock(void)
+{
+}
+#endif
+
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
int apsize, int ssize)
{
@@ -220,6 +243,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
+ acquire_hpte_lock();
while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break;
@@ -234,6 +258,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
+ release_hpte_lock();
clear_bit_unlock(HPTE_LOCK_BIT, word);
}
@@ -243,8 +268,11 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
{
struct hash_pte *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
+ unsigned long flags;
int i;
+ local_irq_save(flags);
+
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
" rflags=%lx, vflags=%lx, psize=%d)\n",
@@ -263,8 +291,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
hptep++;
}
- if (i == HPTES_PER_GROUP)
+ if (i == HPTES_PER_GROUP) {
+ local_irq_restore(flags);
return -1;
+ }
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
@@ -286,10 +316,13 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
* Now set the first dword including the valid bit
* NOTE: this also unlocks the hpte
*/
+ release_hpte_lock();
hptep->v = cpu_to_be64(hpte_v);
__asm__ __volatile__ ("ptesync" : : : "memory");
+ local_irq_restore(flags);
+
return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
}
@@ -327,6 +360,7 @@ static long native_hpte_remove(unsigned long hpte_group)
return -1;
/* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
hptep->v = 0;
return i;
@@ -339,6 +373,9 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
struct hash_pte *hptep = htab_address + slot;
unsigned long hpte_v, want_v;
int ret = 0, local = 0;
+ unsigned long irqflags;
+
+ local_irq_save(irqflags);
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
@@ -382,6 +419,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
if (!(flags & HPTE_NOHPTE_UPDATE))
tlbie(vpn, bpsize, apsize, ssize, local);
+ local_irq_restore(irqflags);
+
return ret;
}
@@ -445,6 +484,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
unsigned long vsid;
long slot;
struct hash_pte *hptep;
+ unsigned long flags;
+
+ local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
@@ -463,6 +505,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
* actual page size will be same.
*/
tlbie(vpn, psize, psize, ssize, 0);
+
+ local_irq_restore(flags);
}
/*
@@ -476,6 +520,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
unsigned long vsid;
long slot;
struct hash_pte *hptep;
+ unsigned long flags;
+
+ local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
@@ -493,6 +540,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
/* Invalidate the TLB */
tlbie(vpn, psize, psize, ssize, 0);
+
+ local_irq_restore(flags);
+
return 0;
}
@@ -517,10 +567,11 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
/* recheck with locks held */
hpte_v = hpte_get_old_v(hptep);
- if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
/* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
hptep->v = 0;
- else
+ } else
native_unlock_hpte(hptep);
}
/*
@@ -580,10 +631,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
- /*
- * Invalidate the hpte. NOTE: this also unlocks it
- */
-
+ /* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
hptep->v = 0;
} else
native_unlock_hpte(hptep);
@@ -765,8 +814,10 @@ static void native_flush_hash_range(unsigned long number, int local)
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
- else
+ else {
+ release_hpte_lock();
hptep->v = 0;
+ }
} pte_iterate_hashed_end();
}
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 747492edb75a..51f48984abca 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -404,7 +404,8 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
struct change_memory_parms {
unsigned long start, end, newpp;
- unsigned int step, nr_cpus, master_cpu;
+ unsigned int step, nr_cpus;
+ atomic_t master_cpu;
atomic_t cpu_counter;
};
@@ -478,7 +479,8 @@ static int change_memory_range_fn(void *data)
{
struct change_memory_parms *parms = data;
- if (parms->master_cpu != smp_processor_id())
+ // First CPU goes through, all others wait.
+ if (atomic_xchg(&parms->master_cpu, 1) == 1)
return chmem_secondary_loop(parms);
// Wait for all but one CPU (this one) to call-in
@@ -516,7 +518,7 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end,
chmem_parms.end = end;
chmem_parms.step = step;
chmem_parms.newpp = newpp;
- chmem_parms.master_cpu = smp_processor_id();
+ atomic_set(&chmem_parms.master_cpu, 0);
cpus_read_lock();
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index df008edf7be0..6df4c6d38b66 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1981,7 +1981,7 @@ repeat:
}
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
-static DEFINE_SPINLOCK(linear_map_hash_lock);
+static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);
static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
{
@@ -2005,10 +2005,10 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
mmu_linear_psize, mmu_kernel_ssize);
BUG_ON (ret < 0);
- spin_lock(&linear_map_hash_lock);
+ raw_spin_lock(&linear_map_hash_lock);
BUG_ON(linear_map_hash_slots[lmi] & 0x80);
linear_map_hash_slots[lmi] = ret | 0x80;
- spin_unlock(&linear_map_hash_lock);
+ raw_spin_unlock(&linear_map_hash_lock);
}
static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
@@ -2018,14 +2018,14 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
- spin_lock(&linear_map_hash_lock);
+ raw_spin_lock(&linear_map_hash_lock);
if (!(linear_map_hash_slots[lmi] & 0x80)) {
- spin_unlock(&linear_map_hash_lock);
+ raw_spin_unlock(&linear_map_hash_lock);
return;
}
hidx = linear_map_hash_slots[lmi] & 0x7f;
linear_map_hash_slots[lmi] = 0;
- spin_unlock(&linear_map_hash_lock);
+ raw_spin_unlock(&linear_map_hash_lock);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 507dc0b5987d..63fd925ccbb8 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -35,6 +35,7 @@
#include <asm/drmem.h>
#include "pseries.h"
+#include "vas.h" /* pseries_vas_dlpar_cpu() */
/*
* This isn't a module but we expose that to userspace
@@ -748,6 +749,16 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
return -EINVAL;
retval = update_ppp(new_entitled_ptr, NULL);
+
+ if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
+ /*
+ * The hypervisor assigns VAS resources based
+ * on entitled capacity for shared mode.
+ * Reconfig VAS windows based on DLPAR CPU events.
+ */
+ if (pseries_vas_dlpar_cpu() != 0)
+ retval = H_HARDWARE;
+ }
} else if (!strcmp(kbuf, "capacity_weight")) {
char *endp;
*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 0e0524cbe20c..4ad6e510d405 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -200,17 +200,42 @@ static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
struct vas_user_win_ref *tsk_ref;
int rc;
- rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
- if (!rc) {
- tsk_ref = &txwin->vas_win.task_ref;
- vas_dump_crb(&crb);
- vas_update_csb(&crb, tsk_ref);
+ while (atomic_read(&txwin->pending_faults)) {
+ rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
+ if (!rc) {
+ tsk_ref = &txwin->vas_win.task_ref;
+ vas_dump_crb(&crb);
+ vas_update_csb(&crb, tsk_ref);
+ }
+ atomic_dec(&txwin->pending_faults);
}
return IRQ_HANDLED;
}
/*
+ * irq_default_primary_handler() can be used only with IRQF_ONESHOT
+ * which disables IRQ before executing the thread handler and enables
+ * it after. But this disabling interrupt sets the VAS IRQ OFF
+ * state in the hypervisor. If the NX generates fault interrupt
+ * during this window, the hypervisor will not deliver this
+ * interrupt to the LPAR. So use VAS specific IRQ handler instead
+ * of calling the default primary handler.
+ */
+static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
+{
+ struct pseries_vas_window *txwin = data;
+
+ /*
+ * The thread hanlder will process this interrupt if it is
+ * already running.
+ */
+ atomic_inc(&txwin->pending_faults);
+
+ return IRQ_WAKE_THREAD;
+}
+
+/*
* Allocate window and setup IRQ mapping.
*/
static int allocate_setup_window(struct pseries_vas_window *txwin,
@@ -240,8 +265,9 @@ static int allocate_setup_window(struct pseries_vas_window *txwin,
goto out_irq;
}
- rc = request_threaded_irq(txwin->fault_virq, NULL,
- pseries_vas_fault_thread_fn, IRQF_ONESHOT,
+ rc = request_threaded_irq(txwin->fault_virq,
+ pseries_vas_irq_handler,
+ pseries_vas_fault_thread_fn, 0,
txwin->name, txwin);
if (rc) {
pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
@@ -826,6 +852,25 @@ int vas_reconfig_capabilties(u8 type, int new_nr_creds)
mutex_unlock(&vas_pseries_mutex);
return rc;
}
+
+int pseries_vas_dlpar_cpu(void)
+{
+ int new_nr_creds, rc;
+
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+ vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
+ (u64)virt_to_phys(&hv_cop_caps));
+ if (!rc) {
+ new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+ rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
+ }
+
+ if (rc)
+ pr_err("Failed reconfig VAS capabilities with DLPAR\n");
+
+ return rc;
+}
+
/*
* Total number of default credits available (target_credits)
* in LPAR depends on number of cores configured. It varies based on
@@ -840,7 +885,15 @@ static int pseries_vas_notifier(struct notifier_block *nb,
struct of_reconfig_data *rd = data;
struct device_node *dn = rd->dn;
const __be32 *intserv = NULL;
- int new_nr_creds, len, rc = 0;
+ int len;
+
+ /*
+ * For shared CPU partition, the hypervisor assigns total credits
+ * based on entitled core capacity. So updating VAS windows will
+ * be called from lparcfg_write().
+ */
+ if (is_shared_processor())
+ return NOTIFY_OK;
if ((action == OF_RECONFIG_ATTACH_NODE) ||
(action == OF_RECONFIG_DETACH_NODE))
@@ -852,19 +905,7 @@ static int pseries_vas_notifier(struct notifier_block *nb,
if (!intserv)
return NOTIFY_OK;
- rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
- vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
- (u64)virt_to_phys(&hv_cop_caps));
- if (!rc) {
- new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
- rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
- new_nr_creds);
- }
-
- if (rc)
- pr_err("Failed reconfig VAS capabilities with DLPAR\n");
-
- return rc;
+ return pseries_vas_dlpar_cpu();
}
static struct notifier_block pseries_vas_nb = {
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
index 333ffa2f9f42..7115043ec488 100644
--- a/arch/powerpc/platforms/pseries/vas.h
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -132,6 +132,7 @@ struct pseries_vas_window {
u64 flags;
char *name;
int fault_virq;
+ atomic_t pending_faults; /* Number of pending faults */
};
int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
@@ -140,10 +141,15 @@ int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
#ifdef CONFIG_PPC_VAS
int vas_migration_handler(int action);
+int pseries_vas_dlpar_cpu(void);
#else
static inline int vas_migration_handler(int action)
{
return 0;
}
+static inline int pseries_vas_dlpar_cpu(void)
+{
+ return 0;
+}
#endif
#endif /* _VAS_H */
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 6b48a3ae9843..fa78595a6089 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -411,14 +411,16 @@ config RISCV_ISA_SVPBMT
If you don't know what to do here, say Y.
-config CC_HAS_ZICBOM
+config TOOLCHAIN_HAS_ZICBOM
bool
- default y if 64BIT && $(cc-option,-mabi=lp64 -march=rv64ima_zicbom)
- default y if 32BIT && $(cc-option,-mabi=ilp32 -march=rv32ima_zicbom)
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zicbom)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zicbom)
+ depends on LLD_VERSION >= 150000 || LD_VERSION >= 23800
config RISCV_ISA_ZICBOM
bool "Zicbom extension support for non-coherent DMA operation"
- depends on CC_HAS_ZICBOM
+ depends on TOOLCHAIN_HAS_ZICBOM
depends on !XIP_KERNEL && MMU
select RISCV_DMA_NONCOHERENT
select RISCV_ALTERNATIVE
@@ -433,6 +435,13 @@ config RISCV_ISA_ZICBOM
If you don't know what to do here, say Y.
+config TOOLCHAIN_HAS_ZIHINTPAUSE
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zihintpause)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zihintpause)
+ depends on LLD_VERSION >= 150000 || LD_VERSION >= 23600
+
config FPU
bool "FPU support"
default y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 1c8ec656e916..0d13b597cb55 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -59,12 +59,10 @@ toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zi
riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei
# Check if the toolchain supports Zicbom extension
-toolchain-supports-zicbom := $(call cc-option-yn, -march=$(riscv-march-y)_zicbom)
-riscv-march-$(toolchain-supports-zicbom) := $(riscv-march-y)_zicbom
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZICBOM) := $(riscv-march-y)_zicbom
# Check if the toolchain supports Zihintpause extension
-toolchain-supports-zihintpause := $(call cc-option-yn, -march=$(riscv-march-y)_zihintpause)
-riscv-march-$(toolchain-supports-zihintpause) := $(riscv-march-y)_zihintpause
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
KBUILD_AFLAGS += -march=$(riscv-march-y)
diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h
index 38af2ec7b9bf..6d58bbb5da46 100644
--- a/arch/riscv/include/asm/jump_label.h
+++ b/arch/riscv/include/asm/jump_label.h
@@ -14,8 +14,8 @@
#define JUMP_LABEL_NOP_SIZE 4
-static __always_inline bool arch_static_branch(struct static_key *key,
- bool branch)
+static __always_inline bool arch_static_branch(struct static_key * const key,
+ const bool branch)
{
asm_volatile_goto(
" .option push \n\t"
@@ -35,8 +35,8 @@ label:
return true;
}
-static __always_inline bool arch_static_branch_jump(struct static_key *key,
- bool branch)
+static __always_inline bool arch_static_branch_jump(struct static_key * const key,
+ const bool branch)
{
asm_volatile_goto(
" .option push \n\t"
diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h
index 1e4f8b4aef79..fa70cfe507aa 100644
--- a/arch/riscv/include/asm/vdso/processor.h
+++ b/arch/riscv/include/asm/vdso/processor.h
@@ -21,7 +21,7 @@ static inline void cpu_relax(void)
* Reduce instruction retirement.
* This assumes the PC changes.
*/
-#ifdef __riscv_zihintpause
+#ifdef CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE
__asm__ __volatile__ ("pause");
#else
/* Encoding of the pause instruction */
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index fa427bdcf773..852ecccd8920 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -213,6 +213,9 @@ static void print_mmu(struct seq_file *f)
static void *c_start(struct seq_file *m, loff_t *pos)
{
+ if (*pos == nr_cpu_ids)
+ return NULL;
+
*pos = cpumask_next(*pos - 1, cpu_online_mask);
if ((*pos) < nr_cpu_ids)
return (void *)(uintptr_t)(1 + *pos);
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index a22e418dbd82..e1226709490f 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -113,6 +113,8 @@ static void __init kasan_populate_pud(pgd_t *pgd,
base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
} else if (pgd_none(*pgd)) {
base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
+ memcpy(base_pud, (void *)kasan_early_shadow_pud,
+ sizeof(pud_t) * PTRS_PER_PUD);
} else {
base_pud = (pud_t *)pgd_page_vaddr(*pgd);
if (base_pud == lm_alias(kasan_early_shadow_pud)) {
@@ -173,8 +175,11 @@ static void __init kasan_populate_p4d(pgd_t *pgd,
base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgd)));
} else {
base_p4d = (p4d_t *)pgd_page_vaddr(*pgd);
- if (base_p4d == lm_alias(kasan_early_shadow_p4d))
+ if (base_p4d == lm_alias(kasan_early_shadow_p4d)) {
base_p4d = memblock_alloc(PTRS_PER_PUD * sizeof(p4d_t), PAGE_SIZE);
+ memcpy(base_p4d, (void *)kasan_early_shadow_p4d,
+ sizeof(p4d_t) * PTRS_PER_P4D);
+ }
}
p4dp = base_p4d + p4d_index(vaddr);
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index af5c6860e0a1..fa9d33b01b85 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -102,8 +102,17 @@ SECTIONS
_compressed_start = .;
*(.vmlinux.bin.compressed)
_compressed_end = .;
- FILL(0xff);
- . = ALIGN(4096);
+ }
+
+#define SB_TRAILER_SIZE 32
+ /* Trailer needed for Secure Boot */
+ . += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */
+ . = ALIGN(4096) - SB_TRAILER_SIZE;
+ .sb.trailer : {
+ QUAD(0)
+ QUAD(0)
+ QUAD(0)
+ QUAD(0x000000207a49504c)
}
_end = .;
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index e08c882dccaa..eaeaeb3ff0be 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -17,7 +17,8 @@
"3: jl 1b\n" \
" lhi %0,0\n" \
"4: sacf 768\n" \
- EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
+ EX_TABLE(0b,4b) EX_TABLE(1b,4b) \
+ EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
: "=d" (ret), "=&d" (oldval), "=&d" (newval), \
"=m" (*uaddr) \
: "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index d5c7c1e30c17..74b53c531e0c 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -459,6 +459,7 @@ static int paiext_push_sample(void)
raw.frag.data = cpump->save;
raw.size = raw.frag.size;
data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
}
overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 58033dfcb6d4..720036fb1924 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -157,7 +157,7 @@ unsigned long __clear_user(void __user *to, unsigned long size)
asm volatile(
" lr 0,%[spec]\n"
"0: mvcos 0(%1),0(%4),%0\n"
- " jz 4f\n"
+ "6: jz 4f\n"
"1: algr %0,%2\n"
" slgr %1,%2\n"
" j 0b\n"
@@ -167,11 +167,11 @@ unsigned long __clear_user(void __user *to, unsigned long size)
" clgr %0,%3\n" /* copy crosses next page boundary? */
" jnh 5f\n"
"3: mvcos 0(%1),0(%4),%3\n"
- " slgr %0,%3\n"
+ "7: slgr %0,%3\n"
" j 5f\n"
"4: slgr %0,%0\n"
"5:\n"
- EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+ EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b)
: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
: "a" (empty_zero_page), [spec] "d" (spec.val)
: "cc", "memory", "0");
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 080c88620723..588089332931 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -64,7 +64,7 @@ static inline int __pcistg_mio_inuser(
asm volatile (
" sacf 256\n"
"0: llgc %[tmp],0(%[src])\n"
- " sllg %[val],%[val],8\n"
+ "4: sllg %[val],%[val],8\n"
" aghi %[src],1\n"
" ogr %[val],%[tmp]\n"
" brctg %[cnt],0b\n"
@@ -72,7 +72,7 @@ static inline int __pcistg_mio_inuser(
"2: ipm %[cc]\n"
" srl %[cc],28\n"
"3: sacf 768\n"
- EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+ EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
:
[src] "+a" (src), [cnt] "+d" (cnt),
[val] "+d" (val), [tmp] "=d" (tmp),
@@ -215,10 +215,10 @@ static inline int __pcilg_mio_inuser(
"2: ahi %[shift],-8\n"
" srlg %[tmp],%[val],0(%[shift])\n"
"3: stc %[tmp],0(%[dst])\n"
- " aghi %[dst],1\n"
+ "5: aghi %[dst],1\n"
" brctg %[cnt],2b\n"
"4: sacf 768\n"
- EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b)
+ EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b)
:
[ioaddr_len] "+&d" (ioaddr_len.pair),
[cc] "+d" (cc), [val] "=d" (val),
diff --git a/arch/x86/crypto/polyval-clmulni_glue.c b/arch/x86/crypto/polyval-clmulni_glue.c
index b7664d018851..8fa58b0f3cb3 100644
--- a/arch/x86/crypto/polyval-clmulni_glue.c
+++ b/arch/x86/crypto/polyval-clmulni_glue.c
@@ -27,13 +27,17 @@
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
+#define POLYVAL_ALIGN 16
+#define POLYVAL_ALIGN_ATTR __aligned(POLYVAL_ALIGN)
+#define POLYVAL_ALIGN_EXTRA ((POLYVAL_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
+#define POLYVAL_CTX_SIZE (sizeof(struct polyval_tfm_ctx) + POLYVAL_ALIGN_EXTRA)
#define NUM_KEY_POWERS 8
struct polyval_tfm_ctx {
/*
* These powers must be in the order h^8, ..., h^1.
*/
- u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
+ u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE] POLYVAL_ALIGN_ATTR;
};
struct polyval_desc_ctx {
@@ -45,6 +49,11 @@ asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
const u8 *in, size_t nblocks, u8 *accumulator);
asmlinkage void clmul_polyval_mul(u8 *op1, const u8 *op2);
+static inline struct polyval_tfm_ctx *polyval_tfm_ctx(struct crypto_shash *tfm)
+{
+ return PTR_ALIGN(crypto_shash_ctx(tfm), POLYVAL_ALIGN);
+}
+
static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
const u8 *in, size_t nblocks, u8 *accumulator)
{
@@ -72,7 +81,7 @@ static void internal_polyval_mul(u8 *op1, const u8 *op2)
static int polyval_x86_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
- struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+ struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(tfm);
int i;
if (keylen != POLYVAL_BLOCK_SIZE)
@@ -102,7 +111,7 @@ static int polyval_x86_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
- const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm);
u8 *pos;
unsigned int nblocks;
unsigned int n;
@@ -143,7 +152,7 @@ static int polyval_x86_update(struct shash_desc *desc,
static int polyval_x86_final(struct shash_desc *desc, u8 *dst)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
- const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm);
if (dctx->bytes) {
internal_polyval_mul(dctx->buffer,
@@ -167,7 +176,7 @@ static struct shash_alg polyval_alg = {
.cra_driver_name = "polyval-clmulni",
.cra_priority = 200,
.cra_blocksize = POLYVAL_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct polyval_tfm_ctx),
+ .cra_ctxsize = POLYVAL_CTX_SIZE,
.cra_module = THIS_MODULE,
},
};
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 3271735f0070..4cb710efbdd9 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -801,7 +801,7 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
/* Extension Memory */
if (ibs_caps & IBS_CAPS_ZEN4 &&
ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
- data_src->mem_lvl_num = PERF_MEM_LVLNUM_EXTN_MEM;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
if (op_data2->rmt_node) {
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
/* IBS doesn't provide Remote socket detail */
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 77e3a47af5ad..fea544e5842a 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -806,7 +806,11 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl),
{},
};
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 3b87d889b6e1..888731ccf1f6 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -10,10 +10,13 @@
/* Even with __builtin_ the compiler may decide to use the out of line
function. */
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
+#include <linux/kmsan_string.h>
+#endif
+
#define __HAVE_ARCH_MEMCPY 1
-#if defined(__SANITIZE_MEMORY__)
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
#undef memcpy
-void *__msan_memcpy(void *dst, const void *src, size_t size);
#define memcpy __msan_memcpy
#else
extern void *memcpy(void *to, const void *from, size_t len);
@@ -21,7 +24,7 @@ extern void *memcpy(void *to, const void *from, size_t len);
extern void *__memcpy(void *to, const void *from, size_t len);
#define __HAVE_ARCH_MEMSET
-#if defined(__SANITIZE_MEMORY__)
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
extern void *__msan_memset(void *s, int c, size_t n);
#undef memset
#define memset __msan_memset
@@ -67,7 +70,7 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
}
#define __HAVE_ARCH_MEMMOVE
-#if defined(__SANITIZE_MEMORY__)
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
#undef memmove
void *__msan_memmove(void *dest, const void *src, size_t len);
#define memmove __msan_memmove
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 8bc614cfe21b..1cc756eafa44 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -254,24 +254,25 @@ extern void __put_user_nocheck_8(void);
#define __put_user_size(x, ptr, size, label) \
do { \
__typeof__(*(ptr)) __x = (x); /* eval x once */ \
- __chk_user_ptr(ptr); \
+ __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \
+ __chk_user_ptr(__ptr); \
switch (size) { \
case 1: \
- __put_user_goto(__x, ptr, "b", "iq", label); \
+ __put_user_goto(__x, __ptr, "b", "iq", label); \
break; \
case 2: \
- __put_user_goto(__x, ptr, "w", "ir", label); \
+ __put_user_goto(__x, __ptr, "w", "ir", label); \
break; \
case 4: \
- __put_user_goto(__x, ptr, "l", "ir", label); \
+ __put_user_goto(__x, __ptr, "l", "ir", label); \
break; \
case 8: \
- __put_user_goto_u64(__x, ptr, label); \
+ __put_user_goto_u64(__x, __ptr, label); \
break; \
default: \
__put_user_bad(); \
} \
- instrument_put_user(__x, ptr, size); \
+ instrument_put_user(__x, __ptr, size); \
} while (0)
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7065462378e2..0810e93cbedc 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1133,11 +1133,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = max(entry->eax, 0x80000021);
break;
case 0x80000001:
+ entry->ebx &= ~GENMASK(27, 16);
cpuid_entry_override(entry, CPUID_8000_0001_EDX);
cpuid_entry_override(entry, CPUID_8000_0001_ECX);
break;
case 0x80000006:
- /* L2 cache and TLB: pass through host info. */
+ /* Drop reserved bits, pass host L2 cache and TLB info. */
+ entry->edx &= ~GENMASK(17, 16);
break;
case 0x80000007: /* Advanced power management */
/* invariant TSC is CPUID.80000007H:EDX[8] */
@@ -1167,6 +1169,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
+ entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
break;
@@ -1186,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ecx = entry->edx = 0;
break;
case 0x8000001a:
+ entry->eax &= GENMASK(2, 0);
+ entry->ebx = entry->ecx = entry->edx = 0;
+ break;
case 0x8000001e:
break;
case 0x8000001F:
@@ -1193,7 +1199,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
} else {
cpuid_entry_override(entry, CPUID_8000_001F_EAX);
-
+ /* Clear NumVMPL since KVM does not support VMPL. */
+ entry->ebx &= ~GENMASK(31, 12);
/*
* Enumerate '0' for "PA bits reduction", the adjusted
* MAXPHYADDR is enumerated directly (see 0x80000008).
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index cfed36aba2f7..c1390357126a 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -158,11 +158,16 @@ out:
static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
{
struct kvm *kvm = inode->i_private;
+ int r;
if (!kvm_get_kvm_safe(kvm))
return -ENOENT;
- return single_open(file, kvm_mmu_rmaps_stat_show, kvm);
+ r = single_open(file, kvm_mmu_rmaps_stat_show, kvm);
+ if (r < 0)
+ kvm_put_kvm(kvm);
+
+ return r;
}
static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 3b27622d4642..4a43261d25a2 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -791,8 +791,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
ctxt->mode, linear);
}
-static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
- enum x86emul_mode mode)
+static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
{
ulong linear;
int rc;
@@ -802,41 +801,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
if (ctxt->op_bytes != sizeof(unsigned long))
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
- rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
+ rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
if (rc == X86EMUL_CONTINUE)
ctxt->_eip = addr.ea;
return rc;
}
+static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
+{
+ u64 efer;
+ struct desc_struct cs;
+ u16 selector;
+ u32 base3;
+
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+
+ if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
+ /* Real mode. cpu must not have long mode active */
+ if (efer & EFER_LMA)
+ return X86EMUL_UNHANDLEABLE;
+ ctxt->mode = X86EMUL_MODE_REAL;
+ return X86EMUL_CONTINUE;
+ }
+
+ if (ctxt->eflags & X86_EFLAGS_VM) {
+ /* Protected/VM86 mode. cpu must not have long mode active */
+ if (efer & EFER_LMA)
+ return X86EMUL_UNHANDLEABLE;
+ ctxt->mode = X86EMUL_MODE_VM86;
+ return X86EMUL_CONTINUE;
+ }
+
+ if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
+ return X86EMUL_UNHANDLEABLE;
+
+ if (efer & EFER_LMA) {
+ if (cs.l) {
+ /* Proper long mode */
+ ctxt->mode = X86EMUL_MODE_PROT64;
+ } else if (cs.d) {
+ /* 32 bit compatibility mode*/
+ ctxt->mode = X86EMUL_MODE_PROT32;
+ } else {
+ ctxt->mode = X86EMUL_MODE_PROT16;
+ }
+ } else {
+ /* Legacy 32 bit / 16 bit mode */
+ ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+ }
+
+ return X86EMUL_CONTINUE;
+}
+
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
{
- return assign_eip(ctxt, dst, ctxt->mode);
+ return assign_eip(ctxt, dst);
}
-static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
- const struct desc_struct *cs_desc)
+static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
{
- enum x86emul_mode mode = ctxt->mode;
- int rc;
+ int rc = emulator_recalc_and_set_mode(ctxt);
-#ifdef CONFIG_X86_64
- if (ctxt->mode >= X86EMUL_MODE_PROT16) {
- if (cs_desc->l) {
- u64 efer = 0;
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
- ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
- if (efer & EFER_LMA)
- mode = X86EMUL_MODE_PROT64;
- } else
- mode = X86EMUL_MODE_PROT32; /* temporary value */
- }
-#endif
- if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
- mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
- rc = assign_eip(ctxt, dst, mode);
- if (rc == X86EMUL_CONTINUE)
- ctxt->mode = mode;
- return rc;
+ return assign_eip(ctxt, dst);
}
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -2172,7 +2201,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
+ rc = assign_eip_far(ctxt, ctxt->src.val);
/* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@@ -2250,7 +2279,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
&new_desc);
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, eip, &new_desc);
+ rc = assign_eip_far(ctxt, eip);
/* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@@ -2432,7 +2461,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ for (i = 0; i < 8; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
val = GET_SMSTATE(u32, smstate, 0x7fcc);
@@ -2489,7 +2518,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
u16 selector;
int i, r;
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
@@ -2633,7 +2662,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
* those side effects need to be explicitly handled for both success
* and shutdown.
*/
- return X86EMUL_CONTINUE;
+ return emulator_recalc_and_set_mode(ctxt);
emulate_shutdown:
ctxt->ops->triple_fault(ctxt);
@@ -2876,6 +2905,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
ctxt->_eip = rdx;
+ ctxt->mode = usermode;
*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
return X86EMUL_CONTINUE;
@@ -3469,7 +3499,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
+ rc = assign_eip_far(ctxt, ctxt->src.val);
if (rc != X86EMUL_CONTINUE)
goto fail;
@@ -3611,11 +3641,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
{
- if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
+ int cr_num = ctxt->modrm_reg;
+ int r;
+
+ if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
return emulate_gp(ctxt, 0);
/* Disable writeback. */
ctxt->dst.type = OP_NONE;
+
+ if (cr_num == 0) {
+ /*
+ * CR0 write might have updated CR0.PE and/or CR0.PG
+ * which can affect the cpu's execution mode.
+ */
+ r = emulator_recalc_and_set_mode(ctxt);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9dba04b6b019..65f092e4a81b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8263,6 +8263,11 @@ static __init int hardware_setup(void)
if (!cpu_has_virtual_nmis())
enable_vnmi = 0;
+#ifdef CONFIG_X86_SGX_KVM
+ if (!cpu_has_vmx_encls_vmexit())
+ enable_sgx = false;
+#endif
+
/*
* set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9cf1ba865562..521b433f978c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2315,11 +2315,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
/* we verify if the enable bit is set... */
if (system_time & 1) {
- kvm_gfn_to_pfn_cache_init(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
- KVM_HOST_USES_PFN, system_time & ~1ULL,
- sizeof(struct pvclock_vcpu_time_info));
+ kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
+ KVM_HOST_USES_PFN, system_time & ~1ULL,
+ sizeof(struct pvclock_vcpu_time_info));
} else {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
}
return;
@@ -3388,7 +3388,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
static void kvmclock_reset(struct kvm_vcpu *vcpu)
{
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
vcpu->arch.time = 0;
}
@@ -10044,7 +10044,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
kvm_x86_ops.nested_ops->has_events(vcpu))
*req_immediate_exit = true;
- WARN_ON(kvm_is_exception_pending(vcpu));
+ /*
+ * KVM must never queue a new exception while injecting an event; KVM
+ * is done emulating and should only propagate the to-be-injected event
+ * to the VMCS/VMCB. Queueing a new exception can put the vCPU into an
+ * infinite loop as KVM will bail from VM-Enter to inject the pending
+ * exception and start the cycle all over.
+ *
+ * Exempt triple faults as they have special handling and won't put the
+ * vCPU into an infinite loop. Triple fault can be queued when running
+ * VMX without unrestricted guest, as that requires KVM to emulate Real
+ * Mode events (see kvm_inject_realmode_interrupt()).
+ */
+ WARN_ON_ONCE(vcpu->arch.exception.pending ||
+ vcpu->arch.exception_vmexit.pending);
return 0;
out:
@@ -11816,6 +11829,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.regs_avail = ~0;
vcpu->arch.regs_dirty = ~0;
+ kvm_gpc_init(&vcpu->arch.pv_time);
+
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 93c628d3e3a9..2dae413bd62a 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -42,13 +42,13 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
int idx = srcu_read_lock(&kvm->srcu);
if (gfn == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(kvm, gpc);
+ kvm_gpc_deactivate(kvm, gpc);
goto out;
}
do {
- ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, KVM_HOST_USES_PFN,
- gpa, PAGE_SIZE);
+ ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa,
+ PAGE_SIZE);
if (ret)
goto out;
@@ -554,15 +554,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
offsetof(struct compat_vcpu_info, time));
if (data->u.gpa == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
r = 0;
break;
}
- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
- &vcpu->arch.xen.vcpu_info_cache,
- NULL, KVM_HOST_USES_PFN, data->u.gpa,
- sizeof(struct vcpu_info));
+ r = kvm_gpc_activate(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_info_cache, NULL,
+ KVM_HOST_USES_PFN, data->u.gpa,
+ sizeof(struct vcpu_info));
if (!r)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -570,16 +570,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
if (data->u.gpa == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.vcpu_time_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_time_info_cache);
r = 0;
break;
}
- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
- &vcpu->arch.xen.vcpu_time_info_cache,
- NULL, KVM_HOST_USES_PFN, data->u.gpa,
- sizeof(struct pvclock_vcpu_time_info));
+ r = kvm_gpc_activate(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_time_info_cache,
+ NULL, KVM_HOST_USES_PFN, data->u.gpa,
+ sizeof(struct pvclock_vcpu_time_info));
if (!r)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
break;
@@ -590,16 +590,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
if (data->u.gpa == GPA_INVALID) {
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.runstate_cache);
+ kvm_gpc_deactivate(vcpu->kvm,
+ &vcpu->arch.xen.runstate_cache);
r = 0;
break;
}
- r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
- &vcpu->arch.xen.runstate_cache,
- NULL, KVM_HOST_USES_PFN, data->u.gpa,
- sizeof(struct vcpu_runstate_info));
+ r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
+ NULL, KVM_HOST_USES_PFN, data->u.gpa,
+ sizeof(struct vcpu_runstate_info));
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
@@ -1667,18 +1666,18 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
case EVTCHNSTAT_ipi:
/* IPI must map back to the same port# */
if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port)
- goto out; /* -EINVAL */
+ goto out_noeventfd; /* -EINVAL */
break;
case EVTCHNSTAT_interdomain:
if (data->u.evtchn.deliver.port.port) {
if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm))
- goto out; /* -EINVAL */
+ goto out_noeventfd; /* -EINVAL */
} else {
eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd);
if (IS_ERR(eventfd)) {
ret = PTR_ERR(eventfd);
- goto out;
+ goto out_noeventfd;
}
}
break;
@@ -1718,6 +1717,7 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
out:
if (eventfd)
eventfd_ctx_put(eventfd);
+out_noeventfd:
kfree(evtchnfd);
return ret;
}
@@ -1816,7 +1816,12 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
{
vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx;
vcpu->arch.xen.poll_evtchn = 0;
+
timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
+
+ kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
+ kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
+ kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
}
void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
@@ -1824,18 +1829,17 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
if (kvm_xen_timer_enabled(vcpu))
kvm_xen_stop_timer(vcpu);
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.runstate_cache);
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.vcpu_info_cache);
- kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
- &vcpu->arch.xen.vcpu_time_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
+
del_timer_sync(&vcpu->arch.xen.poll_timer);
}
void kvm_xen_init_vm(struct kvm *kvm)
{
idr_init(&kvm->arch.xen.evtchn_ports);
+ kvm_gpc_init(&kvm->arch.xen.shinfo_cache);
}
void kvm_xen_destroy_vm(struct kvm *kvm)
@@ -1843,7 +1847,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
struct evtchnfd *evtchnfd;
int i;
- kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache);
+ kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache);
idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
if (!evtchnfd->deliver.port.port)
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 58a200dc762d..17f09dc26381 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -26,6 +26,7 @@ GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCSAN_SANITIZE := n
+KMSAN_SANITIZE := n
KCOV_INSTRUMENT := n
# These are adjustments to the compiler flags used for objects that