diff options
Diffstat (limited to 'arch/s390')
40 files changed, 1286 insertions, 351 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0105ce28e246..eaee7087886f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -112,7 +112,6 @@ config S390 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANTS_DYNAMIC_TASK_STRUCT - select ARCH_WANTS_PROT_NUMA_PROT_NONE select ARCH_WANTS_UBSAN_NO_NULL select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT @@ -540,6 +539,51 @@ config ARCH_RANDOM If unsure, say Y. +config KERNEL_NOBP + def_bool n + prompt "Enable modified branch prediction for the kernel by default" + help + If this option is selected the kernel will switch to a modified + branch prediction mode if the firmware interface is available. + The modified branch prediction mode improves the behaviour in + regard to speculative execution. + + With the option enabled the kernel parameter "nobp=0" or "nospec" + can be used to run the kernel in the normal branch prediction mode. + + With the option disabled the modified branch prediction mode is + enabled with the "nobp=1" kernel parameter. + + If unsure, say N. + +config EXPOLINE + def_bool n + prompt "Avoid speculative indirect branches in the kernel" + help + Compile the kernel with the expoline compiler options to guard + against kernel-to-user data leaks by avoiding speculative indirect + branches. + Requires a compiler with -mindirect-branch=thunk support for full + protection. The kernel may run slower. + + If unsure, say N. + +choice + prompt "Expoline default" + depends on EXPOLINE + default EXPOLINE_FULL + +config EXPOLINE_OFF + bool "spectre_v2=off" + +config EXPOLINE_MEDIUM + bool "spectre_v2=auto" + +config EXPOLINE_FULL + bool "spectre_v2=on" + +endchoice + endmenu menu "Memory setup" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index fd691c4ff89e..2ced3239cb84 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -78,6 +78,16 @@ ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack endif +ifdef CONFIG_EXPOLINE + ifeq ($(call cc-option-yn,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),y) + CC_FLAGS_EXPOLINE := -mindirect-branch=thunk + CC_FLAGS_EXPOLINE += -mfunction-return=thunk + CC_FLAGS_EXPOLINE += -mindirect-branch-table + export CC_FLAGS_EXPOLINE + cflags-y += $(CC_FLAGS_EXPOLINE) + endif +endif + ifdef CONFIG_FUNCTION_TRACER # make use of hotpatch feature if the compiler supports it cc_hotpatch := -mhotpatch=0,3 diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 10432607a573..f9eddbca79d2 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -49,6 +49,30 @@ do { \ #define __smp_mb__before_atomic() barrier() #define __smp_mb__after_atomic() barrier() +/** + * array_index_mask_nospec - generate a mask for array_idx() that is + * ~0UL when the bounds check succeeds and 0 otherwise + * @index: array element index + * @size: number of elements in array + */ +#define array_index_mask_nospec array_index_mask_nospec +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + unsigned long mask; + + if (__builtin_constant_p(size) && size > 0) { + asm(" clgr %2,%1\n" + " slbgr %0,%0\n" + :"=d" (mask) : "d" (size-1), "d" (index) :"cc"); + return mask; + } + asm(" clgr %1,%2\n" + " slbgr %0,%0\n" + :"=d" (mask) : "d" (size), "d" (index) :"cc"); + return ~mask; +} + #include <asm-generic/barrier.h> #endif /* __ASM_BARRIER_H */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 31e400c1a1f3..86e5b2fdee3c 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -261,6 +261,11 @@ static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr) return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); } +static inline int test_and_clear_bit_inv(unsigned long nr, volatile unsigned long *ptr) +{ + return test_and_clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr) { return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr); diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h index a478eb61aaf7..fb56fa3283a2 100644 --- a/arch/s390/include/asm/css_chars.h +++ b/arch/s390/include/asm/css_chars.h @@ -20,7 +20,9 @@ struct css_general_char { u32 aif_tdd : 1; /* bit 56 */ u32 : 1; u32 qebsm : 1; /* bit 58 */ - u32 : 8; + u32 : 2; + u32 aiv : 1; /* bit 61 */ + u32 : 5; u32 aif_osa : 1; /* bit 67 */ u32 : 12; u32 eadm_rf : 1; /* bit 80 */ diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h index eb5323161f11..bb63b2afdf6f 100644 --- a/arch/s390/include/asm/eadm.h +++ b/arch/s390/include/asm/eadm.h @@ -4,7 +4,7 @@ #include <linux/types.h> #include <linux/device.h> -#include <linux/blkdev.h> +#include <linux/blk_types.h> struct arqb { u64 data; diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index fbe0c4be3cd8..99c8ce30b3cd 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -15,6 +15,24 @@ #define MAX_FACILITY_BIT (sizeof(((struct lowcore *)0)->stfle_fac_list) * 8) +static inline void __set_facility(unsigned long nr, void *facilities) +{ + unsigned char *ptr = (unsigned char *) facilities; + + if (nr >= MAX_FACILITY_BIT) + return; + ptr[nr >> 3] |= 0x80 >> (nr & 7); +} + +static inline void __clear_facility(unsigned long nr, void *facilities) +{ + unsigned char *ptr = (unsigned char *) facilities; + + if (nr >= MAX_FACILITY_BIT) + return; + ptr[nr >> 3] &= ~(0x80 >> (nr & 7)); +} + static inline int __test_facility(unsigned long nr, void *facilities) { unsigned char *ptr; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index c1b0a9ac1dc8..afb0f08b8021 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -2,7 +2,7 @@ /* * definition for kernel virtual machines on s390 * - * Copyright IBM Corp. 2008, 2009 + * Copyright IBM Corp. 2008, 2018 * * Author(s): Carsten Otte <cotte@de.ibm.com> */ @@ -183,6 +183,7 @@ struct kvm_s390_sie_block { #define ECA_IB 0x40000000 #define ECA_SIGPI 0x10000000 #define ECA_MVPGI 0x01000000 +#define ECA_AIV 0x00200000 #define ECA_VX 0x00020000 #define ECA_PROTEXCI 0x00002000 #define ECA_SII 0x00000001 @@ -228,7 +229,9 @@ struct kvm_s390_sie_block { __u8 epdx; /* 0x0069 */ __u8 reserved6a[2]; /* 0x006a */ __u32 todpr; /* 0x006c */ - __u8 reserved70[16]; /* 0x0070 */ +#define GISA_FORMAT1 0x00000001 + __u32 gd; /* 0x0070 */ + __u8 reserved74[12]; /* 0x0074 */ __u64 mso; /* 0x0080 */ __u64 msl; /* 0x0088 */ psw_t gpsw; /* 0x0090 */ @@ -317,18 +320,30 @@ struct kvm_vcpu_stat { u64 deliver_program_int; u64 deliver_io_int; u64 exit_wait_state; + u64 instruction_epsw; + u64 instruction_gs; + u64 instruction_io_other; + u64 instruction_lpsw; + u64 instruction_lpswe; u64 instruction_pfmf; + u64 instruction_ptff; + u64 instruction_sck; + u64 instruction_sckpf; u64 instruction_stidp; u64 instruction_spx; u64 instruction_stpx; u64 instruction_stap; - u64 instruction_storage_key; + u64 instruction_iske; + u64 instruction_ri; + u64 instruction_rrbe; + u64 instruction_sske; u64 instruction_ipte_interlock; - u64 instruction_stsch; - u64 instruction_chsc; u64 instruction_stsi; u64 instruction_stfl; + u64 instruction_tb; + u64 instruction_tpi; u64 instruction_tprot; + u64 instruction_tsch; u64 instruction_sie; u64 instruction_essa; u64 instruction_sthyi; @@ -354,6 +369,7 @@ struct kvm_vcpu_stat { u64 diagnose_258; u64 diagnose_308; u64 diagnose_500; + u64 diagnose_other; }; #define PGM_OPERATION 0x01 @@ -410,35 +426,35 @@ struct kvm_vcpu_stat { #define PGM_PER 0x80 #define PGM_CRYPTO_OPERATION 0x119 -/* irq types in order of priority */ +/* irq types in ascend order of priorities */ enum irq_types { - IRQ_PEND_MCHK_EX = 0, - IRQ_PEND_SVC, - IRQ_PEND_PROG, - IRQ_PEND_MCHK_REP, - IRQ_PEND_EXT_IRQ_KEY, - IRQ_PEND_EXT_MALFUNC, - IRQ_PEND_EXT_EMERGENCY, - IRQ_PEND_EXT_EXTERNAL, - IRQ_PEND_EXT_CLOCK_COMP, - IRQ_PEND_EXT_CPU_TIMER, - IRQ_PEND_EXT_TIMING, - IRQ_PEND_EXT_SERVICE, - IRQ_PEND_EXT_HOST, - IRQ_PEND_PFAULT_INIT, - IRQ_PEND_PFAULT_DONE, - IRQ_PEND_VIRTIO, - IRQ_PEND_IO_ISC_0, - IRQ_PEND_IO_ISC_1, - IRQ_PEND_IO_ISC_2, - IRQ_PEND_IO_ISC_3, - IRQ_PEND_IO_ISC_4, - IRQ_PEND_IO_ISC_5, - IRQ_PEND_IO_ISC_6, - IRQ_PEND_IO_ISC_7, - IRQ_PEND_SIGP_STOP, + IRQ_PEND_SET_PREFIX = 0, IRQ_PEND_RESTART, - IRQ_PEND_SET_PREFIX, + IRQ_PEND_SIGP_STOP, + IRQ_PEND_IO_ISC_7, + IRQ_PEND_IO_ISC_6, + IRQ_PEND_IO_ISC_5, + IRQ_PEND_IO_ISC_4, + IRQ_PEND_IO_ISC_3, + IRQ_PEND_IO_ISC_2, + IRQ_PEND_IO_ISC_1, + IRQ_PEND_IO_ISC_0, + IRQ_PEND_VIRTIO, + IRQ_PEND_PFAULT_DONE, + IRQ_PEND_PFAULT_INIT, + IRQ_PEND_EXT_HOST, + IRQ_PEND_EXT_SERVICE, + IRQ_PEND_EXT_TIMING, + IRQ_PEND_EXT_CPU_TIMER, + IRQ_PEND_EXT_CLOCK_COMP, + IRQ_PEND_EXT_EXTERNAL, + IRQ_PEND_EXT_EMERGENCY, + IRQ_PEND_EXT_MALFUNC, + IRQ_PEND_EXT_IRQ_KEY, + IRQ_PEND_MCHK_REP, + IRQ_PEND_PROG, + IRQ_PEND_SVC, + IRQ_PEND_MCHK_EX, IRQ_PEND_COUNT }; @@ -516,9 +532,6 @@ struct kvm_s390_irq_payload { struct kvm_s390_local_interrupt { spinlock_t lock; - struct kvm_s390_float_interrupt *float_int; - struct swait_queue_head *wq; - atomic_t *cpuflags; DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); struct kvm_s390_irq_payload irq; unsigned long pending_irqs; @@ -707,14 +720,50 @@ struct kvm_s390_crypto_cb { struct kvm_s390_apcb1 apcb1; /* 0x0080 */ }; +struct kvm_s390_gisa { + union { + struct { /* common to all formats */ + u32 next_alert; + u8 ipm; + u8 reserved01[2]; + u8 iam; + }; + struct { /* format 0 */ + u32 next_alert; + u8 ipm; + u8 reserved01; + u8 : 6; + u8 g : 1; + u8 c : 1; + u8 iam; + u8 reserved02[4]; + u32 airq_count; + } g0; + struct { /* format 1 */ + u32 next_alert; + u8 ipm; + u8 simm; + u8 nimm; + u8 iam; + u8 aism[8]; + u8 : 6; + u8 g : 1; + u8 c : 1; + u8 reserved03[11]; + u32 airq_count; + } g1; + }; +}; + /* - * sie_page2 has to be allocated as DMA because fac_list and crycb need - * 31bit addresses in the sie control block. + * sie_page2 has to be allocated as DMA because fac_list, crycb and + * gisa need 31bit addresses in the sie control block. */ struct sie_page2 { __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ struct kvm_s390_crypto_cb crycb; /* 0x0800 */ - u8 reserved900[0x1000 - 0x900]; /* 0x0900 */ + struct kvm_s390_gisa gisa; /* 0x0900 */ + u8 reserved920[0x1000 - 0x920]; /* 0x0920 */ }; struct kvm_s390_vsie { @@ -761,6 +810,7 @@ struct kvm_arch{ struct kvm_s390_migration_state *migration_state; /* subset of available cpu features enabled by user space */ DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); + struct kvm_s390_gisa *gisa; }; #define KVM_HVA_ERR_BAD (-1UL) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index ec6592e8ba36..5bc888841eaf 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -136,7 +136,11 @@ struct lowcore { __u64 vdso_per_cpu_data; /* 0x03b8 */ __u64 machine_flags; /* 0x03c0 */ __u64 gmap; /* 0x03c8 */ - __u8 pad_0x03d0[0x0e00-0x03d0]; /* 0x03d0 */ + __u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */ + + /* br %r1 trampoline */ + __u16 br_r1_trampoline; /* 0x0400 */ + __u8 pad_0x0402[0x0e00-0x0402]; /* 0x0402 */ /* * 0xe00 contains the address of the IPL Parameter Information @@ -151,7 +155,8 @@ struct lowcore { __u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */ /* Extended facility list */ - __u64 stfle_fac_list[32]; /* 0x0f00 */ + __u64 stfle_fac_list[16]; /* 0x0f00 */ + __u64 alt_stfle_fac_list[16]; /* 0x0f80 */ __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ /* Pointer to the machine check extended save area */ diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h new file mode 100644 index 000000000000..7df48e5cf36f --- /dev/null +++ b/arch/s390/include/asm/nospec-branch.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_EXPOLINE_H +#define _ASM_S390_EXPOLINE_H + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> + +extern int nospec_call_disable; +extern int nospec_return_disable; + +void nospec_init_branches(void); +void nospec_call_revert(s32 *start, s32 *end); +void nospec_return_revert(s32 *start, s32 *end); + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_S390_EXPOLINE_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index bfbfad482289..7f2953c15c37 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -91,6 +91,7 @@ void cpu_detect_mhz_feature(void); extern const struct seq_operations cpuinfo_op; extern int sysctl_ieee_emulation_warnings; extern void execve_tail(void); +extern void __bpon(void); /* * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. @@ -377,6 +378,9 @@ extern void memcpy_absolute(void *, void *, size_t); memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \ } while (0) +extern int s390_isolate_bp(void); +extern int s390_isolate_bp_guest(void); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_S390_PROCESSOR_H */ diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h index 6b1540337ed6..0e1605538cd4 100644 --- a/arch/s390/include/asm/runtime_instr.h +++ b/arch/s390/include/asm/runtime_instr.h @@ -2,75 +2,10 @@ #ifndef _RUNTIME_INSTR_H #define _RUNTIME_INSTR_H -#define S390_RUNTIME_INSTR_START 0x1 -#define S390_RUNTIME_INSTR_STOP 0x2 - -struct runtime_instr_cb { - __u64 rca; - __u64 roa; - __u64 rla; - - __u32 v : 1; - __u32 s : 1; - __u32 k : 1; - __u32 h : 1; - __u32 a : 1; - __u32 reserved1 : 3; - __u32 ps : 1; - __u32 qs : 1; - __u32 pc : 1; - __u32 qc : 1; - __u32 reserved2 : 1; - __u32 g : 1; - __u32 u : 1; - __u32 l : 1; - __u32 key : 4; - __u32 reserved3 : 8; - __u32 t : 1; - __u32 rgs : 3; - - __u32 m : 4; - __u32 n : 1; - __u32 mae : 1; - __u32 reserved4 : 2; - __u32 c : 1; - __u32 r : 1; - __u32 b : 1; - __u32 j : 1; - __u32 e : 1; - __u32 x : 1; - __u32 reserved5 : 2; - __u32 bpxn : 1; - __u32 bpxt : 1; - __u32 bpti : 1; - __u32 bpni : 1; - __u32 reserved6 : 2; - - __u32 d : 1; - __u32 f : 1; - __u32 ic : 4; - __u32 dc : 4; - - __u64 reserved7; - __u64 sf; - __u64 rsic; - __u64 reserved8; -} __packed __aligned(8); +#include <uapi/asm/runtime_instr.h> extern struct runtime_instr_cb runtime_instr_empty_cb; -static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb) -{ - asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */ - : : "Q" (*cb)); -} - -static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb) -{ - asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */ - : "=Q" (*cb) : : "cc"); -} - static inline void save_ri_cb(struct runtime_instr_cb *cb_prev) { if (cb_prev) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index d3c1a8a2e3ad..3cae9168f63c 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -77,6 +77,7 @@ struct sclp_info { unsigned char has_ibs : 1; unsigned char has_skey : 1; unsigned char has_kss : 1; + unsigned char has_gisaf : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index 25057c118d56..fe7b3f8f0791 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -21,7 +21,8 @@ struct sysinfo_1_1_1 { unsigned char :8; unsigned char ccr; unsigned char cai; - char reserved_0[28]; + char reserved_0[20]; + unsigned long lic; char manufacturer[16]; char type[4]; char reserved_1[12]; diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 25d6ec3aaddd..83ba57533ce6 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -58,6 +58,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ #define TIF_PGSTE 6 /* New mm's will use 4K page tables */ +#define TIF_ISOLATE_BP 8 /* Run process with isolated BP */ +#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ @@ -78,6 +80,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define _TIF_UPROBE _BITUL(TIF_UPROBE) #define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE) #define _TIF_PATCH_PENDING _BITUL(TIF_PATCH_PENDING) +#define _TIF_ISOLATE_BP _BITUL(TIF_ISOLATE_BP) +#define _TIF_ISOLATE_BP_GUEST _BITUL(TIF_ISOLATE_BP_GUEST) #define _TIF_31BIT _BITUL(TIF_31BIT) #define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) diff --git a/arch/s390/include/uapi/asm/runtime_instr.h b/arch/s390/include/uapi/asm/runtime_instr.h new file mode 100644 index 000000000000..45c9ec984e6b --- /dev/null +++ b/arch/s390/include/uapi/asm/runtime_instr.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _S390_UAPI_RUNTIME_INSTR_H +#define _S390_UAPI_RUNTIME_INSTR_H + +#include <linux/types.h> + +#define S390_RUNTIME_INSTR_START 0x1 +#define S390_RUNTIME_INSTR_STOP 0x2 + +struct runtime_instr_cb { + __u64 rca; + __u64 roa; + __u64 rla; + + __u32 v : 1; + __u32 s : 1; + __u32 k : 1; + __u32 h : 1; + __u32 a : 1; + __u32 reserved1 : 3; + __u32 ps : 1; + __u32 qs : 1; + __u32 pc : 1; + __u32 qc : 1; + __u32 reserved2 : 1; + __u32 g : 1; + __u32 u : 1; + __u32 l : 1; + __u32 key : 4; + __u32 reserved3 : 8; + __u32 t : 1; + __u32 rgs : 3; + + __u32 m : 4; + __u32 n : 1; + __u32 mae : 1; + __u32 reserved4 : 2; + __u32 c : 1; + __u32 r : 1; + __u32 b : 1; + __u32 j : 1; + __u32 e : 1; + __u32 x : 1; + __u32 reserved5 : 2; + __u32 bpxn : 1; + __u32 bpxt : 1; + __u32 bpti : 1; + __u32 bpni : 1; + __u32 reserved6 : 2; + + __u32 d : 1; + __u32 f : 1; + __u32 ic : 4; + __u32 dc : 4; + + __u64 reserved7; + __u64 sf; + __u64 rsic; + __u64 reserved8; +} __packed __aligned(8); + +static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */ + : : "Q" (*cb)); +} + +static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */ + : "=Q" (*cb) : : "cc"); +} + +#endif /* _S390_UAPI_RUNTIME_INSTR_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 909bce65cb2b..7f27e3da9709 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -29,6 +29,7 @@ UBSAN_SANITIZE_early.o := n # ifneq ($(CC_FLAGS_MARCH),-march=z900) CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) +CFLAGS_REMOVE_als.o += $(CC_FLAGS_EXPOLINE) CFLAGS_als.o += -march=z900 AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) AFLAGS_head.o += -march=z900 @@ -63,6 +64,9 @@ obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o extra-y += head.o head64.o vmlinux.lds +obj-$(CONFIG_EXPOLINE) += nospec-branch.o +CFLAGS_REMOVE_expoline.o += $(CC_FLAGS_EXPOLINE) + obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 574e77622c04..22476135f738 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -15,6 +15,29 @@ static int __init disable_alternative_instructions(char *str) early_param("noaltinstr", disable_alternative_instructions); +static int __init nobp_setup_early(char *str) +{ + bool enabled; + int rc; + + rc = kstrtobool(str, &enabled); + if (rc) + return rc; + if (enabled && test_facility(82)) + __set_facility(82, S390_lowcore.alt_stfle_fac_list); + else + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + return 0; +} +early_param("nobp", nobp_setup_early); + +static int __init nospec_setup_early(char *str) +{ + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + return 0; +} +early_param("nospec", nospec_setup_early); + struct brcl_insn { u16 opc; s32 disp; @@ -75,7 +98,8 @@ static void __init_or_module __apply_alternatives(struct alt_instr *start, instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; - if (!test_facility(a->facility)) + if (!__test_facility(a->facility, + S390_lowcore.alt_stfle_fac_list)) continue; if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) { diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 497a92047591..ac707a9f729e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -193,6 +193,11 @@ static noinline __init void setup_facility_list(void) { stfle(S390_lowcore.stfle_fac_list, ARRAY_SIZE(S390_lowcore.stfle_fac_list)); + memcpy(S390_lowcore.alt_stfle_fac_list, + S390_lowcore.stfle_fac_list, + sizeof(S390_lowcore.alt_stfle_fac_list)); + if (!IS_ENABLED(CONFIG_KERNEL_NOBP)) + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); } static __init void detect_diag9c(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6cd444d25545..13a133a6015c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -107,6 +107,7 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 3f 1: UPDATE_VTIME %r14,%r15,\timer + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP 2: lg %r15,__LC_ASYNC_STACK # load async stack 3: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm @@ -159,6 +160,130 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) tm off+\addr, \mask .endm + .macro BPOFF + .pushsection .altinstr_replacement, "ax" +660: .long 0xb2e8c000 + .popsection +661: .long 0x47000000 + .pushsection .altinstructions, "a" + .long 661b - . + .long 660b - . + .word 82 + .byte 4 + .byte 4 + .popsection + .endm + + .macro BPON + .pushsection .altinstr_replacement, "ax" +662: .long 0xb2e8d000 + .popsection +663: .long 0x47000000 + .pushsection .altinstructions, "a" + .long 663b - . + .long 662b - . + .word 82 + .byte 4 + .byte 4 + .popsection + .endm + + .macro BPENTER tif_ptr,tif_mask + .pushsection .altinstr_replacement, "ax" +662: .word 0xc004, 0x0000, 0x0000 # 6 byte nop + .word 0xc004, 0x0000, 0x0000 # 6 byte nop + .popsection +664: TSTMSK \tif_ptr,\tif_mask + jz . + 8 + .long 0xb2e8d000 + .pushsection .altinstructions, "a" + .long 664b - . + .long 662b - . + .word 82 + .byte 12 + .byte 12 + .popsection + .endm + + .macro BPEXIT tif_ptr,tif_mask + TSTMSK \tif_ptr,\tif_mask + .pushsection .altinstr_replacement, "ax" +662: jnz . + 8 + .long 0xb2e8d000 + .popsection +664: jz . + 8 + .long 0xb2e8c000 + .pushsection .altinstructions, "a" + .long 664b - . + .long 662b - . + .word 82 + .byte 8 + .byte 8 + .popsection + .endm + +#ifdef CONFIG_EXPOLINE + + .macro GEN_BR_THUNK name,reg,tmp + .section .text.\name,"axG",@progbits,\name,comdat + .globl \name + .hidden \name + .type \name,@function +\name: + .cfi_startproc +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + exrl 0,0f +#else + larl \tmp,0f + ex 0,0(\tmp) +#endif + j . +0: br \reg + .cfi_endproc + .endm + + GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1 + GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1 + GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11 + + .macro BASR_R14_R9 +0: brasl %r14,__s390x_indirect_jump_r1use_r9 + .pushsection .s390_indirect_branches,"a",@progbits + .long 0b-. + .popsection + .endm + + .macro BR_R1USE_R14 +0: jg __s390x_indirect_jump_r1use_r14 + .pushsection .s390_indirect_branches,"a",@progbits + .long 0b-. + .popsection + .endm + + .macro BR_R11USE_R14 +0: jg __s390x_indirect_jump_r11use_r14 + .pushsection .s390_indirect_branches,"a",@progbits + .long 0b-. + .popsection + .endm + +#else /* CONFIG_EXPOLINE */ + + .macro BASR_R14_R9 + basr %r14,%r9 + .endm + + .macro BR_R1USE_R14 + br %r14 + .endm + + .macro BR_R11USE_R14 + br %r14 + .endm + +#endif /* CONFIG_EXPOLINE */ + + .section .kprobes.text, "ax" .Ldummy: /* @@ -171,6 +296,11 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) */ nop 0 +ENTRY(__bpon) + .globl __bpon + BPON + BR_R1USE_R14 + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -193,9 +323,9 @@ ENTRY(__switch_to) mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP - bzr %r14 + jz 0f .insn s,0xb2800000,__LC_LPP # set program parameter - br %r14 +0: BR_R1USE_R14 .L__critical_start: @@ -207,9 +337,11 @@ ENTRY(__switch_to) */ ENTRY(sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers + lg %r12,__LC_CURRENT stg %r2,__SF_EMPTY(%r15) # save control block pointer stg %r3,__SF_EMPTY+8(%r15) # save guest register save area xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # reason code = 0 + mvc __SF_EMPTY+24(8,%r15),__TI_flags(%r12) # copy thread flags TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ? jno .Lsie_load_guest_gprs brasl %r14,load_fpu_regs # load guest fp/vx regs @@ -226,8 +358,12 @@ ENTRY(sie64a) jnz .Lsie_skip TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsie_skip # exit if fp/vx regs changed + BPEXIT __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_entry: sie 0(%r14) +.Lsie_exit: + BPOFF + BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce @@ -248,9 +384,15 @@ ENTRY(sie64a) sie_exit: lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + xgr %r0,%r0 # clear guest registers to + xgr %r1,%r1 # prevent speculative use + xgr %r2,%r2 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers lg %r2,__SF_EMPTY+16(%r15) # return exit reason code - br %r14 + BR_R1USE_R14 .Lsie_fault: lghi %r14,-EFAULT stg %r14,__SF_EMPTY+16(%r15) # set exit reason code @@ -273,6 +415,7 @@ ENTRY(system_call) stpt __LC_SYNC_ENTER_TIMER .Lsysc_stmg: stmg %r8,%r15,__LC_SAVE_AREA_SYNC + BPOFF lg %r12,__LC_CURRENT lghi %r13,__TASK_thread lghi %r14,_PIF_SYSCALL @@ -281,7 +424,10 @@ ENTRY(system_call) la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs .Lsysc_vtime: UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP stmg %r0,%r7,__PT_R0(%r11) + # clear user controlled register to prevent speculative use + xgr %r0,%r0 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC @@ -305,7 +451,7 @@ ENTRY(system_call) lgf %r9,0(%r8,%r10) # get system call add. TSTMSK __TI_flags(%r12),_TIF_TRACE jnz .Lsysc_tracesys - basr %r14,%r9 # call sys_xxxx + BASR_R14_R9 # call sys_xxxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_return: @@ -317,6 +463,7 @@ ENTRY(system_call) jnz .Lsysc_work # check for work TSTMSK __LC_CPU_FLAGS,_CIF_WORK jnz .Lsysc_work + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP .Lsysc_restore: lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) @@ -489,7 +636,7 @@ ENTRY(system_call) lmg %r3,%r7,__PT_R3(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lg %r2,__PT_ORIG_GPR2(%r11) - basr %r14,%r9 # call sys_xxx + BASR_R14_R9 # call sys_xxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_tracenogo: TSTMSK __TI_flags(%r12),_TIF_TRACE @@ -513,7 +660,7 @@ ENTRY(ret_from_fork) lmg %r9,%r10,__PT_R9(%r11) # load gprs ENTRY(kernel_thread_starter) la %r2,0(%r10) - basr %r14,%r9 + BASR_R14_R9 j .Lsysc_tracenogo /* @@ -522,6 +669,7 @@ ENTRY(kernel_thread_starter) ENTRY(pgm_check_handler) stpt __LC_SYNC_ENTER_TIMER + BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_CURRENT @@ -550,6 +698,7 @@ ENTRY(pgm_check_handler) aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 4f 2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lg %r15,__LC_KERNEL_STACK lgr %r14,%r12 aghi %r14,__TASK_thread # pointer to thread_struct @@ -561,6 +710,15 @@ ENTRY(pgm_check_handler) 4: lgr %r13,%r11 la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) + # clear user controlled registers to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r2,%r2 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC @@ -582,9 +740,9 @@ ENTRY(pgm_check_handler) nill %r10,0x007f sll %r10,2 je .Lpgm_return - lgf %r1,0(%r10,%r1) # load address of handler routine + lgf %r9,0(%r10,%r1) # load address of handler routine lgr %r2,%r11 # pass pointer to pt_regs - basr %r14,%r1 # branch to interrupt-handler + BASR_R14_R9 # branch to interrupt-handler .Lpgm_return: LOCKDEP_SYS_EXIT tm __PT_PSW+1(%r11),0x01 # returning to user ? @@ -620,12 +778,23 @@ ENTRY(pgm_check_handler) ENTRY(io_int_handler) STCK __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER + BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT larl %r13,cleanup_critical lmg %r8,%r9,__LC_IO_OLD_PSW SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER stmg %r0,%r7,__PT_R0(%r11) + # clear user controlled registers to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r2,%r2 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID @@ -660,9 +829,13 @@ ENTRY(io_int_handler) lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno .Lio_exit_kernel + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP .Lio_exit_timer: stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER +.Lio_exit_kernel: lmg %r11,%r15,__PT_R11(%r11) lpswe __LC_RETURN_PSW .Lio_done: @@ -833,12 +1006,23 @@ ENTRY(io_int_handler) ENTRY(ext_int_handler) STCK __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER + BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT larl %r13,cleanup_critical lmg %r8,%r9,__LC_EXT_OLD_PSW SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER stmg %r0,%r7,__PT_R0(%r11) + # clear user controlled registers to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r2,%r2 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) lghi %r1,__LC_EXT_PARAMS2 @@ -871,11 +1055,12 @@ ENTRY(psw_idle) .Lpsw_idle_stcctm: #endif oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT + BPON STCK __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) .Lpsw_idle_lpsw: lpswe __SF_EMPTY(%r15) - br %r14 + BR_R1USE_R14 .Lpsw_idle_end: /* @@ -889,7 +1074,7 @@ ENTRY(save_fpu_regs) lg %r2,__LC_CURRENT aghi %r2,__TASK_thread TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bor %r14 + jo .Lsave_fpu_regs_exit stfpc __THREAD_FPU_fpc(%r2) lg %r3,__THREAD_FPU_regs(%r2) TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX @@ -916,7 +1101,8 @@ ENTRY(save_fpu_regs) std 15,120(%r3) .Lsave_fpu_regs_done: oi __LC_CPU_FLAGS+7,_CIF_FPU - br %r14 +.Lsave_fpu_regs_exit: + BR_R1USE_R14 .Lsave_fpu_regs_end: EXPORT_SYMBOL(save_fpu_regs) @@ -934,7 +1120,7 @@ load_fpu_regs: lg %r4,__LC_CURRENT aghi %r4,__TASK_thread TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bnor %r14 + jno .Lload_fpu_regs_exit lfpc __THREAD_FPU_fpc(%r4) TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area @@ -961,7 +1147,8 @@ load_fpu_regs: ld 15,120(%r4) .Lload_fpu_regs_done: ni __LC_CPU_FLAGS+7,255-_CIF_FPU - br %r14 +.Lload_fpu_regs_exit: + BR_R1USE_R14 .Lload_fpu_regs_end: .L__critical_end: @@ -971,6 +1158,7 @@ load_fpu_regs: */ ENTRY(mcck_int_handler) STCK __LC_MCCK_CLOCK + BPOFF la %r1,4095 # validate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer sckc __LC_CLOCK_COMPARATOR # validate comparator @@ -1046,6 +1234,16 @@ ENTRY(mcck_int_handler) .Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) + # clear user controlled registers to prevent speculative use + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r2,%r2 + xgr %r3,%r3 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r10,%r10 mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) @@ -1071,6 +1269,7 @@ ENTRY(mcck_int_handler) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 0: lmg %r11,%r15,__PT_R11(%r11) @@ -1166,7 +1365,7 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end jl .Lcleanup_load_fpu_regs -0: br %r14 +0: BR_R11USE_R14 .align 8 .Lcleanup_table: @@ -1197,11 +1396,12 @@ cleanup_critical: clg %r9,BASED(.Lsie_crit_mcck_length) jh 1f oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST -1: lg %r9,__SF_EMPTY(%r15) # get control block pointer +1: BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + lg %r9,__SF_EMPTY(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - br %r14 + BR_R11USE_R14 #endif .Lcleanup_system_call: @@ -1254,7 +1454,7 @@ cleanup_critical: stg %r15,56(%r11) # r15 stack pointer # set new psw address and exit larl %r9,.Lsysc_do_svc - br %r14 + BR_R11USE_R14 .Lcleanup_system_call_insn: .quad system_call .quad .Lsysc_stmg @@ -1266,7 +1466,7 @@ cleanup_critical: .Lcleanup_sysc_tif: larl %r9,.Lsysc_tif - br %r14 + BR_R11USE_R14 .Lcleanup_sysc_restore: # check if stpt has been executed @@ -1283,14 +1483,14 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - br %r14 + BR_R11USE_R14 .Lcleanup_sysc_restore_insn: .quad .Lsysc_exit_timer .quad .Lsysc_done - 4 .Lcleanup_io_tif: larl %r9,.Lio_tif - br %r14 + BR_R11USE_R14 .Lcleanup_io_restore: # check if stpt has been executed @@ -1304,7 +1504,7 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - br %r14 + BR_R11USE_R14 .Lcleanup_io_restore_insn: .quad .Lio_exit_timer .quad .Lio_done - 4 @@ -1357,17 +1557,17 @@ cleanup_critical: # prepare return psw nihh %r8,0xfcfd # clear irq & wait state bits lg %r9,48(%r11) # return from psw_idle - br %r14 + BR_R11USE_R14 .Lcleanup_idle_insn: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: larl %r9,save_fpu_regs - br %r14 + BR_R11USE_R14 .Lcleanup_load_fpu_regs: larl %r9,load_fpu_regs - br %r14 + BR_R11USE_R14 /* * Integer constants @@ -1387,7 +1587,6 @@ cleanup_critical: .Lsie_crit_mcck_length: .quad .Lsie_skip - .Lsie_entry #endif - .section .rodata, "a" #define SYSCALL(esame,emu) .long esame .globl sys_call_table diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index da5cc3b469aa..34477c1aee6d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -543,6 +543,7 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { + __bpon(); diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 943d13e90c98..60f60afa645c 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -281,7 +281,7 @@ static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p) * is a BUG. The code path resides in the .kprobes.text * section and is executed with interrupts disabled. */ - printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr); + pr_err("Invalid kprobe detected.\n"); dump_kprobe(p); BUG(); } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index b7abfad4fd7d..1fc6d1ff92d3 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -19,6 +19,8 @@ #include <linux/moduleloader.h> #include <linux/bug.h> #include <asm/alternative.h> +#include <asm/nospec-branch.h> +#include <asm/facility.h> #if 0 #define DEBUGP printk @@ -156,7 +158,11 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, me->arch.got_offset = me->core_layout.size; me->core_layout.size += me->arch.got_size; me->arch.plt_offset = me->core_layout.size; - me->core_layout.size += me->arch.plt_size; + if (me->arch.plt_size) { + if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_call_disable) + me->arch.plt_size += PLT_ENTRY_SIZE; + me->core_layout.size += me->arch.plt_size; + } return 0; } @@ -310,9 +316,21 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, unsigned int *ip; ip = me->core_layout.base + me->arch.plt_offset + info->plt_offset; - ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ - ip[1] = 0x100a0004; - ip[2] = 0x07f10000; + ip[0] = 0x0d10e310; /* basr 1,0 */ + ip[1] = 0x100a0004; /* lg 1,10(1) */ + if (IS_ENABLED(CONFIG_EXPOLINE) && + !nospec_call_disable) { + unsigned int *ij; + ij = me->core_layout.base + + me->arch.plt_offset + + me->arch.plt_size - PLT_ENTRY_SIZE; + ip[2] = 0xa7f40000 + /* j __jump_r1 */ + (unsigned int)(u16) + (((unsigned long) ij - 8 - + (unsigned long) ip) / 2); + } else { + ip[2] = 0x07f10000; /* br %r1 */ + } ip[3] = (unsigned int) (val >> 32); ip[4] = (unsigned int) val; info->plt_initialized = 1; @@ -418,16 +436,42 @@ int module_finalize(const Elf_Ehdr *hdr, struct module *me) { const Elf_Shdr *s; - char *secstrings; + char *secstrings, *secname; + void *aseg; + + if (IS_ENABLED(CONFIG_EXPOLINE) && + !nospec_call_disable && me->arch.plt_size) { + unsigned int *ij; + + ij = me->core_layout.base + me->arch.plt_offset + + me->arch.plt_size - PLT_ENTRY_SIZE; + if (test_facility(35)) { + ij[0] = 0xc6000000; /* exrl %r0,.+10 */ + ij[1] = 0x0005a7f4; /* j . */ + ij[2] = 0x000007f1; /* br %r1 */ + } else { + ij[0] = 0x44000000 | (unsigned int) + offsetof(struct lowcore, br_r1_trampoline); + ij[1] = 0xa7f40000; /* j . */ + } + } secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".altinstructions", secstrings + s->sh_name)) { - /* patch .altinstructions */ - void *aseg = (void *)s->sh_addr; + aseg = (void *) s->sh_addr; + secname = secstrings + s->sh_name; + if (!strcmp(".altinstructions", secname)) + /* patch .altinstructions */ apply_alternatives(aseg, aseg + s->sh_size); - } + + if (IS_ENABLED(CONFIG_EXPOLINE) && + (!strcmp(".nospec_call_table", secname))) + nospec_call_revert(aseg, aseg + s->sh_size); + + if (IS_ENABLED(CONFIG_EXPOLINE) && + (!strcmp(".nospec_return_table", secname))) + nospec_return_revert(aseg, aseg + s->sh_size); } jump_label_apply_nops(me); diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c new file mode 100644 index 000000000000..69d7fcf48158 --- /dev/null +++ b/arch/s390/kernel/nospec-branch.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/module.h> +#include <asm/nospec-branch.h> + +int nospec_call_disable = IS_ENABLED(EXPOLINE_OFF); +int nospec_return_disable = !IS_ENABLED(EXPOLINE_FULL); + +static int __init nospectre_v2_setup_early(char *str) +{ + nospec_call_disable = 1; + nospec_return_disable = 1; + return 0; +} +early_param("nospectre_v2", nospectre_v2_setup_early); + +static int __init spectre_v2_setup_early(char *str) +{ + if (str && !strncmp(str, "on", 2)) { + nospec_call_disable = 0; + nospec_return_disable = 0; + } + if (str && !strncmp(str, "off", 3)) { + nospec_call_disable = 1; + nospec_return_disable = 1; + } + if (str && !strncmp(str, "auto", 4)) { + nospec_call_disable = 0; + nospec_return_disable = 1; + } + return 0; +} +early_param("spectre_v2", spectre_v2_setup_early); + +static void __init_or_module __nospec_revert(s32 *start, s32 *end) +{ + enum { BRCL_EXPOLINE, BRASL_EXPOLINE } type; + u8 *instr, *thunk, *br; + u8 insnbuf[6]; + s32 *epo; + + /* Second part of the instruction replace is always a nop */ + memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4); + for (epo = start; epo < end; epo++) { + instr = (u8 *) epo + *epo; + if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04) + type = BRCL_EXPOLINE; /* brcl instruction */ + else if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x05) + type = BRASL_EXPOLINE; /* brasl instruction */ + else + continue; + thunk = instr + (*(int *)(instr + 2)) * 2; + if (thunk[0] == 0xc6 && thunk[1] == 0x00) + /* exrl %r0,<target-br> */ + br = thunk + (*(int *)(thunk + 2)) * 2; + else if (thunk[0] == 0xc0 && (thunk[1] & 0x0f) == 0x00 && + thunk[6] == 0x44 && thunk[7] == 0x00 && + (thunk[8] & 0x0f) == 0x00 && thunk[9] == 0x00 && + (thunk[1] & 0xf0) == (thunk[8] & 0xf0)) + /* larl %rx,<target br> + ex %r0,0(%rx) */ + br = thunk + (*(int *)(thunk + 2)) * 2; + else + continue; + if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) + continue; + switch (type) { + case BRCL_EXPOLINE: + /* brcl to thunk, replace with br + nop */ + insnbuf[0] = br[0]; + insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + break; + case BRASL_EXPOLINE: + /* brasl to thunk, replace with basr + nop */ + insnbuf[0] = 0x0d; + insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + break; + } + + s390_kernel_write(instr, insnbuf, 6); + } +} + +void __init_or_module nospec_call_revert(s32 *start, s32 *end) +{ + if (nospec_call_disable) + __nospec_revert(start, end); +} + +void __init_or_module nospec_return_revert(s32 *start, s32 *end) +{ + if (nospec_return_disable) + __nospec_revert(start, end); +} + +extern s32 __nospec_call_start[], __nospec_call_end[]; +extern s32 __nospec_return_start[], __nospec_return_end[]; +void __init nospec_init_branches(void) +{ + nospec_call_revert(__nospec_call_start, __nospec_call_end); + nospec_return_revert(__nospec_return_start, __nospec_return_end); +} diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index 94f90cefbffc..c5bc3f209652 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -226,7 +226,7 @@ CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af); CPUMF_EVENT_ATTR(cf_z14, BCD_DFP_EXECUTION_SLOTS, 0x00e0); CPUMF_EVENT_ATTR(cf_z14, VX_BCD_EXECUTION_SLOTS, 0x00e1); CPUMF_EVENT_ATTR(cf_z14, DECIMAL_INSTRUCTIONS, 0x00e2); -CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e9); +CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e8); CPUMF_EVENT_ATTR(cf_z14, TX_NC_TABORT, 0x00f3); CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5362fd868d0d..6fe2e1875058 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -197,3 +197,21 @@ const struct seq_operations cpuinfo_op = { .stop = c_stop, .show = show_cpuinfo, }; + +int s390_isolate_bp(void) +{ + if (!test_facility(82)) + return -EOPNOTSUPP; + set_thread_flag(TIF_ISOLATE_BP); + return 0; +} +EXPORT_SYMBOL(s390_isolate_bp); + +int s390_isolate_bp_guest(void) +{ + if (!test_facility(82)) + return -EOPNOTSUPP; + set_thread_flag(TIF_ISOLATE_BP_GUEST); + return 0; +} +EXPORT_SYMBOL(s390_isolate_bp_guest); diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index 09f5bf0d5c0c..125c7f6e8715 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -18,6 +18,8 @@ #include <asm/cpu_mf.h> #include <asm/irq.h> +#include "entry.h" + /* empty control block to disable RI by loading it */ struct runtime_instr_cb runtime_instr_empty_cb; @@ -59,7 +61,13 @@ static void init_runtime_instr_cb(struct runtime_instr_cb *cb) cb->v = 1; } -SYSCALL_DEFINE1(s390_runtime_instr, int, command) +/* + * The signum argument is unused. In older kernels it was used to + * specify a real-time signal. For backwards compatibility user space + * should pass a valid real-time signal number (the signum argument + * was checked in older kernels). + */ +SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) { struct runtime_instr_cb *cb; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 793da97f9a6e..a6a91f01a17a 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -68,6 +68,7 @@ #include <asm/sysinfo.h> #include <asm/numa.h> #include <asm/alternative.h> +#include <asm/nospec-branch.h> #include "entry.h" /* @@ -340,7 +341,9 @@ static void __init setup_lowcore(void) lc->preempt_count = S390_lowcore.preempt_count; lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, - MAX_FACILITY_BIT/8); + sizeof(lc->stfle_fac_list)); + memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, + sizeof(lc->alt_stfle_fac_list)); nmi_alloc_boot_cpu(lc); vdso_alloc_boot_cpu(lc); lc->sync_enter_timer = S390_lowcore.sync_enter_timer; @@ -377,6 +380,7 @@ static void __init setup_lowcore(void) lc->spinlock_index = 0; arch_spin_lock_setup(0); #endif + lc->br_r1_trampoline = 0x07f1; /* br %r1 */ set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; @@ -952,6 +956,8 @@ void __init setup_arch(char **cmdline_p) set_preferred_console(); apply_alternative_instructions(); + if (IS_ENABLED(CONFIG_EXPOLINE)) + nospec_init_branches(); /* Setup zfcpdump support */ setup_zfcpdump(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index a919b2f0141d..a4a9fe1934e9 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -214,6 +214,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; + lc->br_r1_trampoline = 0x07f1; /* br %r1 */ if (nmi_alloc_per_cpu(lc)) goto out; if (vdso_alloc_per_cpu(lc)) @@ -266,7 +267,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) __ctl_store(lc->cregs_save_area, 0, 15); save_access_regs((unsigned int *) lc->access_regs_save_area); memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, - MAX_FACILITY_BIT/8); + sizeof(lc->stfle_fac_list)); + memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, + sizeof(lc->alt_stfle_fac_list)); arch_spin_lock_setup(cpu); } @@ -317,6 +320,7 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), mem_assign_absolute(lc->restart_fn, (unsigned long) func); mem_assign_absolute(lc->restart_data, (unsigned long) data); mem_assign_absolute(lc->restart_source, source_cpu); + __bpon(); asm volatile( "0: sigp 0,%0,%2 # sigp restart to target cpu\n" " brc 2,0b # busy, try again\n" @@ -901,6 +905,7 @@ void __cpu_die(unsigned int cpu) void __noreturn cpu_die(void) { idle_task_exit(); + __bpon(); pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); for (;;) ; } diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index a441cba8d165..fc7e04c2195b 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -89,6 +89,8 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info) EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap)); seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer); seq_printf(m, "Type: %-4.4s\n", info->type); + if (info->lic) + seq_printf(m, "LIC Identifier: %016lx\n", info->lic); /* * Sigh: the model field has been renamed with System z9 * to model_capacity and a new model field has been added diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 608cf2987d19..08d12cfaf091 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -123,6 +123,20 @@ SECTIONS *(.altinstr_replacement) } + /* + * Table with the patch locations to undo expolines + */ + .nospec_call_table : { + __nospec_call_start = . ; + *(.s390_indirect*) + __nospec_call_end = . ; + } + .nospec_return_table : { + __nospec_return_start = . ; + *(.s390_return*) + __nospec_return_end = . ; + } + /* early.c uses stsi, which requires page aligned data. */ . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 9a4594e0a1ff..a3dbd459cce9 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -23,6 +23,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_VCPU_ASYNC_IOCTL select HAVE_KVM_EVENTFD select KVM_ASYNC_PF select KVM_ASYNC_PF_SYNC diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 89aa114a2cba..45634b3d2e0a 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -257,6 +257,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) case 0x500: return __diag_virtio_hypercall(vcpu); default: + vcpu->stat.diagnose_other++; return -EOPNOTSUPP; } } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 024ad8bcc516..aabf46f5f883 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -36,7 +36,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) { int c, scn; - if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) + if (!kvm_s390_test_cpuflags(vcpu, CPUSTAT_ECALL_PEND)) return 0; BUG_ON(!kvm_s390_use_sca_entries()); @@ -101,18 +101,17 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) /* another external call is pending */ return -EBUSY; } - atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_ECALL_PEND); return 0; } static void sca_clear_ext_call(struct kvm_vcpu *vcpu) { - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; int rc, expect; if (!kvm_s390_use_sca_entries()) return; - atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); + kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND); read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { struct esca_block *sca = vcpu->kvm->arch.sca; @@ -190,8 +189,8 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) static inline int is_ioirq(unsigned long irq_type) { - return ((irq_type >= IRQ_PEND_IO_ISC_0) && - (irq_type <= IRQ_PEND_IO_ISC_7)); + return ((irq_type >= IRQ_PEND_IO_ISC_7) && + (irq_type <= IRQ_PEND_IO_ISC_0)); } static uint64_t isc_to_isc_bits(int isc) @@ -199,25 +198,59 @@ static uint64_t isc_to_isc_bits(int isc) return (0x80 >> isc) << 24; } +static inline u32 isc_to_int_word(u8 isc) +{ + return ((u32)isc << 27) | 0x80000000; +} + static inline u8 int_word_to_isc(u32 int_word) { return (int_word & 0x38000000) >> 27; } +/* + * To use atomic bitmap functions, we have to provide a bitmap address + * that is u64 aligned. However, the ipm might be u32 aligned. + * Therefore, we logically start the bitmap at the very beginning of the + * struct and fixup the bit number. + */ +#define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE) + +static inline void kvm_s390_gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) +{ + set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); +} + +static inline u8 kvm_s390_gisa_get_ipm(struct kvm_s390_gisa *gisa) +{ + return READ_ONCE(gisa->ipm); +} + +static inline void kvm_s390_gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) +{ + clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); +} + +static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) +{ + return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); +} + static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.float_int.pending_irqs | - vcpu->arch.local_int.pending_irqs; + vcpu->arch.local_int.pending_irqs | + kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7; } static inline int isc_to_irq_type(unsigned long isc) { - return IRQ_PEND_IO_ISC_0 + isc; + return IRQ_PEND_IO_ISC_0 - isc; } static inline int irq_type_to_isc(unsigned long irq_type) { - return irq_type - IRQ_PEND_IO_ISC_0; + return IRQ_PEND_IO_ISC_0 - irq_type; } static unsigned long disable_iscs(struct kvm_vcpu *vcpu, @@ -278,20 +311,20 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) static void __set_cpu_idle(struct kvm_vcpu *vcpu) { - atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); - set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); + set_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); } static void __unset_cpu_idle(struct kvm_vcpu *vcpu) { - atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); - clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); + kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); + clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); } static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) { - atomic_andnot(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, - &vcpu->arch.sie_block->cpuflags); + kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IO_INT | CPUSTAT_EXT_INT | + CPUSTAT_STOP_INT); vcpu->arch.sie_block->lctl = 0x0000; vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT); @@ -302,17 +335,12 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) } } -static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) -{ - atomic_or(flag, &vcpu->arch.sie_block->cpuflags); -} - static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) { if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK)) return; else if (psw_ioint_disabled(vcpu)) - __set_cpuflag(vcpu, CPUSTAT_IO_INT); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); else vcpu->arch.sie_block->lctl |= LCTL_CR6; } @@ -322,7 +350,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK)) return; if (psw_extint_disabled(vcpu)) - __set_cpuflag(vcpu, CPUSTAT_EXT_INT); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT); else vcpu->arch.sie_block->lctl |= LCTL_CR0; } @@ -340,7 +368,7 @@ static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu) { if (kvm_s390_is_stop_irq_pending(vcpu)) - __set_cpuflag(vcpu, CPUSTAT_STOP_INT); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); } /* Set interception request for non-deliverable interrupts */ @@ -897,18 +925,38 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu) return rc ? -EFAULT : 0; } +static int __do_deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_io_info *io) +{ + int rc; + + rc = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID); + rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR); + rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM); + rc |= put_guest_lc(vcpu, io->io_int_word, (u32 *)__LC_IO_INT_WORD); + rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + return rc ? -EFAULT : 0; +} + static int __must_check __deliver_io(struct kvm_vcpu *vcpu, unsigned long irq_type) { struct list_head *isc_list; struct kvm_s390_float_interrupt *fi; struct kvm_s390_interrupt_info *inti = NULL; + struct kvm_s390_io_info io; + u32 isc; int rc = 0; fi = &vcpu->kvm->arch.float_int; spin_lock(&fi->lock); - isc_list = &fi->lists[irq_type_to_isc(irq_type)]; + isc = irq_type_to_isc(irq_type); + isc_list = &fi->lists[isc]; inti = list_first_entry_or_null(isc_list, struct kvm_s390_interrupt_info, list); @@ -936,24 +984,31 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, spin_unlock(&fi->lock); if (inti) { - rc = put_guest_lc(vcpu, inti->io.subchannel_id, - (u16 *)__LC_SUBCHANNEL_ID); - rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, - (u16 *)__LC_SUBCHANNEL_NR); - rc |= put_guest_lc(vcpu, inti->io.io_int_parm, - (u32 *)__LC_IO_INT_PARM); - rc |= put_guest_lc(vcpu, inti->io.io_int_word, - (u32 *)__LC_IO_INT_WORD); - rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, - &vcpu->arch.sie_block->gpsw, - sizeof(psw_t)); + rc = __do_deliver_io(vcpu, &(inti->io)); kfree(inti); + goto out; } - return rc ? -EFAULT : 0; + if (vcpu->kvm->arch.gisa && + kvm_s390_gisa_tac_ipm_gisc(vcpu->kvm->arch.gisa, isc)) { + /* + * in case an adapter interrupt was not delivered + * in SIE context KVM will handle the delivery + */ + VCPU_EVENT(vcpu, 4, "%s isc %u", "deliver: I/O (AI/gisa)", isc); + memset(&io, 0, sizeof(io)); + io.io_int_word = isc_to_int_word(isc); + vcpu->stat.deliver_io_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_IO(1, 0, 0, 0), + ((__u32)io.subchannel_id << 16) | + io.subchannel_nr, + ((__u64)io.io_int_parm << 32) | + io.io_int_word); + rc = __do_deliver_io(vcpu, &io); + } +out: + return rc; } typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu); @@ -1155,8 +1210,8 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); while ((irqs = deliverable_irqs(vcpu)) && !rc) { - /* bits are in the order of interrupt priority */ - irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT); + /* bits are in the reverse order of interrupt priority */ + irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT); if (is_ioirq(irq_type)) { rc = __deliver_io(vcpu, irq_type); } else { @@ -1228,7 +1283,7 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) li->irq.ext = irq->u.ext; set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs); - atomic_or(CPUSTAT_EXT_INT, li->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT); return 0; } @@ -1253,7 +1308,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) return -EBUSY; *extcall = irq->u.extcall; - atomic_or(CPUSTAT_EXT_INT, li->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT); return 0; } @@ -1297,7 +1352,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs)) return -EBUSY; stop->flags = irq->u.stop.flags; - __set_cpuflag(vcpu, CPUSTAT_STOP_INT); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); return 0; } @@ -1329,7 +1384,7 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, set_bit(irq->u.emerg.code, li->sigp_emerg_pending); set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); - atomic_or(CPUSTAT_EXT_INT, li->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT); return 0; } @@ -1373,7 +1428,7 @@ static int __inject_ckc(struct kvm_vcpu *vcpu) 0, 0); set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); - atomic_or(CPUSTAT_EXT_INT, li->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT); return 0; } @@ -1386,7 +1441,7 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu) 0, 0); set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); - atomic_or(CPUSTAT_EXT_INT, li->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT); return 0; } @@ -1416,20 +1471,86 @@ static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm, return NULL; } +static struct kvm_s390_interrupt_info *get_top_io_int(struct kvm *kvm, + u64 isc_mask, u32 schid) +{ + struct kvm_s390_interrupt_info *inti = NULL; + int isc; + + for (isc = 0; isc <= MAX_ISC && !inti; isc++) { + if (isc_mask & isc_to_isc_bits(isc)) + inti = get_io_int(kvm, isc, schid); + } + return inti; +} + +static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid) +{ + unsigned long active_mask; + int isc; + + if (schid) + goto out; + if (!kvm->arch.gisa) + goto out; + + active_mask = (isc_mask & kvm_s390_gisa_get_ipm(kvm->arch.gisa) << 24) << 32; + while (active_mask) { + isc = __fls(active_mask) ^ (BITS_PER_LONG - 1); + if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, isc)) + return isc; + clear_bit_inv(isc, &active_mask); + } +out: + return -EINVAL; +} + /* * Dequeue and return an I/O interrupt matching any of the interruption * subclasses as designated by the isc mask in cr6 and the schid (if != 0). + * Take into account the interrupts pending in the interrupt list and in GISA. + * + * Note that for a guest that does not enable I/O interrupts + * but relies on TPI, a flood of classic interrupts may starve + * out adapter interrupts on the same isc. Linux does not do + * that, and it is possible to work around the issue by configuring + * different iscs for classic and adapter interrupts in the guest, + * but we may want to revisit this in the future. */ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 isc_mask, u32 schid) { - struct kvm_s390_interrupt_info *inti = NULL; + struct kvm_s390_interrupt_info *inti, *tmp_inti; int isc; - for (isc = 0; isc <= MAX_ISC && !inti; isc++) { - if (isc_mask & isc_to_isc_bits(isc)) - inti = get_io_int(kvm, isc, schid); + inti = get_top_io_int(kvm, isc_mask, schid); + + isc = get_top_gisa_isc(kvm, isc_mask, schid); + if (isc < 0) + /* no AI in GISA */ + goto out; + + if (!inti) + /* AI in GISA but no classical IO int */ + goto gisa_out; + + /* both types of interrupts present */ + if (int_word_to_isc(inti->io.io_int_word) <= isc) { + /* classical IO int with higher priority */ + kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); + goto out; } +gisa_out: + tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (tmp_inti) { + tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0); + tmp_inti->io.io_int_word = isc_to_int_word(isc); + if (inti) + kvm_s390_reinject_io_int(kvm, inti); + inti = tmp_inti; + } else + kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); +out: return inti; } @@ -1517,6 +1638,15 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) struct list_head *list; int isc; + isc = int_word_to_isc(inti->io.io_int_word); + + if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) { + VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc); + kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); + kfree(inti); + return 0; + } + fi = &kvm->arch.float_int; spin_lock(&fi->lock); if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) { @@ -1532,7 +1662,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) inti->io.subchannel_id >> 8, inti->io.subchannel_id >> 1 & 0x3, inti->io.subchannel_nr); - isc = int_word_to_isc(inti->io.io_int_word); list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; list_add_tail(&inti->list, list); set_bit(isc_to_irq_type(isc), &fi->pending_irqs); @@ -1546,7 +1675,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) static void __floating_irq_kick(struct kvm *kvm, u64 type) { struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; - struct kvm_s390_local_interrupt *li; struct kvm_vcpu *dst_vcpu; int sigcpu, online_vcpus, nr_tries = 0; @@ -1568,20 +1696,17 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) dst_vcpu = kvm_get_vcpu(kvm, sigcpu); /* make the VCPU drop out of the SIE, or wake it up if sleeping */ - li = &dst_vcpu->arch.local_int; - spin_lock(&li->lock); switch (type) { case KVM_S390_MCHK: - atomic_or(CPUSTAT_STOP_INT, li->cpuflags); + kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - atomic_or(CPUSTAT_IO_INT, li->cpuflags); + kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); break; default: - atomic_or(CPUSTAT_EXT_INT, li->cpuflags); + kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT); break; } - spin_unlock(&li->lock); kvm_s390_vcpu_wakeup(dst_vcpu); } @@ -1820,6 +1945,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) for (i = 0; i < FIRQ_MAX_COUNT; i++) fi->counters[i] = 0; spin_unlock(&fi->lock); + kvm_s390_gisa_clear(kvm); }; static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) @@ -1847,6 +1973,22 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) max_irqs = len / sizeof(struct kvm_s390_irq); + if (kvm->arch.gisa && + kvm_s390_gisa_get_ipm(kvm->arch.gisa)) { + for (i = 0; i <= MAX_ISC; i++) { + if (n == max_irqs) { + /* signal userspace to try again */ + ret = -ENOMEM; + goto out_nolock; + } + if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, i)) { + irq = (struct kvm_s390_irq *) &buf[n]; + irq->type = KVM_S390_INT_IO(1, 0, 0, 0); + irq->u.io.io_int_word = isc_to_int_word(i); + n++; + } + } + } fi = &kvm->arch.float_int; spin_lock(&fi->lock); for (i = 0; i < FIRQ_LIST_COUNT; i++) { @@ -1885,6 +2027,7 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) out: spin_unlock(&fi->lock); +out_nolock: if (!ret && n > 0) { if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n)) ret = -EFAULT; @@ -2245,7 +2388,7 @@ static int kvm_s390_inject_airq(struct kvm *kvm, struct kvm_s390_interrupt s390int = { .type = KVM_S390_INT_IO(1, 0, 0, 0), .parm = 0, - .parm64 = (adapter->isc << 27) | 0x80000000, + .parm64 = isc_to_int_word(adapter->isc), }; int ret = 0; @@ -2687,3 +2830,28 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) return n; } + +void kvm_s390_gisa_clear(struct kvm *kvm) +{ + if (kvm->arch.gisa) { + memset(kvm->arch.gisa, 0, sizeof(struct kvm_s390_gisa)); + kvm->arch.gisa->next_alert = (u32)(u64)kvm->arch.gisa; + VM_EVENT(kvm, 3, "gisa 0x%pK cleared", kvm->arch.gisa); + } +} + +void kvm_s390_gisa_init(struct kvm *kvm) +{ + if (css_general_characteristics.aiv) { + kvm->arch.gisa = &kvm->arch.sie_page2->gisa; + VM_EVENT(kvm, 3, "gisa 0x%pK initialized", kvm->arch.gisa); + kvm_s390_gisa_clear(kvm); + } +} + +void kvm_s390_gisa_destroy(struct kvm *kvm) +{ + if (!kvm->arch.gisa) + return; + kvm->arch.gisa = NULL; +} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1371dff2b90d..ba4c7092335a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2,7 +2,7 @@ /* * hosting IBM Z kernel virtual machines (s390x) * - * Copyright IBM Corp. 2008, 2017 + * Copyright IBM Corp. 2008, 2018 * * Author(s): Carsten Otte <cotte@de.ibm.com> * Christian Borntraeger <borntraeger@de.ibm.com> @@ -87,19 +87,31 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, { "exit_wait_state", VCPU_STAT(exit_wait_state) }, + { "instruction_epsw", VCPU_STAT(instruction_epsw) }, + { "instruction_gs", VCPU_STAT(instruction_gs) }, + { "instruction_io_other", VCPU_STAT(instruction_io_other) }, + { "instruction_lpsw", VCPU_STAT(instruction_lpsw) }, + { "instruction_lpswe", VCPU_STAT(instruction_lpswe) }, { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, + { "instruction_ptff", VCPU_STAT(instruction_ptff) }, { "instruction_stidp", VCPU_STAT(instruction_stidp) }, + { "instruction_sck", VCPU_STAT(instruction_sck) }, + { "instruction_sckpf", VCPU_STAT(instruction_sckpf) }, { "instruction_spx", VCPU_STAT(instruction_spx) }, { "instruction_stpx", VCPU_STAT(instruction_stpx) }, { "instruction_stap", VCPU_STAT(instruction_stap) }, - { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, + { "instruction_iske", VCPU_STAT(instruction_iske) }, + { "instruction_ri", VCPU_STAT(instruction_ri) }, + { "instruction_rrbe", VCPU_STAT(instruction_rrbe) }, + { "instruction_sske", VCPU_STAT(instruction_sske) }, { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, - { "instruction_stsch", VCPU_STAT(instruction_stsch) }, - { "instruction_chsc", VCPU_STAT(instruction_chsc) }, { "instruction_essa", VCPU_STAT(instruction_essa) }, { "instruction_stsi", VCPU_STAT(instruction_stsi) }, { "instruction_stfl", VCPU_STAT(instruction_stfl) }, + { "instruction_tb", VCPU_STAT(instruction_tb) }, + { "instruction_tpi", VCPU_STAT(instruction_tpi) }, { "instruction_tprot", VCPU_STAT(instruction_tprot) }, + { "instruction_tsch", VCPU_STAT(instruction_tsch) }, { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, { "instruction_sie", VCPU_STAT(instruction_sie) }, { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, @@ -118,12 +130,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, - { "diagnose_10", VCPU_STAT(diagnose_10) }, - { "diagnose_44", VCPU_STAT(diagnose_44) }, - { "diagnose_9c", VCPU_STAT(diagnose_9c) }, - { "diagnose_258", VCPU_STAT(diagnose_258) }, - { "diagnose_308", VCPU_STAT(diagnose_308) }, - { "diagnose_500", VCPU_STAT(diagnose_500) }, + { "instruction_diag_10", VCPU_STAT(diagnose_10) }, + { "instruction_diag_44", VCPU_STAT(diagnose_44) }, + { "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, + { "instruction_diag_258", VCPU_STAT(diagnose_258) }, + { "instruction_diag_308", VCPU_STAT(diagnose_308) }, + { "instruction_diag_500", VCPU_STAT(diagnose_500) }, + { "instruction_diag_other", VCPU_STAT(diagnose_other) }, { NULL } }; @@ -576,7 +589,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) case KVM_CAP_S390_GS: r = -EINVAL; mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus)) { + if (kvm->created_vcpus) { r = -EBUSY; } else if (test_facility(133)) { set_kvm_facility(kvm->arch.model.fac_mask, 133); @@ -1088,7 +1101,6 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm, struct kvm_device_attr *attr) { struct kvm_s390_vm_cpu_feat data; - int ret = -EBUSY; if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) return -EFAULT; @@ -1098,13 +1110,18 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm, return -EINVAL; mutex_lock(&kvm->lock); - if (!atomic_read(&kvm->online_vcpus)) { - bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, - KVM_S390_VM_CPU_FEAT_NR_BITS); - ret = 0; + if (kvm->created_vcpus) { + mutex_unlock(&kvm->lock); + return -EBUSY; } + bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, + KVM_S390_VM_CPU_FEAT_NR_BITS); mutex_unlock(&kvm->lock); - return ret; + VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", + data.feat[0], + data.feat[1], + data.feat[2]); + return 0; } static int kvm_s390_set_processor_subfunc(struct kvm *kvm, @@ -1206,6 +1223,10 @@ static int kvm_s390_get_processor_feat(struct kvm *kvm, KVM_S390_VM_CPU_FEAT_NR_BITS); if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) return -EFAULT; + VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", + data.feat[0], + data.feat[1], + data.feat[2]); return 0; } @@ -1219,6 +1240,10 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm, KVM_S390_VM_CPU_FEAT_NR_BITS); if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) return -EFAULT; + VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", + data.feat[0], + data.feat[1], + data.feat[2]); return 0; } @@ -1911,6 +1936,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.dbf) goto out_err; + BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); kvm->arch.sie_page2 = (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!kvm->arch.sie_page2) @@ -1981,6 +2007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) spin_lock_init(&kvm->arch.start_stop_lock); kvm_s390_vsie_init(kvm); + kvm_s390_gisa_init(kvm); KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; @@ -2043,6 +2070,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_free_vcpus(kvm); sca_dispose(kvm); debug_unregister(kvm->arch.dbf); + kvm_s390_gisa_destroy(kvm); free_page((unsigned long)kvm->arch.sie_page2); if (!kvm_is_ucontrol(kvm)) gmap_remove(kvm->arch.gmap); @@ -2314,7 +2342,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { gmap_enable(vcpu->arch.enabled_gmap); - atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) __start_cpu_timer_accounting(vcpu); vcpu->cpu = cpu; @@ -2325,7 +2353,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) __stop_cpu_timer_accounting(vcpu); - atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); vcpu->arch.enabled_gmap = gmap_get_enabled(); gmap_disable(vcpu->arch.enabled_gmap); @@ -2422,9 +2450,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) CPUSTAT_STOPPED); if (test_kvm_facility(vcpu->kvm, 78)) - atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); else if (test_kvm_facility(vcpu->kvm, 8)) - atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); kvm_s390_vcpu_setup_model(vcpu); @@ -2456,12 +2484,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) if (test_kvm_facility(vcpu->kvm, 139)) vcpu->arch.sie_block->ecd |= ECD_MEF; + if (vcpu->arch.sie_block->gd) { + vcpu->arch.sie_block->eca |= ECA_AIV; + VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", + vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); + } vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) | SDNXC; vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; if (sclp.has_kss) - atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); else vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; @@ -2508,9 +2541,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->icpua = id; spin_lock_init(&vcpu->arch.local_int.lock); - vcpu->arch.local_int.float_int = &kvm->arch.float_int; - vcpu->arch.local_int.wq = &vcpu->wq; - vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; + vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa; + if (vcpu->arch.sie_block->gd && sclp.has_gisaf) + vcpu->arch.sie_block->gd |= GISA_FORMAT1; seqcount_init(&vcpu->arch.cputm_seqcount); rc = kvm_vcpu_init(vcpu, kvm, id); @@ -2567,7 +2600,7 @@ static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) * return immediately. */ void exit_sie(struct kvm_vcpu *vcpu) { - atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) cpu_relax(); } @@ -2720,47 +2753,70 @@ static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu_load(vcpu); memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); + vcpu_put(vcpu); return 0; } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu_load(vcpu); memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); + vcpu_put(vcpu); return 0; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { + vcpu_load(vcpu); + memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); + + vcpu_put(vcpu); return 0; } int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { + vcpu_load(vcpu); + memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); + + vcpu_put(vcpu); return 0; } int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - if (test_fp_ctl(fpu->fpc)) - return -EINVAL; + int ret = 0; + + vcpu_load(vcpu); + + if (test_fp_ctl(fpu->fpc)) { + ret = -EINVAL; + goto out; + } vcpu->run->s.regs.fpc = fpu->fpc; if (MACHINE_HAS_VX) convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, (freg_t *) fpu->fprs); else memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); - return 0; + +out: + vcpu_put(vcpu); + return ret; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { + vcpu_load(vcpu); + /* make sure we have the latest values */ save_fpu_regs(); if (MACHINE_HAS_VX) @@ -2769,6 +2825,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) else memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); fpu->fpc = vcpu->run->s.regs.fpc; + + vcpu_put(vcpu); return 0; } @@ -2800,41 +2858,56 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, { int rc = 0; + vcpu_load(vcpu); + vcpu->guest_debug = 0; kvm_s390_clear_bp_data(vcpu); - if (dbg->control & ~VALID_GUESTDBG_FLAGS) - return -EINVAL; - if (!sclp.has_gpere) - return -EINVAL; + if (dbg->control & ~VALID_GUESTDBG_FLAGS) { + rc = -EINVAL; + goto out; + } + if (!sclp.has_gpere) { + rc = -EINVAL; + goto out; + } if (dbg->control & KVM_GUESTDBG_ENABLE) { vcpu->guest_debug = dbg->control; /* enforce guest PER */ - atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); if (dbg->control & KVM_GUESTDBG_USE_HW_BP) rc = kvm_s390_import_bp_data(vcpu, dbg); } else { - atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); + kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); vcpu->arch.guestdbg.last_bp = 0; } if (rc) { vcpu->guest_debug = 0; kvm_s390_clear_bp_data(vcpu); - atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); + kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); } +out: + vcpu_put(vcpu); return rc; } int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + int ret; + + vcpu_load(vcpu); + /* CHECK_STOP and LOAD are not supported yet */ - return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : - KVM_MP_STATE_OPERATING; + ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : + KVM_MP_STATE_OPERATING; + + vcpu_put(vcpu); + return ret; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, @@ -2842,6 +2915,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int rc = 0; + vcpu_load(vcpu); + /* user space knows about this interface - let it control the state */ vcpu->kvm->arch.user_cpu_state_ctrl = 1; @@ -2859,12 +2934,13 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, rc = -ENXIO; } + vcpu_put(vcpu); return rc; } static bool ibs_enabled(struct kvm_vcpu *vcpu) { - return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; + return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); } static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) @@ -2900,8 +2976,7 @@ retry: if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { if (!ibs_enabled(vcpu)) { trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); - atomic_or(CPUSTAT_IBS, - &vcpu->arch.sie_block->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); } goto retry; } @@ -2909,8 +2984,7 @@ retry: if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { if (ibs_enabled(vcpu)) { trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); - atomic_andnot(CPUSTAT_IBS, - &vcpu->arch.sie_block->cpuflags); + kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); } goto retry; } @@ -3390,9 +3464,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (kvm_run->immediate_exit) return -EINTR; + vcpu_load(vcpu); + if (guestdbg_exit_pending(vcpu)) { kvm_s390_prepare_debug_exit(vcpu); - return 0; + rc = 0; + goto out; } kvm_sigset_activate(vcpu); @@ -3402,7 +3479,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } else if (is_vcpu_stopped(vcpu)) { pr_err_ratelimited("can't run stopped vcpu %d\n", vcpu->vcpu_id); - return -EINVAL; + rc = -EINVAL; + goto out; } sync_regs(vcpu, kvm_run); @@ -3432,6 +3510,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_sigset_deactivate(vcpu); vcpu->stat.exit_userspace++; +out: + vcpu_put(vcpu); return rc; } @@ -3560,7 +3640,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) __disable_ibs_on_all_vcpus(vcpu->kvm); } - atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); /* * Another VCPU might have used IBS while we were offline. * Let's play safe and flush the VCPU at startup. @@ -3586,7 +3666,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ kvm_s390_clear_stop_irq(vcpu); - atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); __disable_ibs_on_vcpu(vcpu); for (i = 0; i < online_vcpus; i++) { @@ -3693,36 +3773,45 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, return r; } -long kvm_arch_vcpu_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +long kvm_arch_vcpu_async_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; - int idx; - long r; switch (ioctl) { case KVM_S390_IRQ: { struct kvm_s390_irq s390irq; - r = -EFAULT; if (copy_from_user(&s390irq, argp, sizeof(s390irq))) - break; - r = kvm_s390_inject_vcpu(vcpu, &s390irq); - break; + return -EFAULT; + return kvm_s390_inject_vcpu(vcpu, &s390irq); } case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; struct kvm_s390_irq s390irq; - r = -EFAULT; if (copy_from_user(&s390int, argp, sizeof(s390int))) - break; + return -EFAULT; if (s390int_to_s390irq(&s390int, &s390irq)) return -EINVAL; - r = kvm_s390_inject_vcpu(vcpu, &s390irq); - break; + return kvm_s390_inject_vcpu(vcpu, &s390irq); + } } + return -ENOIOCTLCMD; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + int idx; + long r; + + vcpu_load(vcpu); + + switch (ioctl) { case KVM_S390_STORE_STATUS: idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_s390_vcpu_store_status(vcpu, arg); @@ -3847,6 +3936,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, default: r = -ENOTTY; } + + vcpu_put(vcpu); return r; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 5e46ba429bcb..bd31b37b0e6f 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -47,14 +47,29 @@ do { \ d_args); \ } while (0) +static inline void kvm_s390_set_cpuflags(struct kvm_vcpu *vcpu, u32 flags) +{ + atomic_or(flags, &vcpu->arch.sie_block->cpuflags); +} + +static inline void kvm_s390_clear_cpuflags(struct kvm_vcpu *vcpu, u32 flags) +{ + atomic_andnot(flags, &vcpu->arch.sie_block->cpuflags); +} + +static inline bool kvm_s390_test_cpuflags(struct kvm_vcpu *vcpu, u32 flags) +{ + return (atomic_read(&vcpu->arch.sie_block->cpuflags) & flags) == flags; +} + static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) { - return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; + return kvm_s390_test_cpuflags(vcpu, CPUSTAT_STOPPED); } static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) { - return test_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); + return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); } static inline int kvm_is_ucontrol(struct kvm *kvm) @@ -367,6 +382,9 @@ int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *buf, int len); int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len); +void kvm_s390_gisa_init(struct kvm *kvm); +void kvm_s390_gisa_clear(struct kvm *kvm); +void kvm_s390_gisa_destroy(struct kvm *kvm); /* implemented in guestdbg.c */ void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 0714bfa56da0..c4c4e157c036 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -2,7 +2,7 @@ /* * handling privileged instructions * - * Copyright IBM Corp. 2008, 2013 + * Copyright IBM Corp. 2008, 2018 * * Author(s): Carsten Otte <cotte@de.ibm.com> * Christian Borntraeger <borntraeger@de.ibm.com> @@ -34,6 +34,8 @@ static int handle_ri(struct kvm_vcpu *vcpu) { + vcpu->stat.instruction_ri++; + if (test_kvm_facility(vcpu->kvm, 64)) { VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)"); vcpu->arch.sie_block->ecb3 |= ECB3_RI; @@ -53,6 +55,8 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu) static int handle_gs(struct kvm_vcpu *vcpu) { + vcpu->stat.instruction_gs++; + if (test_kvm_facility(vcpu->kvm, 133)) { VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)"); preempt_disable(); @@ -85,6 +89,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) u8 ar; u64 op2, val; + vcpu->stat.instruction_sck++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -203,14 +209,14 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu) trace_kvm_s390_skey_related_inst(vcpu); if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) && - !(atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS)) + !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS)) return rc; rc = s390_enable_skey(); VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc); if (!rc) { - if (atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS) - atomic_andnot(CPUSTAT_KSS, &sie_block->cpuflags); + if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS)) + kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS); else sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); @@ -222,7 +228,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) { int rc; - vcpu->stat.instruction_storage_key++; rc = kvm_s390_skey_check_enable(vcpu); if (rc) return rc; @@ -242,6 +247,8 @@ static int handle_iske(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + vcpu->stat.instruction_iske++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -274,6 +281,8 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + vcpu->stat.instruction_rrbe++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -312,6 +321,8 @@ static int handle_sske(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + vcpu->stat.instruction_sske++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -392,6 +403,8 @@ static int handle_test_block(struct kvm_vcpu *vcpu) gpa_t addr; int reg2; + vcpu->stat.instruction_tb++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -424,6 +437,8 @@ static int handle_tpi(struct kvm_vcpu *vcpu) u64 addr; u8 ar; + vcpu->stat.instruction_tpi++; + addr = kvm_s390_get_base_disp_s(vcpu, &ar); if (addr & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -484,6 +499,8 @@ static int handle_tsch(struct kvm_vcpu *vcpu) struct kvm_s390_interrupt_info *inti = NULL; const u64 isc_mask = 0xffUL << 24; /* all iscs set */ + vcpu->stat.instruction_tsch++; + /* a valid schid has at least one bit set */ if (vcpu->run->s.regs.gprs[1]) inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask, @@ -527,6 +544,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->ipa == 0xb235) return handle_tsch(vcpu); /* Handle in userspace. */ + vcpu->stat.instruction_io_other++; return -EOPNOTSUPP; } else { /* @@ -592,6 +610,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) int rc; u8 ar; + vcpu->stat.instruction_lpsw++; + if (gpsw->mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -619,6 +639,8 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) int rc; u8 ar; + vcpu->stat.instruction_lpswe++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -828,6 +850,8 @@ static int handle_epsw(struct kvm_vcpu *vcpu) { int reg1, reg2; + vcpu->stat.instruction_epsw++; + kvm_s390_get_regs_rre(vcpu, ®1, ®2); /* This basically extracts the mask half of the psw. */ @@ -1332,6 +1356,8 @@ static int handle_sckpf(struct kvm_vcpu *vcpu) { u32 value; + vcpu->stat.instruction_sckpf++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -1347,6 +1373,8 @@ static int handle_sckpf(struct kvm_vcpu *vcpu) static int handle_ptff(struct kvm_vcpu *vcpu) { + vcpu->stat.instruction_ptff++; + /* we don't emulate any control instructions yet */ kvm_s390_set_psw_cc(vcpu, 3); return 0; diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index c1f5cde2c878..683036c1c92a 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -20,22 +20,18 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, u64 *reg) { - struct kvm_s390_local_interrupt *li; - int cpuflags; + const bool stopped = kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED); int rc; int ext_call_pending; - li = &dst_vcpu->arch.local_int; - - cpuflags = atomic_read(li->cpuflags); ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu); - if (!(cpuflags & CPUSTAT_STOPPED) && !ext_call_pending) + if (!stopped && !ext_call_pending) rc = SIGP_CC_ORDER_CODE_ACCEPTED; else { *reg &= 0xffffffff00000000UL; if (ext_call_pending) *reg |= SIGP_STATUS_EXT_CALL_PENDING; - if (cpuflags & CPUSTAT_STOPPED) + if (stopped) *reg |= SIGP_STATUS_STOPPED; rc = SIGP_CC_STATUS_STORED; } @@ -208,11 +204,9 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, u32 addr, u64 *reg) { - int flags; int rc; - flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); - if (!(flags & CPUSTAT_STOPPED)) { + if (!kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; return SIGP_CC_STATUS_STORED; @@ -231,7 +225,6 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, static int __sigp_sense_running(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, u64 *reg) { - struct kvm_s390_local_interrupt *li; int rc; if (!test_kvm_facility(vcpu->kvm, 9)) { @@ -240,8 +233,7 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, return SIGP_CC_STATUS_STORED; } - li = &dst_vcpu->arch.local_int; - if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { + if (kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_RUNNING)) { /* running */ rc = SIGP_CC_ORDER_CODE_ACCEPTED; } else { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 751348348477..ec772700ff96 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -28,13 +28,23 @@ struct vsie_page { * the same offset as that in struct sie_page! */ struct mcck_volatile_info mcck_info; /* 0x0200 */ - /* the pinned originial scb */ + /* + * The pinned original scb. Be aware that other VCPUs can modify + * it while we read from it. Values that are used for conditions or + * are reused conditionally, should be accessed via READ_ONCE. + */ struct kvm_s390_sie_block *scb_o; /* 0x0218 */ /* the shadow gmap in use by the vsie_page */ struct gmap *gmap; /* 0x0220 */ /* address of the last reported fault to guest2 */ unsigned long fault_addr; /* 0x0228 */ - __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */ + /* calculated guest addresses of satellite control blocks */ + gpa_t sca_gpa; /* 0x0230 */ + gpa_t itdba_gpa; /* 0x0238 */ + gpa_t gvrd_gpa; /* 0x0240 */ + gpa_t riccbd_gpa; /* 0x0248 */ + gpa_t sdnx_gpa; /* 0x0250 */ + __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */ struct kvm_s390_crypto_cb crycb; /* 0x0700 */ __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ }; @@ -140,12 +150,13 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; - u32 crycb_addr = scb_o->crycbd & 0x7ffffff8U; + const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd); + const u32 crycb_addr = crycbd_o & 0x7ffffff8U; unsigned long *b1, *b2; u8 ecb3_flags; scb_s->crycbd = 0; - if (!(scb_o->crycbd & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1)) + if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1)) return 0; /* format-1 is supported with message-security-assist extension 3 */ if (!test_kvm_facility(vcpu->kvm, 76)) @@ -183,12 +194,15 @@ static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + /* READ_ONCE does not work on bitfields - use a temporary variable */ + const uint32_t __new_ibc = scb_o->ibc; + const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU; __u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU; scb_s->ibc = 0; /* ibc installed in g2 and requested for g3 */ - if (vcpu->kvm->arch.model.ibc && (scb_o->ibc & 0x0fffU)) { - scb_s->ibc = scb_o->ibc & 0x0fffU; + if (vcpu->kvm->arch.model.ibc && new_ibc) { + scb_s->ibc = new_ibc; /* takte care of the minimum ibc level of the machine */ if (scb_s->ibc < min_ibc) scb_s->ibc = min_ibc; @@ -259,6 +273,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + /* READ_ONCE does not work on bitfields - use a temporary variable */ + const uint32_t __new_prefix = scb_o->prefix; + const uint32_t new_prefix = READ_ONCE(__new_prefix); + const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE; bool had_tx = scb_s->ecb & ECB_TE; unsigned long new_mso = 0; int rc; @@ -306,14 +324,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->icpua = scb_o->icpua; if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM)) - new_mso = scb_o->mso & 0xfffffffffff00000UL; + new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL; /* if the hva of the prefix changes, we have to remap the prefix */ - if (scb_s->mso != new_mso || scb_s->prefix != scb_o->prefix) + if (scb_s->mso != new_mso || scb_s->prefix != new_prefix) prefix_unmapped(vsie_page); /* SIE will do mso/msl validity and exception checks for us */ scb_s->msl = scb_o->msl & 0xfffffffffff00000UL; scb_s->mso = new_mso; - scb_s->prefix = scb_o->prefix; + scb_s->prefix = new_prefix; /* We have to definetly flush the tlb if this scb never ran */ if (scb_s->ihcpu != 0xffffU) @@ -325,11 +343,11 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP)) scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT; /* transactional execution */ - if (test_kvm_facility(vcpu->kvm, 73)) { + if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) { /* remap the prefix is tx is toggled on */ - if ((scb_o->ecb & ECB_TE) && !had_tx) + if (!had_tx) prefix_unmapped(vsie_page); - scb_s->ecb |= scb_o->ecb & ECB_TE; + scb_s->ecb |= ECB_TE; } /* branch prediction */ if (test_kvm_facility(vcpu->kvm, 82)) @@ -473,46 +491,42 @@ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa) /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */ static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { - struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; hpa_t hpa; - gpa_t gpa; hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol; if (hpa) { - gpa = scb_o->scaol & ~0xfUL; - if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO)) - gpa |= (u64) scb_o->scaoh << 32; - unpin_guest_page(vcpu->kvm, gpa, hpa); + unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa); + vsie_page->sca_gpa = 0; scb_s->scaol = 0; scb_s->scaoh = 0; } hpa = scb_s->itdba; if (hpa) { - gpa = scb_o->itdba & ~0xffUL; - unpin_guest_page(vcpu->kvm, gpa, hpa); + unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa); + vsie_page->itdba_gpa = 0; scb_s->itdba = 0; } hpa = scb_s->gvrd; if (hpa) { - gpa = scb_o->gvrd & ~0x1ffUL; - unpin_guest_page(vcpu->kvm, gpa, hpa); + unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa); + vsie_page->gvrd_gpa = 0; scb_s->gvrd = 0; } hpa = scb_s->riccbd; if (hpa) { - gpa = scb_o->riccbd & ~0x3fUL; - unpin_guest_page(vcpu->kvm, gpa, hpa); + unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa); + vsie_page->riccbd_gpa = 0; scb_s->riccbd = 0; } hpa = scb_s->sdnxo; if (hpa) { - gpa = scb_o->sdnxo; - unpin_guest_page(vcpu->kvm, gpa, hpa); + unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa); + vsie_page->sdnx_gpa = 0; scb_s->sdnxo = 0; } } @@ -539,9 +553,9 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) gpa_t gpa; int rc = 0; - gpa = scb_o->scaol & ~0xfUL; + gpa = READ_ONCE(scb_o->scaol) & ~0xfUL; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO)) - gpa |= (u64) scb_o->scaoh << 32; + gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32; if (gpa) { if (!(gpa & ~0x1fffUL)) rc = set_validity_icpt(scb_s, 0x0038U); @@ -557,11 +571,12 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } if (rc) goto unpin; + vsie_page->sca_gpa = gpa; scb_s->scaoh = (u32)((u64)hpa >> 32); scb_s->scaol = (u32)(u64)hpa; } - gpa = scb_o->itdba & ~0xffUL; + gpa = READ_ONCE(scb_o->itdba) & ~0xffUL; if (gpa && (scb_s->ecb & ECB_TE)) { if (!(gpa & ~0x1fffU)) { rc = set_validity_icpt(scb_s, 0x0080U); @@ -573,10 +588,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) rc = set_validity_icpt(scb_s, 0x0080U); goto unpin; } + vsie_page->itdba_gpa = gpa; scb_s->itdba = hpa; } - gpa = scb_o->gvrd & ~0x1ffUL; + gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL; if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { if (!(gpa & ~0x1fffUL)) { rc = set_validity_icpt(scb_s, 0x1310U); @@ -591,10 +607,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) rc = set_validity_icpt(scb_s, 0x1310U); goto unpin; } + vsie_page->gvrd_gpa = gpa; scb_s->gvrd = hpa; } - gpa = scb_o->riccbd & ~0x3fUL; + gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL; if (gpa && (scb_s->ecb3 & ECB3_RI)) { if (!(gpa & ~0x1fffUL)) { rc = set_validity_icpt(scb_s, 0x0043U); @@ -607,13 +624,14 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) goto unpin; } /* Validity 0x0044 will be checked by SIE */ + vsie_page->riccbd_gpa = gpa; scb_s->riccbd = hpa; } if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { unsigned long sdnxc; - gpa = scb_o->sdnxo & ~0xfUL; - sdnxc = scb_o->sdnxo & 0xfUL; + gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL; + sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL; if (!gpa || !(gpa & ~0x1fffUL)) { rc = set_validity_icpt(scb_s, 0x10b0U); goto unpin; @@ -634,6 +652,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) rc = set_validity_icpt(scb_s, 0x10b0U); goto unpin; } + vsie_page->sdnx_gpa = gpa; scb_s->sdnxo = hpa | sdnxc; } return 0; @@ -778,7 +797,7 @@ static void retry_vsie_icpt(struct vsie_page *vsie_page) static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; - __u32 fac = vsie_page->scb_o->fac & 0x7ffffff8U; + __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U; if (fac && test_kvm_facility(vcpu->kvm, 7)) { retry_vsie_icpt(vsie_page); @@ -904,7 +923,7 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu, * External calls have to lead to a kick of the vcpu and * therefore the vsie -> Simulate Wait state. */ - atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); /* * We have to adjust the g3 epoch by the g2 epoch. The epoch will * automatically be adjusted on tod clock changes via kvm_sync_clock. @@ -926,7 +945,7 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu, */ static void unregister_shadow_scb(struct kvm_vcpu *vcpu) { - atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); WRITE_ONCE(vcpu->arch.vsie_block, NULL); } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 05d459b638f5..2c55a2b9d6c6 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -815,27 +815,17 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, * @ptl: pointer to the spinlock pointer * * Returns a pointer to the locked pte for a guest address, or NULL - * - * Note: Can also be called for shadow gmaps. */ static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr, spinlock_t **ptl) { unsigned long *table; - if (gmap_is_shadow(gmap)) - spin_lock(&gmap->guest_table_lock); + BUG_ON(gmap_is_shadow(gmap)); /* Walk the gmap page table, lock and get pte pointer */ table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */ - if (!table || *table & _SEGMENT_ENTRY_INVALID) { - if (gmap_is_shadow(gmap)) - spin_unlock(&gmap->guest_table_lock); + if (!table || *table & _SEGMENT_ENTRY_INVALID) return NULL; - } - if (gmap_is_shadow(gmap)) { - *ptl = &gmap->guest_table_lock; - return pte_offset_map((pmd_t *) table, gaddr); - } return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl); } @@ -889,8 +879,6 @@ static void gmap_pte_op_end(spinlock_t *ptl) * -EFAULT if gaddr is invalid (or mapping for shadows is missing). * * Called with sg->mm->mmap_sem in read. - * - * Note: Can also be called for shadow gmaps. */ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, unsigned long len, int prot, unsigned long bits) @@ -900,6 +888,7 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, pte_t *ptep; int rc; + BUG_ON(gmap_is_shadow(gmap)); while (len) { rc = -EAGAIN; ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); @@ -960,7 +949,8 @@ EXPORT_SYMBOL_GPL(gmap_mprotect_notify); * @val: pointer to the unsigned long value to return * * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT - * if reading using the virtual address failed. + * if reading using the virtual address failed. -EINVAL if called on a gmap + * shadow. * * Called with gmap->mm->mmap_sem in read. */ @@ -971,6 +961,9 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) pte_t *ptep, pte; int rc; + if (gmap_is_shadow(gmap)) + return -EINVAL; + while (1) { rc = -EAGAIN; ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); @@ -1028,18 +1021,17 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, } /** - * gmap_protect_rmap - modify access rights to memory and create an rmap + * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap * @sg: pointer to the shadow guest address space structure * @raddr: rmap address in the shadow gmap * @paddr: address in the parent guest address space * @len: length of the memory area to protect - * @prot: indicates access rights: none, read-only or read-write * * Returns 0 if successfully protected and the rmap was created, -ENOMEM * if out of memory and -EFAULT if paddr is invalid. */ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, - unsigned long paddr, unsigned long len, int prot) + unsigned long paddr, unsigned long len) { struct gmap *parent; struct gmap_rmap *rmap; @@ -1067,7 +1059,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, ptep = gmap_pte_op_walk(parent, paddr, &ptl); if (ptep) { spin_lock(&sg->guest_table_lock); - rc = ptep_force_prot(parent->mm, paddr, ptep, prot, + rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ, PGSTE_VSIE_BIT); if (!rc) gmap_insert_rmap(sg, vmaddr, rmap); @@ -1077,7 +1069,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, radix_tree_preload_end(); if (rc) { kfree(rmap); - rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot); + rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ); if (rc) return rc; continue; @@ -1616,7 +1608,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, origin = r2t & _REGION_ENTRY_ORIGIN; offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; - rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); + rc = gmap_protect_rmap(sg, raddr, origin + offset, len); spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 4); @@ -1699,7 +1691,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, origin = r3t & _REGION_ENTRY_ORIGIN; offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; - rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); + rc = gmap_protect_rmap(sg, raddr, origin + offset, len); spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 3); @@ -1783,7 +1775,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, origin = sgt & _REGION_ENTRY_ORIGIN; offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; - rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); + rc = gmap_protect_rmap(sg, raddr, origin + offset, len); spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 2); @@ -1902,7 +1894,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, /* Make pgt read-only in parent gmap page table (not the pgste) */ raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT; origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; - rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ); + rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE); spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 1); @@ -2005,7 +1997,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page); * Called with sg->parent->shadow_lock. */ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, - unsigned long gaddr, pte_t *pte) + unsigned long gaddr) { struct gmap_rmap *rmap, *rnext, *head; unsigned long start, end, bits, raddr; @@ -2090,7 +2082,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, spin_lock(&gmap->shadow_lock); list_for_each_entry_safe(sg, next, &gmap->children, list) - gmap_shadow_notify(sg, vmaddr, gaddr, pte); + gmap_shadow_notify(sg, vmaddr, gaddr); spin_unlock(&gmap->shadow_lock); } if (bits & PGSTE_IN_BIT) |