summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig46
-rw-r--r--arch/s390/Makefile10
-rw-r--r--arch/s390/include/asm/barrier.h24
-rw-r--r--arch/s390/include/asm/bitops.h5
-rw-r--r--arch/s390/include/asm/css_chars.h4
-rw-r--r--arch/s390/include/asm/eadm.h2
-rw-r--r--arch/s390/include/asm/facility.h18
-rw-r--r--arch/s390/include/asm/kvm_host.h126
-rw-r--r--arch/s390/include/asm/lowcore.h9
-rw-r--r--arch/s390/include/asm/nospec-branch.h18
-rw-r--r--arch/s390/include/asm/processor.h4
-rw-r--r--arch/s390/include/asm/runtime_instr.h67
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/include/asm/sysinfo.h3
-rw-r--r--arch/s390/include/asm/thread_info.h4
-rw-r--r--arch/s390/include/uapi/asm/runtime_instr.h74
-rw-r--r--arch/s390/kernel/Makefile4
-rw-r--r--arch/s390/kernel/alternative.c26
-rw-r--r--arch/s390/kernel/early.c5
-rw-r--r--arch/s390/kernel/entry.S249
-rw-r--r--arch/s390/kernel/ipl.c1
-rw-r--r--arch/s390/kernel/kprobes.c2
-rw-r--r--arch/s390/kernel/module.c62
-rw-r--r--arch/s390/kernel/nospec-branch.c100
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c2
-rw-r--r--arch/s390/kernel/processor.c18
-rw-r--r--arch/s390/kernel/runtime_instr.c10
-rw-r--r--arch/s390/kernel/setup.c8
-rw-r--r--arch/s390/kernel/smp.c7
-rw-r--r--arch/s390/kernel/sysinfo.c2
-rw-r--r--arch/s390/kernel/vmlinux.lds.S14
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/diag.c1
-rw-r--r--arch/s390/kvm/interrupt.c288
-rw-r--r--arch/s390/kvm/kvm-s390.c209
-rw-r--r--arch/s390/kvm/kvm-s390.h22
-rw-r--r--arch/s390/kvm/priv.c38
-rw-r--r--arch/s390/kvm/sigp.c18
-rw-r--r--arch/s390/kvm/vsie.c91
-rw-r--r--arch/s390/mm/gmap.c44
40 files changed, 1286 insertions, 351 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 0105ce28e246..eaee7087886f 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -112,7 +112,6 @@ config S390
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
- select ARCH_WANTS_PROT_NUMA_PROT_NONE
select ARCH_WANTS_UBSAN_NO_NULL
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
@@ -540,6 +539,51 @@ config ARCH_RANDOM
If unsure, say Y.
+config KERNEL_NOBP
+ def_bool n
+ prompt "Enable modified branch prediction for the kernel by default"
+ help
+ If this option is selected the kernel will switch to a modified
+ branch prediction mode if the firmware interface is available.
+ The modified branch prediction mode improves the behaviour in
+ regard to speculative execution.
+
+ With the option enabled the kernel parameter "nobp=0" or "nospec"
+ can be used to run the kernel in the normal branch prediction mode.
+
+ With the option disabled the modified branch prediction mode is
+ enabled with the "nobp=1" kernel parameter.
+
+ If unsure, say N.
+
+config EXPOLINE
+ def_bool n
+ prompt "Avoid speculative indirect branches in the kernel"
+ help
+ Compile the kernel with the expoline compiler options to guard
+ against kernel-to-user data leaks by avoiding speculative indirect
+ branches.
+ Requires a compiler with -mindirect-branch=thunk support for full
+ protection. The kernel may run slower.
+
+ If unsure, say N.
+
+choice
+ prompt "Expoline default"
+ depends on EXPOLINE
+ default EXPOLINE_FULL
+
+config EXPOLINE_OFF
+ bool "spectre_v2=off"
+
+config EXPOLINE_MEDIUM
+ bool "spectre_v2=auto"
+
+config EXPOLINE_FULL
+ bool "spectre_v2=on"
+
+endchoice
+
endmenu
menu "Memory setup"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index fd691c4ff89e..2ced3239cb84 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -78,6 +78,16 @@ ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y)
cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack
endif
+ifdef CONFIG_EXPOLINE
+ ifeq ($(call cc-option-yn,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),y)
+ CC_FLAGS_EXPOLINE := -mindirect-branch=thunk
+ CC_FLAGS_EXPOLINE += -mfunction-return=thunk
+ CC_FLAGS_EXPOLINE += -mindirect-branch-table
+ export CC_FLAGS_EXPOLINE
+ cflags-y += $(CC_FLAGS_EXPOLINE)
+ endif
+endif
+
ifdef CONFIG_FUNCTION_TRACER
# make use of hotpatch feature if the compiler supports it
cc_hotpatch := -mhotpatch=0,3
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 10432607a573..f9eddbca79d2 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -49,6 +49,30 @@ do { \
#define __smp_mb__before_atomic() barrier()
#define __smp_mb__after_atomic() barrier()
+/**
+ * array_index_mask_nospec - generate a mask for array_idx() that is
+ * ~0UL when the bounds check succeeds and 0 otherwise
+ * @index: array element index
+ * @size: number of elements in array
+ */
+#define array_index_mask_nospec array_index_mask_nospec
+static inline unsigned long array_index_mask_nospec(unsigned long index,
+ unsigned long size)
+{
+ unsigned long mask;
+
+ if (__builtin_constant_p(size) && size > 0) {
+ asm(" clgr %2,%1\n"
+ " slbgr %0,%0\n"
+ :"=d" (mask) : "d" (size-1), "d" (index) :"cc");
+ return mask;
+ }
+ asm(" clgr %1,%2\n"
+ " slbgr %0,%0\n"
+ :"=d" (mask) : "d" (size), "d" (index) :"cc");
+ return ~mask;
+}
+
#include <asm-generic/barrier.h>
#endif /* __ASM_BARRIER_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 31e400c1a1f3..86e5b2fdee3c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -261,6 +261,11 @@ static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
+static inline int test_and_clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return test_and_clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index a478eb61aaf7..fb56fa3283a2 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -20,7 +20,9 @@ struct css_general_char {
u32 aif_tdd : 1; /* bit 56 */
u32 : 1;
u32 qebsm : 1; /* bit 58 */
- u32 : 8;
+ u32 : 2;
+ u32 aiv : 1; /* bit 61 */
+ u32 : 5;
u32 aif_osa : 1; /* bit 67 */
u32 : 12;
u32 eadm_rf : 1; /* bit 80 */
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index eb5323161f11..bb63b2afdf6f 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -4,7 +4,7 @@
#include <linux/types.h>
#include <linux/device.h>
-#include <linux/blkdev.h>
+#include <linux/blk_types.h>
struct arqb {
u64 data;
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index fbe0c4be3cd8..99c8ce30b3cd 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -15,6 +15,24 @@
#define MAX_FACILITY_BIT (sizeof(((struct lowcore *)0)->stfle_fac_list) * 8)
+static inline void __set_facility(unsigned long nr, void *facilities)
+{
+ unsigned char *ptr = (unsigned char *) facilities;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return;
+ ptr[nr >> 3] |= 0x80 >> (nr & 7);
+}
+
+static inline void __clear_facility(unsigned long nr, void *facilities)
+{
+ unsigned char *ptr = (unsigned char *) facilities;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return;
+ ptr[nr >> 3] &= ~(0x80 >> (nr & 7));
+}
+
static inline int __test_facility(unsigned long nr, void *facilities)
{
unsigned char *ptr;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c1b0a9ac1dc8..afb0f08b8021 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -2,7 +2,7 @@
/*
* definition for kernel virtual machines on s390
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
@@ -183,6 +183,7 @@ struct kvm_s390_sie_block {
#define ECA_IB 0x40000000
#define ECA_SIGPI 0x10000000
#define ECA_MVPGI 0x01000000
+#define ECA_AIV 0x00200000
#define ECA_VX 0x00020000
#define ECA_PROTEXCI 0x00002000
#define ECA_SII 0x00000001
@@ -228,7 +229,9 @@ struct kvm_s390_sie_block {
__u8 epdx; /* 0x0069 */
__u8 reserved6a[2]; /* 0x006a */
__u32 todpr; /* 0x006c */
- __u8 reserved70[16]; /* 0x0070 */
+#define GISA_FORMAT1 0x00000001
+ __u32 gd; /* 0x0070 */
+ __u8 reserved74[12]; /* 0x0074 */
__u64 mso; /* 0x0080 */
__u64 msl; /* 0x0088 */
psw_t gpsw; /* 0x0090 */
@@ -317,18 +320,30 @@ struct kvm_vcpu_stat {
u64 deliver_program_int;
u64 deliver_io_int;
u64 exit_wait_state;
+ u64 instruction_epsw;
+ u64 instruction_gs;
+ u64 instruction_io_other;
+ u64 instruction_lpsw;
+ u64 instruction_lpswe;
u64 instruction_pfmf;
+ u64 instruction_ptff;
+ u64 instruction_sck;
+ u64 instruction_sckpf;
u64 instruction_stidp;
u64 instruction_spx;
u64 instruction_stpx;
u64 instruction_stap;
- u64 instruction_storage_key;
+ u64 instruction_iske;
+ u64 instruction_ri;
+ u64 instruction_rrbe;
+ u64 instruction_sske;
u64 instruction_ipte_interlock;
- u64 instruction_stsch;
- u64 instruction_chsc;
u64 instruction_stsi;
u64 instruction_stfl;
+ u64 instruction_tb;
+ u64 instruction_tpi;
u64 instruction_tprot;
+ u64 instruction_tsch;
u64 instruction_sie;
u64 instruction_essa;
u64 instruction_sthyi;
@@ -354,6 +369,7 @@ struct kvm_vcpu_stat {
u64 diagnose_258;
u64 diagnose_308;
u64 diagnose_500;
+ u64 diagnose_other;
};
#define PGM_OPERATION 0x01
@@ -410,35 +426,35 @@ struct kvm_vcpu_stat {
#define PGM_PER 0x80
#define PGM_CRYPTO_OPERATION 0x119
-/* irq types in order of priority */
+/* irq types in ascend order of priorities */
enum irq_types {
- IRQ_PEND_MCHK_EX = 0,
- IRQ_PEND_SVC,
- IRQ_PEND_PROG,
- IRQ_PEND_MCHK_REP,
- IRQ_PEND_EXT_IRQ_KEY,
- IRQ_PEND_EXT_MALFUNC,
- IRQ_PEND_EXT_EMERGENCY,
- IRQ_PEND_EXT_EXTERNAL,
- IRQ_PEND_EXT_CLOCK_COMP,
- IRQ_PEND_EXT_CPU_TIMER,
- IRQ_PEND_EXT_TIMING,
- IRQ_PEND_EXT_SERVICE,
- IRQ_PEND_EXT_HOST,
- IRQ_PEND_PFAULT_INIT,
- IRQ_PEND_PFAULT_DONE,
- IRQ_PEND_VIRTIO,
- IRQ_PEND_IO_ISC_0,
- IRQ_PEND_IO_ISC_1,
- IRQ_PEND_IO_ISC_2,
- IRQ_PEND_IO_ISC_3,
- IRQ_PEND_IO_ISC_4,
- IRQ_PEND_IO_ISC_5,
- IRQ_PEND_IO_ISC_6,
- IRQ_PEND_IO_ISC_7,
- IRQ_PEND_SIGP_STOP,
+ IRQ_PEND_SET_PREFIX = 0,
IRQ_PEND_RESTART,
- IRQ_PEND_SET_PREFIX,
+ IRQ_PEND_SIGP_STOP,
+ IRQ_PEND_IO_ISC_7,
+ IRQ_PEND_IO_ISC_6,
+ IRQ_PEND_IO_ISC_5,
+ IRQ_PEND_IO_ISC_4,
+ IRQ_PEND_IO_ISC_3,
+ IRQ_PEND_IO_ISC_2,
+ IRQ_PEND_IO_ISC_1,
+ IRQ_PEND_IO_ISC_0,
+ IRQ_PEND_VIRTIO,
+ IRQ_PEND_PFAULT_DONE,
+ IRQ_PEND_PFAULT_INIT,
+ IRQ_PEND_EXT_HOST,
+ IRQ_PEND_EXT_SERVICE,
+ IRQ_PEND_EXT_TIMING,
+ IRQ_PEND_EXT_CPU_TIMER,
+ IRQ_PEND_EXT_CLOCK_COMP,
+ IRQ_PEND_EXT_EXTERNAL,
+ IRQ_PEND_EXT_EMERGENCY,
+ IRQ_PEND_EXT_MALFUNC,
+ IRQ_PEND_EXT_IRQ_KEY,
+ IRQ_PEND_MCHK_REP,
+ IRQ_PEND_PROG,
+ IRQ_PEND_SVC,
+ IRQ_PEND_MCHK_EX,
IRQ_PEND_COUNT
};
@@ -516,9 +532,6 @@ struct kvm_s390_irq_payload {
struct kvm_s390_local_interrupt {
spinlock_t lock;
- struct kvm_s390_float_interrupt *float_int;
- struct swait_queue_head *wq;
- atomic_t *cpuflags;
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
struct kvm_s390_irq_payload irq;
unsigned long pending_irqs;
@@ -707,14 +720,50 @@ struct kvm_s390_crypto_cb {
struct kvm_s390_apcb1 apcb1; /* 0x0080 */
};
+struct kvm_s390_gisa {
+ union {
+ struct { /* common to all formats */
+ u32 next_alert;
+ u8 ipm;
+ u8 reserved01[2];
+ u8 iam;
+ };
+ struct { /* format 0 */
+ u32 next_alert;
+ u8 ipm;
+ u8 reserved01;
+ u8 : 6;
+ u8 g : 1;
+ u8 c : 1;
+ u8 iam;
+ u8 reserved02[4];
+ u32 airq_count;
+ } g0;
+ struct { /* format 1 */
+ u32 next_alert;
+ u8 ipm;
+ u8 simm;
+ u8 nimm;
+ u8 iam;
+ u8 aism[8];
+ u8 : 6;
+ u8 g : 1;
+ u8 c : 1;
+ u8 reserved03[11];
+ u32 airq_count;
+ } g1;
+ };
+};
+
/*
- * sie_page2 has to be allocated as DMA because fac_list and crycb need
- * 31bit addresses in the sie control block.
+ * sie_page2 has to be allocated as DMA because fac_list, crycb and
+ * gisa need 31bit addresses in the sie control block.
*/
struct sie_page2 {
__u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
struct kvm_s390_crypto_cb crycb; /* 0x0800 */
- u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
+ struct kvm_s390_gisa gisa; /* 0x0900 */
+ u8 reserved920[0x1000 - 0x920]; /* 0x0920 */
};
struct kvm_s390_vsie {
@@ -761,6 +810,7 @@ struct kvm_arch{
struct kvm_s390_migration_state *migration_state;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+ struct kvm_s390_gisa *gisa;
};
#define KVM_HVA_ERR_BAD (-1UL)
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index ec6592e8ba36..5bc888841eaf 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -136,7 +136,11 @@ struct lowcore {
__u64 vdso_per_cpu_data; /* 0x03b8 */
__u64 machine_flags; /* 0x03c0 */
__u64 gmap; /* 0x03c8 */
- __u8 pad_0x03d0[0x0e00-0x03d0]; /* 0x03d0 */
+ __u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */
+
+ /* br %r1 trampoline */
+ __u16 br_r1_trampoline; /* 0x0400 */
+ __u8 pad_0x0402[0x0e00-0x0402]; /* 0x0402 */
/*
* 0xe00 contains the address of the IPL Parameter Information
@@ -151,7 +155,8 @@ struct lowcore {
__u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */
/* Extended facility list */
- __u64 stfle_fac_list[32]; /* 0x0f00 */
+ __u64 stfle_fac_list[16]; /* 0x0f00 */
+ __u64 alt_stfle_fac_list[16]; /* 0x0f80 */
__u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */
/* Pointer to the machine check extended save area */
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
new file mode 100644
index 000000000000..7df48e5cf36f
--- /dev/null
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_EXPOLINE_H
+#define _ASM_S390_EXPOLINE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+extern int nospec_call_disable;
+extern int nospec_return_disable;
+
+void nospec_init_branches(void);
+void nospec_call_revert(s32 *start, s32 *end);
+void nospec_return_revert(s32 *start, s32 *end);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index bfbfad482289..7f2953c15c37 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -91,6 +91,7 @@ void cpu_detect_mhz_feature(void);
extern const struct seq_operations cpuinfo_op;
extern int sysctl_ieee_emulation_warnings;
extern void execve_tail(void);
+extern void __bpon(void);
/*
* User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
@@ -377,6 +378,9 @@ extern void memcpy_absolute(void *, void *, size_t);
memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
} while (0)
+extern int s390_isolate_bp(void);
+extern int s390_isolate_bp_guest(void);
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h
index 6b1540337ed6..0e1605538cd4 100644
--- a/arch/s390/include/asm/runtime_instr.h
+++ b/arch/s390/include/asm/runtime_instr.h
@@ -2,75 +2,10 @@
#ifndef _RUNTIME_INSTR_H
#define _RUNTIME_INSTR_H
-#define S390_RUNTIME_INSTR_START 0x1
-#define S390_RUNTIME_INSTR_STOP 0x2
-
-struct runtime_instr_cb {
- __u64 rca;
- __u64 roa;
- __u64 rla;
-
- __u32 v : 1;
- __u32 s : 1;
- __u32 k : 1;
- __u32 h : 1;
- __u32 a : 1;
- __u32 reserved1 : 3;
- __u32 ps : 1;
- __u32 qs : 1;
- __u32 pc : 1;
- __u32 qc : 1;
- __u32 reserved2 : 1;
- __u32 g : 1;
- __u32 u : 1;
- __u32 l : 1;
- __u32 key : 4;
- __u32 reserved3 : 8;
- __u32 t : 1;
- __u32 rgs : 3;
-
- __u32 m : 4;
- __u32 n : 1;
- __u32 mae : 1;
- __u32 reserved4 : 2;
- __u32 c : 1;
- __u32 r : 1;
- __u32 b : 1;
- __u32 j : 1;
- __u32 e : 1;
- __u32 x : 1;
- __u32 reserved5 : 2;
- __u32 bpxn : 1;
- __u32 bpxt : 1;
- __u32 bpti : 1;
- __u32 bpni : 1;
- __u32 reserved6 : 2;
-
- __u32 d : 1;
- __u32 f : 1;
- __u32 ic : 4;
- __u32 dc : 4;
-
- __u64 reserved7;
- __u64 sf;
- __u64 rsic;
- __u64 reserved8;
-} __packed __aligned(8);
+#include <uapi/asm/runtime_instr.h>
extern struct runtime_instr_cb runtime_instr_empty_cb;
-static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
-{
- asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */
- : : "Q" (*cb));
-}
-
-static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb)
-{
- asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */
- : "=Q" (*cb) : : "cc");
-}
-
static inline void save_ri_cb(struct runtime_instr_cb *cb_prev)
{
if (cb_prev)
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index d3c1a8a2e3ad..3cae9168f63c 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -77,6 +77,7 @@ struct sclp_info {
unsigned char has_ibs : 1;
unsigned char has_skey : 1;
unsigned char has_kss : 1;
+ unsigned char has_gisaf : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index 25057c118d56..fe7b3f8f0791 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -21,7 +21,8 @@ struct sysinfo_1_1_1 {
unsigned char :8;
unsigned char ccr;
unsigned char cai;
- char reserved_0[28];
+ char reserved_0[20];
+ unsigned long lic;
char manufacturer[16];
char type[4];
char reserved_1[12];
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 25d6ec3aaddd..83ba57533ce6 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -58,6 +58,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
#define TIF_PATCH_PENDING 5 /* pending live patching update */
#define TIF_PGSTE 6 /* New mm's will use 4K page tables */
+#define TIF_ISOLATE_BP 8 /* Run process with isolated BP */
+#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
@@ -78,6 +80,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define _TIF_UPROBE _BITUL(TIF_UPROBE)
#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE)
#define _TIF_PATCH_PENDING _BITUL(TIF_PATCH_PENDING)
+#define _TIF_ISOLATE_BP _BITUL(TIF_ISOLATE_BP)
+#define _TIF_ISOLATE_BP_GUEST _BITUL(TIF_ISOLATE_BP_GUEST)
#define _TIF_31BIT _BITUL(TIF_31BIT)
#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
diff --git a/arch/s390/include/uapi/asm/runtime_instr.h b/arch/s390/include/uapi/asm/runtime_instr.h
new file mode 100644
index 000000000000..45c9ec984e6b
--- /dev/null
+++ b/arch/s390/include/uapi/asm/runtime_instr.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _S390_UAPI_RUNTIME_INSTR_H
+#define _S390_UAPI_RUNTIME_INSTR_H
+
+#include <linux/types.h>
+
+#define S390_RUNTIME_INSTR_START 0x1
+#define S390_RUNTIME_INSTR_STOP 0x2
+
+struct runtime_instr_cb {
+ __u64 rca;
+ __u64 roa;
+ __u64 rla;
+
+ __u32 v : 1;
+ __u32 s : 1;
+ __u32 k : 1;
+ __u32 h : 1;
+ __u32 a : 1;
+ __u32 reserved1 : 3;
+ __u32 ps : 1;
+ __u32 qs : 1;
+ __u32 pc : 1;
+ __u32 qc : 1;
+ __u32 reserved2 : 1;
+ __u32 g : 1;
+ __u32 u : 1;
+ __u32 l : 1;
+ __u32 key : 4;
+ __u32 reserved3 : 8;
+ __u32 t : 1;
+ __u32 rgs : 3;
+
+ __u32 m : 4;
+ __u32 n : 1;
+ __u32 mae : 1;
+ __u32 reserved4 : 2;
+ __u32 c : 1;
+ __u32 r : 1;
+ __u32 b : 1;
+ __u32 j : 1;
+ __u32 e : 1;
+ __u32 x : 1;
+ __u32 reserved5 : 2;
+ __u32 bpxn : 1;
+ __u32 bpxt : 1;
+ __u32 bpti : 1;
+ __u32 bpni : 1;
+ __u32 reserved6 : 2;
+
+ __u32 d : 1;
+ __u32 f : 1;
+ __u32 ic : 4;
+ __u32 dc : 4;
+
+ __u64 reserved7;
+ __u64 sf;
+ __u64 rsic;
+ __u64 reserved8;
+} __packed __aligned(8);
+
+static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */
+ : : "Q" (*cb));
+}
+
+static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */
+ : "=Q" (*cb) : : "cc");
+}
+
+#endif /* _S390_UAPI_RUNTIME_INSTR_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 909bce65cb2b..7f27e3da9709 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -29,6 +29,7 @@ UBSAN_SANITIZE_early.o := n
#
ifneq ($(CC_FLAGS_MARCH),-march=z900)
CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH)
+CFLAGS_REMOVE_als.o += $(CC_FLAGS_EXPOLINE)
CFLAGS_als.o += -march=z900
AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
AFLAGS_head.o += -march=z900
@@ -63,6 +64,9 @@ obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
extra-y += head.o head64.o vmlinux.lds
+obj-$(CONFIG_EXPOLINE) += nospec-branch.o
+CFLAGS_REMOVE_expoline.o += $(CC_FLAGS_EXPOLINE)
+
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 574e77622c04..22476135f738 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -15,6 +15,29 @@ static int __init disable_alternative_instructions(char *str)
early_param("noaltinstr", disable_alternative_instructions);
+static int __init nobp_setup_early(char *str)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(str, &enabled);
+ if (rc)
+ return rc;
+ if (enabled && test_facility(82))
+ __set_facility(82, S390_lowcore.alt_stfle_fac_list);
+ else
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ return 0;
+}
+early_param("nobp", nobp_setup_early);
+
+static int __init nospec_setup_early(char *str)
+{
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ return 0;
+}
+early_param("nospec", nospec_setup_early);
+
struct brcl_insn {
u16 opc;
s32 disp;
@@ -75,7 +98,8 @@ static void __init_or_module __apply_alternatives(struct alt_instr *start,
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
- if (!test_facility(a->facility))
+ if (!__test_facility(a->facility,
+ S390_lowcore.alt_stfle_fac_list))
continue;
if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) {
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 497a92047591..ac707a9f729e 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -193,6 +193,11 @@ static noinline __init void setup_facility_list(void)
{
stfle(S390_lowcore.stfle_fac_list,
ARRAY_SIZE(S390_lowcore.stfle_fac_list));
+ memcpy(S390_lowcore.alt_stfle_fac_list,
+ S390_lowcore.stfle_fac_list,
+ sizeof(S390_lowcore.alt_stfle_fac_list));
+ if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
}
static __init void detect_diag9c(void)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6cd444d25545..13a133a6015c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -107,6 +107,7 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 3f
1: UPDATE_VTIME %r14,%r15,\timer
+ BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
2: lg %r15,__LC_ASYNC_STACK # load async stack
3: la %r11,STACK_FRAME_OVERHEAD(%r15)
.endm
@@ -159,6 +160,130 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
tm off+\addr, \mask
.endm
+ .macro BPOFF
+ .pushsection .altinstr_replacement, "ax"
+660: .long 0xb2e8c000
+ .popsection
+661: .long 0x47000000
+ .pushsection .altinstructions, "a"
+ .long 661b - .
+ .long 660b - .
+ .word 82
+ .byte 4
+ .byte 4
+ .popsection
+ .endm
+
+ .macro BPON
+ .pushsection .altinstr_replacement, "ax"
+662: .long 0xb2e8d000
+ .popsection
+663: .long 0x47000000
+ .pushsection .altinstructions, "a"
+ .long 663b - .
+ .long 662b - .
+ .word 82
+ .byte 4
+ .byte 4
+ .popsection
+ .endm
+
+ .macro BPENTER tif_ptr,tif_mask
+ .pushsection .altinstr_replacement, "ax"
+662: .word 0xc004, 0x0000, 0x0000 # 6 byte nop
+ .word 0xc004, 0x0000, 0x0000 # 6 byte nop
+ .popsection
+664: TSTMSK \tif_ptr,\tif_mask
+ jz . + 8
+ .long 0xb2e8d000
+ .pushsection .altinstructions, "a"
+ .long 664b - .
+ .long 662b - .
+ .word 82
+ .byte 12
+ .byte 12
+ .popsection
+ .endm
+
+ .macro BPEXIT tif_ptr,tif_mask
+ TSTMSK \tif_ptr,\tif_mask
+ .pushsection .altinstr_replacement, "ax"
+662: jnz . + 8
+ .long 0xb2e8d000
+ .popsection
+664: jz . + 8
+ .long 0xb2e8c000
+ .pushsection .altinstructions, "a"
+ .long 664b - .
+ .long 662b - .
+ .word 82
+ .byte 8
+ .byte 8
+ .popsection
+ .endm
+
+#ifdef CONFIG_EXPOLINE
+
+ .macro GEN_BR_THUNK name,reg,tmp
+ .section .text.\name,"axG",@progbits,\name,comdat
+ .globl \name
+ .hidden \name
+ .type \name,@function
+\name:
+ .cfi_startproc
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ exrl 0,0f
+#else
+ larl \tmp,0f
+ ex 0,0(\tmp)
+#endif
+ j .
+0: br \reg
+ .cfi_endproc
+ .endm
+
+ GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1
+ GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1
+ GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11
+
+ .macro BASR_R14_R9
+0: brasl %r14,__s390x_indirect_jump_r1use_r9
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 0b-.
+ .popsection
+ .endm
+
+ .macro BR_R1USE_R14
+0: jg __s390x_indirect_jump_r1use_r14
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 0b-.
+ .popsection
+ .endm
+
+ .macro BR_R11USE_R14
+0: jg __s390x_indirect_jump_r11use_r14
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 0b-.
+ .popsection
+ .endm
+
+#else /* CONFIG_EXPOLINE */
+
+ .macro BASR_R14_R9
+ basr %r14,%r9
+ .endm
+
+ .macro BR_R1USE_R14
+ br %r14
+ .endm
+
+ .macro BR_R11USE_R14
+ br %r14
+ .endm
+
+#endif /* CONFIG_EXPOLINE */
+
+
.section .kprobes.text, "ax"
.Ldummy:
/*
@@ -171,6 +296,11 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
*/
nop 0
+ENTRY(__bpon)
+ .globl __bpon
+ BPON
+ BR_R1USE_R14
+
/*
* Scheduler resume function, called by switch_to
* gpr2 = (task_struct *) prev
@@ -193,9 +323,9 @@ ENTRY(__switch_to)
mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
- bzr %r14
+ jz 0f
.insn s,0xb2800000,__LC_LPP # set program parameter
- br %r14
+0: BR_R1USE_R14
.L__critical_start:
@@ -207,9 +337,11 @@ ENTRY(__switch_to)
*/
ENTRY(sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
+ lg %r12,__LC_CURRENT
stg %r2,__SF_EMPTY(%r15) # save control block pointer
stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # reason code = 0
+ mvc __SF_EMPTY+24(8,%r15),__TI_flags(%r12) # copy thread flags
TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ?
jno .Lsie_load_guest_gprs
brasl %r14,load_fpu_regs # load guest fp/vx regs
@@ -226,8 +358,12 @@ ENTRY(sie64a)
jnz .Lsie_skip
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lsie_skip # exit if fp/vx regs changed
+ BPEXIT __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_entry:
sie 0(%r14)
+.Lsie_exit:
+ BPOFF
+ BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_skip:
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
@@ -248,9 +384,15 @@ ENTRY(sie64a)
sie_exit:
lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ xgr %r0,%r0 # clear guest registers to
+ xgr %r1,%r1 # prevent speculative use
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
lg %r2,__SF_EMPTY+16(%r15) # return exit reason code
- br %r14
+ BR_R1USE_R14
.Lsie_fault:
lghi %r14,-EFAULT
stg %r14,__SF_EMPTY+16(%r15) # set exit reason code
@@ -273,6 +415,7 @@ ENTRY(system_call)
stpt __LC_SYNC_ENTER_TIMER
.Lsysc_stmg:
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ BPOFF
lg %r12,__LC_CURRENT
lghi %r13,__TASK_thread
lghi %r14,_PIF_SYSCALL
@@ -281,7 +424,10 @@ ENTRY(system_call)
la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
.Lsysc_vtime:
UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
+ BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled register to prevent speculative use
+ xgr %r0,%r0
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
@@ -305,7 +451,7 @@ ENTRY(system_call)
lgf %r9,0(%r8,%r10) # get system call add.
TSTMSK __TI_flags(%r12),_TIF_TRACE
jnz .Lsysc_tracesys
- basr %r14,%r9 # call sys_xxxx
+ BASR_R14_R9 # call sys_xxxx
stg %r2,__PT_R2(%r11) # store return value
.Lsysc_return:
@@ -317,6 +463,7 @@ ENTRY(system_call)
jnz .Lsysc_work # check for work
TSTMSK __LC_CPU_FLAGS,_CIF_WORK
jnz .Lsysc_work
+ BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
.Lsysc_restore:
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
@@ -489,7 +636,7 @@ ENTRY(system_call)
lmg %r3,%r7,__PT_R3(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
lg %r2,__PT_ORIG_GPR2(%r11)
- basr %r14,%r9 # call sys_xxx
+ BASR_R14_R9 # call sys_xxx
stg %r2,__PT_R2(%r11) # store return value
.Lsysc_tracenogo:
TSTMSK __TI_flags(%r12),_TIF_TRACE
@@ -513,7 +660,7 @@ ENTRY(ret_from_fork)
lmg %r9,%r10,__PT_R9(%r11) # load gprs
ENTRY(kernel_thread_starter)
la %r2,0(%r10)
- basr %r14,%r9
+ BASR_R14_R9
j .Lsysc_tracenogo
/*
@@ -522,6 +669,7 @@ ENTRY(kernel_thread_starter)
ENTRY(pgm_check_handler)
stpt __LC_SYNC_ENTER_TIMER
+ BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_CURRENT
@@ -550,6 +698,7 @@ ENTRY(pgm_check_handler)
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 4f
2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
+ BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
lg %r15,__LC_KERNEL_STACK
lgr %r14,%r12
aghi %r14,__TASK_thread # pointer to thread_struct
@@ -561,6 +710,15 @@ ENTRY(pgm_check_handler)
4: lgr %r13,%r11
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
@@ -582,9 +740,9 @@ ENTRY(pgm_check_handler)
nill %r10,0x007f
sll %r10,2
je .Lpgm_return
- lgf %r1,0(%r10,%r1) # load address of handler routine
+ lgf %r9,0(%r10,%r1) # load address of handler routine
lgr %r2,%r11 # pass pointer to pt_regs
- basr %r14,%r1 # branch to interrupt-handler
+ BASR_R14_R9 # branch to interrupt-handler
.Lpgm_return:
LOCKDEP_SYS_EXIT
tm __PT_PSW+1(%r11),0x01 # returning to user ?
@@ -620,12 +778,23 @@ ENTRY(pgm_check_handler)
ENTRY(io_int_handler)
STCK __LC_INT_CLOCK
stpt __LC_ASYNC_ENTER_TIMER
+ BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r12,__LC_CURRENT
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_IO_OLD_PSW
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
@@ -660,9 +829,13 @@ ENTRY(io_int_handler)
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno .Lio_exit_kernel
+ BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
.Lio_exit_timer:
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+.Lio_exit_kernel:
lmg %r11,%r15,__PT_R11(%r11)
lpswe __LC_RETURN_PSW
.Lio_done:
@@ -833,12 +1006,23 @@ ENTRY(io_int_handler)
ENTRY(ext_int_handler)
STCK __LC_INT_CLOCK
stpt __LC_ASYNC_ENTER_TIMER
+ BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r12,__LC_CURRENT
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_EXT_OLD_PSW
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
lghi %r1,__LC_EXT_PARAMS2
@@ -871,11 +1055,12 @@ ENTRY(psw_idle)
.Lpsw_idle_stcctm:
#endif
oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
+ BPON
STCK __CLOCK_IDLE_ENTER(%r2)
stpt __TIMER_IDLE_ENTER(%r2)
.Lpsw_idle_lpsw:
lpswe __SF_EMPTY(%r15)
- br %r14
+ BR_R1USE_R14
.Lpsw_idle_end:
/*
@@ -889,7 +1074,7 @@ ENTRY(save_fpu_regs)
lg %r2,__LC_CURRENT
aghi %r2,__TASK_thread
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- bor %r14
+ jo .Lsave_fpu_regs_exit
stfpc __THREAD_FPU_fpc(%r2)
lg %r3,__THREAD_FPU_regs(%r2)
TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
@@ -916,7 +1101,8 @@ ENTRY(save_fpu_regs)
std 15,120(%r3)
.Lsave_fpu_regs_done:
oi __LC_CPU_FLAGS+7,_CIF_FPU
- br %r14
+.Lsave_fpu_regs_exit:
+ BR_R1USE_R14
.Lsave_fpu_regs_end:
EXPORT_SYMBOL(save_fpu_regs)
@@ -934,7 +1120,7 @@ load_fpu_regs:
lg %r4,__LC_CURRENT
aghi %r4,__TASK_thread
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- bnor %r14
+ jno .Lload_fpu_regs_exit
lfpc __THREAD_FPU_fpc(%r4)
TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
@@ -961,7 +1147,8 @@ load_fpu_regs:
ld 15,120(%r4)
.Lload_fpu_regs_done:
ni __LC_CPU_FLAGS+7,255-_CIF_FPU
- br %r14
+.Lload_fpu_regs_exit:
+ BR_R1USE_R14
.Lload_fpu_regs_end:
.L__critical_end:
@@ -971,6 +1158,7 @@ load_fpu_regs:
*/
ENTRY(mcck_int_handler)
STCK __LC_MCCK_CLOCK
+ BPOFF
la %r1,4095 # validate r1
spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer
sckc __LC_CLOCK_COMPARATOR # validate comparator
@@ -1046,6 +1234,16 @@ ENTRY(mcck_int_handler)
.Lmcck_skip:
lghi %r14,__LC_GPREGS_SAVE_AREA+64
stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
mvc __PT_R8(64,%r11),0(%r14)
stmg %r8,%r9,__PT_PSW(%r11)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
@@ -1071,6 +1269,7 @@ ENTRY(mcck_int_handler)
mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
jno 0f
+ BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
0: lmg %r11,%r15,__PT_R11(%r11)
@@ -1166,7 +1365,7 @@ cleanup_critical:
jl 0f
clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
jl .Lcleanup_load_fpu_regs
-0: br %r14
+0: BR_R11USE_R14
.align 8
.Lcleanup_table:
@@ -1197,11 +1396,12 @@ cleanup_critical:
clg %r9,BASED(.Lsie_crit_mcck_length)
jh 1f
oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
-1: lg %r9,__SF_EMPTY(%r15) # get control block pointer
+1: BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ lg %r9,__SF_EMPTY(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
- br %r14
+ BR_R11USE_R14
#endif
.Lcleanup_system_call:
@@ -1254,7 +1454,7 @@ cleanup_critical:
stg %r15,56(%r11) # r15 stack pointer
# set new psw address and exit
larl %r9,.Lsysc_do_svc
- br %r14
+ BR_R11USE_R14
.Lcleanup_system_call_insn:
.quad system_call
.quad .Lsysc_stmg
@@ -1266,7 +1466,7 @@ cleanup_critical:
.Lcleanup_sysc_tif:
larl %r9,.Lsysc_tif
- br %r14
+ BR_R11USE_R14
.Lcleanup_sysc_restore:
# check if stpt has been executed
@@ -1283,14 +1483,14 @@ cleanup_critical:
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
1: lmg %r8,%r9,__LC_RETURN_PSW
- br %r14
+ BR_R11USE_R14
.Lcleanup_sysc_restore_insn:
.quad .Lsysc_exit_timer
.quad .Lsysc_done - 4
.Lcleanup_io_tif:
larl %r9,.Lio_tif
- br %r14
+ BR_R11USE_R14
.Lcleanup_io_restore:
# check if stpt has been executed
@@ -1304,7 +1504,7 @@ cleanup_critical:
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
1: lmg %r8,%r9,__LC_RETURN_PSW
- br %r14
+ BR_R11USE_R14
.Lcleanup_io_restore_insn:
.quad .Lio_exit_timer
.quad .Lio_done - 4
@@ -1357,17 +1557,17 @@ cleanup_critical:
# prepare return psw
nihh %r8,0xfcfd # clear irq & wait state bits
lg %r9,48(%r11) # return from psw_idle
- br %r14
+ BR_R11USE_R14
.Lcleanup_idle_insn:
.quad .Lpsw_idle_lpsw
.Lcleanup_save_fpu_regs:
larl %r9,save_fpu_regs
- br %r14
+ BR_R11USE_R14
.Lcleanup_load_fpu_regs:
larl %r9,load_fpu_regs
- br %r14
+ BR_R11USE_R14
/*
* Integer constants
@@ -1387,7 +1587,6 @@ cleanup_critical:
.Lsie_crit_mcck_length:
.quad .Lsie_skip - .Lsie_entry
#endif
-
.section .rodata, "a"
#define SYSCALL(esame,emu) .long esame
.globl sys_call_table
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index da5cc3b469aa..34477c1aee6d 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -543,6 +543,7 @@ static struct kset *ipl_kset;
static void __ipl_run(void *unused)
{
+ __bpon();
diag308(DIAG308_LOAD_CLEAR, NULL);
if (MACHINE_IS_VM)
__cpcmd("IPL", NULL, 0, NULL);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 943d13e90c98..60f60afa645c 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -281,7 +281,7 @@ static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
* is a BUG. The code path resides in the .kprobes.text
* section and is executed with interrupts disabled.
*/
- printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr);
+ pr_err("Invalid kprobe detected.\n");
dump_kprobe(p);
BUG();
}
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index b7abfad4fd7d..1fc6d1ff92d3 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -19,6 +19,8 @@
#include <linux/moduleloader.h>
#include <linux/bug.h>
#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
+#include <asm/facility.h>
#if 0
#define DEBUGP printk
@@ -156,7 +158,11 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
me->arch.got_offset = me->core_layout.size;
me->core_layout.size += me->arch.got_size;
me->arch.plt_offset = me->core_layout.size;
- me->core_layout.size += me->arch.plt_size;
+ if (me->arch.plt_size) {
+ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_call_disable)
+ me->arch.plt_size += PLT_ENTRY_SIZE;
+ me->core_layout.size += me->arch.plt_size;
+ }
return 0;
}
@@ -310,9 +316,21 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
unsigned int *ip;
ip = me->core_layout.base + me->arch.plt_offset +
info->plt_offset;
- ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
- ip[1] = 0x100a0004;
- ip[2] = 0x07f10000;
+ ip[0] = 0x0d10e310; /* basr 1,0 */
+ ip[1] = 0x100a0004; /* lg 1,10(1) */
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ !nospec_call_disable) {
+ unsigned int *ij;
+ ij = me->core_layout.base +
+ me->arch.plt_offset +
+ me->arch.plt_size - PLT_ENTRY_SIZE;
+ ip[2] = 0xa7f40000 + /* j __jump_r1 */
+ (unsigned int)(u16)
+ (((unsigned long) ij - 8 -
+ (unsigned long) ip) / 2);
+ } else {
+ ip[2] = 0x07f10000; /* br %r1 */
+ }
ip[3] = (unsigned int) (val >> 32);
ip[4] = (unsigned int) val;
info->plt_initialized = 1;
@@ -418,16 +436,42 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
const Elf_Shdr *s;
- char *secstrings;
+ char *secstrings, *secname;
+ void *aseg;
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ !nospec_call_disable && me->arch.plt_size) {
+ unsigned int *ij;
+
+ ij = me->core_layout.base + me->arch.plt_offset +
+ me->arch.plt_size - PLT_ENTRY_SIZE;
+ if (test_facility(35)) {
+ ij[0] = 0xc6000000; /* exrl %r0,.+10 */
+ ij[1] = 0x0005a7f4; /* j . */
+ ij[2] = 0x000007f1; /* br %r1 */
+ } else {
+ ij[0] = 0x44000000 | (unsigned int)
+ offsetof(struct lowcore, br_r1_trampoline);
+ ij[1] = 0xa7f40000; /* j . */
+ }
+ }
secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
- if (!strcmp(".altinstructions", secstrings + s->sh_name)) {
- /* patch .altinstructions */
- void *aseg = (void *)s->sh_addr;
+ aseg = (void *) s->sh_addr;
+ secname = secstrings + s->sh_name;
+ if (!strcmp(".altinstructions", secname))
+ /* patch .altinstructions */
apply_alternatives(aseg, aseg + s->sh_size);
- }
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ (!strcmp(".nospec_call_table", secname)))
+ nospec_call_revert(aseg, aseg + s->sh_size);
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ (!strcmp(".nospec_return_table", secname)))
+ nospec_return_revert(aseg, aseg + s->sh_size);
}
jump_label_apply_nops(me);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
new file mode 100644
index 000000000000..69d7fcf48158
--- /dev/null
+++ b/arch/s390/kernel/nospec-branch.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <asm/nospec-branch.h>
+
+int nospec_call_disable = IS_ENABLED(EXPOLINE_OFF);
+int nospec_return_disable = !IS_ENABLED(EXPOLINE_FULL);
+
+static int __init nospectre_v2_setup_early(char *str)
+{
+ nospec_call_disable = 1;
+ nospec_return_disable = 1;
+ return 0;
+}
+early_param("nospectre_v2", nospectre_v2_setup_early);
+
+static int __init spectre_v2_setup_early(char *str)
+{
+ if (str && !strncmp(str, "on", 2)) {
+ nospec_call_disable = 0;
+ nospec_return_disable = 0;
+ }
+ if (str && !strncmp(str, "off", 3)) {
+ nospec_call_disable = 1;
+ nospec_return_disable = 1;
+ }
+ if (str && !strncmp(str, "auto", 4)) {
+ nospec_call_disable = 0;
+ nospec_return_disable = 1;
+ }
+ return 0;
+}
+early_param("spectre_v2", spectre_v2_setup_early);
+
+static void __init_or_module __nospec_revert(s32 *start, s32 *end)
+{
+ enum { BRCL_EXPOLINE, BRASL_EXPOLINE } type;
+ u8 *instr, *thunk, *br;
+ u8 insnbuf[6];
+ s32 *epo;
+
+ /* Second part of the instruction replace is always a nop */
+ memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4);
+ for (epo = start; epo < end; epo++) {
+ instr = (u8 *) epo + *epo;
+ if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
+ type = BRCL_EXPOLINE; /* brcl instruction */
+ else if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x05)
+ type = BRASL_EXPOLINE; /* brasl instruction */
+ else
+ continue;
+ thunk = instr + (*(int *)(instr + 2)) * 2;
+ if (thunk[0] == 0xc6 && thunk[1] == 0x00)
+ /* exrl %r0,<target-br> */
+ br = thunk + (*(int *)(thunk + 2)) * 2;
+ else if (thunk[0] == 0xc0 && (thunk[1] & 0x0f) == 0x00 &&
+ thunk[6] == 0x44 && thunk[7] == 0x00 &&
+ (thunk[8] & 0x0f) == 0x00 && thunk[9] == 0x00 &&
+ (thunk[1] & 0xf0) == (thunk[8] & 0xf0))
+ /* larl %rx,<target br> + ex %r0,0(%rx) */
+ br = thunk + (*(int *)(thunk + 2)) * 2;
+ else
+ continue;
+ if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
+ continue;
+ switch (type) {
+ case BRCL_EXPOLINE:
+ /* brcl to thunk, replace with br + nop */
+ insnbuf[0] = br[0];
+ insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ break;
+ case BRASL_EXPOLINE:
+ /* brasl to thunk, replace with basr + nop */
+ insnbuf[0] = 0x0d;
+ insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ break;
+ }
+
+ s390_kernel_write(instr, insnbuf, 6);
+ }
+}
+
+void __init_or_module nospec_call_revert(s32 *start, s32 *end)
+{
+ if (nospec_call_disable)
+ __nospec_revert(start, end);
+}
+
+void __init_or_module nospec_return_revert(s32 *start, s32 *end)
+{
+ if (nospec_return_disable)
+ __nospec_revert(start, end);
+}
+
+extern s32 __nospec_call_start[], __nospec_call_end[];
+extern s32 __nospec_return_start[], __nospec_return_end[];
+void __init nospec_init_branches(void)
+{
+ nospec_call_revert(__nospec_call_start, __nospec_call_end);
+ nospec_return_revert(__nospec_return_start, __nospec_return_end);
+}
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 94f90cefbffc..c5bc3f209652 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -226,7 +226,7 @@ CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
CPUMF_EVENT_ATTR(cf_z14, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
CPUMF_EVENT_ATTR(cf_z14, VX_BCD_EXECUTION_SLOTS, 0x00e1);
CPUMF_EVENT_ATTR(cf_z14, DECIMAL_INSTRUCTIONS, 0x00e2);
-CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e9);
+CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e8);
CPUMF_EVENT_ATTR(cf_z14, TX_NC_TABORT, 0x00f3);
CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4);
CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 5362fd868d0d..6fe2e1875058 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -197,3 +197,21 @@ const struct seq_operations cpuinfo_op = {
.stop = c_stop,
.show = show_cpuinfo,
};
+
+int s390_isolate_bp(void)
+{
+ if (!test_facility(82))
+ return -EOPNOTSUPP;
+ set_thread_flag(TIF_ISOLATE_BP);
+ return 0;
+}
+EXPORT_SYMBOL(s390_isolate_bp);
+
+int s390_isolate_bp_guest(void)
+{
+ if (!test_facility(82))
+ return -EOPNOTSUPP;
+ set_thread_flag(TIF_ISOLATE_BP_GUEST);
+ return 0;
+}
+EXPORT_SYMBOL(s390_isolate_bp_guest);
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index 09f5bf0d5c0c..125c7f6e8715 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -18,6 +18,8 @@
#include <asm/cpu_mf.h>
#include <asm/irq.h>
+#include "entry.h"
+
/* empty control block to disable RI by loading it */
struct runtime_instr_cb runtime_instr_empty_cb;
@@ -59,7 +61,13 @@ static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
cb->v = 1;
}
-SYSCALL_DEFINE1(s390_runtime_instr, int, command)
+/*
+ * The signum argument is unused. In older kernels it was used to
+ * specify a real-time signal. For backwards compatibility user space
+ * should pass a valid real-time signal number (the signum argument
+ * was checked in older kernels).
+ */
+SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
{
struct runtime_instr_cb *cb;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 793da97f9a6e..a6a91f01a17a 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -68,6 +68,7 @@
#include <asm/sysinfo.h>
#include <asm/numa.h>
#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
#include "entry.h"
/*
@@ -340,7 +341,9 @@ static void __init setup_lowcore(void)
lc->preempt_count = S390_lowcore.preempt_count;
lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
- MAX_FACILITY_BIT/8);
+ sizeof(lc->stfle_fac_list));
+ memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
+ sizeof(lc->alt_stfle_fac_list));
nmi_alloc_boot_cpu(lc);
vdso_alloc_boot_cpu(lc);
lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
@@ -377,6 +380,7 @@ static void __init setup_lowcore(void)
lc->spinlock_index = 0;
arch_spin_lock_setup(0);
#endif
+ lc->br_r1_trampoline = 0x07f1; /* br %r1 */
set_prefix((u32)(unsigned long) lc);
lowcore_ptr[0] = lc;
@@ -952,6 +956,8 @@ void __init setup_arch(char **cmdline_p)
set_preferred_console();
apply_alternative_instructions();
+ if (IS_ENABLED(CONFIG_EXPOLINE))
+ nospec_init_branches();
/* Setup zfcpdump support */
setup_zfcpdump();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a919b2f0141d..a4a9fe1934e9 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -214,6 +214,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
+ lc->br_r1_trampoline = 0x07f1; /* br %r1 */
if (nmi_alloc_per_cpu(lc))
goto out;
if (vdso_alloc_per_cpu(lc))
@@ -266,7 +267,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
__ctl_store(lc->cregs_save_area, 0, 15);
save_access_regs((unsigned int *) lc->access_regs_save_area);
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
- MAX_FACILITY_BIT/8);
+ sizeof(lc->stfle_fac_list));
+ memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
+ sizeof(lc->alt_stfle_fac_list));
arch_spin_lock_setup(cpu);
}
@@ -317,6 +320,7 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
mem_assign_absolute(lc->restart_fn, (unsigned long) func);
mem_assign_absolute(lc->restart_data, (unsigned long) data);
mem_assign_absolute(lc->restart_source, source_cpu);
+ __bpon();
asm volatile(
"0: sigp 0,%0,%2 # sigp restart to target cpu\n"
" brc 2,0b # busy, try again\n"
@@ -901,6 +905,7 @@ void __cpu_die(unsigned int cpu)
void __noreturn cpu_die(void)
{
idle_task_exit();
+ __bpon();
pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
for (;;) ;
}
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index a441cba8d165..fc7e04c2195b 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -89,6 +89,8 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer);
seq_printf(m, "Type: %-4.4s\n", info->type);
+ if (info->lic)
+ seq_printf(m, "LIC Identifier: %016lx\n", info->lic);
/*
* Sigh: the model field has been renamed with System z9
* to model_capacity and a new model field has been added
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 608cf2987d19..08d12cfaf091 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -123,6 +123,20 @@ SECTIONS
*(.altinstr_replacement)
}
+ /*
+ * Table with the patch locations to undo expolines
+ */
+ .nospec_call_table : {
+ __nospec_call_start = . ;
+ *(.s390_indirect*)
+ __nospec_call_end = . ;
+ }
+ .nospec_return_table : {
+ __nospec_return_start = . ;
+ *(.s390_return*)
+ __nospec_return_end = . ;
+ }
+
/* early.c uses stsi, which requires page aligned data. */
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 9a4594e0a1ff..a3dbd459cce9 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -23,6 +23,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 89aa114a2cba..45634b3d2e0a 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -257,6 +257,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
case 0x500:
return __diag_virtio_hypercall(vcpu);
default:
+ vcpu->stat.diagnose_other++;
return -EOPNOTSUPP;
}
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 024ad8bcc516..aabf46f5f883 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -36,7 +36,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
{
int c, scn;
- if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
+ if (!kvm_s390_test_cpuflags(vcpu, CPUSTAT_ECALL_PEND))
return 0;
BUG_ON(!kvm_s390_use_sca_entries());
@@ -101,18 +101,17 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
/* another external call is pending */
return -EBUSY;
}
- atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
return 0;
}
static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
{
- struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
int rc, expect;
if (!kvm_s390_use_sca_entries())
return;
- atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
@@ -190,8 +189,8 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
static inline int is_ioirq(unsigned long irq_type)
{
- return ((irq_type >= IRQ_PEND_IO_ISC_0) &&
- (irq_type <= IRQ_PEND_IO_ISC_7));
+ return ((irq_type >= IRQ_PEND_IO_ISC_7) &&
+ (irq_type <= IRQ_PEND_IO_ISC_0));
}
static uint64_t isc_to_isc_bits(int isc)
@@ -199,25 +198,59 @@ static uint64_t isc_to_isc_bits(int isc)
return (0x80 >> isc) << 24;
}
+static inline u32 isc_to_int_word(u8 isc)
+{
+ return ((u32)isc << 27) | 0x80000000;
+}
+
static inline u8 int_word_to_isc(u32 int_word)
{
return (int_word & 0x38000000) >> 27;
}
+/*
+ * To use atomic bitmap functions, we have to provide a bitmap address
+ * that is u64 aligned. However, the ipm might be u32 aligned.
+ * Therefore, we logically start the bitmap at the very beginning of the
+ * struct and fixup the bit number.
+ */
+#define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE)
+
+static inline void kvm_s390_gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
+static inline u8 kvm_s390_gisa_get_ipm(struct kvm_s390_gisa *gisa)
+{
+ return READ_ONCE(gisa->ipm);
+}
+
+static inline void kvm_s390_gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
+static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->arch.float_int.pending_irqs |
- vcpu->arch.local_int.pending_irqs;
+ vcpu->arch.local_int.pending_irqs |
+ kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
}
static inline int isc_to_irq_type(unsigned long isc)
{
- return IRQ_PEND_IO_ISC_0 + isc;
+ return IRQ_PEND_IO_ISC_0 - isc;
}
static inline int irq_type_to_isc(unsigned long irq_type)
{
- return irq_type - IRQ_PEND_IO_ISC_0;
+ return IRQ_PEND_IO_ISC_0 - irq_type;
}
static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
@@ -278,20 +311,20 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
static void __set_cpu_idle(struct kvm_vcpu *vcpu)
{
- atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
- set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
+ set_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
}
static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
{
- atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
- clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
+ clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
}
static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
{
- atomic_andnot(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
- &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IO_INT | CPUSTAT_EXT_INT |
+ CPUSTAT_STOP_INT);
vcpu->arch.sie_block->lctl = 0x0000;
vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
@@ -302,17 +335,12 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
}
}
-static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
-{
- atomic_or(flag, &vcpu->arch.sie_block->cpuflags);
-}
-
static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
{
if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
return;
else if (psw_ioint_disabled(vcpu))
- __set_cpuflag(vcpu, CPUSTAT_IO_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
else
vcpu->arch.sie_block->lctl |= LCTL_CR6;
}
@@ -322,7 +350,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK))
return;
if (psw_extint_disabled(vcpu))
- __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
else
vcpu->arch.sie_block->lctl |= LCTL_CR0;
}
@@ -340,7 +368,7 @@ static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
{
if (kvm_s390_is_stop_irq_pending(vcpu))
- __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
}
/* Set interception request for non-deliverable interrupts */
@@ -897,18 +925,38 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
return rc ? -EFAULT : 0;
}
+static int __do_deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_io_info *io)
+{
+ int rc;
+
+ rc = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID);
+ rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR);
+ rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM);
+ rc |= put_guest_lc(vcpu, io->io_int_word, (u32 *)__LC_IO_INT_WORD);
+ rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ return rc ? -EFAULT : 0;
+}
+
static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
unsigned long irq_type)
{
struct list_head *isc_list;
struct kvm_s390_float_interrupt *fi;
struct kvm_s390_interrupt_info *inti = NULL;
+ struct kvm_s390_io_info io;
+ u32 isc;
int rc = 0;
fi = &vcpu->kvm->arch.float_int;
spin_lock(&fi->lock);
- isc_list = &fi->lists[irq_type_to_isc(irq_type)];
+ isc = irq_type_to_isc(irq_type);
+ isc_list = &fi->lists[isc];
inti = list_first_entry_or_null(isc_list,
struct kvm_s390_interrupt_info,
list);
@@ -936,24 +984,31 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
spin_unlock(&fi->lock);
if (inti) {
- rc = put_guest_lc(vcpu, inti->io.subchannel_id,
- (u16 *)__LC_SUBCHANNEL_ID);
- rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
- (u16 *)__LC_SUBCHANNEL_NR);
- rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
- (u32 *)__LC_IO_INT_PARM);
- rc |= put_guest_lc(vcpu, inti->io.io_int_word,
- (u32 *)__LC_IO_INT_WORD);
- rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
- &vcpu->arch.sie_block->gpsw,
- sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
- &vcpu->arch.sie_block->gpsw,
- sizeof(psw_t));
+ rc = __do_deliver_io(vcpu, &(inti->io));
kfree(inti);
+ goto out;
}
- return rc ? -EFAULT : 0;
+ if (vcpu->kvm->arch.gisa &&
+ kvm_s390_gisa_tac_ipm_gisc(vcpu->kvm->arch.gisa, isc)) {
+ /*
+ * in case an adapter interrupt was not delivered
+ * in SIE context KVM will handle the delivery
+ */
+ VCPU_EVENT(vcpu, 4, "%s isc %u", "deliver: I/O (AI/gisa)", isc);
+ memset(&io, 0, sizeof(io));
+ io.io_int_word = isc_to_int_word(isc);
+ vcpu->stat.deliver_io_int++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_INT_IO(1, 0, 0, 0),
+ ((__u32)io.subchannel_id << 16) |
+ io.subchannel_nr,
+ ((__u64)io.io_int_parm << 32) |
+ io.io_int_word);
+ rc = __do_deliver_io(vcpu, &io);
+ }
+out:
+ return rc;
}
typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
@@ -1155,8 +1210,8 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
while ((irqs = deliverable_irqs(vcpu)) && !rc) {
- /* bits are in the order of interrupt priority */
- irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
+ /* bits are in the reverse order of interrupt priority */
+ irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
if (is_ioirq(irq_type)) {
rc = __deliver_io(vcpu, irq_type);
} else {
@@ -1228,7 +1283,7 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
li->irq.ext = irq->u.ext;
set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1253,7 +1308,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
return -EBUSY;
*extcall = irq->u.extcall;
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1297,7 +1352,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs))
return -EBUSY;
stop->flags = irq->u.stop.flags;
- __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
return 0;
}
@@ -1329,7 +1384,7 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1373,7 +1428,7 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
0, 0);
set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1386,7 +1441,7 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
0, 0);
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1416,20 +1471,86 @@ static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
return NULL;
}
+static struct kvm_s390_interrupt_info *get_top_io_int(struct kvm *kvm,
+ u64 isc_mask, u32 schid)
+{
+ struct kvm_s390_interrupt_info *inti = NULL;
+ int isc;
+
+ for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
+ if (isc_mask & isc_to_isc_bits(isc))
+ inti = get_io_int(kvm, isc, schid);
+ }
+ return inti;
+}
+
+static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid)
+{
+ unsigned long active_mask;
+ int isc;
+
+ if (schid)
+ goto out;
+ if (!kvm->arch.gisa)
+ goto out;
+
+ active_mask = (isc_mask & kvm_s390_gisa_get_ipm(kvm->arch.gisa) << 24) << 32;
+ while (active_mask) {
+ isc = __fls(active_mask) ^ (BITS_PER_LONG - 1);
+ if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, isc))
+ return isc;
+ clear_bit_inv(isc, &active_mask);
+ }
+out:
+ return -EINVAL;
+}
+
/*
* Dequeue and return an I/O interrupt matching any of the interruption
* subclasses as designated by the isc mask in cr6 and the schid (if != 0).
+ * Take into account the interrupts pending in the interrupt list and in GISA.
+ *
+ * Note that for a guest that does not enable I/O interrupts
+ * but relies on TPI, a flood of classic interrupts may starve
+ * out adapter interrupts on the same isc. Linux does not do
+ * that, and it is possible to work around the issue by configuring
+ * different iscs for classic and adapter interrupts in the guest,
+ * but we may want to revisit this in the future.
*/
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 isc_mask, u32 schid)
{
- struct kvm_s390_interrupt_info *inti = NULL;
+ struct kvm_s390_interrupt_info *inti, *tmp_inti;
int isc;
- for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
- if (isc_mask & isc_to_isc_bits(isc))
- inti = get_io_int(kvm, isc, schid);
+ inti = get_top_io_int(kvm, isc_mask, schid);
+
+ isc = get_top_gisa_isc(kvm, isc_mask, schid);
+ if (isc < 0)
+ /* no AI in GISA */
+ goto out;
+
+ if (!inti)
+ /* AI in GISA but no classical IO int */
+ goto gisa_out;
+
+ /* both types of interrupts present */
+ if (int_word_to_isc(inti->io.io_int_word) <= isc) {
+ /* classical IO int with higher priority */
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+ goto out;
}
+gisa_out:
+ tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (tmp_inti) {
+ tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0);
+ tmp_inti->io.io_int_word = isc_to_int_word(isc);
+ if (inti)
+ kvm_s390_reinject_io_int(kvm, inti);
+ inti = tmp_inti;
+ } else
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+out:
return inti;
}
@@ -1517,6 +1638,15 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
struct list_head *list;
int isc;
+ isc = int_word_to_isc(inti->io.io_int_word);
+
+ if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) {
+ VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+ kfree(inti);
+ return 0;
+ }
+
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
@@ -1532,7 +1662,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
inti->io.subchannel_id >> 8,
inti->io.subchannel_id >> 1 & 0x3,
inti->io.subchannel_nr);
- isc = int_word_to_isc(inti->io.io_int_word);
list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
list_add_tail(&inti->list, list);
set_bit(isc_to_irq_type(isc), &fi->pending_irqs);
@@ -1546,7 +1675,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
static void __floating_irq_kick(struct kvm *kvm, u64 type)
{
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
- struct kvm_s390_local_interrupt *li;
struct kvm_vcpu *dst_vcpu;
int sigcpu, online_vcpus, nr_tries = 0;
@@ -1568,20 +1696,17 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
/* make the VCPU drop out of the SIE, or wake it up if sleeping */
- li = &dst_vcpu->arch.local_int;
- spin_lock(&li->lock);
switch (type) {
case KVM_S390_MCHK:
- atomic_or(CPUSTAT_STOP_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- atomic_or(CPUSTAT_IO_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
break;
default:
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
break;
}
- spin_unlock(&li->lock);
kvm_s390_vcpu_wakeup(dst_vcpu);
}
@@ -1820,6 +1945,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
for (i = 0; i < FIRQ_MAX_COUNT; i++)
fi->counters[i] = 0;
spin_unlock(&fi->lock);
+ kvm_s390_gisa_clear(kvm);
};
static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
@@ -1847,6 +1973,22 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
max_irqs = len / sizeof(struct kvm_s390_irq);
+ if (kvm->arch.gisa &&
+ kvm_s390_gisa_get_ipm(kvm->arch.gisa)) {
+ for (i = 0; i <= MAX_ISC; i++) {
+ if (n == max_irqs) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ goto out_nolock;
+ }
+ if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, i)) {
+ irq = (struct kvm_s390_irq *) &buf[n];
+ irq->type = KVM_S390_INT_IO(1, 0, 0, 0);
+ irq->u.io.io_int_word = isc_to_int_word(i);
+ n++;
+ }
+ }
+ }
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++) {
@@ -1885,6 +2027,7 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
out:
spin_unlock(&fi->lock);
+out_nolock:
if (!ret && n > 0) {
if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
ret = -EFAULT;
@@ -2245,7 +2388,7 @@ static int kvm_s390_inject_airq(struct kvm *kvm,
struct kvm_s390_interrupt s390int = {
.type = KVM_S390_INT_IO(1, 0, 0, 0),
.parm = 0,
- .parm64 = (adapter->isc << 27) | 0x80000000,
+ .parm64 = isc_to_int_word(adapter->isc),
};
int ret = 0;
@@ -2687,3 +2830,28 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
return n;
}
+
+void kvm_s390_gisa_clear(struct kvm *kvm)
+{
+ if (kvm->arch.gisa) {
+ memset(kvm->arch.gisa, 0, sizeof(struct kvm_s390_gisa));
+ kvm->arch.gisa->next_alert = (u32)(u64)kvm->arch.gisa;
+ VM_EVENT(kvm, 3, "gisa 0x%pK cleared", kvm->arch.gisa);
+ }
+}
+
+void kvm_s390_gisa_init(struct kvm *kvm)
+{
+ if (css_general_characteristics.aiv) {
+ kvm->arch.gisa = &kvm->arch.sie_page2->gisa;
+ VM_EVENT(kvm, 3, "gisa 0x%pK initialized", kvm->arch.gisa);
+ kvm_s390_gisa_clear(kvm);
+ }
+}
+
+void kvm_s390_gisa_destroy(struct kvm *kvm)
+{
+ if (!kvm->arch.gisa)
+ return;
+ kvm->arch.gisa = NULL;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 1371dff2b90d..ba4c7092335a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2,7 +2,7 @@
/*
* hosting IBM Z kernel virtual machines (s390x)
*
- * Copyright IBM Corp. 2008, 2017
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -87,19 +87,31 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+ { "instruction_epsw", VCPU_STAT(instruction_epsw) },
+ { "instruction_gs", VCPU_STAT(instruction_gs) },
+ { "instruction_io_other", VCPU_STAT(instruction_io_other) },
+ { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
+ { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
+ { "instruction_ptff", VCPU_STAT(instruction_ptff) },
{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
+ { "instruction_sck", VCPU_STAT(instruction_sck) },
+ { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
{ "instruction_spx", VCPU_STAT(instruction_spx) },
{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
{ "instruction_stap", VCPU_STAT(instruction_stap) },
- { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+ { "instruction_iske", VCPU_STAT(instruction_iske) },
+ { "instruction_ri", VCPU_STAT(instruction_ri) },
+ { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
+ { "instruction_sske", VCPU_STAT(instruction_sske) },
{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
- { "instruction_stsch", VCPU_STAT(instruction_stsch) },
- { "instruction_chsc", VCPU_STAT(instruction_chsc) },
{ "instruction_essa", VCPU_STAT(instruction_essa) },
{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
+ { "instruction_tb", VCPU_STAT(instruction_tb) },
+ { "instruction_tpi", VCPU_STAT(instruction_tpi) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
+ { "instruction_tsch", VCPU_STAT(instruction_tsch) },
{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
{ "instruction_sie", VCPU_STAT(instruction_sie) },
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
@@ -118,12 +130,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
- { "diagnose_10", VCPU_STAT(diagnose_10) },
- { "diagnose_44", VCPU_STAT(diagnose_44) },
- { "diagnose_9c", VCPU_STAT(diagnose_9c) },
- { "diagnose_258", VCPU_STAT(diagnose_258) },
- { "diagnose_308", VCPU_STAT(diagnose_308) },
- { "diagnose_500", VCPU_STAT(diagnose_500) },
+ { "instruction_diag_10", VCPU_STAT(diagnose_10) },
+ { "instruction_diag_44", VCPU_STAT(diagnose_44) },
+ { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
+ { "instruction_diag_258", VCPU_STAT(diagnose_258) },
+ { "instruction_diag_308", VCPU_STAT(diagnose_308) },
+ { "instruction_diag_500", VCPU_STAT(diagnose_500) },
+ { "instruction_diag_other", VCPU_STAT(diagnose_other) },
{ NULL }
};
@@ -576,7 +589,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
case KVM_CAP_S390_GS:
r = -EINVAL;
mutex_lock(&kvm->lock);
- if (atomic_read(&kvm->online_vcpus)) {
+ if (kvm->created_vcpus) {
r = -EBUSY;
} else if (test_facility(133)) {
set_kvm_facility(kvm->arch.model.fac_mask, 133);
@@ -1088,7 +1101,6 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm,
struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_feat data;
- int ret = -EBUSY;
if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
return -EFAULT;
@@ -1098,13 +1110,18 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm,
return -EINVAL;
mutex_lock(&kvm->lock);
- if (!atomic_read(&kvm->online_vcpus)) {
- bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
- KVM_S390_VM_CPU_FEAT_NR_BITS);
- ret = 0;
+ if (kvm->created_vcpus) {
+ mutex_unlock(&kvm->lock);
+ return -EBUSY;
}
+ bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
mutex_unlock(&kvm->lock);
- return ret;
+ VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
+ return 0;
}
static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
@@ -1206,6 +1223,10 @@ static int kvm_s390_get_processor_feat(struct kvm *kvm,
KVM_S390_VM_CPU_FEAT_NR_BITS);
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
return 0;
}
@@ -1219,6 +1240,10 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm,
KVM_S390_VM_CPU_FEAT_NR_BITS);
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
return 0;
}
@@ -1911,6 +1936,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.dbf)
goto out_err;
+ BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
kvm->arch.sie_page2 =
(struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
if (!kvm->arch.sie_page2)
@@ -1981,6 +2007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
spin_lock_init(&kvm->arch.start_stop_lock);
kvm_s390_vsie_init(kvm);
+ kvm_s390_gisa_init(kvm);
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
@@ -2043,6 +2070,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_free_vcpus(kvm);
sca_dispose(kvm);
debug_unregister(kvm->arch.dbf);
+ kvm_s390_gisa_destroy(kvm);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
@@ -2314,7 +2342,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
gmap_enable(vcpu->arch.enabled_gmap);
- atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__start_cpu_timer_accounting(vcpu);
vcpu->cpu = cpu;
@@ -2325,7 +2353,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1;
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__stop_cpu_timer_accounting(vcpu);
- atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
vcpu->arch.enabled_gmap = gmap_get_enabled();
gmap_disable(vcpu->arch.enabled_gmap);
@@ -2422,9 +2450,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
CPUSTAT_STOPPED);
if (test_kvm_facility(vcpu->kvm, 78))
- atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
else if (test_kvm_facility(vcpu->kvm, 8))
- atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
kvm_s390_vcpu_setup_model(vcpu);
@@ -2456,12 +2484,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
if (test_kvm_facility(vcpu->kvm, 139))
vcpu->arch.sie_block->ecd |= ECD_MEF;
+ if (vcpu->arch.sie_block->gd) {
+ vcpu->arch.sie_block->eca |= ECA_AIV;
+ VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
+ vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
+ }
vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
| SDNXC;
vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
if (sclp.has_kss)
- atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
else
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
@@ -2508,9 +2541,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block->icpua = id;
spin_lock_init(&vcpu->arch.local_int.lock);
- vcpu->arch.local_int.float_int = &kvm->arch.float_int;
- vcpu->arch.local_int.wq = &vcpu->wq;
- vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
+ vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
+ if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
+ vcpu->arch.sie_block->gd |= GISA_FORMAT1;
seqcount_init(&vcpu->arch.cputm_seqcount);
rc = kvm_vcpu_init(vcpu, kvm, id);
@@ -2567,7 +2600,7 @@ static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
* return immediately. */
void exit_sie(struct kvm_vcpu *vcpu)
{
- atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
cpu_relax();
}
@@ -2720,47 +2753,70 @@ static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ vcpu_load(vcpu);
+
memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ vcpu_load(vcpu);
+
memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
+
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- if (test_fp_ctl(fpu->fpc))
- return -EINVAL;
+ int ret = 0;
+
+ vcpu_load(vcpu);
+
+ if (test_fp_ctl(fpu->fpc)) {
+ ret = -EINVAL;
+ goto out;
+ }
vcpu->run->s.regs.fpc = fpu->fpc;
if (MACHINE_HAS_VX)
convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
(freg_t *) fpu->fprs);
else
memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
- return 0;
+
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
+ vcpu_load(vcpu);
+
/* make sure we have the latest values */
save_fpu_regs();
if (MACHINE_HAS_VX)
@@ -2769,6 +2825,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
else
memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
fpu->fpc = vcpu->run->s.regs.fpc;
+
+ vcpu_put(vcpu);
return 0;
}
@@ -2800,41 +2858,56 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
{
int rc = 0;
+ vcpu_load(vcpu);
+
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
- if (dbg->control & ~VALID_GUESTDBG_FLAGS)
- return -EINVAL;
- if (!sclp.has_gpere)
- return -EINVAL;
+ if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
+ rc = -EINVAL;
+ goto out;
+ }
+ if (!sclp.has_gpere) {
+ rc = -EINVAL;
+ goto out;
+ }
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
/* enforce guest PER */
- atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
rc = kvm_s390_import_bp_data(vcpu, dbg);
} else {
- atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
vcpu->arch.guestdbg.last_bp = 0;
}
if (rc) {
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
- atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
}
+out:
+ vcpu_put(vcpu);
return rc;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ int ret;
+
+ vcpu_load(vcpu);
+
/* CHECK_STOP and LOAD are not supported yet */
- return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
- KVM_MP_STATE_OPERATING;
+ ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
+ KVM_MP_STATE_OPERATING;
+
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
@@ -2842,6 +2915,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{
int rc = 0;
+ vcpu_load(vcpu);
+
/* user space knows about this interface - let it control the state */
vcpu->kvm->arch.user_cpu_state_ctrl = 1;
@@ -2859,12 +2934,13 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
rc = -ENXIO;
}
+ vcpu_put(vcpu);
return rc;
}
static bool ibs_enabled(struct kvm_vcpu *vcpu)
{
- return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
+ return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
}
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
@@ -2900,8 +2976,7 @@ retry:
if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
if (!ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
- atomic_or(CPUSTAT_IBS,
- &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
}
goto retry;
}
@@ -2909,8 +2984,7 @@ retry:
if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
if (ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
- atomic_andnot(CPUSTAT_IBS,
- &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
}
goto retry;
}
@@ -3390,9 +3464,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (kvm_run->immediate_exit)
return -EINTR;
+ vcpu_load(vcpu);
+
if (guestdbg_exit_pending(vcpu)) {
kvm_s390_prepare_debug_exit(vcpu);
- return 0;
+ rc = 0;
+ goto out;
}
kvm_sigset_activate(vcpu);
@@ -3402,7 +3479,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
} else if (is_vcpu_stopped(vcpu)) {
pr_err_ratelimited("can't run stopped vcpu %d\n",
vcpu->vcpu_id);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
sync_regs(vcpu, kvm_run);
@@ -3432,6 +3510,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_sigset_deactivate(vcpu);
vcpu->stat.exit_userspace++;
+out:
+ vcpu_put(vcpu);
return rc;
}
@@ -3560,7 +3640,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
__disable_ibs_on_all_vcpus(vcpu->kvm);
}
- atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
/*
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
@@ -3586,7 +3666,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
kvm_s390_clear_stop_irq(vcpu);
- atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) {
@@ -3693,36 +3773,45 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
return r;
}
-long kvm_arch_vcpu_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
- int idx;
- long r;
switch (ioctl) {
case KVM_S390_IRQ: {
struct kvm_s390_irq s390irq;
- r = -EFAULT;
if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
- break;
- r = kvm_s390_inject_vcpu(vcpu, &s390irq);
- break;
+ return -EFAULT;
+ return kvm_s390_inject_vcpu(vcpu, &s390irq);
}
case KVM_S390_INTERRUPT: {
struct kvm_s390_interrupt s390int;
struct kvm_s390_irq s390irq;
- r = -EFAULT;
if (copy_from_user(&s390int, argp, sizeof(s390int)))
- break;
+ return -EFAULT;
if (s390int_to_s390irq(&s390int, &s390irq))
return -EINVAL;
- r = kvm_s390_inject_vcpu(vcpu, &s390irq);
- break;
+ return kvm_s390_inject_vcpu(vcpu, &s390irq);
+ }
}
+ return -ENOIOCTLCMD;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ int idx;
+ long r;
+
+ vcpu_load(vcpu);
+
+ switch (ioctl) {
case KVM_S390_STORE_STATUS:
idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_s390_vcpu_store_status(vcpu, arg);
@@ -3847,6 +3936,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
default:
r = -ENOTTY;
}
+
+ vcpu_put(vcpu);
return r;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 5e46ba429bcb..bd31b37b0e6f 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -47,14 +47,29 @@ do { \
d_args); \
} while (0)
+static inline void kvm_s390_set_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ atomic_or(flags, &vcpu->arch.sie_block->cpuflags);
+}
+
+static inline void kvm_s390_clear_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ atomic_andnot(flags, &vcpu->arch.sie_block->cpuflags);
+}
+
+static inline bool kvm_s390_test_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ return (atomic_read(&vcpu->arch.sie_block->cpuflags) & flags) == flags;
+}
+
static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
{
- return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
+ return kvm_s390_test_cpuflags(vcpu, CPUSTAT_STOPPED);
}
static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
{
- return test_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+ return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
}
static inline int kvm_is_ucontrol(struct kvm *kvm)
@@ -367,6 +382,9 @@ int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
void __user *buf, int len);
int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
__u8 __user *buf, int len);
+void kvm_s390_gisa_init(struct kvm *kvm);
+void kvm_s390_gisa_clear(struct kvm *kvm);
+void kvm_s390_gisa_destroy(struct kvm *kvm);
/* implemented in guestdbg.c */
void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 0714bfa56da0..c4c4e157c036 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -2,7 +2,7 @@
/*
* handling privileged instructions
*
- * Copyright IBM Corp. 2008, 2013
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -34,6 +34,8 @@
static int handle_ri(struct kvm_vcpu *vcpu)
{
+ vcpu->stat.instruction_ri++;
+
if (test_kvm_facility(vcpu->kvm, 64)) {
VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)");
vcpu->arch.sie_block->ecb3 |= ECB3_RI;
@@ -53,6 +55,8 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu)
static int handle_gs(struct kvm_vcpu *vcpu)
{
+ vcpu->stat.instruction_gs++;
+
if (test_kvm_facility(vcpu->kvm, 133)) {
VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)");
preempt_disable();
@@ -85,6 +89,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
u8 ar;
u64 op2, val;
+ vcpu->stat.instruction_sck++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -203,14 +209,14 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
trace_kvm_s390_skey_related_inst(vcpu);
if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
- !(atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS))
+ !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
return rc;
rc = s390_enable_skey();
VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
if (!rc) {
- if (atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS)
- atomic_andnot(CPUSTAT_KSS, &sie_block->cpuflags);
+ if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
else
sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE |
ICTL_RRBE);
@@ -222,7 +228,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
{
int rc;
- vcpu->stat.instruction_storage_key++;
rc = kvm_s390_skey_check_enable(vcpu);
if (rc)
return rc;
@@ -242,6 +247,8 @@ static int handle_iske(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ vcpu->stat.instruction_iske++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -274,6 +281,8 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ vcpu->stat.instruction_rrbe++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -312,6 +321,8 @@ static int handle_sske(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ vcpu->stat.instruction_sske++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -392,6 +403,8 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
gpa_t addr;
int reg2;
+ vcpu->stat.instruction_tb++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -424,6 +437,8 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
u64 addr;
u8 ar;
+ vcpu->stat.instruction_tpi++;
+
addr = kvm_s390_get_base_disp_s(vcpu, &ar);
if (addr & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -484,6 +499,8 @@ static int handle_tsch(struct kvm_vcpu *vcpu)
struct kvm_s390_interrupt_info *inti = NULL;
const u64 isc_mask = 0xffUL << 24; /* all iscs set */
+ vcpu->stat.instruction_tsch++;
+
/* a valid schid has at least one bit set */
if (vcpu->run->s.regs.gprs[1])
inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
@@ -527,6 +544,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->ipa == 0xb235)
return handle_tsch(vcpu);
/* Handle in userspace. */
+ vcpu->stat.instruction_io_other++;
return -EOPNOTSUPP;
} else {
/*
@@ -592,6 +610,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
int rc;
u8 ar;
+ vcpu->stat.instruction_lpsw++;
+
if (gpsw->mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -619,6 +639,8 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
int rc;
u8 ar;
+ vcpu->stat.instruction_lpswe++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -828,6 +850,8 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
{
int reg1, reg2;
+ vcpu->stat.instruction_epsw++;
+
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
/* This basically extracts the mask half of the psw. */
@@ -1332,6 +1356,8 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
{
u32 value;
+ vcpu->stat.instruction_sckpf++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -1347,6 +1373,8 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
static int handle_ptff(struct kvm_vcpu *vcpu)
{
+ vcpu->stat.instruction_ptff++;
+
/* we don't emulate any control instructions yet */
kvm_s390_set_psw_cc(vcpu, 3);
return 0;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index c1f5cde2c878..683036c1c92a 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -20,22 +20,18 @@
static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
u64 *reg)
{
- struct kvm_s390_local_interrupt *li;
- int cpuflags;
+ const bool stopped = kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED);
int rc;
int ext_call_pending;
- li = &dst_vcpu->arch.local_int;
-
- cpuflags = atomic_read(li->cpuflags);
ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu);
- if (!(cpuflags & CPUSTAT_STOPPED) && !ext_call_pending)
+ if (!stopped && !ext_call_pending)
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
else {
*reg &= 0xffffffff00000000UL;
if (ext_call_pending)
*reg |= SIGP_STATUS_EXT_CALL_PENDING;
- if (cpuflags & CPUSTAT_STOPPED)
+ if (stopped)
*reg |= SIGP_STATUS_STOPPED;
rc = SIGP_CC_STATUS_STORED;
}
@@ -208,11 +204,9 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
struct kvm_vcpu *dst_vcpu,
u32 addr, u64 *reg)
{
- int flags;
int rc;
- flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
- if (!(flags & CPUSTAT_STOPPED)) {
+ if (!kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED)) {
*reg &= 0xffffffff00000000UL;
*reg |= SIGP_STATUS_INCORRECT_STATE;
return SIGP_CC_STATUS_STORED;
@@ -231,7 +225,6 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
static int __sigp_sense_running(struct kvm_vcpu *vcpu,
struct kvm_vcpu *dst_vcpu, u64 *reg)
{
- struct kvm_s390_local_interrupt *li;
int rc;
if (!test_kvm_facility(vcpu->kvm, 9)) {
@@ -240,8 +233,7 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu,
return SIGP_CC_STATUS_STORED;
}
- li = &dst_vcpu->arch.local_int;
- if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+ if (kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_RUNNING)) {
/* running */
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
} else {
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 751348348477..ec772700ff96 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -28,13 +28,23 @@ struct vsie_page {
* the same offset as that in struct sie_page!
*/
struct mcck_volatile_info mcck_info; /* 0x0200 */
- /* the pinned originial scb */
+ /*
+ * The pinned original scb. Be aware that other VCPUs can modify
+ * it while we read from it. Values that are used for conditions or
+ * are reused conditionally, should be accessed via READ_ONCE.
+ */
struct kvm_s390_sie_block *scb_o; /* 0x0218 */
/* the shadow gmap in use by the vsie_page */
struct gmap *gmap; /* 0x0220 */
/* address of the last reported fault to guest2 */
unsigned long fault_addr; /* 0x0228 */
- __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */
+ /* calculated guest addresses of satellite control blocks */
+ gpa_t sca_gpa; /* 0x0230 */
+ gpa_t itdba_gpa; /* 0x0238 */
+ gpa_t gvrd_gpa; /* 0x0240 */
+ gpa_t riccbd_gpa; /* 0x0248 */
+ gpa_t sdnx_gpa; /* 0x0250 */
+ __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
};
@@ -140,12 +150,13 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
- u32 crycb_addr = scb_o->crycbd & 0x7ffffff8U;
+ const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
+ const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
unsigned long *b1, *b2;
u8 ecb3_flags;
scb_s->crycbd = 0;
- if (!(scb_o->crycbd & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
+ if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
return 0;
/* format-1 is supported with message-security-assist extension 3 */
if (!test_kvm_facility(vcpu->kvm, 76))
@@ -183,12 +194,15 @@ static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ /* READ_ONCE does not work on bitfields - use a temporary variable */
+ const uint32_t __new_ibc = scb_o->ibc;
+ const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
__u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
scb_s->ibc = 0;
/* ibc installed in g2 and requested for g3 */
- if (vcpu->kvm->arch.model.ibc && (scb_o->ibc & 0x0fffU)) {
- scb_s->ibc = scb_o->ibc & 0x0fffU;
+ if (vcpu->kvm->arch.model.ibc && new_ibc) {
+ scb_s->ibc = new_ibc;
/* takte care of the minimum ibc level of the machine */
if (scb_s->ibc < min_ibc)
scb_s->ibc = min_ibc;
@@ -259,6 +273,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ /* READ_ONCE does not work on bitfields - use a temporary variable */
+ const uint32_t __new_prefix = scb_o->prefix;
+ const uint32_t new_prefix = READ_ONCE(__new_prefix);
+ const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
bool had_tx = scb_s->ecb & ECB_TE;
unsigned long new_mso = 0;
int rc;
@@ -306,14 +324,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->icpua = scb_o->icpua;
if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
- new_mso = scb_o->mso & 0xfffffffffff00000UL;
+ new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
/* if the hva of the prefix changes, we have to remap the prefix */
- if (scb_s->mso != new_mso || scb_s->prefix != scb_o->prefix)
+ if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
prefix_unmapped(vsie_page);
/* SIE will do mso/msl validity and exception checks for us */
scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
scb_s->mso = new_mso;
- scb_s->prefix = scb_o->prefix;
+ scb_s->prefix = new_prefix;
/* We have to definetly flush the tlb if this scb never ran */
if (scb_s->ihcpu != 0xffffU)
@@ -325,11 +343,11 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
/* transactional execution */
- if (test_kvm_facility(vcpu->kvm, 73)) {
+ if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
/* remap the prefix is tx is toggled on */
- if ((scb_o->ecb & ECB_TE) && !had_tx)
+ if (!had_tx)
prefix_unmapped(vsie_page);
- scb_s->ecb |= scb_o->ecb & ECB_TE;
+ scb_s->ecb |= ECB_TE;
}
/* branch prediction */
if (test_kvm_facility(vcpu->kvm, 82))
@@ -473,46 +491,42 @@ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
/* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
- struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
hpa_t hpa;
- gpa_t gpa;
hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
if (hpa) {
- gpa = scb_o->scaol & ~0xfUL;
- if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
- gpa |= (u64) scb_o->scaoh << 32;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
+ vsie_page->sca_gpa = 0;
scb_s->scaol = 0;
scb_s->scaoh = 0;
}
hpa = scb_s->itdba;
if (hpa) {
- gpa = scb_o->itdba & ~0xffUL;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
+ vsie_page->itdba_gpa = 0;
scb_s->itdba = 0;
}
hpa = scb_s->gvrd;
if (hpa) {
- gpa = scb_o->gvrd & ~0x1ffUL;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
+ vsie_page->gvrd_gpa = 0;
scb_s->gvrd = 0;
}
hpa = scb_s->riccbd;
if (hpa) {
- gpa = scb_o->riccbd & ~0x3fUL;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
+ vsie_page->riccbd_gpa = 0;
scb_s->riccbd = 0;
}
hpa = scb_s->sdnxo;
if (hpa) {
- gpa = scb_o->sdnxo;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
+ vsie_page->sdnx_gpa = 0;
scb_s->sdnxo = 0;
}
}
@@ -539,9 +553,9 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
gpa_t gpa;
int rc = 0;
- gpa = scb_o->scaol & ~0xfUL;
+ gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
- gpa |= (u64) scb_o->scaoh << 32;
+ gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
if (gpa) {
if (!(gpa & ~0x1fffUL))
rc = set_validity_icpt(scb_s, 0x0038U);
@@ -557,11 +571,12 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
}
if (rc)
goto unpin;
+ vsie_page->sca_gpa = gpa;
scb_s->scaoh = (u32)((u64)hpa >> 32);
scb_s->scaol = (u32)(u64)hpa;
}
- gpa = scb_o->itdba & ~0xffUL;
+ gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
if (gpa && (scb_s->ecb & ECB_TE)) {
if (!(gpa & ~0x1fffU)) {
rc = set_validity_icpt(scb_s, 0x0080U);
@@ -573,10 +588,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
rc = set_validity_icpt(scb_s, 0x0080U);
goto unpin;
}
+ vsie_page->itdba_gpa = gpa;
scb_s->itdba = hpa;
}
- gpa = scb_o->gvrd & ~0x1ffUL;
+ gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x1310U);
@@ -591,10 +607,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
rc = set_validity_icpt(scb_s, 0x1310U);
goto unpin;
}
+ vsie_page->gvrd_gpa = gpa;
scb_s->gvrd = hpa;
}
- gpa = scb_o->riccbd & ~0x3fUL;
+ gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
if (gpa && (scb_s->ecb3 & ECB3_RI)) {
if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x0043U);
@@ -607,13 +624,14 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
goto unpin;
}
/* Validity 0x0044 will be checked by SIE */
+ vsie_page->riccbd_gpa = gpa;
scb_s->riccbd = hpa;
}
if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
unsigned long sdnxc;
- gpa = scb_o->sdnxo & ~0xfUL;
- sdnxc = scb_o->sdnxo & 0xfUL;
+ gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
+ sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
if (!gpa || !(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x10b0U);
goto unpin;
@@ -634,6 +652,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
rc = set_validity_icpt(scb_s, 0x10b0U);
goto unpin;
}
+ vsie_page->sdnx_gpa = gpa;
scb_s->sdnxo = hpa | sdnxc;
}
return 0;
@@ -778,7 +797,7 @@ static void retry_vsie_icpt(struct vsie_page *vsie_page)
static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
- __u32 fac = vsie_page->scb_o->fac & 0x7ffffff8U;
+ __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
if (fac && test_kvm_facility(vcpu->kvm, 7)) {
retry_vsie_icpt(vsie_page);
@@ -904,7 +923,7 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu,
* External calls have to lead to a kick of the vcpu and
* therefore the vsie -> Simulate Wait state.
*/
- atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
/*
* We have to adjust the g3 epoch by the g2 epoch. The epoch will
* automatically be adjusted on tod clock changes via kvm_sync_clock.
@@ -926,7 +945,7 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu,
*/
static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
{
- atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
WRITE_ONCE(vcpu->arch.vsie_block, NULL);
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 05d459b638f5..2c55a2b9d6c6 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -815,27 +815,17 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
* @ptl: pointer to the spinlock pointer
*
* Returns a pointer to the locked pte for a guest address, or NULL
- *
- * Note: Can also be called for shadow gmaps.
*/
static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
spinlock_t **ptl)
{
unsigned long *table;
- if (gmap_is_shadow(gmap))
- spin_lock(&gmap->guest_table_lock);
+ BUG_ON(gmap_is_shadow(gmap));
/* Walk the gmap page table, lock and get pte pointer */
table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */
- if (!table || *table & _SEGMENT_ENTRY_INVALID) {
- if (gmap_is_shadow(gmap))
- spin_unlock(&gmap->guest_table_lock);
+ if (!table || *table & _SEGMENT_ENTRY_INVALID)
return NULL;
- }
- if (gmap_is_shadow(gmap)) {
- *ptl = &gmap->guest_table_lock;
- return pte_offset_map((pmd_t *) table, gaddr);
- }
return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
}
@@ -889,8 +879,6 @@ static void gmap_pte_op_end(spinlock_t *ptl)
* -EFAULT if gaddr is invalid (or mapping for shadows is missing).
*
* Called with sg->mm->mmap_sem in read.
- *
- * Note: Can also be called for shadow gmaps.
*/
static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
unsigned long len, int prot, unsigned long bits)
@@ -900,6 +888,7 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
pte_t *ptep;
int rc;
+ BUG_ON(gmap_is_shadow(gmap));
while (len) {
rc = -EAGAIN;
ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
@@ -960,7 +949,8 @@ EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
* @val: pointer to the unsigned long value to return
*
* Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
- * if reading using the virtual address failed.
+ * if reading using the virtual address failed. -EINVAL if called on a gmap
+ * shadow.
*
* Called with gmap->mm->mmap_sem in read.
*/
@@ -971,6 +961,9 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
pte_t *ptep, pte;
int rc;
+ if (gmap_is_shadow(gmap))
+ return -EINVAL;
+
while (1) {
rc = -EAGAIN;
ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
@@ -1028,18 +1021,17 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
}
/**
- * gmap_protect_rmap - modify access rights to memory and create an rmap
+ * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap
* @sg: pointer to the shadow guest address space structure
* @raddr: rmap address in the shadow gmap
* @paddr: address in the parent guest address space
* @len: length of the memory area to protect
- * @prot: indicates access rights: none, read-only or read-write
*
* Returns 0 if successfully protected and the rmap was created, -ENOMEM
* if out of memory and -EFAULT if paddr is invalid.
*/
static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
- unsigned long paddr, unsigned long len, int prot)
+ unsigned long paddr, unsigned long len)
{
struct gmap *parent;
struct gmap_rmap *rmap;
@@ -1067,7 +1059,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
ptep = gmap_pte_op_walk(parent, paddr, &ptl);
if (ptep) {
spin_lock(&sg->guest_table_lock);
- rc = ptep_force_prot(parent->mm, paddr, ptep, prot,
+ rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ,
PGSTE_VSIE_BIT);
if (!rc)
gmap_insert_rmap(sg, vmaddr, rmap);
@@ -1077,7 +1069,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
radix_tree_preload_end();
if (rc) {
kfree(rmap);
- rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
+ rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ);
if (rc)
return rc;
continue;
@@ -1616,7 +1608,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
origin = r2t & _REGION_ENTRY_ORIGIN;
offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
- rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 4);
@@ -1699,7 +1691,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
origin = r3t & _REGION_ENTRY_ORIGIN;
offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
- rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 3);
@@ -1783,7 +1775,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
origin = sgt & _REGION_ENTRY_ORIGIN;
offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
- rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 2);
@@ -1902,7 +1894,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
/* Make pgt read-only in parent gmap page table (not the pgste) */
raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
- rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 1);
@@ -2005,7 +1997,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page);
* Called with sg->parent->shadow_lock.
*/
static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
- unsigned long gaddr, pte_t *pte)
+ unsigned long gaddr)
{
struct gmap_rmap *rmap, *rnext, *head;
unsigned long start, end, bits, raddr;
@@ -2090,7 +2082,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
spin_lock(&gmap->shadow_lock);
list_for_each_entry_safe(sg, next,
&gmap->children, list)
- gmap_shadow_notify(sg, vmaddr, gaddr, pte);
+ gmap_shadow_notify(sg, vmaddr, gaddr);
spin_unlock(&gmap->shadow_lock);
}
if (bits & PGSTE_IN_BIT)