summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/include/asm/ctl_reg.h4
-rw-r--r--arch/s390/include/asm/debug.h3
-rw-r--r--arch/s390/include/asm/dis.h2
-rw-r--r--arch/s390/include/asm/kprobes.h20
-rw-r--r--arch/s390/include/asm/kvm_host.h44
-rw-r--r--arch/s390/include/asm/nmi.h13
-rw-r--r--arch/s390/include/asm/processor.h2
-rw-r--r--arch/s390/include/asm/sysinfo.h2
-rw-r--r--arch/s390/include/uapi/asm/kvm.h12
-rw-r--r--arch/s390/kernel/asm-offsets.c3
-rw-r--r--arch/s390/kernel/debug.c8
-rw-r--r--arch/s390/kernel/entry.S34
-rw-r--r--arch/s390/kernel/ftrace.c4
-rw-r--r--arch/s390/kernel/nmi.c84
-rw-r--r--arch/s390/kernel/vmlinux.lds.S8
-rw-r--r--arch/s390/kvm/gaccess.c43
-rw-r--r--arch/s390/kvm/interrupt.c91
-rw-r--r--arch/s390/kvm/kvm-s390.c372
-rw-r--r--arch/s390/kvm/kvm-s390.h2
-rw-r--r--arch/s390/kvm/priv.c103
-rw-r--r--arch/s390/kvm/vsie.c25
-rw-r--r--arch/s390/lib/probes.c1
-rw-r--r--arch/s390/lib/uaccess.c4
23 files changed, 816 insertions, 68 deletions
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index d0441ad2a990..e508dff92535 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -59,7 +59,9 @@ union ctlreg0 {
unsigned long lap : 1; /* Low-address-protection control */
unsigned long : 4;
unsigned long edat : 1; /* Enhanced-DAT-enablement control */
- unsigned long : 4;
+ unsigned long : 2;
+ unsigned long iep : 1; /* Instruction-Execution-Protection */
+ unsigned long : 1;
unsigned long afp : 1; /* AFP-register control */
unsigned long vx : 1; /* Vector enablement control */
unsigned long : 7;
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 0206c8052328..df7b54ea956d 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -10,6 +10,7 @@
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/time.h>
+#include <linux/refcount.h>
#include <uapi/asm/debug.h>
#define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */
@@ -31,7 +32,7 @@ struct debug_view;
typedef struct debug_info {
struct debug_info* next;
struct debug_info* prev;
- atomic_t ref_count;
+ refcount_t ref_count;
spinlock_t lock;
int level;
int nr_areas;
diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h
index 60323c21938b..37f617dfbede 100644
--- a/arch/s390/include/asm/dis.h
+++ b/arch/s390/include/asm/dis.h
@@ -40,6 +40,8 @@ static inline int insn_length(unsigned char code)
return ((((int) code + 64) >> 7) + 1) << 1;
}
+struct pt_regs;
+
void show_code(struct pt_regs *regs);
void print_fn_code(unsigned char *code, unsigned long len);
int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len);
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 1293c4066cfc..28792ef82c83 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -27,12 +27,21 @@
* 2005-Dec Used as a template for s390 by Mike Grundy
* <grundym@us.ibm.com>
*/
+#include <linux/types.h>
#include <asm-generic/kprobes.h>
#define BREAKPOINT_INSTRUCTION 0x0002
+#define FIXUP_PSW_NORMAL 0x08
+#define FIXUP_BRANCH_NOT_TAKEN 0x04
+#define FIXUP_RETURN_REGISTER 0x02
+#define FIXUP_NOT_REQUIRED 0x01
+
+int probe_is_prohibited_opcode(u16 *insn);
+int probe_get_fixup_type(u16 *insn);
+int probe_is_insn_relative_long(u16 *insn);
+
#ifdef CONFIG_KPROBES
-#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
#include <linux/sched/task_stack.h>
@@ -56,11 +65,6 @@ typedef u16 kprobe_opcode_t;
#define KPROBE_SWAP_INST 0x10
-#define FIXUP_PSW_NORMAL 0x08
-#define FIXUP_BRANCH_NOT_TAKEN 0x04
-#define FIXUP_RETURN_REGISTER 0x02
-#define FIXUP_NOT_REQUIRED 0x01
-
/* Architecture specific copy of original instruction */
struct arch_specific_insn {
/* copy of original instruction */
@@ -90,10 +94,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
-int probe_is_prohibited_opcode(u16 *insn);
-int probe_get_fixup_type(u16 *insn);
-int probe_is_insn_relative_long(u16 *insn);
-
#define flush_insn_slot(p) do { } while (0)
#endif /* CONFIG_KPROBES */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 9c3bd94204ac..495aedbaf447 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -45,6 +45,8 @@
#define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0)
#define KVM_REQ_DISABLE_IBS KVM_ARCH_REQ(1)
#define KVM_REQ_ICPT_OPEREXC KVM_ARCH_REQ(2)
+#define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
+#define KVM_REQ_STOP_MIGRATION KVM_ARCH_REQ(4)
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
@@ -56,7 +58,7 @@ union bsca_sigp_ctrl {
__u8 r : 1;
__u8 scn : 6;
};
-} __packed;
+};
union esca_sigp_ctrl {
__u16 value;
@@ -65,14 +67,14 @@ union esca_sigp_ctrl {
__u8 reserved: 7;
__u8 scn;
};
-} __packed;
+};
struct esca_entry {
union esca_sigp_ctrl sigp_ctrl;
__u16 reserved1[3];
__u64 sda;
__u64 reserved2[6];
-} __packed;
+};
struct bsca_entry {
__u8 reserved0;
@@ -80,7 +82,7 @@ struct bsca_entry {
__u16 reserved[3];
__u64 sda;
__u64 reserved2[2];
-} __attribute__((packed));
+};
union ipte_control {
unsigned long val;
@@ -97,7 +99,7 @@ struct bsca_block {
__u64 mcn;
__u64 reserved2;
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
-} __attribute__((packed));
+};
struct esca_block {
union ipte_control ipte_control;
@@ -105,7 +107,21 @@ struct esca_block {
__u64 mcn[4];
__u64 reserved2[20];
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
-} __packed;
+};
+
+/*
+ * This struct is used to store some machine check info from lowcore
+ * for machine checks that happen while the guest is running.
+ * This info in host's lowcore might be overwritten by a second machine
+ * check from host when host is in the machine check's high-level handling.
+ * The size is 24 bytes.
+ */
+struct mcck_volatile_info {
+ __u64 mcic;
+ __u64 failing_storage_address;
+ __u32 ext_damage_code;
+ __u32 reserved;
+};
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
@@ -260,14 +276,15 @@ struct kvm_s390_sie_block {
struct kvm_s390_itdb {
__u8 data[256];
-} __packed;
+};
struct sie_page {
struct kvm_s390_sie_block sie_block;
- __u8 reserved200[1024]; /* 0x0200 */
+ struct mcck_volatile_info mcck_info; /* 0x0200 */
+ __u8 reserved218[1000]; /* 0x0218 */
struct kvm_s390_itdb itdb; /* 0x0600 */
__u8 reserved700[2304]; /* 0x0700 */
-} __packed;
+};
struct kvm_vcpu_stat {
u64 exit_userspace;
@@ -681,7 +698,7 @@ struct sie_page2 {
__u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
struct kvm_s390_crypto_cb crycb; /* 0x0800 */
u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
-} __packed;
+};
struct kvm_s390_vsie {
struct mutex mutex;
@@ -691,6 +708,12 @@ struct kvm_s390_vsie {
struct page *pages[KVM_MAX_VCPUS];
};
+struct kvm_s390_migration_state {
+ unsigned long bitmap_size; /* in bits (number of guest pages) */
+ atomic64_t dirty_pages; /* number of dirty pages */
+ unsigned long *pgste_bitmap;
+};
+
struct kvm_arch{
void *sca;
int use_esca;
@@ -718,6 +741,7 @@ struct kvm_arch{
struct kvm_s390_crypto crypto;
struct kvm_s390_vsie vsie;
u64 epoch;
+ struct kvm_s390_migration_state *migration_state;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
};
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index e3e8895f5d3e..9d91cf3e427f 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -14,11 +14,24 @@
#include <linux/const.h>
#include <linux/types.h>
+#define MCIC_SUBCLASS_MASK (1ULL<<63 | 1ULL<<62 | 1ULL<<61 | \
+ 1ULL<<59 | 1ULL<<58 | 1ULL<<56 | \
+ 1ULL<<55 | 1ULL<<54 | 1ULL<<53 | \
+ 1ULL<<52 | 1ULL<<47 | 1ULL<<46 | \
+ 1ULL<<45 | 1ULL<<44)
#define MCCK_CODE_SYSTEM_DAMAGE _BITUL(63)
+#define MCCK_CODE_EXT_DAMAGE _BITUL(63 - 5)
+#define MCCK_CODE_CP _BITUL(63 - 9)
#define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46)
#define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20)
#define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23)
+#define MCCK_CR14_CR_PENDING_SUB_MASK (1 << 28)
+#define MCCK_CR14_RECOVERY_SUB_MASK (1 << 27)
+#define MCCK_CR14_DEGRAD_SUB_MASK (1 << 26)
+#define MCCK_CR14_EXT_DAMAGE_SUB_MASK (1 << 25)
+#define MCCK_CR14_WARN_SUB_MASK (1 << 24)
+
#ifndef __ASSEMBLY__
union mci {
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 60d395fdc864..5b1b247dfbca 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -20,6 +20,7 @@
#define CIF_FPU 4 /* restore FPU registers */
#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */
#define CIF_ENABLED_WAIT 6 /* in enabled wait state */
+#define CIF_MCCK_GUEST 7 /* machine check happening in guest */
#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING)
#define _CIF_ASCE_PRIMARY _BITUL(CIF_ASCE_PRIMARY)
@@ -28,6 +29,7 @@
#define _CIF_FPU _BITUL(CIF_FPU)
#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ)
#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT)
+#define _CIF_MCCK_GUEST _BITUL(CIF_MCCK_GUEST)
#ifndef __ASSEMBLY__
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index 73bff45ced55..e784bed6ed7f 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -146,7 +146,7 @@ extern int topology_max_mnest;
* Returns the maximum nesting level supported by the cpu topology code.
* The current maximum level is 4 which is the drawer level.
*/
-static inline int topology_mnest_limit(void)
+static inline unsigned char topology_mnest_limit(void)
{
return min(topology_max_mnest, 4);
}
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 3dd2a1d308dd..69d09c39bbcd 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -28,6 +28,7 @@
#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8
#define KVM_DEV_FLIC_AISM 9
#define KVM_DEV_FLIC_AIRQ_INJECT 10
+#define KVM_DEV_FLIC_AISM_ALL 11
/*
* We can have up to 4*64k pending subchannels + 8 adapter interrupts,
* as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -53,6 +54,11 @@ struct kvm_s390_ais_req {
__u16 mode;
};
+struct kvm_s390_ais_all {
+ __u8 simm;
+ __u8 nimm;
+};
+
#define KVM_S390_IO_ADAPTER_MASK 1
#define KVM_S390_IO_ADAPTER_MAP 2
#define KVM_S390_IO_ADAPTER_UNMAP 3
@@ -70,6 +76,7 @@ struct kvm_s390_io_adapter_req {
#define KVM_S390_VM_TOD 1
#define KVM_S390_VM_CRYPTO 2
#define KVM_S390_VM_CPU_MODEL 3
+#define KVM_S390_VM_MIGRATION 4
/* kvm attributes for mem_ctrl */
#define KVM_S390_VM_MEM_ENABLE_CMMA 0
@@ -151,6 +158,11 @@ struct kvm_s390_vm_cpu_subfunc {
#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2
#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3
+/* kvm attributes for migration mode */
+#define KVM_S390_VM_MIGRATION_STOP 0
+#define KVM_S390_VM_MIGRATION_START 1
+#define KVM_S390_VM_MIGRATION_STATUS 2
+
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* general purpose regs for s390 */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 6bb29633e1f1..b65c414b6c0e 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -58,6 +58,9 @@ int main(void)
OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
OFFSET(__SF_GPRS, stack_frame, gprs);
OFFSET(__SF_EMPTY, stack_frame, empty1);
+ OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
+ OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
+ OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
BLANK();
/* timeval/timezone offsets for use by vdso */
OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 530226b6cb19..86b3e74f569e 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -277,7 +277,7 @@ debug_info_alloc(const char *name, int pages_per_area, int nr_areas,
memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS *
sizeof(struct dentry*));
- atomic_set(&(rc->ref_count), 0);
+ refcount_set(&(rc->ref_count), 0);
return rc;
@@ -361,7 +361,7 @@ debug_info_create(const char *name, int pages_per_area, int nr_areas,
debug_area_last = rc;
rc->next = NULL;
- debug_info_get(rc);
+ refcount_set(&rc->ref_count, 1);
out:
return rc;
}
@@ -416,7 +416,7 @@ static void
debug_info_get(debug_info_t * db_info)
{
if (db_info)
- atomic_inc(&db_info->ref_count);
+ refcount_inc(&db_info->ref_count);
}
/*
@@ -431,7 +431,7 @@ debug_info_put(debug_info_t *db_info)
if (!db_info)
return;
- if (atomic_dec_and_test(&db_info->ref_count)) {
+ if (refcount_dec_and_test(&db_info->ref_count)) {
for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
if (!db_info->views[i])
continue;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index a5f5d3bb3dbc..3b4b158382ea 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -225,6 +225,7 @@ ENTRY(sie64a)
jnz .Lsie_skip
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lsie_skip # exit if fp/vx regs changed
+.Lsie_entry:
sie 0(%r14)
.Lsie_skip:
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
@@ -312,6 +313,7 @@ ENTRY(system_call)
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+.Lsysc_exit_timer:
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
lmg %r11,%r15,__PT_R11(%r11)
@@ -623,6 +625,7 @@ ENTRY(io_int_handler)
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+.Lio_exit_timer:
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
lmg %r11,%r15,__PT_R11(%r11)
@@ -1102,7 +1105,13 @@ cleanup_critical:
.quad .Lsie_done
.Lcleanup_sie:
- lg %r9,__SF_EMPTY(%r15) # get control block pointer
+ cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt?
+ je 1f
+ slg %r9,BASED(.Lsie_crit_mcck_start)
+ clg %r9,BASED(.Lsie_crit_mcck_length)
+ jh 1f
+ oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+1: lg %r9,__SF_EMPTY(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
@@ -1174,15 +1183,23 @@ cleanup_critical:
br %r14
.Lcleanup_sysc_restore:
+ # check if stpt has been executed
clg %r9,BASED(.Lcleanup_sysc_restore_insn)
+ jh 0f
+ mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+ cghi %r11,__LC_SAVE_AREA_ASYNC
je 0f
+ mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+0: clg %r9,BASED(.Lcleanup_sysc_restore_insn+8)
+ je 1f
lg %r9,24(%r11) # get saved pointer to pt_regs
mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
-0: lmg %r8,%r9,__LC_RETURN_PSW
+1: lmg %r8,%r9,__LC_RETURN_PSW
br %r14
.Lcleanup_sysc_restore_insn:
+ .quad .Lsysc_exit_timer
.quad .Lsysc_done - 4
.Lcleanup_io_tif:
@@ -1190,15 +1207,20 @@ cleanup_critical:
br %r14
.Lcleanup_io_restore:
+ # check if stpt has been executed
clg %r9,BASED(.Lcleanup_io_restore_insn)
- je 0f
+ jh 0f
+ mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+0: clg %r9,BASED(.Lcleanup_io_restore_insn+8)
+ je 1f
lg %r9,24(%r11) # get saved r11 pointer to pt_regs
mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
-0: lmg %r8,%r9,__LC_RETURN_PSW
+1: lmg %r8,%r9,__LC_RETURN_PSW
br %r14
.Lcleanup_io_restore_insn:
+ .quad .Lio_exit_timer
.quad .Lio_done - 4
.Lcleanup_idle:
@@ -1274,6 +1296,10 @@ cleanup_critical:
.quad .Lsie_gmap
.Lsie_critical_length:
.quad .Lsie_done - .Lsie_gmap
+.Lsie_crit_mcck_start:
+ .quad .Lsie_entry
+.Lsie_crit_mcck_length:
+ .quad .Lsie_skip - .Lsie_entry
#endif
.section .rodata, "a"
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 27477f34cc0a..d03a6d12c4bd 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -173,6 +173,8 @@ int __init ftrace_dyn_arch_init(void)
return 0;
}
+#ifdef CONFIG_MODULES
+
static int __init ftrace_plt_init(void)
{
unsigned int *ip;
@@ -191,6 +193,8 @@ static int __init ftrace_plt_init(void)
}
device_initcall(ftrace_plt_init);
+#endif /* CONFIG_MODULES */
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* Hook the return address and push it in the stack of return addresses
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 985589523970..31d03a84126c 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -25,6 +25,8 @@
#include <asm/crw.h>
#include <asm/switch_to.h>
#include <asm/ctl_reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/kvm_host.h>
struct mcck_struct {
unsigned int kill_task : 1;
@@ -274,12 +276,39 @@ static int notrace s390_validate_registers(union mci mci, int umode)
return kill_task;
}
+/*
+ * Backup the guest's machine check info to its description block
+ */
+static void notrace s390_backup_mcck_info(struct pt_regs *regs)
+{
+ struct mcck_volatile_info *mcck_backup;
+ struct sie_page *sie_page;
+
+ /* r14 contains the sie block, which was set in sie64a */
+ struct kvm_s390_sie_block *sie_block =
+ (struct kvm_s390_sie_block *) regs->gprs[14];
+
+ if (sie_block == NULL)
+ /* Something's seriously wrong, stop system. */
+ s390_handle_damage();
+
+ sie_page = container_of(sie_block, struct sie_page, sie_block);
+ mcck_backup = &sie_page->mcck_info;
+ mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+ ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
+ mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
+ mcck_backup->failing_storage_address
+ = S390_lowcore.failing_storage_address;
+}
+
#define MAX_IPD_COUNT 29
#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
#define ED_STP_ISLAND 6 /* External damage STP island check */
#define ED_STP_SYNC 7 /* External damage STP sync check */
+#define MCCK_CODE_NO_GUEST (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
+
/*
* machine check handler.
*/
@@ -291,6 +320,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
struct mcck_struct *mcck;
unsigned long long tmp;
union mci mci;
+ unsigned long mcck_dam_code;
nmi_enter();
inc_irq_stat(NMI_NMI);
@@ -301,7 +331,13 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
/* System damage -> stopping machine */
s390_handle_damage();
}
- if (mci.pd) {
+
+ /*
+ * Reinject the instruction processing damages' machine checks
+ * including Delayed Access Exception into the guest
+ * instead of damaging the host if they happen in the guest.
+ */
+ if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
if (mci.b) {
/* Processing backup -> verify if we can survive this */
u64 z_mcic, o_mcic, t_mcic;
@@ -345,6 +381,14 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck->mcck_code = mci.val;
set_cpu_flag(CIF_MCCK_PENDING);
}
+
+ /*
+ * Backup the machine check's info if it happens when the guest
+ * is running.
+ */
+ if (test_cpu_flag(CIF_MCCK_GUEST))
+ s390_backup_mcck_info(regs);
+
if (mci.cd) {
/* Timing facility damage */
s390_handle_damage();
@@ -358,15 +402,22 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
if (mcck->stp_queue)
set_cpu_flag(CIF_MCCK_PENDING);
}
- if (mci.se)
- /* Storage error uncorrected */
- s390_handle_damage();
- if (mci.ke)
- /* Storage key-error uncorrected */
- s390_handle_damage();
- if (mci.ds && mci.fa)
- /* Storage degradation */
- s390_handle_damage();
+
+ /*
+ * Reinject storage related machine checks into the guest if they
+ * happen when the guest is running.
+ */
+ if (!test_cpu_flag(CIF_MCCK_GUEST)) {
+ if (mci.se)
+ /* Storage error uncorrected */
+ s390_handle_damage();
+ if (mci.ke)
+ /* Storage key-error uncorrected */
+ s390_handle_damage();
+ if (mci.ds && mci.fa)
+ /* Storage degradation */
+ s390_handle_damage();
+ }
if (mci.cp) {
/* Channel report word pending */
mcck->channel_report = 1;
@@ -377,6 +428,19 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck->warning = 1;
set_cpu_flag(CIF_MCCK_PENDING);
}
+
+ /*
+ * If there are only Channel Report Pending and External Damage
+ * machine checks, they will not be reinjected into the guest
+ * because they refer to host conditions only.
+ */
+ mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
+ if (test_cpu_flag(CIF_MCCK_GUEST) &&
+ (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
+ /* Set exit reason code for host's later handling */
+ *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
+ }
+ clear_cpu_flag(CIF_MCCK_GUEST);
nmi_exit();
}
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 72307f108c40..6e2c42bd1c3b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -31,8 +31,14 @@ SECTIONS
{
. = 0x00000000;
.text : {
- _text = .; /* Text and read-only data */
+ /* Text and read-only data */
HEAD_TEXT
+ /*
+ * E.g. perf doesn't like symbols starting at address zero,
+ * therefore skip the initial PSW and channel program located
+ * at address zero and let _text start at 0x200.
+ */
+ _text = 0x200;
TEXT_TEXT
SCHED_TEXT
CPUIDLE_TEXT
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9da243d94cc3..17e3a4e71bc9 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -89,7 +89,7 @@ struct region3_table_entry_fc1 {
unsigned long f : 1; /* Fetch-Protection Bit */
unsigned long fc : 1; /* Format-Control */
unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long co : 1; /* Change-Recording Override */
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
unsigned long : 2;
unsigned long i : 1; /* Region-Invalid Bit */
unsigned long cr : 1; /* Common-Region Bit */
@@ -131,7 +131,7 @@ struct segment_entry_fc1 {
unsigned long f : 1; /* Fetch-Protection Bit */
unsigned long fc : 1; /* Format-Control */
unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long co : 1; /* Change-Recording Override */
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
unsigned long : 2;
unsigned long i : 1; /* Segment-Invalid Bit */
unsigned long cs : 1; /* Common-Segment Bit */
@@ -168,7 +168,8 @@ union page_table_entry {
unsigned long z : 1; /* Zero Bit */
unsigned long i : 1; /* Page-Invalid Bit */
unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 9;
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
+ unsigned long : 8;
};
};
@@ -241,7 +242,7 @@ struct ale {
unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */
unsigned long : 6;
unsigned long astesn : 32; /* ASTE Sequence Number */
-} __packed;
+};
struct aste {
unsigned long i : 1; /* ASX-Invalid Bit */
@@ -257,7 +258,7 @@ struct aste {
unsigned long ald : 32;
unsigned long astesn : 32;
/* .. more fields there */
-} __packed;
+};
int ipte_lock_held(struct kvm_vcpu *vcpu)
{
@@ -485,6 +486,7 @@ enum prot_type {
PROT_TYPE_KEYC = 1,
PROT_TYPE_ALC = 2,
PROT_TYPE_DAT = 3,
+ PROT_TYPE_IEP = 4,
};
static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
@@ -500,6 +502,9 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (code) {
case PGM_PROTECTION:
switch (prot) {
+ case PROT_TYPE_IEP:
+ tec->b61 = 1;
+ /* FALL THROUGH */
case PROT_TYPE_LA:
tec->b56 = 1;
break;
@@ -591,6 +596,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
* @gpa: points to where guest physical (absolute) address should be stored
* @asce: effective asce
* @mode: indicates the access mode to be used
+ * @prot: returns the type for protection exceptions
*
* Translate a guest virtual address into a guest absolute address by means
* of dynamic address translation as specified by the architecture.
@@ -606,19 +612,21 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
*/
static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
unsigned long *gpa, const union asce asce,
- enum gacc_mode mode)
+ enum gacc_mode mode, enum prot_type *prot)
{
union vaddress vaddr = {.addr = gva};
union raddress raddr = {.addr = gva};
union page_table_entry pte;
int dat_protection = 0;
+ int iep_protection = 0;
union ctlreg0 ctlreg0;
unsigned long ptr;
- int edat1, edat2;
+ int edat1, edat2, iep;
ctlreg0.val = vcpu->arch.sie_block->gcr[0];
edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
+ iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
if (asce.r)
goto real_address;
ptr = asce.origin * 4096;
@@ -702,6 +710,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_TRANSLATION_SPEC;
if (rtte.fc && edat2) {
dat_protection |= rtte.fc1.p;
+ iep_protection = rtte.fc1.iep;
raddr.rfaa = rtte.fc1.rfaa;
goto absolute_address;
}
@@ -729,6 +738,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_TRANSLATION_SPEC;
if (ste.fc && edat1) {
dat_protection |= ste.fc1.p;
+ iep_protection = ste.fc1.iep;
raddr.sfaa = ste.fc1.sfaa;
goto absolute_address;
}
@@ -745,12 +755,19 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
if (pte.z)
return PGM_TRANSLATION_SPEC;
dat_protection |= pte.p;
+ iep_protection = pte.iep;
raddr.pfra = pte.pfra;
real_address:
raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
absolute_address:
- if (mode == GACC_STORE && dat_protection)
+ if (mode == GACC_STORE && dat_protection) {
+ *prot = PROT_TYPE_DAT;
return PGM_PROTECTION;
+ }
+ if (mode == GACC_IFETCH && iep_protection && iep) {
+ *prot = PROT_TYPE_IEP;
+ return PGM_PROTECTION;
+ }
if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
return PGM_ADDRESSING;
*gpa = raddr.addr;
@@ -782,6 +799,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
int lap_enabled, rc = 0;
+ enum prot_type prot;
lap_enabled = low_address_protection_enabled(vcpu, asce);
while (nr_pages) {
@@ -791,7 +809,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
PROT_TYPE_LA);
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
- rc = guest_translate(vcpu, ga, pages, asce, mode);
+ rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
if (rc < 0)
return rc;
} else {
@@ -800,7 +818,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
rc = PGM_ADDRESSING;
}
if (rc)
- return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT);
+ return trans_exc(vcpu, rc, ga, ar, mode, prot);
ga += PAGE_SIZE;
pages++;
nr_pages--;
@@ -886,6 +904,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long *gpa, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ enum prot_type prot;
union asce asce;
int rc;
@@ -900,9 +919,9 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
}
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, asce, mode);
+ rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
if (rc > 0)
- return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
+ return trans_exc(vcpu, rc, gva, 0, mode, prot);
} else {
*gpa = kvm_s390_real_to_abs(vcpu, gva);
if (kvm_is_error_gpa(vcpu->kvm, *gpa))
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index caf15c8a8948..f2c78fc1852d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -251,8 +251,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
if (psw_mchk_disabled(vcpu))
active_mask &= ~IRQ_PEND_MCHK_MASK;
+ /*
+ * Check both floating and local interrupt's cr14 because
+ * bit IRQ_PEND_MCHK_REP could be set in both cases.
+ */
if (!(vcpu->arch.sie_block->gcr[14] &
- vcpu->kvm->arch.float_int.mchk.cr14))
+ (vcpu->kvm->arch.float_int.mchk.cr14 |
+ vcpu->arch.local_int.irq.mchk.cr14)))
__clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
/*
@@ -1876,6 +1881,28 @@ out:
return ret < 0 ? ret : n;
}
+static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_ais_all ais;
+
+ if (attr->attr < sizeof(ais))
+ return -EINVAL;
+
+ if (!test_kvm_facility(kvm, 72))
+ return -ENOTSUPP;
+
+ mutex_lock(&fi->ais_lock);
+ ais.simm = fi->simm;
+ ais.nimm = fi->nimm;
+ mutex_unlock(&fi->ais_lock);
+
+ if (copy_to_user((void __user *)attr->addr, &ais, sizeof(ais)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r;
@@ -1885,6 +1912,9 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
attr->attr);
break;
+ case KVM_DEV_FLIC_AISM_ALL:
+ r = flic_ais_mode_get_all(dev->kvm, attr);
+ break;
default:
r = -EINVAL;
}
@@ -2235,6 +2265,25 @@ static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr)
return kvm_s390_inject_airq(kvm, adapter);
}
+static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_ais_all ais;
+
+ if (!test_kvm_facility(kvm, 72))
+ return -ENOTSUPP;
+
+ if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
+ return -EFAULT;
+
+ mutex_lock(&fi->ais_lock);
+ fi->simm = ais.simm;
+ fi->nimm = ais.nimm;
+ mutex_unlock(&fi->ais_lock);
+
+ return 0;
+}
+
static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r = 0;
@@ -2277,6 +2326,9 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
case KVM_DEV_FLIC_AIRQ_INJECT:
r = flic_inject_airq(dev->kvm, attr);
break;
+ case KVM_DEV_FLIC_AISM_ALL:
+ r = flic_ais_mode_set_all(dev->kvm, attr);
+ break;
default:
r = -EINVAL;
}
@@ -2298,6 +2350,7 @@ static int flic_has_attr(struct kvm_device *dev,
case KVM_DEV_FLIC_CLEAR_IO_IRQ:
case KVM_DEV_FLIC_AISM:
case KVM_DEV_FLIC_AIRQ_INJECT:
+ case KVM_DEV_FLIC_AISM_ALL:
return 0;
}
return -ENXIO;
@@ -2415,6 +2468,42 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
return ret;
}
+/*
+ * Inject the machine check to the guest.
+ */
+void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+ struct mcck_volatile_info *mcck_info)
+{
+ struct kvm_s390_interrupt_info inti;
+ struct kvm_s390_irq irq;
+ struct kvm_s390_mchk_info *mchk;
+ union mci mci;
+ __u64 cr14 = 0; /* upper bits are not used */
+
+ mci.val = mcck_info->mcic;
+ if (mci.sr)
+ cr14 |= MCCK_CR14_RECOVERY_SUB_MASK;
+ if (mci.dg)
+ cr14 |= MCCK_CR14_DEGRAD_SUB_MASK;
+ if (mci.w)
+ cr14 |= MCCK_CR14_WARN_SUB_MASK;
+
+ mchk = mci.ck ? &inti.mchk : &irq.u.mchk;
+ mchk->cr14 = cr14;
+ mchk->mcic = mcck_info->mcic;
+ mchk->ext_damage_code = mcck_info->ext_damage_code;
+ mchk->failing_storage_address = mcck_info->failing_storage_address;
+ if (mci.ck) {
+ /* Inject the floating machine check */
+ inti.type = KVM_S390_MCHK;
+ WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti));
+ } else {
+ /* Inject the machine check to specified vcpu */
+ irq.type = KVM_S390_MCHK;
+ WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
+ }
+}
+
int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ad41e0fa3a21..ef6419654c16 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -30,6 +30,7 @@
#include <linux/vmalloc.h>
#include <linux/bitmap.h>
#include <linux/sched/signal.h>
+#include <linux/string.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
@@ -386,6 +387,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_SKEYS:
case KVM_CAP_S390_IRQ_STATE:
case KVM_CAP_S390_USER_INSTR0:
+ case KVM_CAP_S390_CMMA_MIGRATION:
case KVM_CAP_S390_AIS:
r = 1;
break;
@@ -750,6 +752,129 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
return 0;
}
+static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
+{
+ int cx;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(cx, vcpu, kvm)
+ kvm_s390_sync_request(req, vcpu);
+}
+
+/*
+ * Must be called with kvm->srcu held to avoid races on memslots, and with
+ * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
+ */
+static int kvm_s390_vm_start_migration(struct kvm *kvm)
+{
+ struct kvm_s390_migration_state *mgs;
+ struct kvm_memory_slot *ms;
+ /* should be the only one */
+ struct kvm_memslots *slots;
+ unsigned long ram_pages;
+ int slotnr;
+
+ /* migration mode already enabled */
+ if (kvm->arch.migration_state)
+ return 0;
+
+ slots = kvm_memslots(kvm);
+ if (!slots || !slots->used_slots)
+ return -EINVAL;
+
+ mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
+ if (!mgs)
+ return -ENOMEM;
+ kvm->arch.migration_state = mgs;
+
+ if (kvm->arch.use_cmma) {
+ /*
+ * Get the last slot. They should be sorted by base_gfn, so the
+ * last slot is also the one at the end of the address space.
+ * We have verified above that at least one slot is present.
+ */
+ ms = slots->memslots + slots->used_slots - 1;
+ /* round up so we only use full longs */
+ ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
+ /* allocate enough bytes to store all the bits */
+ mgs->pgste_bitmap = vmalloc(ram_pages / 8);
+ if (!mgs->pgste_bitmap) {
+ kfree(mgs);
+ kvm->arch.migration_state = NULL;
+ return -ENOMEM;
+ }
+
+ mgs->bitmap_size = ram_pages;
+ atomic64_set(&mgs->dirty_pages, ram_pages);
+ /* mark all the pages in active slots as dirty */
+ for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
+ ms = slots->memslots + slotnr;
+ bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
+ }
+
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+ }
+ return 0;
+}
+
+/*
+ * Must be called with kvm->lock to avoid races with ourselves and
+ * kvm_s390_vm_start_migration.
+ */
+static int kvm_s390_vm_stop_migration(struct kvm *kvm)
+{
+ struct kvm_s390_migration_state *mgs;
+
+ /* migration mode already disabled */
+ if (!kvm->arch.migration_state)
+ return 0;
+ mgs = kvm->arch.migration_state;
+ kvm->arch.migration_state = NULL;
+
+ if (kvm->arch.use_cmma) {
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
+ vfree(mgs->pgste_bitmap);
+ }
+ kfree(mgs);
+ return 0;
+}
+
+static int kvm_s390_vm_set_migration(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ int idx, res = -ENXIO;
+
+ mutex_lock(&kvm->lock);
+ switch (attr->attr) {
+ case KVM_S390_VM_MIGRATION_START:
+ idx = srcu_read_lock(&kvm->srcu);
+ res = kvm_s390_vm_start_migration(kvm);
+ srcu_read_unlock(&kvm->srcu, idx);
+ break;
+ case KVM_S390_VM_MIGRATION_STOP:
+ res = kvm_s390_vm_stop_migration(kvm);
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&kvm->lock);
+
+ return res;
+}
+
+static int kvm_s390_vm_get_migration(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ u64 mig = (kvm->arch.migration_state != NULL);
+
+ if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
+ return -ENXIO;
+
+ if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
+ return -EFAULT;
+ return 0;
+}
+
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
{
u8 gtod_high;
@@ -1090,6 +1215,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CRYPTO:
ret = kvm_s390_vm_set_crypto(kvm, attr);
break;
+ case KVM_S390_VM_MIGRATION:
+ ret = kvm_s390_vm_set_migration(kvm, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -1112,6 +1240,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_MODEL:
ret = kvm_s390_get_cpu_model(kvm, attr);
break;
+ case KVM_S390_VM_MIGRATION:
+ ret = kvm_s390_vm_get_migration(kvm, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -1179,6 +1310,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
break;
}
break;
+ case KVM_S390_VM_MIGRATION:
+ ret = 0;
+ break;
default:
ret = -ENXIO;
break;
@@ -1286,6 +1420,182 @@ out:
return r;
}
+/*
+ * Base address and length must be sent at the start of each block, therefore
+ * it's cheaper to send some clean data, as long as it's less than the size of
+ * two longs.
+ */
+#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
+/* for consistency */
+#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
+
+/*
+ * This function searches for the next page with dirty CMMA attributes, and
+ * saves the attributes in the buffer up to either the end of the buffer or
+ * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
+ * no trailing clean bytes are saved.
+ * In case no dirty bits were found, or if CMMA was not enabled or used, the
+ * output buffer will indicate 0 as length.
+ */
+static int kvm_s390_get_cmma_bits(struct kvm *kvm,
+ struct kvm_s390_cmma_log *args)
+{
+ struct kvm_s390_migration_state *s = kvm->arch.migration_state;
+ unsigned long bufsize, hva, pgstev, i, next, cur;
+ int srcu_idx, peek, r = 0, rr;
+ u8 *res;
+
+ cur = args->start_gfn;
+ i = next = pgstev = 0;
+
+ if (unlikely(!kvm->arch.use_cmma))
+ return -ENXIO;
+ /* Invalid/unsupported flags were specified */
+ if (args->flags & ~KVM_S390_CMMA_PEEK)
+ return -EINVAL;
+ /* Migration mode query, and we are not doing a migration */
+ peek = !!(args->flags & KVM_S390_CMMA_PEEK);
+ if (!peek && !s)
+ return -EINVAL;
+ /* CMMA is disabled or was not used, or the buffer has length zero */
+ bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
+ if (!bufsize || !kvm->mm->context.use_cmma) {
+ memset(args, 0, sizeof(*args));
+ return 0;
+ }
+
+ if (!peek) {
+ /* We are not peeking, and there are no dirty pages */
+ if (!atomic64_read(&s->dirty_pages)) {
+ memset(args, 0, sizeof(*args));
+ return 0;
+ }
+ cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
+ args->start_gfn);
+ if (cur >= s->bitmap_size) /* nothing found, loop back */
+ cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
+ if (cur >= s->bitmap_size) { /* again! (very unlikely) */
+ memset(args, 0, sizeof(*args));
+ return 0;
+ }
+ next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
+ }
+
+ res = vmalloc(bufsize);
+ if (!res)
+ return -ENOMEM;
+
+ args->start_gfn = cur;
+
+ down_read(&kvm->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ while (i < bufsize) {
+ hva = gfn_to_hva(kvm, cur);
+ if (kvm_is_error_hva(hva)) {
+ r = -EFAULT;
+ break;
+ }
+ /* decrement only if we actually flipped the bit to 0 */
+ if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
+ atomic64_dec(&s->dirty_pages);
+ r = get_pgste(kvm->mm, hva, &pgstev);
+ if (r < 0)
+ pgstev = 0;
+ /* save the value */
+ res[i++] = (pgstev >> 24) & 0x3;
+ /*
+ * if the next bit is too far away, stop.
+ * if we reached the previous "next", find the next one
+ */
+ if (!peek) {
+ if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
+ break;
+ if (cur == next)
+ next = find_next_bit(s->pgste_bitmap,
+ s->bitmap_size, cur + 1);
+ /* reached the end of the bitmap or of the buffer, stop */
+ if ((next >= s->bitmap_size) ||
+ (next >= args->start_gfn + bufsize))
+ break;
+ }
+ cur++;
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ up_read(&kvm->mm->mmap_sem);
+ args->count = i;
+ args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
+
+ rr = copy_to_user((void __user *)args->values, res, args->count);
+ if (rr)
+ r = -EFAULT;
+
+ vfree(res);
+ return r;
+}
+
+/*
+ * This function sets the CMMA attributes for the given pages. If the input
+ * buffer has zero length, no action is taken, otherwise the attributes are
+ * set and the mm->context.use_cmma flag is set.
+ */
+static int kvm_s390_set_cmma_bits(struct kvm *kvm,
+ const struct kvm_s390_cmma_log *args)
+{
+ unsigned long hva, mask, pgstev, i;
+ uint8_t *bits;
+ int srcu_idx, r = 0;
+
+ mask = args->mask;
+
+ if (!kvm->arch.use_cmma)
+ return -ENXIO;
+ /* invalid/unsupported flags */
+ if (args->flags != 0)
+ return -EINVAL;
+ /* Enforce sane limit on memory allocation */
+ if (args->count > KVM_S390_CMMA_SIZE_MAX)
+ return -EINVAL;
+ /* Nothing to do */
+ if (args->count == 0)
+ return 0;
+
+ bits = vmalloc(sizeof(*bits) * args->count);
+ if (!bits)
+ return -ENOMEM;
+
+ r = copy_from_user(bits, (void __user *)args->values, args->count);
+ if (r) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ down_read(&kvm->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ for (i = 0; i < args->count; i++) {
+ hva = gfn_to_hva(kvm, args->start_gfn + i);
+ if (kvm_is_error_hva(hva)) {
+ r = -EFAULT;
+ break;
+ }
+
+ pgstev = bits[i];
+ pgstev = pgstev << 24;
+ mask &= _PGSTE_GPS_USAGE_MASK;
+ set_pgste_bits(kvm->mm, hva, mask, pgstev);
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ up_read(&kvm->mm->mmap_sem);
+
+ if (!kvm->mm->context.use_cmma) {
+ down_write(&kvm->mm->mmap_sem);
+ kvm->mm->context.use_cmma = 1;
+ up_write(&kvm->mm->mmap_sem);
+ }
+out:
+ vfree(bits);
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1364,6 +1674,29 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_s390_set_skeys(kvm, &args);
break;
}
+ case KVM_S390_GET_CMMA_BITS: {
+ struct kvm_s390_cmma_log args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+ r = kvm_s390_get_cmma_bits(kvm, &args);
+ if (!r) {
+ r = copy_to_user(argp, &args, sizeof(args));
+ if (r)
+ r = -EFAULT;
+ }
+ break;
+ }
+ case KVM_S390_SET_CMMA_BITS: {
+ struct kvm_s390_cmma_log args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+ r = kvm_s390_set_cmma_bits(kvm, &args);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -1633,6 +1966,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
+ if (kvm->arch.migration_state) {
+ vfree(kvm->arch.migration_state->pgste_bitmap);
+ kfree(kvm->arch.migration_state);
+ }
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
}
@@ -1977,7 +2314,6 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
if (!vcpu->arch.sie_block->cbrlo)
return -ENOMEM;
- vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
return 0;
}
@@ -2069,6 +2405,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
if (!vcpu)
goto out;
+ BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
if (!sie_page)
goto out_free_cpu;
@@ -2489,6 +2826,27 @@ retry:
goto retry;
}
+ if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
+ /*
+ * Disable CMMA virtualization; we will emulate the ESSA
+ * instruction manually, in order to provide additional
+ * functionalities needed for live migration.
+ */
+ vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
+ /*
+ * Re-enable CMMA virtualization if CMMA is available and
+ * was used.
+ */
+ if ((vcpu->kvm->arch.use_cmma) &&
+ (vcpu->kvm->mm->context.use_cmma))
+ vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+ goto retry;
+ }
+
/* nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
@@ -2683,6 +3041,9 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
+ struct mcck_volatile_info *mcck_info;
+ struct sie_page *sie_page;
+
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
@@ -2693,6 +3054,15 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
+ if (exit_reason == -EINTR) {
+ VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ sie_page = container_of(vcpu->arch.sie_block,
+ struct sie_page, sie_block);
+ mcck_info = &sie_page->mcck_info;
+ kvm_s390_reinject_machine_check(vcpu, mcck_info);
+ return 0;
+ }
+
if (vcpu->arch.sie_block->icptcode > 0) {
int rc = kvm_handle_sie_intercept(vcpu);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 55f5c8457d6d..6fedc8bc7a37 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -397,4 +397,6 @@ static inline int kvm_s390_use_sca_entries(void)
*/
return sclp.has_sigpif;
}
+void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+ struct mcck_volatile_info *mcck_info);
#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c03106c428cf..a226c459809b 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -24,6 +24,7 @@
#include <asm/ebcdic.h>
#include <asm/sysinfo.h>
#include <asm/pgtable.h>
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/io.h>
@@ -949,13 +950,72 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return 0;
}
+static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
+{
+ struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
+ int r1, r2, nappended, entries;
+ unsigned long gfn, hva, res, pgstev, ptev;
+ unsigned long *cbrlo;
+
+ /*
+ * We don't need to set SD.FPF.SK to 1 here, because if we have a
+ * machine check here we either handle it or crash
+ */
+
+ kvm_s390_get_regs_rre(vcpu, &r1, &r2);
+ gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT;
+ hva = gfn_to_hva(vcpu->kvm, gfn);
+ entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+
+ if (kvm_is_error_hva(hva))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev);
+ if (nappended < 0) {
+ res = orc ? 0x10 : 0;
+ vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */
+ return 0;
+ }
+ res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22;
+ /*
+ * Set the block-content state part of the result. 0 means resident, so
+ * nothing to do if the page is valid. 2 is for preserved pages
+ * (non-present and non-zero), and 3 for zero pages (non-present and
+ * zero).
+ */
+ if (ptev & _PAGE_INVALID) {
+ res |= 2;
+ if (pgstev & _PGSTE_GPS_ZERO)
+ res |= 1;
+ }
+ vcpu->run->s.regs.gprs[r1] = res;
+ /*
+ * It is possible that all the normal 511 slots were full, in which case
+ * we will now write in the 512th slot, which is reserved for host use.
+ * In both cases we let the normal essa handling code process all the
+ * slots, including the reserved one, if needed.
+ */
+ if (nappended > 0) {
+ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo & PAGE_MASK);
+ cbrlo[entries] = gfn << PAGE_SHIFT;
+ }
+
+ if (orc) {
+ /* increment only if we are really flipping the bit to 1 */
+ if (!test_and_set_bit(gfn, ms->pgste_bitmap))
+ atomic64_inc(&ms->dirty_pages);
+ }
+
+ return nappended;
+}
+
static int handle_essa(struct kvm_vcpu *vcpu)
{
/* entries expected to be 1FF */
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
unsigned long *cbrlo;
struct gmap *gmap;
- int i;
+ int i, orc;
VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
gmap = vcpu->arch.gmap;
@@ -965,12 +1025,45 @@ static int handle_essa(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
-
- if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
+ /* Check for invalid operation request code */
+ orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+ if (orc > ESSA_MAX)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- /* Retry the ESSA instruction */
- kvm_s390_retry_instr(vcpu);
+ if (likely(!vcpu->kvm->arch.migration_state)) {
+ /*
+ * CMMA is enabled in the KVM settings, but is disabled in
+ * the SIE block and in the mm_context, and we are not doing
+ * a migration. Enable CMMA in the mm_context.
+ * Since we need to take a write lock to write to the context
+ * to avoid races with storage keys handling, we check if the
+ * value really needs to be written to; if the value is
+ * already correct, we do nothing and avoid the lock.
+ */
+ if (vcpu->kvm->mm->context.use_cmma == 0) {
+ down_write(&vcpu->kvm->mm->mmap_sem);
+ vcpu->kvm->mm->context.use_cmma = 1;
+ up_write(&vcpu->kvm->mm->mmap_sem);
+ }
+ /*
+ * If we are here, we are supposed to have CMMA enabled in
+ * the SIE block. Enabling CMMA works on a per-CPU basis,
+ * while the context use_cmma flag is per process.
+ * It's possible that the context flag is enabled and the
+ * SIE flag is not, so we set the flag always; if it was
+ * already set, nothing changes, otherwise we enable it
+ * on this CPU too.
+ */
+ vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+ /* Retry the ESSA instruction */
+ kvm_s390_retry_instr(vcpu);
+ } else {
+ /* Account for the possible extra cbrl entry */
+ i = do_essa(vcpu, orc);
+ if (i < 0)
+ return i;
+ entries += i;
+ }
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
down_read(&gmap->mm->mmap_sem);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 4719ecb9ab42..715c19c45d9a 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -26,16 +26,21 @@
struct vsie_page {
struct kvm_s390_sie_block scb_s; /* 0x0000 */
+ /*
+ * the backup info for machine check. ensure it's at
+ * the same offset as that in struct sie_page!
+ */
+ struct mcck_volatile_info mcck_info; /* 0x0200 */
/* the pinned originial scb */
- struct kvm_s390_sie_block *scb_o; /* 0x0200 */
+ struct kvm_s390_sie_block *scb_o; /* 0x0218 */
/* the shadow gmap in use by the vsie_page */
- struct gmap *gmap; /* 0x0208 */
+ struct gmap *gmap; /* 0x0220 */
/* address of the last reported fault to guest2 */
- unsigned long fault_addr; /* 0x0210 */
- __u8 reserved[0x0700 - 0x0218]; /* 0x0218 */
+ unsigned long fault_addr; /* 0x0228 */
+ __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
-} __packed;
+};
/* trigger a validity icpt for the given scb */
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
@@ -801,6 +806,8 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ struct mcck_volatile_info *mcck_info;
+ struct sie_page *sie_page;
int rc;
handle_last_fault(vcpu, vsie_page);
@@ -822,6 +829,14 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
local_irq_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (rc == -EINTR) {
+ VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ sie_page = container_of(scb_s, struct sie_page, sie_block);
+ mcck_info = &sie_page->mcck_info;
+ kvm_s390_reinject_machine_check(vcpu, mcck_info);
+ return 0;
+ }
+
if (rc > 0)
rc = 0; /* we could still have an icpt */
else if (rc == -EFAULT)
diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c
index ae90e1ae3607..1963ddbf4ab3 100644
--- a/arch/s390/lib/probes.c
+++ b/arch/s390/lib/probes.c
@@ -4,6 +4,7 @@
* Copyright IBM Corp. 2014
*/
+#include <linux/errno.h>
#include <asm/kprobes.h>
#include <asm/dis.h>
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 1e5bb2b86c42..b3bd3f23b8e8 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -337,8 +337,8 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
return 0;
done = 0;
do {
- offset = (size_t)src & ~PAGE_MASK;
- len = min(size - done, PAGE_SIZE - offset);
+ offset = (size_t)src & (L1_CACHE_BYTES - 1);
+ len = min(size - done, L1_CACHE_BYTES - offset);
if (copy_from_user(dst, src, len))
return -EFAULT;
len_str = strnlen(dst, len);