diff options
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/Kconfig | 1 | ||||
-rw-r--r-- | arch/s390/configs/debug_defconfig | 3 | ||||
-rw-r--r-- | arch/s390/configs/defconfig | 3 | ||||
-rw-r--r-- | arch/s390/include/asm/cpu_mcf.h | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/entry-common.h | 10 | ||||
-rw-r--r-- | arch/s390/include/asm/kvm_host.h | 5 | ||||
-rw-r--r-- | arch/s390/include/asm/pci.h | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/smp.h | 1 | ||||
-rw-r--r-- | arch/s390/include/asm/vdso/gettimeofday.h | 3 | ||||
-rw-r--r-- | arch/s390/kernel/perf_cpum_cf.c | 12 | ||||
-rw-r--r-- | arch/s390/kernel/perf_cpum_cf_common.c | 46 | ||||
-rw-r--r-- | arch/s390/kernel/perf_cpum_cf_diag.c | 54 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 4 | ||||
-rw-r--r-- | arch/s390/kernel/smp.c | 1 | ||||
-rw-r--r-- | arch/s390/kernel/syscall.c | 1 | ||||
-rw-r--r-- | arch/s390/kernel/syscalls/syscall.tbl | 3 | ||||
-rw-r--r-- | arch/s390/kernel/traps.c | 2 | ||||
-rw-r--r-- | arch/s390/kvm/diag.c | 31 | ||||
-rw-r--r-- | arch/s390/kvm/gaccess.c | 30 | ||||
-rw-r--r-- | arch/s390/kvm/gaccess.h | 60 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 15 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.h | 8 | ||||
-rw-r--r-- | arch/s390/kvm/vsie.c | 109 | ||||
-rw-r--r-- | arch/s390/pci/pci.c | 10 | ||||
-rw-r--r-- | arch/s390/pci/pci_event.c | 28 |
25 files changed, 332 insertions, 112 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d72989591223..b4c7c34069f8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -140,6 +140,7 @@ config S390 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC + select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_TRACEHOOK diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6422618a4f75..86afcc6b56bf 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -387,6 +387,7 @@ CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y +CONFIG_PCI_IOV=y # CONFIG_PCIEASPM is not set CONFIG_PCI_DEBUG=y CONFIG_HOTPLUG_PCI=y @@ -548,7 +549,7 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 371a529546aa..71b49ea5b058 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -377,6 +377,7 @@ CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y +CONFIG_PCI_IOV=y # CONFIG_PCIEASPM is not set CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y @@ -540,7 +541,7 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h index 649b9fc60685..3e4cbcb7c4cc 100644 --- a/arch/s390/include/asm/cpu_mcf.h +++ b/arch/s390/include/asm/cpu_mcf.h @@ -123,4 +123,6 @@ static inline int stccm_avail(void) return test_facility(142); } +size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset, + struct cpumf_ctr_info *info); #endif /* _ASM_S390_CPU_MCF_H */ diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 9cceb26ed63f..baa8005090c3 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -4,9 +4,11 @@ #include <linux/sched.h> #include <linux/audit.h> +#include <linux/randomize_kstack.h> #include <linux/tracehook.h> #include <linux/processor.h> #include <linux/uaccess.h> +#include <asm/timex.h> #include <asm/fpu/api.h> #define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP) @@ -48,6 +50,14 @@ static __always_inline void arch_exit_to_user_mode(void) #define arch_exit_to_user_mode arch_exit_to_user_mode +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work) +{ + choose_random_kstack_offset(get_tod_clock_fast() & 0xff); +} + +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare + static inline bool on_thread_stack(void) { return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1)); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 6bcfc5614bbc..8925f3969478 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -454,6 +454,7 @@ struct kvm_vcpu_stat { u64 diagnose_44; u64 diagnose_9c; u64 diagnose_9c_ignored; + u64 diagnose_9c_forward; u64 diagnose_258; u64 diagnose_308; u64 diagnose_500; @@ -700,6 +701,10 @@ struct kvm_hw_bp_info_arch { #define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \ (vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING)) +#define KVM_GUESTDBG_VALID_MASK \ + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |\ + KVM_GUESTDBG_USE_HW_BP | KVM_GUESTDBG_EXIT_PENDING) + struct kvm_guestdbg_info_arch { unsigned long cr0; unsigned long cr9; diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 35c2af9371a9..10b67f8aab99 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -204,7 +204,7 @@ extern unsigned int s390_pci_no_rid; struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state); int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); -int zpci_configure_device(struct zpci_dev *zdev, u32 fh); +int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh); int zpci_deconfigure_device(struct zpci_dev *zdev); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 01e360004481..e317fd4866c1 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -63,5 +63,6 @@ extern void __noreturn cpu_die(void); extern void __cpu_die(unsigned int cpu); extern int __cpu_disable(void); extern void schedule_mcck_handler(void); +void notrace smp_yield_cpu(int cpu); #endif /* __ASM_SMP_H */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index ed89ef742530..383c53c3dddd 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -68,7 +68,8 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) } #ifdef CONFIG_TIME_NS -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return _timens_data; } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index b3beef64d3d4..31a605bcbc6e 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -230,9 +230,7 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) /* No support for kernel space counters only */ } else if (!attr->exclude_kernel && attr->exclude_user) { return -EOPNOTSUPP; - - /* Count user and kernel space */ - } else { + } else { /* Count user and kernel space */ if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) return -EOPNOTSUPP; ev = cpumf_generic_events_basic[ev]; @@ -402,12 +400,12 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) */ if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base])) ctr_set_stop(&cpuhw->state, hwc->config_base); - event->hw.state |= PERF_HES_STOPPED; + hwc->state |= PERF_HES_STOPPED; } if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { hw_perf_event_update(event); - event->hw.state |= PERF_HES_UPTODATE; + hwc->state |= PERF_HES_UPTODATE; } } @@ -430,8 +428,6 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) if (flags & PERF_EF_START) cpumf_pmu_start(event, PERF_EF_RELOAD); - perf_event_update_userpage(event); - return 0; } @@ -451,8 +447,6 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) */ if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base])) ctr_set_disable(&cpuhw->state, event->hw.config_base); - - perf_event_update_userpage(event); } /* diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c index 3bced89caffb..6d53215c8484 100644 --- a/arch/s390/kernel/perf_cpum_cf_common.c +++ b/arch/s390/kernel/perf_cpum_cf_common.c @@ -170,6 +170,52 @@ static int cpum_cf_offline_cpu(unsigned int cpu) return cpum_cf_setup(cpu, PMC_RELEASE); } +/* Return the maximum possible counter set size (in number of 8 byte counters) + * depending on type and model number. + */ +size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset, + struct cpumf_ctr_info *info) +{ + size_t ctrset_size = 0; + + switch (ctrset) { + case CPUMF_CTR_SET_BASIC: + if (info->cfvn >= 1) + ctrset_size = 6; + break; + case CPUMF_CTR_SET_USER: + if (info->cfvn == 1) + ctrset_size = 6; + else if (info->cfvn >= 3) + ctrset_size = 2; + break; + case CPUMF_CTR_SET_CRYPTO: + if (info->csvn >= 1 && info->csvn <= 5) + ctrset_size = 16; + else if (info->csvn == 6) + ctrset_size = 20; + break; + case CPUMF_CTR_SET_EXT: + if (info->csvn == 1) + ctrset_size = 32; + else if (info->csvn == 2) + ctrset_size = 48; + else if (info->csvn >= 3 && info->csvn <= 5) + ctrset_size = 128; + else if (info->csvn == 6) + ctrset_size = 160; + break; + case CPUMF_CTR_SET_MT_DIAG: + if (info->csvn > 3) + ctrset_size = 48; + break; + case CPUMF_CTR_SET_MAX: + break; + } + + return ctrset_size; +} + static int __init cpum_cf_init(void) { int rc; diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index 2e3e7edbe3a0..08c985c1097c 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -316,52 +316,6 @@ static void cf_diag_read(struct perf_event *event) debug_sprintf_event(cf_diag_dbg, 5, "%s event %p\n", __func__, event); } -/* Return the maximum possible counter set size (in number of 8 byte counters) - * depending on type and model number. - */ -static size_t cf_diag_ctrset_size(enum cpumf_ctr_set ctrset, - struct cpumf_ctr_info *info) -{ - size_t ctrset_size = 0; - - switch (ctrset) { - case CPUMF_CTR_SET_BASIC: - if (info->cfvn >= 1) - ctrset_size = 6; - break; - case CPUMF_CTR_SET_USER: - if (info->cfvn == 1) - ctrset_size = 6; - else if (info->cfvn >= 3) - ctrset_size = 2; - break; - case CPUMF_CTR_SET_CRYPTO: - if (info->csvn >= 1 && info->csvn <= 5) - ctrset_size = 16; - else if (info->csvn == 6) - ctrset_size = 20; - break; - case CPUMF_CTR_SET_EXT: - if (info->csvn == 1) - ctrset_size = 32; - else if (info->csvn == 2) - ctrset_size = 48; - else if (info->csvn >= 3 && info->csvn <= 5) - ctrset_size = 128; - else if (info->csvn == 6) - ctrset_size = 160; - break; - case CPUMF_CTR_SET_MT_DIAG: - if (info->csvn > 3) - ctrset_size = 48; - break; - case CPUMF_CTR_SET_MAX: - break; - } - - return ctrset_size; -} - /* Calculate memory needed to store all counter sets together with header and * trailer data. This is independend of the counter set authorization which * can vary depending on the configuration. @@ -372,7 +326,7 @@ static size_t cf_diag_ctrset_maxsize(struct cpumf_ctr_info *info) enum cpumf_ctr_set i; for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { - size_t size = cf_diag_ctrset_size(i, info); + size_t size = cpum_cf_ctrset_size(i, info); if (size) max_size += size * sizeof(u64) + @@ -405,7 +359,7 @@ static size_t cf_diag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, ctrdata->def = CF_DIAG_CTRSET_DEF; ctrdata->set = ctrset; ctrdata->res1 = 0; - ctrset_size = cf_diag_ctrset_size(ctrset, &cpuhw->info); + ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info); if (ctrset_size) { /* Save data */ need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); @@ -845,7 +799,7 @@ static void cf_diag_cpu_read(void *parm) if (!(p->sets & cpumf_ctr_ctl[set])) continue; /* Counter set not in list */ - set_size = cf_diag_ctrset_size(set, &cpuhw->info); + set_size = cpum_cf_ctrset_size(set, &cpuhw->info); space = sizeof(csd->data) - csd->used; space = cf_diag_cpuset_read(sp, set, set_size, space); if (space) { @@ -975,7 +929,7 @@ static size_t cf_diag_needspace(unsigned int sets) for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { if (!(sets & cpumf_ctr_ctl[i])) continue; - bytes += cf_diag_ctrset_size(i, &cpuhw->info) * sizeof(u64) + + bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) + sizeof(((struct s390_ctrset_setdata *)0)->set) + sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 72134f9f6ff5..5aab59ad5688 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -937,9 +937,9 @@ static int __init setup_hwcaps(void) if (MACHINE_HAS_VX) { elf_hwcap |= HWCAP_S390_VXRS; if (test_facility(134)) - elf_hwcap |= HWCAP_S390_VXRS_EXT; - if (test_facility(135)) elf_hwcap |= HWCAP_S390_VXRS_BCD; + if (test_facility(135)) + elf_hwcap |= HWCAP_S390_VXRS_EXT; if (test_facility(148)) elf_hwcap |= HWCAP_S390_VXRS_EXT2; if (test_facility(152)) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 58c8afa3da65..2fec2b80d35d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -429,6 +429,7 @@ void notrace smp_yield_cpu(int cpu) asm volatile("diag %0,0,0x9c" : : "d" (pcpu_devices[cpu].address)); } +EXPORT_SYMBOL_GPL(smp_yield_cpu); /* * Send cpus emergency shutdown signal. This gives the cpus the diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index bc8e650e377d..4e5cc7d2364e 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -142,6 +142,7 @@ void do_syscall(struct pt_regs *regs) void noinstr __do_syscall(struct pt_regs *regs, int per_trap) { + add_random_kstack_offset(); enter_from_user_mode(regs); memcpy(®s->gprs[8], S390_lowcore.save_area_sync, 8 * sizeof(unsigned long)); diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index a421905c36e8..7e4a2aba366d 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -446,3 +446,6 @@ 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr 443 common quotactl_path sys_quotactl_path sys_quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 63021d484626..8dd23c703718 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -17,6 +17,7 @@ #include "asm/ptrace.h" #include <linux/kprobes.h> #include <linux/kdebug.h> +#include <linux/randomize_kstack.h> #include <linux/extable.h> #include <linux/ptrace.h> #include <linux/sched.h> @@ -301,6 +302,7 @@ void noinstr __do_pgm_check(struct pt_regs *regs) unsigned int trapnr, syscall_redirect = 0; irqentry_state_t state; + add_random_kstack_offset(); regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc; regs->int_parm_long = S390_lowcore.trans_exc_code; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 5b8ec1c447e1..02c146f9e5cd 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -150,6 +150,19 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) return 0; } +static int forward_cnt; +static unsigned long cur_slice; + +static int diag9c_forwarding_overrun(void) +{ + /* Reset the count on a new slice */ + if (time_after(jiffies, cur_slice)) { + cur_slice = jiffies; + forward_cnt = diag9c_forwarding_hz / HZ; + } + return forward_cnt-- <= 0 ? 1 : 0; +} + static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) { struct kvm_vcpu *tcpu; @@ -167,9 +180,21 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) if (!tcpu) goto no_yield; - /* target already running */ - if (READ_ONCE(tcpu->cpu) >= 0) - goto no_yield; + /* target guest VCPU already running */ + if (READ_ONCE(tcpu->cpu) >= 0) { + if (!diag9c_forwarding_hz || diag9c_forwarding_overrun()) + goto no_yield; + + /* target host CPU already running */ + if (!vcpu_is_preempted(tcpu->cpu)) + goto no_yield; + smp_yield_cpu(tcpu->cpu); + VCPU_EVENT(vcpu, 5, + "diag time slice end directed to %d: yield forwarded", + tid); + vcpu->stat.diagnose_9c_forward++; + return 0; + } if (kvm_vcpu_yield_to(tcpu) <= 0) goto no_yield; diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 6d6b57059493..b9f85b2dc053 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -976,7 +976,9 @@ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) * kvm_s390_shadow_tables - walk the guest page table and create shadow tables * @sg: pointer to the shadow guest address space structure * @saddr: faulting address in the shadow gmap - * @pgt: pointer to the page table address result + * @pgt: pointer to the beginning of the page table for the given address if + * successful (return value 0), or to the first invalid DAT entry in + * case of exceptions (return value > 0) * @fake: pgt references contiguous guest memory block, not a pgtable */ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, @@ -1034,6 +1036,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, rfte.val = ptr; goto shadow_r2t; } + *pgt = ptr + vaddr.rfx * 8; rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val); if (rc) return rc; @@ -1060,6 +1063,7 @@ shadow_r2t: rste.val = ptr; goto shadow_r3t; } + *pgt = ptr + vaddr.rsx * 8; rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val); if (rc) return rc; @@ -1087,6 +1091,7 @@ shadow_r3t: rtte.val = ptr; goto shadow_sgt; } + *pgt = ptr + vaddr.rtx * 8; rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val); if (rc) return rc; @@ -1123,6 +1128,7 @@ shadow_sgt: ste.val = ptr; goto shadow_pgt; } + *pgt = ptr + vaddr.sx * 8; rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val); if (rc) return rc; @@ -1157,6 +1163,8 @@ shadow_pgt: * @vcpu: virtual cpu * @sg: pointer to the shadow guest address space structure * @saddr: faulting address in the shadow gmap + * @datptr: will contain the address of the faulting DAT table entry, or of + * the valid leaf, plus some flags * * Returns: - 0 if the shadow fault was successfully resolved * - > 0 (pgm exception code) on exceptions while faulting @@ -1165,11 +1173,11 @@ shadow_pgt: * - -ENOMEM if out of memory */ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, - unsigned long saddr) + unsigned long saddr, unsigned long *datptr) { union vaddress vaddr; union page_table_entry pte; - unsigned long pgt; + unsigned long pgt = 0; int dat_protection, fake; int rc; @@ -1191,8 +1199,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE; goto shadow_page; } - if (!rc) - rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); + + switch (rc) { + case PGM_SEGMENT_TRANSLATION: + case PGM_REGION_THIRD_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_FIRST_TRANS: + pgt |= PEI_NOT_PTE; + break; + case 0: + pgt += vaddr.px * 8; + rc = gmap_read_table(sg->parent, pgt, &pte.val); + } + if (datptr) + *datptr = pgt | dat_protection * PEI_DAT_PROT; if (!rc && pte.i) rc = PGM_PAGE_TRANSLATION; if (!rc && pte.z) diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index f4c51756c462..7c72a5e3449f 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -18,17 +18,14 @@ /** * kvm_s390_real_to_abs - convert guest real address to guest absolute address - * @vcpu - guest virtual cpu + * @prefix - guest prefix * @gra - guest real address * * Returns the guest absolute address that corresponds to the passed guest real - * address @gra of a virtual guest cpu by applying its prefix. + * address @gra of by applying the given prefix. */ -static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, - unsigned long gra) +static inline unsigned long _kvm_s390_real_to_abs(u32 prefix, unsigned long gra) { - unsigned long prefix = kvm_s390_get_prefix(vcpu); - if (gra < 2 * PAGE_SIZE) gra += prefix; else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE) @@ -37,6 +34,43 @@ static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, } /** + * kvm_s390_real_to_abs - convert guest real address to guest absolute address + * @vcpu - guest virtual cpu + * @gra - guest real address + * + * Returns the guest absolute address that corresponds to the passed guest real + * address @gra of a virtual guest cpu by applying its prefix. + */ +static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, + unsigned long gra) +{ + return _kvm_s390_real_to_abs(kvm_s390_get_prefix(vcpu), gra); +} + +/** + * _kvm_s390_logical_to_effective - convert guest logical to effective address + * @psw: psw of the guest + * @ga: guest logical address + * + * Convert a guest logical address to an effective address by applying the + * rules of the addressing mode defined by bits 31 and 32 of the given PSW + * (extendended/basic addressing mode). + * + * Depending on the addressing mode, the upper 40 bits (24 bit addressing + * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing + * mode) of @ga will be zeroed and the remaining bits will be returned. + */ +static inline unsigned long _kvm_s390_logical_to_effective(psw_t *psw, + unsigned long ga) +{ + if (psw_bits(*psw).eaba == PSW_BITS_AMODE_64BIT) + return ga; + if (psw_bits(*psw).eaba == PSW_BITS_AMODE_31BIT) + return ga & ((1UL << 31) - 1); + return ga & ((1UL << 24) - 1); +} + +/** * kvm_s390_logical_to_effective - convert guest logical to effective address * @vcpu: guest virtual cpu * @ga: guest logical address @@ -52,13 +86,7 @@ static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu, unsigned long ga) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; - - if (psw_bits(*psw).eaba == PSW_BITS_AMODE_64BIT) - return ga; - if (psw_bits(*psw).eaba == PSW_BITS_AMODE_31BIT) - return ga & ((1UL << 31) - 1); - return ga & ((1UL << 24) - 1); + return _kvm_s390_logical_to_effective(&vcpu->arch.sie_block->gpsw, ga); } /* @@ -359,7 +387,11 @@ void ipte_unlock(struct kvm_vcpu *vcpu); int ipte_lock_held(struct kvm_vcpu *vcpu); int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra); +/* MVPG PEI indication bits */ +#define PEI_DAT_PROT 2 +#define PEI_NOT_PTE 4 + int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow, - unsigned long saddr); + unsigned long saddr, unsigned long *datptr); #endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2f09e9d7dc95..1296fc10f80c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -158,6 +158,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("instruction_diag_44", diagnose_44), VCPU_STAT("instruction_diag_9c", diagnose_9c), VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored), + VCPU_STAT("diag_9c_forward", diagnose_9c_forward), VCPU_STAT("instruction_diag_258", diagnose_258), VCPU_STAT("instruction_diag_308", diagnose_308), VCPU_STAT("instruction_diag_500", diagnose_500), @@ -185,6 +186,11 @@ static bool use_gisa = true; module_param(use_gisa, bool, 0644); MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); +/* maximum diag9c forwarding per second */ +unsigned int diag9c_forwarding_hz; +module_param(diag9c_forwarding_hz, uint, 0644); +MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); + /* * For now we handle at most 16 double words as this is what the s390 base * kernel handles and stores in the prefix page. If we ever need to go beyond @@ -544,6 +550,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_DIAG318: r = 1; break; + case KVM_CAP_SET_GUEST_DEBUG2: + r = KVM_GUESTDBG_VALID_MASK; + break; case KVM_CAP_S390_HPAGE_1M: r = 0; if (hpage && !kvm_is_ucontrol(kvm)) @@ -4307,16 +4316,16 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu) kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; if (MACHINE_HAS_GS) { + preempt_disable(); __ctl_set_bit(2, 4); if (vcpu->arch.gs_enabled) save_gs_cb(current->thread.gs_cb); - preempt_disable(); current->thread.gs_cb = vcpu->arch.host_gscb; restore_gs_cb(vcpu->arch.host_gscb); - preempt_enable(); if (!vcpu->arch.host_gscb) __ctl_clear_bit(2, 4); vcpu->arch.host_gscb = NULL; + preempt_enable(); } /* SIE will save etoken directly into SDNX and therefore kvm_run */ } @@ -4542,7 +4551,7 @@ int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) /* * As we are starting a second VCPU, we have to disable * the IBS facility on all VCPUs to remove potentially - * oustanding ENABLE requests. + * outstanding ENABLE requests. */ __disable_ibs_on_all_vcpus(vcpu->kvm); } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 79dcd647b378..9fad25109b0d 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -471,4 +471,12 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, * @kvm: the KVM guest */ void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm); + +/** + * diag9c_forwarding_hz + * + * Set the maximum number of diag9c forwarding per second + */ +extern unsigned int diag9c_forwarding_hz; + #endif diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index bd803e091918..4002a24bc43a 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -417,11 +417,6 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) memcpy((void *)((u64)scb_o + 0xc0), (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0); break; - case ICPT_PARTEXEC: - /* MVPG only */ - memcpy((void *)((u64)scb_o + 0xc0), - (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0); - break; } if (scb_s->ihcpu != 0xffffU) @@ -620,10 +615,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at offset *mso* */ prefix += scb_s->mso; - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL); if (!rc && (scb_s->ecb & ECB_TE)) rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, - prefix + PAGE_SIZE); + prefix + PAGE_SIZE, NULL); /* * We don't have to mprotect, we will be called for all unshadows. * SIE will detect if protection applies and trigger a validity. @@ -914,7 +909,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) current->thread.gmap_addr, 1); rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, - current->thread.gmap_addr); + current->thread.gmap_addr, NULL); if (rc > 0) { rc = inject_fault(vcpu, rc, current->thread.gmap_addr, @@ -936,7 +931,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu, { if (vsie_page->fault_addr) kvm_s390_shadow_fault(vcpu, vsie_page->gmap, - vsie_page->fault_addr); + vsie_page->fault_addr, NULL); vsie_page->fault_addr = 0; } @@ -984,6 +979,98 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } /* + * Get a register for a nested guest. + * @vcpu the vcpu of the guest + * @vsie_page the vsie_page for the nested guest + * @reg the register number, the upper 4 bits are ignored. + * returns: the value of the register. + */ +static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg) +{ + /* no need to validate the parameter and/or perform error handling */ + reg &= 0xf; + switch (reg) { + case 15: + return vsie_page->scb_s.gg15; + case 14: + return vsie_page->scb_s.gg14; + default: + return vcpu->run->s.regs.gprs[reg]; + } +} + +static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + unsigned long pei_dest, pei_src, src, dest, mask, prefix; + u64 *pei_block = &vsie_page->scb_o->mcic; + int edat, rc_dest, rc_src; + union ctlreg0 cr0; + + cr0.val = vcpu->arch.sie_block->gcr[0]; + edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8); + mask = _kvm_s390_logical_to_effective(&scb_s->gpsw, PAGE_MASK); + prefix = scb_s->prefix << GUEST_PREFIX_SHIFT; + + dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask; + dest = _kvm_s390_real_to_abs(prefix, dest) + scb_s->mso; + src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask; + src = _kvm_s390_real_to_abs(prefix, src) + scb_s->mso; + + rc_dest = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei_dest); + rc_src = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei_src); + /* + * Either everything went well, or something non-critical went wrong + * e.g. because of a race. In either case, simply retry. + */ + if (rc_dest == -EAGAIN || rc_src == -EAGAIN || (!rc_dest && !rc_src)) { + retry_vsie_icpt(vsie_page); + return -EAGAIN; + } + /* Something more serious went wrong, propagate the error */ + if (rc_dest < 0) + return rc_dest; + if (rc_src < 0) + return rc_src; + + /* The only possible suppressing exception: just deliver it */ + if (rc_dest == PGM_TRANSLATION_SPEC || rc_src == PGM_TRANSLATION_SPEC) { + clear_vsie_icpt(vsie_page); + rc_dest = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC); + WARN_ON_ONCE(rc_dest); + return 1; + } + + /* + * Forward the PEI intercept to the guest if it was a page fault, or + * also for segment and region table faults if EDAT applies. + */ + if (edat) { + rc_dest = rc_dest == PGM_ASCE_TYPE ? rc_dest : 0; + rc_src = rc_src == PGM_ASCE_TYPE ? rc_src : 0; + } else { + rc_dest = rc_dest != PGM_PAGE_TRANSLATION ? rc_dest : 0; + rc_src = rc_src != PGM_PAGE_TRANSLATION ? rc_src : 0; + } + if (!rc_dest && !rc_src) { + pei_block[0] = pei_dest; + pei_block[1] = pei_src; + return 1; + } + + retry_vsie_icpt(vsie_page); + + /* + * The host has edat, and the guest does not, or it was an ASCE type + * exception. The host needs to inject the appropriate DAT interrupts + * into the guest. + */ + if (rc_dest) + return inject_fault(vcpu, rc_dest, dest, 1); + return inject_fault(vcpu, rc_src, src, 0); +} + +/* * Run the vsie on a shadow scb and a shadow gmap, without any further * sanity checks, handling SIE faults. * @@ -1071,6 +1158,10 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if ((scb_s->ipa & 0xf000) != 0xf000) scb_s->ipa += 0x1000; break; + case ICPT_PARTEXEC: + if (scb_s->ipa == 0xb254) + rc = vsie_handle_mvpg(vcpu, vsie_page); + break; } return rc; } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index c01b6dbac7cf..b0993e05affe 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -738,17 +738,19 @@ error: } /** - * zpci_configure_device() - Configure a zpci_dev + * zpci_scan_configured_device() - Scan a freshly configured zpci_dev * @zdev: The zpci_dev to be configured * @fh: The general function handle supplied by the platform * * Given a device in the configuration state Configured, enables, scans and - * adds it to the common code PCI subsystem. If any failure occurs, the - * zpci_dev is left disabled. + * adds it to the common code PCI subsystem if possible. If the PCI device is + * parked because we can not yet create a PCI bus because we have not seen + * function 0, it is ignored but will be scanned once function 0 appears. + * If any failure occurs, the zpci_dev is left disabled. * * Return: 0 on success, or an error code otherwise */ -int zpci_configure_device(struct zpci_dev *zdev, u32 fh) +int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh) { int rc; diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 1178b48a66df..cd447b96b4b1 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -76,8 +76,6 @@ void zpci_event_error(void *data) static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) { - enum zpci_state state; - zdev->fh = fh; /* Give the driver a hint that the function is * already unusable. @@ -88,15 +86,12 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) */ zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; - if (!clp_get_state(zdev->fid, &state) && - state == ZPCI_FN_STATE_RESERVED) { - zpci_zdev_put(zdev); - } } static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); + enum zpci_state state; zpci_err("avail CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); @@ -113,7 +108,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; zdev->state = ZPCI_FN_STATE_CONFIGURED; } - zpci_configure_device(zdev, ccdf->fh); + zpci_scan_configured_device(zdev, ccdf->fh); break; case 0x0302: /* Reserved -> Standby */ if (!zdev) @@ -123,13 +118,28 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; case 0x0303: /* Deconfiguration requested */ if (zdev) { + /* The event may have been queued before we confirgured + * the device. + */ + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) + break; zdev->fh = ccdf->fh; zpci_deconfigure_device(zdev); } break; case 0x0304: /* Configured -> Standby|Reserved */ - if (zdev) - zpci_event_hard_deconfigured(zdev, ccdf->fh); + if (zdev) { + /* The event may have been queued before we confirgured + * the device.: + */ + if (zdev->state == ZPCI_FN_STATE_CONFIGURED) + zpci_event_hard_deconfigured(zdev, ccdf->fh); + /* The 0x0304 event may immediately reserve the device */ + if (!clp_get_state(zdev->fid, &state) && + state == ZPCI_FN_STATE_RESERVED) { + zpci_zdev_put(zdev); + } + } break; case 0x0306: /* 0x308 or 0x302 for multiple devices */ zpci_remove_reserved_devices(); |