summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig74
-rw-r--r--arch/s390/appldata/appldata_os.c2
-rw-r--r--arch/s390/boot/.gitignore1
-rw-r--r--arch/s390/boot/Makefile2
-rw-r--r--arch/s390/boot/compressed/.gitignore1
-rw-r--r--arch/s390/boot/install.sh17
-rw-r--r--arch/s390/boot/uv.c18
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/defconfig1
-rw-r--r--arch/s390/crypto/aes_s390.c3
-rw-r--r--arch/s390/include/asm/Kbuild15
-rw-r--r--arch/s390/include/asm/futex.h2
-rw-r--r--arch/s390/include/asm/gmap.h6
-rw-r--r--arch/s390/include/asm/hw_irq.h1
-rw-r--r--arch/s390/include/asm/ipl.h1
-rw-r--r--arch/s390/include/asm/kvm_host.h117
-rw-r--r--arch/s390/include/asm/lowcore.h4
-rw-r--r--arch/s390/include/asm/mmu.h4
-rw-r--r--arch/s390/include/asm/mmu_context.h45
-rw-r--r--arch/s390/include/asm/numa.h13
-rw-r--r--arch/s390/include/asm/page.h26
-rw-r--r--arch/s390/include/asm/pci.h6
-rw-r--r--arch/s390/include/asm/pgalloc.h39
-rw-r--r--arch/s390/include/asm/pgtable.h35
-rw-r--r--arch/s390/include/asm/processor.h10
-rw-r--r--arch/s390/include/asm/qdio.h25
-rw-r--r--arch/s390/include/asm/setup.h7
-rw-r--r--arch/s390/include/asm/smp.h1
-rw-r--r--arch/s390/include/asm/topology.h15
-rw-r--r--arch/s390/include/asm/uv.h251
-rw-r--r--arch/s390/kernel/.gitignore1
-rw-r--r--arch/s390/kernel/Makefile4
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/diag.c4
-rw-r--r--arch/s390/kernel/entry.S65
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/ipl.c73
-rw-r--r--arch/s390/kernel/irq.c26
-rw-r--r--arch/s390/kernel/machine_kexec.c31
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c123
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c44
-rw-r--r--arch/s390/kernel/pgm_check.S4
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/processor.c34
-rw-r--r--arch/s390/kernel/setup.c13
-rw-r--r--arch/s390/kernel/signal.c4
-rw-r--r--arch/s390/kernel/smp.c17
-rw-r--r--arch/s390/kernel/suspend.c240
-rw-r--r--arch/s390/kernel/swsusp.S276
-rw-r--r--arch/s390/kernel/topology.c34
-rw-r--r--arch/s390/kernel/trace.c2
-rw-r--r--arch/s390/kernel/traps.c2
-rw-r--r--arch/s390/kernel/uv.c415
-rw-r--r--arch/s390/kernel/vdso64/.gitignore1
-rw-r--r--arch/s390/kvm/Kconfig4
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c6
-rw-r--r--arch/s390/kvm/gaccess.c23
-rw-r--r--arch/s390/kvm/intercept.c123
-rw-r--r--arch/s390/kvm/interrupt.c401
-rw-r--r--arch/s390/kvm/kvm-s390.c600
-rw-r--r--arch/s390/kvm/kvm-s390.h51
-rw-r--r--arch/s390/kvm/priv.c13
-rw-r--r--arch/s390/kvm/pv.c303
-rw-r--r--arch/s390/kvm/vsie.c1
-rw-r--r--arch/s390/mm/cmm.c46
-rw-r--r--arch/s390/mm/fault.c109
-rw-r--r--arch/s390/mm/gmap.c79
-rw-r--r--arch/s390/mm/hugetlbpage.c11
-rw-r--r--arch/s390/mm/init.c9
-rw-r--r--arch/s390/mm/mmap.c40
-rw-r--r--arch/s390/mm/pageattr.c16
-rw-r--r--arch/s390/mm/pgalloc.c108
-rw-r--r--arch/s390/mm/vmem.c4
-rw-r--r--arch/s390/numa/Makefile2
-rw-r--r--arch/s390/numa/mode_emu.c577
-rw-r--r--arch/s390/numa/numa.c147
-rw-r--r--arch/s390/numa/numa_mode.h25
-rw-r--r--arch/s390/numa/toptree.c351
-rw-r--r--arch/s390/numa/toptree.h61
-rw-r--r--arch/s390/pci/pci.c83
-rw-r--r--arch/s390/pci/pci_clp.c2
-rw-r--r--arch/s390/pci/pci_irq.c5
-rw-r--r--arch/s390/purgatory/.gitignore1
-rw-r--r--arch/s390/tools/.gitignore1
85 files changed, 2799 insertions, 2566 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8abe77536d9d..2167bce993ff 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -102,13 +102,13 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
select ARCH_KEEP_MEMBLOCK
- select ARCH_SAVE_PAGE_KEYS if HIBERNATION
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+ select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS2
@@ -195,6 +195,7 @@ config S390
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
select SWIOTLB
select GENERIC_ALLOCATOR
+ imply IMA_SECURE_AND_OR_TRUSTED_BOOT
config SCHED_OMIT_FRAME_POINTER
@@ -450,14 +451,6 @@ config NR_CPUS
config HOTPLUG_CPU
def_bool y
-# Some NUMA nodes have memory ranges that span
-# other nodes. Even though a pfn is valid and
-# between a node's start and end pfns, it may not
-# reside on that node. See memmap_init_zone()
-# for details. <- They meant memory holes!
-config NODES_SPAN_OTHER_NODES
- def_bool NUMA
-
config NUMA
bool "NUMA support"
depends on SCHED_TOPOLOGY
@@ -467,58 +460,9 @@ config NUMA
This option adds NUMA support to the kernel.
- An operation mode can be selected by appending
- numa=<method> to the kernel command line.
-
- The default behaviour is identical to appending numa=plain to
- the command line. This will create just one node with all
- available memory and all CPUs in it.
-
config NODES_SHIFT
- int "Maximum NUMA nodes (as a power of 2)"
- range 1 10
- depends on NUMA
- default "4"
- help
- Specify the maximum number of NUMA nodes available on the target
- system. Increases memory reserved to accommodate various tables.
-
-menu "Select NUMA modes"
- depends on NUMA
-
-config NUMA_EMU
- bool "NUMA emulation"
- default y
- help
- Numa emulation mode will split the available system memory into
- equal chunks which then are distributed over the configured number
- of nodes in a round-robin manner.
-
- The number of fake nodes is limited by the number of available memory
- chunks (i.e. memory size / fake size) and the number of supported
- nodes in the kernel.
-
- The CPUs are assigned to the nodes in a way that partially respects
- the original machine topology (if supported by the machine).
- Fair distribution of the CPUs is not guaranteed.
-
-config EMU_SIZE
- hex "NUMA emulation memory chunk size"
- default 0x10000000
- range 0x400000 0x100000000
- depends on NUMA_EMU
- help
- Select the default size by which the memory is chopped and then
- assigned to emulated NUMA nodes.
-
- This can be overridden by specifying
-
- emu_size=<n>
-
- on the kernel command line where also suffixes K, M, G, and T are
- supported.
-
-endmenu
+ int
+ default "1"
config SCHED_SMT
def_bool n
@@ -866,15 +810,6 @@ config SECCOMP
If unsure, say Y.
-menu "Power Management"
-
-config ARCH_HIBERNATION_POSSIBLE
- def_bool y
-
-source "kernel/power/Kconfig"
-
-endmenu
-
config CCW
def_bool y
@@ -1009,7 +944,6 @@ config S390_GUEST
select TTY
select VIRTUALIZATION
select VIRTIO
- select VIRTIO_CONSOLE
help
Enabling this option adds support for virtio based paravirtual device
drivers on s390.
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 54f375627532..8bf46d705957 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -75,7 +75,7 @@ struct appldata_os_data {
(waiting for I/O) */
/* per cpu data */
- struct appldata_os_per_cpu os_cpu[0];
+ struct appldata_os_per_cpu os_cpu[];
} __attribute__((packed));
static struct appldata_os_data *appldata_os_data;
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
index 16ff906e4610..b265bfede188 100644
--- a/arch/s390/boot/.gitignore
+++ b/arch/s390/boot/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
image
bzImage
section_cmp.*
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 0ff9261c915e..45b33b83de08 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -37,7 +37,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
obj-y += version.o pgm_check_info.o ctype.o text_dma.o
-obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) += uv.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
targets := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore
index e72fcd7ecebb..765a08f1bd77 100644
--- a/arch/s390/boot/compressed/.gitignore
+++ b/arch/s390/boot/compressed/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
vmlinux
vmlinux.lds
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index bed227f267ae..515b27a996b3 100644
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -21,15 +21,10 @@
if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
-# Default install - same as make zlilo
+echo "Warning: '${INSTALLKERNEL}' command not available - additional " \
+ "bootloader config required" >&2
+if [ -f $4/vmlinuz-$1 ]; then mv $4/vmlinuz-$1 $4/vmlinuz-$1.old; fi
+if [ -f $4/System.map-$1 ]; then mv $4/System.map-$1 $4/System.map-$1.old; fi
-if [ -f $4/vmlinuz ]; then
- mv $4/vmlinuz $4/vmlinuz.old
-fi
-
-if [ -f $4/System.map ]; then
- mv $4/System.map $4/System.old
-fi
-
-cat $2 > $4/vmlinuz
-cp $3 $4/System.map
+cat $2 > $4/vmlinuz-$1
+cp $3 $4/System.map-$1
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 3f501159ee9f..f887a479cdc7 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -3,7 +3,11 @@
#include <asm/facility.h>
#include <asm/sections.h>
+/* will be used in arch/s390/kernel/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
+#endif
+struct uv_info __bootdata_preserved(uv_info);
void uv_query_info(void)
{
@@ -19,7 +23,21 @@ void uv_query_info(void)
if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
return;
+ if (IS_ENABLED(CONFIG_KVM)) {
+ memcpy(uv_info.inst_calls_list, uvcb.inst_calls_list, sizeof(uv_info.inst_calls_list));
+ uv_info.uv_base_stor_len = uvcb.uv_base_stor_len;
+ uv_info.guest_base_stor_len = uvcb.conf_base_phys_stor_len;
+ uv_info.guest_virt_base_stor_len = uvcb.conf_base_virt_stor_len;
+ uv_info.guest_virt_var_stor_len = uvcb.conf_virt_var_stor_len;
+ uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len;
+ uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE);
+ uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
+ uv_info.max_guest_cpus = uvcb.max_guest_cpus;
+ }
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
prot_virt_guest = 1;
+#endif
}
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 0c86ba19fa2b..46038bc58c9e 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -532,6 +532,7 @@ CONFIG_INPUT_EVDEV=y
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=0
CONFIG_NULL_TTY=m
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
CONFIG_HANGCHECK_TIMER=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 6b27d861a9a3..7cd0648c1f4e 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -528,6 +528,7 @@ CONFIG_INPUT_EVDEV=y
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=0
CONFIG_NULL_TTY=m
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
CONFIG_HANGCHECK_TIMER=m
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 1c23d84a9097..73044634d342 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -342,6 +342,7 @@ static int cbc_aes_crypt(struct skcipher_request *req, unsigned long modifier)
memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
ret = skcipher_walk_done(&walk, nbytes - n);
}
+ memzero_explicit(&param, sizeof(param));
return ret;
}
@@ -470,6 +471,8 @@ static int xts_aes_crypt(struct skcipher_request *req, unsigned long modifier)
walk.dst.virt.addr, walk.src.virt.addr, n);
ret = skcipher_walk_done(&walk, nbytes - n);
}
+ memzero_explicit(&pcc_param, sizeof(pcc_param));
+ memzero_explicit(&xts_param, sizeof(xts_param));
return ret;
}
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 1832ae6442ef..83f6e85de7bc 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -5,21 +5,6 @@ generated-y += syscall_table.h
generated-y += unistd_nr.h
generic-y += asm-offsets.h
-generic-y += cacheflush.h
-generic-y += device.h
-generic-y += dma-mapping.h
-generic-y += div64.h
-generic-y += emergency-restart.h
generic-y += export.h
-generic-y += fb.h
-generic-y += irq_regs.h
-generic-y += irq_work.h
-generic-y += kmap_types.h
-generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
-generic-y += mm-arch-hooks.h
-generic-y += mmiowb.h
-generic-y += trace_clock.h
-generic-y += unaligned.h
-generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 5e97a4353147..26f9144562c9 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -29,7 +29,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
mm_segment_t old_fs;
old_fs = enable_sacf_uaccess();
- pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op("lr %2,%5\n",
@@ -54,7 +53,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
default:
ret = -ENOSYS;
}
- pagefault_enable();
disable_sacf_uaccess(old_fs);
if (!ret)
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 37f96b6f0e61..a816fb4734b8 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -9,6 +9,7 @@
#ifndef _ASM_S390_GMAP_H
#define _ASM_S390_GMAP_H
+#include <linux/radix-tree.h>
#include <linux/refcount.h>
/* Generic bits for GMAP notification on DAT table entry changes. */
@@ -31,6 +32,7 @@
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
* @pfault_enabled: defines if pfaults are applicable for the guest
+ * @guest_handle: protected virtual machine handle for the ultravisor
* @host_to_rmap: radix tree with gmap_rmap lists
* @children: list of shadow gmap structures
* @pt_list: list of all page tables used in the shadow guest address space
@@ -54,6 +56,8 @@ struct gmap {
unsigned long asce_end;
void *private;
bool pfault_enabled;
+ /* only set for protected virtual machines */
+ unsigned long guest_handle;
/* Additional data for shadow guest address spaces */
struct radix_tree_root host_to_rmap;
struct list_head children;
@@ -144,4 +148,6 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start,
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
+int gmap_mark_unmergeable(void);
+void s390_reset_acc(struct mm_struct *mm);
#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
index adae176757ae..9078b5b6b837 100644
--- a/arch/s390/include/asm/hw_irq.h
+++ b/arch/s390/include/asm/hw_irq.h
@@ -7,6 +7,5 @@
void __init init_airq_interrupts(void);
void __init init_cio_interrupts(void);
-void __init init_ext_interrupts(void);
#endif
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 084e71b7272a..b63bd66404b8 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -119,6 +119,7 @@ enum diag308_subcode {
DIAG308_LOAD_NORMAL_DUMP = 4,
DIAG308_SET = 5,
DIAG308_STORE = 6,
+ DIAG308_LOAD_NORMAL = 7,
};
enum diag308_rc {
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1726224e7772..d6bcd34f3ec3 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -127,6 +127,12 @@ struct mcck_volatile_info {
#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
CR14_EXTERNAL_DAMAGE_SUBMASK)
+#define SIDAD_SIZE_MASK 0xff
+#define sida_origin(sie_block) \
+ ((sie_block)->sidad & PAGE_MASK)
+#define sida_size(sie_block) \
+ ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
+
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
@@ -160,7 +166,13 @@ struct kvm_s390_sie_block {
__u8 reserved08[4]; /* 0x0008 */
#define PROG_IN_SIE (1<<0)
__u32 prog0c; /* 0x000c */
- __u8 reserved10[16]; /* 0x0010 */
+ union {
+ __u8 reserved10[16]; /* 0x0010 */
+ struct {
+ __u64 pv_handle_cpu;
+ __u64 pv_handle_config;
+ };
+ };
#define PROG_BLOCK_SIE (1<<0)
#define PROG_REQUEST (1<<1)
atomic_t prog20; /* 0x0020 */
@@ -209,10 +221,23 @@ struct kvm_s390_sie_block {
#define ICPT_PARTEXEC 0x38
#define ICPT_IOINST 0x40
#define ICPT_KSS 0x5c
+#define ICPT_MCHKREQ 0x60
+#define ICPT_INT_ENABLE 0x64
+#define ICPT_PV_INSTR 0x68
+#define ICPT_PV_NOTIFY 0x6c
+#define ICPT_PV_PREF 0x70
__u8 icptcode; /* 0x0050 */
__u8 icptstatus; /* 0x0051 */
__u16 ihcpu; /* 0x0052 */
- __u8 reserved54[2]; /* 0x0054 */
+ __u8 reserved54; /* 0x0054 */
+#define IICTL_CODE_NONE 0x00
+#define IICTL_CODE_MCHK 0x01
+#define IICTL_CODE_EXT 0x02
+#define IICTL_CODE_IO 0x03
+#define IICTL_CODE_RESTART 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND 0x11
+ __u8 iictl; /* 0x0055 */
__u16 ipa; /* 0x0056 */
__u32 ipb; /* 0x0058 */
__u32 scaoh; /* 0x005c */
@@ -233,7 +258,7 @@ struct kvm_s390_sie_block {
#define ECB3_RI 0x01
__u8 ecb3; /* 0x0063 */
__u32 scaol; /* 0x0064 */
- __u8 reserved68; /* 0x0068 */
+ __u8 sdf; /* 0x0068 */
__u8 epdx; /* 0x0069 */
__u8 reserved6a[2]; /* 0x006a */
__u32 todpr; /* 0x006c */
@@ -249,31 +274,58 @@ struct kvm_s390_sie_block {
#define HPID_KVM 0x4
#define HPID_VSIE 0x5
__u8 hpid; /* 0x00b8 */
- __u8 reservedb9[11]; /* 0x00b9 */
- __u16 extcpuaddr; /* 0x00c4 */
- __u16 eic; /* 0x00c6 */
+ __u8 reservedb9[7]; /* 0x00b9 */
+ union {
+ struct {
+ __u32 eiparams; /* 0x00c0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ };
+ __u64 mcic; /* 0x00c0 */
+ } __packed;
__u32 reservedc8; /* 0x00c8 */
- __u16 pgmilc; /* 0x00cc */
- __u16 iprcc; /* 0x00ce */
- __u32 dxc; /* 0x00d0 */
- __u16 mcn; /* 0x00d4 */
- __u8 perc; /* 0x00d6 */
- __u8 peratmid; /* 0x00d7 */
+ union {
+ struct {
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ };
+ __u32 edc; /* 0x00cc */
+ } __packed;
+ union {
+ struct {
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ };
+ __u64 faddr; /* 0x00d0 */
+ } __packed;
__u64 peraddr; /* 0x00d8 */
__u8 eai; /* 0x00e0 */
__u8 peraid; /* 0x00e1 */
__u8 oai; /* 0x00e2 */
__u8 armid; /* 0x00e3 */
__u8 reservede4[4]; /* 0x00e4 */
- __u64 tecmc; /* 0x00e8 */
- __u8 reservedf0[12]; /* 0x00f0 */
+ union {
+ __u64 tecmc; /* 0x00e8 */
+ struct {
+ __u16 subchannel_id; /* 0x00e8 */
+ __u16 subchannel_nr; /* 0x00ea */
+ __u32 io_int_parm; /* 0x00ec */
+ __u32 io_int_word; /* 0x00f0 */
+ };
+ } __packed;
+ __u8 reservedf4[8]; /* 0x00f4 */
#define CRYCB_FORMAT_MASK 0x00000003
#define CRYCB_FORMAT0 0x00000000
#define CRYCB_FORMAT1 0x00000001
#define CRYCB_FORMAT2 0x00000003
__u32 crycbd; /* 0x00fc */
__u64 gcr[16]; /* 0x0100 */
- __u64 gbea; /* 0x0180 */
+ union {
+ __u64 gbea; /* 0x0180 */
+ __u64 sidad;
+ };
__u8 reserved188[8]; /* 0x0188 */
__u64 sdnxo; /* 0x0190 */
__u8 reserved198[8]; /* 0x0198 */
@@ -292,7 +344,7 @@ struct kvm_s390_sie_block {
__u64 itdba; /* 0x01e8 */
__u64 riccbd; /* 0x01f0 */
__u64 gvrd; /* 0x01f8 */
-} __attribute__((packed));
+} __packed __aligned(512);
struct kvm_s390_itdb {
__u8 data[256];
@@ -301,7 +353,9 @@ struct kvm_s390_itdb {
struct sie_page {
struct kvm_s390_sie_block sie_block;
struct mcck_volatile_info mcck_info; /* 0x0200 */
- __u8 reserved218[1000]; /* 0x0218 */
+ __u8 reserved218[360]; /* 0x0218 */
+ __u64 pv_grregs[16]; /* 0x0380 */
+ __u8 reserved400[512]; /* 0x0400 */
struct kvm_s390_itdb itdb; /* 0x0600 */
__u8 reserved700[2304]; /* 0x0700 */
};
@@ -476,6 +530,7 @@ enum irq_types {
IRQ_PEND_PFAULT_INIT,
IRQ_PEND_EXT_HOST,
IRQ_PEND_EXT_SERVICE,
+ IRQ_PEND_EXT_SERVICE_EV,
IRQ_PEND_EXT_TIMING,
IRQ_PEND_EXT_CPU_TIMER,
IRQ_PEND_EXT_CLOCK_COMP,
@@ -520,6 +575,7 @@ enum irq_types {
(1UL << IRQ_PEND_EXT_TIMING) | \
(1UL << IRQ_PEND_EXT_HOST) | \
(1UL << IRQ_PEND_EXT_SERVICE) | \
+ (1UL << IRQ_PEND_EXT_SERVICE_EV) | \
(1UL << IRQ_PEND_VIRTIO) | \
(1UL << IRQ_PEND_PFAULT_INIT) | \
(1UL << IRQ_PEND_PFAULT_DONE))
@@ -536,6 +592,13 @@ enum irq_types {
#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
(1UL << IRQ_PEND_MCHK_EX))
+#define IRQ_PEND_EXT_II_MASK ((1UL << IRQ_PEND_EXT_CPU_TIMER) | \
+ (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+ (1UL << IRQ_PEND_EXT_EMERGENCY) | \
+ (1UL << IRQ_PEND_EXT_EXTERNAL) | \
+ (1UL << IRQ_PEND_EXT_SERVICE) | \
+ (1UL << IRQ_PEND_EXT_SERVICE_EV))
+
struct kvm_s390_interrupt_info {
struct list_head list;
u64 type;
@@ -594,6 +657,7 @@ struct kvm_s390_local_interrupt {
struct kvm_s390_float_interrupt {
unsigned long pending_irqs;
+ unsigned long masked_irqs;
spinlock_t lock;
struct list_head lists[FIRQ_LIST_COUNT];
int counters[FIRQ_MAX_COUNT];
@@ -645,6 +709,11 @@ struct kvm_guestdbg_info_arch {
unsigned long last_bp;
};
+struct kvm_s390_pv_vcpu {
+ u64 handle;
+ unsigned long stor_base;
+};
+
struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
/* if vsie is active, currently executed shadow sie control block */
@@ -673,6 +742,7 @@ struct kvm_vcpu_arch {
__u64 cputm_start;
bool gs_enabled;
bool skey_enabled;
+ struct kvm_s390_pv_vcpu pv;
};
struct kvm_vm_stat {
@@ -701,9 +771,6 @@ struct s390_io_adapter {
bool masked;
bool swap;
bool suppressible;
- struct rw_semaphore maps_lock;
- struct list_head maps;
- atomic_t nr_maps;
};
#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
@@ -846,6 +913,13 @@ struct kvm_s390_gisa_interrupt {
DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS);
};
+struct kvm_s390_pv {
+ u64 handle;
+ u64 guest_len;
+ unsigned long stor_base;
+ void *stor_var;
+};
+
struct kvm_arch{
void *sca;
int use_esca;
@@ -881,6 +955,7 @@ struct kvm_arch{
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
struct kvm_s390_gisa_interrupt gisa_int;
+ struct kvm_s390_pv pv;
};
#define KVM_HVA_ERR_BAD (-1UL)
@@ -921,7 +996,7 @@ static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
- struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+ struct kvm_memory_slot *slot) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 237ee0c4169f..612ed3c6d581 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -141,7 +141,9 @@ struct lowcore {
/* br %r1 trampoline */
__u16 br_r1_trampoline; /* 0x0400 */
- __u8 pad_0x0402[0x0e00-0x0402]; /* 0x0402 */
+ __u32 return_lpswe; /* 0x0402 */
+ __u32 return_mcck_lpswe; /* 0x0406 */
+ __u8 pad_0x040a[0x0e00-0x040a]; /* 0x040a */
/*
* 0xe00 contains the address of the IPL Parameter Information
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bcfb6371086f..e12ff0f29d1a 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -16,6 +16,8 @@ typedef struct {
unsigned long asce;
unsigned long asce_limit;
unsigned long vdso_base;
+ /* The mmu context belongs to a secure guest. */
+ atomic_t is_protected;
/*
* The following bitfields need a down_write on the mm
* semaphore when they are written to. As they are only
@@ -32,8 +34,6 @@ typedef struct {
unsigned int uses_cmm:1;
/* The gmaps associated with this context are allowed to use huge pages. */
unsigned int allow_gmap_hpage_1m:1;
- /* The mmu context is for compat task */
- unsigned int compat_mm:1;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 8d04e6f3f796..c9f3d8a52756 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -18,14 +18,16 @@
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ unsigned long asce_type, init_entry;
+
spin_lock_init(&mm->context.lock);
INIT_LIST_HEAD(&mm->context.pgtable_list);
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.flush_count, 0);
+ atomic_set(&mm->context.is_protected, 0);
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
- mm->context.compat_mm = test_thread_flag(TIF_31BIT);
#ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste ||
test_thread_flag(TIF_PGSTE) ||
@@ -36,33 +38,34 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.allow_gmap_hpage_1m = 0;
#endif
switch (mm->context.asce_limit) {
- case _REGION2_SIZE:
+ default:
/*
- * forked 3-level task, fall through to set new asce with new
- * mm->pgd
+ * context created by exec, the value of asce_limit can
+ * only be zero in this case
*/
- case 0:
- /* context created by exec, set asce limit to 4TB */
- mm->context.asce_limit = STACK_TOP_MAX;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+ VM_BUG_ON(mm->context.asce_limit);
+ /* continue as 3-level task */
+ mm->context.asce_limit = _REGION2_SIZE;
+ fallthrough;
+ case _REGION2_SIZE:
+ /* forked 3-level task */
+ init_entry = _REGION3_ENTRY_EMPTY;
+ asce_type = _ASCE_TYPE_REGION3;
break;
- case -PAGE_SIZE:
- /* forked 5-level task, set new asce with new_mm->pgd */
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+ case TASK_SIZE_MAX:
+ /* forked 5-level task */
+ init_entry = _REGION1_ENTRY_EMPTY;
+ asce_type = _ASCE_TYPE_REGION1;
break;
case _REGION1_SIZE:
- /* forked 4-level task, set new asce with new mm->pgd */
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ /* forked 4-level task */
+ init_entry = _REGION2_ENTRY_EMPTY;
+ asce_type = _ASCE_TYPE_REGION2;
break;
- case _REGION3_SIZE:
- /* forked 2-level compat task, set new asce with new mm->pgd */
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
}
- crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | asce_type;
+ crst_table_init((unsigned long *) mm->pgd, init_entry);
return 0;
}
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
index 35f8cbe7e5bb..23cd5d1b734b 100644
--- a/arch/s390/include/asm/numa.h
+++ b/arch/s390/include/asm/numa.h
@@ -13,24 +13,13 @@
#ifdef CONFIG_NUMA
#include <linux/numa.h>
-#include <linux/cpumask.h>
void numa_setup(void);
-int numa_pfn_to_nid(unsigned long pfn);
-int __node_distance(int a, int b);
-void numa_update_cpu_topology(void);
-
-extern cpumask_t node_to_cpumask_map[MAX_NUMNODES];
-extern int numa_debug_enabled;
#else
static inline void numa_setup(void) { }
-static inline void numa_update_cpu_topology(void) { }
-static inline int numa_pfn_to_nid(unsigned long pfn)
-{
- return 0;
-}
#endif /* CONFIG_NUMA */
+
#endif /* _ASM_S390_NUMA_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 1019efd85b9d..cc98f9b78fd4 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -153,6 +153,11 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define HAVE_ARCH_FREE_PAGE
#define HAVE_ARCH_ALLOC_PAGE
+#if IS_ENABLED(CONFIG_PGSTE)
+int arch_make_page_accessible(struct page *page);
+#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+#endif
+
#endif /* !__ASSEMBLY__ */
#define __PAGE_OFFSET 0x0UL
@@ -161,23 +166,22 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define __pa(x) ((unsigned long)(x))
#define __va(x) ((void *)(unsigned long)(x))
-#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)
+#define phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
+#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
+
+#define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys))
+#define page_to_phys(page) pfn_to_phys(page_to_pfn(page))
+
+#define pfn_to_virt(pfn) __va(pfn_to_phys(pfn))
+#define virt_to_pfn(kaddr) (phys_to_pfn(__pa(kaddr)))
#define pfn_to_kaddr(pfn) pfn_to_virt(pfn)
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define page_to_virt(page) pfn_to_virt(page_to_pfn(page))
-#define phys_to_pfn(kaddr) ((kaddr) >> PAGE_SHIFT)
-#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
-
-#define phys_to_page(kaddr) pfn_to_page(phys_to_pfn(kaddr))
-#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
-
-#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
-#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index b05187ce5dbd..7485ee561fec 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -5,6 +5,7 @@
#include <linux/pci.h>
#include <linux/mutex.h>
#include <linux/iommu.h>
+#include <linux/pci_hotplug.h>
#include <asm-generic/pci.h>
#include <asm/pci_clp.h>
#include <asm/pci_debug.h>
@@ -25,6 +26,7 @@ int pci_proc_domain(struct pci_bus *);
#define ZPCI_NR_DMA_SPACES 1
#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
+#define ZPCI_DOMAIN_BITMAP_SIZE (1 << 16)
/* PCI Function Controls */
#define ZPCI_FC_FN_ENABLED 0x80
@@ -96,6 +98,7 @@ struct s390_domain;
struct zpci_dev {
struct pci_bus *bus;
struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
+ struct hotplug_slot hotplug_slot;
enum zpci_state state;
u32 fid; /* function ID, used by sclp */
@@ -186,6 +189,9 @@ int clp_enable_fh(struct zpci_dev *, u8);
int clp_disable_fh(struct zpci_dev *);
int clp_get_state(u32 fid, enum zpci_state *state);
+/* UID */
+void update_uid_checking(bool new);
+
/* IOMMU Interface */
int zpci_init_iommu(struct zpci_dev *zdev);
void zpci_destroy_iommu(struct zpci_dev *zdev);
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 77606c4acd58..74a352f8c0d1 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -34,19 +34,21 @@ static inline void crst_table_init(unsigned long *crst, unsigned long entry)
memset64((u64 *)crst, entry, _CRST_ENTRIES);
}
-static inline unsigned long pgd_entry_type(struct mm_struct *mm)
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
+
+static inline unsigned long check_asce_limit(struct mm_struct *mm, unsigned long addr,
+ unsigned long len)
{
- if (mm_pmd_folded(mm))
- return _SEGMENT_ENTRY_EMPTY;
- if (mm_pud_folded(mm))
- return _REGION3_ENTRY_EMPTY;
- if (mm_p4d_folded(mm))
- return _REGION2_ENTRY_EMPTY;
- return _REGION1_ENTRY_EMPTY;
-}
+ int rc;
-int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
-void crst_table_downgrade(struct mm_struct *);
+ if (addr + len > mm->context.asce_limit &&
+ addr + len <= TASK_SIZE) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
{
@@ -116,24 +118,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- unsigned long *table = crst_table_alloc(mm);
-
- if (!table)
- return NULL;
- if (mm->context.asce_limit == _REGION3_SIZE) {
- /* Forking a compat process with 2 page table levels */
- if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
- crst_table_free(mm, table);
- return NULL;
- }
- }
- return (pgd_t *) table;
+ return (pgd_t *) crst_table_alloc(mm);
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- if (mm->context.asce_limit == _REGION3_SIZE)
- pgtable_pmd_page_dtor(virt_to_page(pgd));
crst_table_free(mm, (unsigned long *) pgd);
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6d7c3b7e9281..6076c8c912d2 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -19,6 +19,7 @@
#include <linux/atomic.h>
#include <asm/bug.h>
#include <asm/page.h>
+#include <asm/uv.h>
extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
@@ -520,6 +521,15 @@ static inline int mm_has_pgste(struct mm_struct *mm)
return 0;
}
+static inline int mm_is_protected(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (unlikely(atomic_read(&mm->context.is_protected)))
+ return 1;
+#endif
+ return 0;
+}
+
static inline int mm_alloc_pgste(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -1067,7 +1077,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_t res;
+
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -1079,7 +1094,12 @@ void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_t res;
+
+ res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(vma->vm_mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
/*
@@ -1094,12 +1114,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep, int full)
{
+ pte_t res;
+
if (full) {
- pte_t pte = *ptep;
+ res = *ptep;
*ptep = __pte(_PAGE_INVALID);
- return pte;
+ } else {
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
- return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index aadb3d0e2adc..555d148ccf32 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -92,15 +92,15 @@ extern void __bpon(void);
*/
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \
- (1UL << 31) : -PAGE_SIZE)
+ _REGION3_SIZE : TASK_SIZE_MAX)
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
- (1UL << 30) : (1UL << 41))
+ (_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1))
#define TASK_SIZE TASK_SIZE_OF(current)
#define TASK_SIZE_MAX (-PAGE_SIZE)
#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \
- (1UL << 31) : (1UL << 42))
-#define STACK_TOP_MAX (1UL << 42)
+ _REGION3_SIZE : _REGION2_SIZE)
+#define STACK_TOP_MAX _REGION2_SIZE
#define HAVE_ARCH_PICK_MMAP_LAYOUT
@@ -161,6 +161,7 @@ typedef struct thread_struct thread_struct;
#define INIT_THREAD { \
.ksp = sizeof(init_stack) + (unsigned long) &init_stack, \
.fpu.regs = (void *) init_task.thread.fpu.fprs, \
+ .last_break = 1, \
}
/*
@@ -177,7 +178,6 @@ typedef struct thread_struct thread_struct;
regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \
regs->psw.addr = new_psw; \
regs->gprs[15] = new_stackp; \
- crst_table_downgrade(current->mm); \
execve_tail(); \
} while (0)
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 1e3517b0518b..86a3796e9be8 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -325,7 +325,6 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
/**
* struct qdio_initialize - qdio initialization data
- * @cdev: associated ccw device
* @q_format: queue format
* @qdr_ac: feature flags to set
* @adapter_name: name for the adapter
@@ -338,15 +337,14 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
* @no_output_qs: number of output queues
* @input_handler: handler to be called for input queues
* @output_handler: handler to be called for output queues
- * @queue_start_poll_array: polling handlers (one per input queue or NULL)
+ * @irq_poll: Data IRQ polling handler (NULL when not supported)
* @scan_threshold: # of in-use buffers that triggers scan on output queue
* @int_parm: interruption parameter
- * @input_sbal_addr_array: address of no_input_qs * 128 pointers
- * @output_sbal_addr_array: address of no_output_qs * 128 pointers
+ * @input_sbal_addr_array: per-queue array, each element points to 128 SBALs
+ * @output_sbal_addr_array: per-queue array, each element points to 128 SBALs
* @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL)
*/
struct qdio_initialize {
- struct ccw_device *cdev;
unsigned char q_format;
unsigned char qdr_ac;
unsigned char adapter_name[8];
@@ -359,12 +357,11 @@ struct qdio_initialize {
unsigned int no_output_qs;
qdio_handler_t *input_handler;
qdio_handler_t *output_handler;
- void (**queue_start_poll_array) (struct ccw_device *, int,
- unsigned long);
+ void (*irq_poll)(struct ccw_device *cdev, unsigned long data);
unsigned int scan_threshold;
unsigned long int_parm;
- struct qdio_buffer **input_sbal_addr_array;
- struct qdio_buffer **output_sbal_addr_array;
+ struct qdio_buffer ***input_sbal_addr_array;
+ struct qdio_buffer ***output_sbal_addr_array;
struct qdio_outbuf_state *output_sbal_state_array;
};
@@ -409,14 +406,16 @@ int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count);
void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count);
void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count);
-extern int qdio_allocate(struct qdio_initialize *);
-extern int qdio_establish(struct qdio_initialize *);
+extern int qdio_allocate(struct ccw_device *cdev, unsigned int no_input_qs,
+ unsigned int no_output_qs);
+extern int qdio_establish(struct ccw_device *cdev,
+ struct qdio_initialize *init_data);
extern int qdio_activate(struct ccw_device *);
extern void qdio_release_aob(struct qaob *);
extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int,
unsigned int);
-extern int qdio_start_irq(struct ccw_device *, int);
-extern int qdio_stop_irq(struct ccw_device *, int);
+extern int qdio_start_irq(struct ccw_device *cdev);
+extern int qdio_stop_irq(struct ccw_device *cdev);
extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
extern int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr,
bool is_input, unsigned int *bufnr,
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index b241ddb67caf..534f212753d6 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -8,6 +8,7 @@
#include <linux/bits.h>
#include <uapi/asm/setup.h>
+#include <linux/build_bug.h>
#define EP_OFFSET 0x10008
#define EP_STRING "S390EP"
@@ -162,6 +163,12 @@ static inline unsigned long kaslr_offset(void)
return __kaslr_offset;
}
+static inline u32 gen_lpswe(unsigned long addr)
+{
+ BUILD_BUG_ON(addr > 0xfff);
+ return 0xb2b20000 | addr;
+}
+
#else /* __ASSEMBLY__ */
#define IPL_DEVICE (IPL_DEVICE_OFFSET)
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index b157a81fb977..231a51e870fe 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -34,6 +34,7 @@ extern int smp_vcpu_scheduled(int cpu);
extern void smp_yield_cpu(int cpu);
extern void smp_cpu_set_polarization(int cpu, int val);
extern int smp_cpu_get_polarization(int cpu);
+extern int smp_cpu_get_cpu_address(int cpu);
extern void smp_fill_possible_mask(void);
extern void smp_detect_cpus(void);
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index cca406fdbe51..fbb507504a3b 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -16,8 +16,8 @@ struct cpu_topology_s390 {
unsigned short socket_id;
unsigned short book_id;
unsigned short drawer_id;
- unsigned short node_id;
unsigned short dedicated : 1;
+ int booted_cores;
cpumask_t thread_mask;
cpumask_t core_mask;
cpumask_t book_mask;
@@ -25,7 +25,6 @@ struct cpu_topology_s390 {
};
extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
-extern cpumask_t cpus_with_topology;
#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
#define topology_thread_id(cpu) (cpu_topology[cpu].thread_id)
@@ -37,6 +36,7 @@ extern cpumask_t cpus_with_topology;
#define topology_drawer_id(cpu) (cpu_topology[cpu].drawer_id)
#define topology_drawer_cpumask(cpu) (&cpu_topology[cpu].drawer_mask)
#define topology_cpu_dedicated(cpu) (cpu_topology[cpu].dedicated)
+#define topology_booted_cores(cpu) (cpu_topology[cpu].booted_cores)
#define mc_capable() 1
@@ -45,6 +45,7 @@ int topology_cpu_init(struct cpu *);
int topology_set_cpu_management(int fc);
void topology_schedule_update(void);
void store_topology(struct sysinfo_15_1_x *info);
+void update_cpu_masks(void);
void topology_expect_change(void);
const struct cpumask *cpu_coregroup_mask(int cpu);
@@ -54,6 +55,8 @@ static inline void topology_init_early(void) { }
static inline void topology_schedule_update(void) { }
static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
static inline int topology_cpu_dedicated(int cpu_nr) { return 0; }
+static inline int topology_booted_cores(int cpu_nr) { return 1; }
+static inline void update_cpu_masks(void) { }
static inline void topology_expect_change(void) { }
#endif /* CONFIG_SCHED_TOPOLOGY */
@@ -71,19 +74,23 @@ static inline void topology_expect_change(void) { }
#define cpu_to_node cpu_to_node
static inline int cpu_to_node(int cpu)
{
- return cpu_topology[cpu].node_id;
+ return 0;
}
/* Returns a pointer to the cpumask of CPUs on node 'node'. */
#define cpumask_of_node cpumask_of_node
static inline const struct cpumask *cpumask_of_node(int node)
{
- return &node_to_cpumask_map[node];
+ return cpu_possible_mask;
}
#define pcibus_to_node(bus) __pcibus_to_node(bus)
#define node_distance(a, b) __node_distance(a, b)
+static inline int __node_distance(int a, int b)
+{
+ return 0;
+}
#else /* !CONFIG_NUMA */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 4093a2856929..cff4b4c99b75 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -14,23 +14,62 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/bug.h>
+#include <linux/sched.h>
#include <asm/page.h>
+#include <asm/gmap.h>
#define UVC_RC_EXECUTED 0x0001
#define UVC_RC_INV_CMD 0x0002
#define UVC_RC_INV_STATE 0x0003
#define UVC_RC_INV_LEN 0x0005
#define UVC_RC_NO_RESUME 0x0007
+#define UVC_RC_NEED_DESTROY 0x8000
#define UVC_CMD_QUI 0x0001
+#define UVC_CMD_INIT_UV 0x000f
+#define UVC_CMD_CREATE_SEC_CONF 0x0100
+#define UVC_CMD_DESTROY_SEC_CONF 0x0101
+#define UVC_CMD_CREATE_SEC_CPU 0x0120
+#define UVC_CMD_DESTROY_SEC_CPU 0x0121
+#define UVC_CMD_CONV_TO_SEC_STOR 0x0200
+#define UVC_CMD_CONV_FROM_SEC_STOR 0x0201
+#define UVC_CMD_SET_SEC_CONF_PARAMS 0x0300
+#define UVC_CMD_UNPACK_IMG 0x0301
+#define UVC_CMD_VERIFY_IMG 0x0302
+#define UVC_CMD_CPU_RESET 0x0310
+#define UVC_CMD_CPU_RESET_INITIAL 0x0311
+#define UVC_CMD_PREPARE_RESET 0x0320
+#define UVC_CMD_CPU_RESET_CLEAR 0x0321
+#define UVC_CMD_CPU_SET_STATE 0x0330
+#define UVC_CMD_SET_UNSHARE_ALL 0x0340
+#define UVC_CMD_PIN_PAGE_SHARED 0x0341
+#define UVC_CMD_UNPIN_PAGE_SHARED 0x0342
#define UVC_CMD_SET_SHARED_ACCESS 0x1000
#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
/* Bits in installed uv calls */
enum uv_cmds_inst {
BIT_UVC_CMD_QUI = 0,
+ BIT_UVC_CMD_INIT_UV = 1,
+ BIT_UVC_CMD_CREATE_SEC_CONF = 2,
+ BIT_UVC_CMD_DESTROY_SEC_CONF = 3,
+ BIT_UVC_CMD_CREATE_SEC_CPU = 4,
+ BIT_UVC_CMD_DESTROY_SEC_CPU = 5,
+ BIT_UVC_CMD_CONV_TO_SEC_STOR = 6,
+ BIT_UVC_CMD_CONV_FROM_SEC_STOR = 7,
BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
+ BIT_UVC_CMD_SET_SEC_PARMS = 11,
+ BIT_UVC_CMD_UNPACK_IMG = 13,
+ BIT_UVC_CMD_VERIFY_IMG = 14,
+ BIT_UVC_CMD_CPU_RESET = 15,
+ BIT_UVC_CMD_CPU_RESET_INITIAL = 16,
+ BIT_UVC_CMD_CPU_SET_STATE = 17,
+ BIT_UVC_CMD_PREPARE_RESET = 18,
+ BIT_UVC_CMD_CPU_PERFORM_CLEAR_RESET = 19,
+ BIT_UVC_CMD_UNSHARE_ALL = 20,
+ BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
+ BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
};
struct uv_cb_header {
@@ -40,13 +79,127 @@ struct uv_cb_header {
u16 rrc; /* Return Reason Code */
} __packed __aligned(8);
+/* Query Ultravisor Information */
struct uv_cb_qui {
struct uv_cb_header header;
u64 reserved08;
u64 inst_calls_list[4];
- u64 reserved30[15];
+ u64 reserved30[2];
+ u64 uv_base_stor_len;
+ u64 reserved48;
+ u64 conf_base_phys_stor_len;
+ u64 conf_base_virt_stor_len;
+ u64 conf_virt_var_stor_len;
+ u64 cpu_stor_len;
+ u32 reserved70[3];
+ u32 max_num_sec_conf;
+ u64 max_guest_stor_addr;
+ u8 reserved88[158 - 136];
+ u16 max_guest_cpus;
+ u8 reserveda0[200 - 160];
} __packed __aligned(8);
+/* Initialize Ultravisor */
+struct uv_cb_init {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 stor_origin;
+ u64 stor_len;
+ u64 reserved28[4];
+} __packed __aligned(8);
+
+/* Create Guest Configuration */
+struct uv_cb_cgc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 conf_base_stor_origin;
+ u64 conf_virt_stor_origin;
+ u64 reserved30;
+ u64 guest_stor_origin;
+ u64 guest_stor_len;
+ u64 guest_sca;
+ u64 guest_asce;
+ u64 reserved58[5];
+} __packed __aligned(8);
+
+/* Create Secure CPU */
+struct uv_cb_csc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 cpu_handle;
+ u64 guest_handle;
+ u64 stor_origin;
+ u8 reserved30[6];
+ u16 num;
+ u64 state_origin;
+ u64 reserved40[4];
+} __packed __aligned(8);
+
+/* Convert to Secure */
+struct uv_cb_cts {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 gaddr;
+} __packed __aligned(8);
+
+/* Convert from Secure / Pin Page Shared */
+struct uv_cb_cfs {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 paddr;
+} __packed __aligned(8);
+
+/* Set Secure Config Parameter */
+struct uv_cb_ssc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 sec_header_origin;
+ u32 sec_header_len;
+ u32 reserved2c;
+ u64 reserved30[4];
+} __packed __aligned(8);
+
+/* Unpack */
+struct uv_cb_unp {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 gaddr;
+ u64 tweak[2];
+ u64 reserved38[3];
+} __packed __aligned(8);
+
+#define PV_CPU_STATE_OPR 1
+#define PV_CPU_STATE_STP 2
+#define PV_CPU_STATE_CHKSTP 3
+#define PV_CPU_STATE_OPR_LOAD 5
+
+struct uv_cb_cpu_set_state {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 cpu_handle;
+ u8 reserved20[7];
+ u8 state;
+ u64 reserved28[5];
+};
+
+/*
+ * A common UV call struct for calls that take no payload
+ * Examples:
+ * Destroy cpu/config
+ * Verify
+ */
+struct uv_cb_nodata {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 handle;
+ u64 reserved20[4];
+} __packed __aligned(8);
+
+/* Set Shared Access */
struct uv_cb_share {
struct uv_cb_header header;
u64 reserved08[3];
@@ -54,21 +207,76 @@ struct uv_cb_share {
u64 reserved28;
} __packed __aligned(8);
-static inline int uv_call(unsigned long r1, unsigned long r2)
+static inline int __uv_call(unsigned long r1, unsigned long r2)
{
int cc;
asm volatile(
- "0: .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
- " brc 3,0b\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
+ " .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
: [cc] "=d" (cc)
: [r1] "a" (r1), [r2] "a" (r2)
: "memory", "cc");
return cc;
}
+static inline int uv_call(unsigned long r1, unsigned long r2)
+{
+ int cc;
+
+ do {
+ cc = __uv_call(r1, r2);
+ } while (cc > 1);
+ return cc;
+}
+
+/* Low level uv_call that avoids stalls for long running busy conditions */
+static inline int uv_call_sched(unsigned long r1, unsigned long r2)
+{
+ int cc;
+
+ do {
+ cc = __uv_call(r1, r2);
+ cond_resched();
+ } while (cc > 1);
+ return cc;
+}
+
+/*
+ * special variant of uv_call that only transports the cpu or guest
+ * handle and the command, like destroy or verify.
+ */
+static inline int uv_cmd_nodata(u64 handle, u16 cmd, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_nodata uvcb = {
+ .header.cmd = cmd,
+ .header.len = sizeof(uvcb),
+ .handle = handle,
+ };
+ int cc;
+
+ WARN(!handle, "No handle provided to Ultravisor call cmd %x\n", cmd);
+ cc = uv_call_sched(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ return cc ? -EINVAL : 0;
+}
+
+struct uv_info {
+ unsigned long inst_calls_list[4];
+ unsigned long uv_base_stor_len;
+ unsigned long guest_base_stor_len;
+ unsigned long guest_virt_base_stor_len;
+ unsigned long guest_virt_var_stor_len;
+ unsigned long guest_cpu_stor_len;
+ unsigned long max_sec_stor_addr;
+ unsigned int max_num_sec_conf;
+ unsigned short max_guest_cpus;
+};
+
+extern struct uv_info uv_info;
+
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
extern int prot_virt_guest;
@@ -121,11 +329,40 @@ static inline int uv_remove_shared(unsigned long addr)
return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
}
-void uv_query_info(void);
#else
#define is_prot_virt_guest() 0
static inline int uv_set_shared(unsigned long addr) { return 0; }
static inline int uv_remove_shared(unsigned long addr) { return 0; }
+#endif
+
+#if IS_ENABLED(CONFIG_KVM)
+extern int prot_virt_host;
+
+static inline int is_prot_virt_host(void)
+{
+ return prot_virt_host;
+}
+
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int uv_convert_from_secure(unsigned long paddr);
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+
+void setup_uv(void);
+void adjust_to_uv_max(unsigned long *vmax);
+#else
+#define is_prot_virt_host() 0
+static inline void setup_uv(void) {}
+static inline void adjust_to_uv_max(unsigned long *vmax) {}
+
+static inline int uv_convert_from_secure(unsigned long paddr)
+{
+ return 0;
+}
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+void uv_query_info(void);
+#else
static inline void uv_query_info(void) {}
#endif
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore
index c5f676c3c224..bbb90f92d051 100644
--- a/arch/s390/kernel/.gitignore
+++ b/arch/s390/kernel/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
vmlinux.lds
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 2b1203cf7be6..75f26d775027 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -54,7 +54,6 @@ CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
-obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
@@ -70,7 +69,7 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o
-obj-$(CONFIG_IMA) += ima_arch.o
+obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
@@ -78,6 +77,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_diag.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
# vdso
obj-y += vdso64/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index ce33406cfe83..e80f0e6f5972 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -124,6 +124,8 @@ int main(void)
OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code);
OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address);
OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr);
+ OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe);
+ OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe);
OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw);
OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw);
OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index e9dac9a24d3f..ccba63aaeb47 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -84,7 +84,7 @@ static int show_diag_stat(struct seq_file *m, void *v)
static void *show_diag_stat_start(struct seq_file *m, loff_t *pos)
{
- return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+ return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL;
}
static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos)
@@ -133,7 +133,7 @@ void diag_stat_inc(enum diag_stat_enum nr)
}
EXPORT_SYMBOL(diag_stat_inc);
-void diag_stat_inc_norecursion(enum diag_stat_enum nr)
+void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr)
{
this_cpu_inc(diag_stat.counter[nr]);
trace_s390_diagnose_norecursion(diag_map[nr].code);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 9205add8481d..3ae64914bd14 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -115,26 +115,29 @@ _LPP_OFFSET = __LC_LPP
.macro SWITCH_ASYNC savearea,timer
tmhh %r8,0x0001 # interrupting from user ?
- jnz 1f
+ jnz 2f
lgr %r14,%r9
+ cghi %r14,__LC_RETURN_LPSWE
+ je 0f
slg %r14,BASED(.Lcritical_start)
clg %r14,BASED(.Lcritical_length)
- jhe 0f
+ jhe 1f
+0:
lghi %r11,\savearea # inside critical section, do cleanup
brasl %r14,cleanup_critical
tmhh %r8,0x0001 # retest problem state after cleanup
- jnz 1f
-0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
+ jnz 2f
+1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
slgr %r14,%r15
srag %r14,%r14,STACK_SHIFT
- jnz 2f
+ jnz 3f
CHECK_STACK \savearea
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 3f
-1: UPDATE_VTIME %r14,%r15,\timer
+ j 4f
+2: UPDATE_VTIME %r14,%r15,\timer
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-2: lg %r15,__LC_ASYNC_STACK # load async stack
-3: la %r11,STACK_FRAME_OVERHEAD(%r15)
+3: lg %r15,__LC_ASYNC_STACK # load async stack
+4: la %r11,STACK_FRAME_OVERHEAD(%r15)
.endm
.macro UPDATE_VTIME w1,w2,enter_timer
@@ -401,7 +404,7 @@ ENTRY(system_call)
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
lmg %r11,%r15,__PT_R11(%r11)
- lpswe __LC_RETURN_PSW
+ b __LC_RETURN_LPSWE(%r0)
.Lsysc_done:
#
@@ -608,43 +611,50 @@ ENTRY(pgm_check_handler)
BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_CURRENT
+ srag %r11,%r10,12
+ jnz 0f
+ /* if __LC_LAST_BREAK is < 4096, it contains one of
+ * the lpswe addresses in lowcore. Set it to 1 (initial state)
+ * to prevent leaking that address to userspace.
+ */
+ lghi %r10,1
+0: lg %r12,__LC_CURRENT
lghi %r11,0
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_PGM_OLD_PSW
tmhh %r8,0x0001 # test problem state bit
- jnz 2f # -> fault in user space
+ jnz 3f # -> fault in user space
#if IS_ENABLED(CONFIG_KVM)
# cleanup critical section for program checks in sie64a
lgr %r14,%r9
slg %r14,BASED(.Lsie_critical_start)
clg %r14,BASED(.Lsie_critical_length)
- jhe 0f
+ jhe 1f
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
lghi %r11,_PIF_GUEST_FAULT
#endif
-0: tmhh %r8,0x4000 # PER bit set in old PSW ?
- jnz 1f # -> enabled, can't be a double fault
+1: tmhh %r8,0x4000 # PER bit set in old PSW ?
+ jnz 2f # -> enabled, can't be a double fault
tm __LC_PGM_ILC+3,0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-1: CHECK_STACK __LC_SAVE_AREA_SYNC
+2: CHECK_STACK __LC_SAVE_AREA_SYNC
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- # CHECK_VMAP_STACK branches to stack_overflow or 4f
- CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
-2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
+ # CHECK_VMAP_STACK branches to stack_overflow or 5f
+ CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f
+3: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
lg %r15,__LC_KERNEL_STACK
lgr %r14,%r12
aghi %r14,__TASK_thread # pointer to thread_struct
lghi %r13,__LC_PGM_TDB
tm __LC_PGM_ILC+2,0x02 # check for transaction abort
- jz 3f
+ jz 4f
mvc __THREAD_trap_tdb(256,%r14),0(%r13)
-3: stg %r10,__THREAD_last_break(%r14)
-4: lgr %r13,%r11
+4: stg %r10,__THREAD_last_break(%r14)
+5: lgr %r13,%r11
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
# clear user controlled registers to prevent speculative use
@@ -663,14 +673,14 @@ ENTRY(pgm_check_handler)
stg %r13,__PT_FLAGS(%r11)
stg %r10,__PT_ARGS(%r11)
tm __LC_PGM_ILC+3,0x80 # check for per exception
- jz 5f
+ jz 6f
tmhh %r8,0x0001 # kernel per event ?
jz .Lpgm_kprobe
oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP
mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-5: REENABLE_IRQS
+6: REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
llgh %r10,__PT_INT_CODE+2(%r11)
@@ -775,7 +785,7 @@ ENTRY(io_int_handler)
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
.Lio_exit_kernel:
lmg %r11,%r15,__PT_R11(%r11)
- lpswe __LC_RETURN_PSW
+ b __LC_RETURN_LPSWE(%r0)
.Lio_done:
#
@@ -1214,7 +1224,7 @@ ENTRY(mcck_int_handler)
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
0: lmg %r11,%r15,__PT_R11(%r11)
- lpswe __LC_RETURN_MCCK_PSW
+ b __LC_RETURN_MCCK_LPSWE
.Lmcck_panic:
lg %r15,__LC_NODAT_STACK
@@ -1271,6 +1281,8 @@ ENDPROC(stack_overflow)
#endif
ENTRY(cleanup_critical)
+ cghi %r9,__LC_RETURN_LPSWE
+ je .Lcleanup_lpswe
#if IS_ENABLED(CONFIG_KVM)
clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap
jl 0f
@@ -1424,6 +1436,7 @@ ENDPROC(cleanup_critical)
mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
+.Lcleanup_lpswe:
1: lmg %r8,%r9,__LC_RETURN_PSW
BR_EX %r14,%r11
.Lcleanup_sysc_restore_insn:
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 1d3927e01a5f..faca269d5f27 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -24,6 +24,8 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
void do_protection_exception(struct pt_regs *regs);
void do_dat_exception(struct pt_regs *regs);
+void do_secure_storage_access(struct pt_regs *regs);
+void do_non_secure_storage_access(struct pt_regs *regs);
void addressing_exception(struct pt_regs *regs);
void data_exception(struct pt_regs *regs);
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 6837affc19e8..4a71061974fd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -144,6 +144,9 @@ static struct ipl_parameter_block *dump_block_ccw;
static struct sclp_ipl_info sclp_ipl_info;
+static bool reipl_fcp_clear;
+static bool reipl_ccw_clear;
+
static inline int __diag308(unsigned long subcode, void *addr)
{
register unsigned long _addr asm("0") = (unsigned long) addr;
@@ -691,6 +694,21 @@ static struct kobj_attribute sys_reipl_fcp_loadparm_attr =
__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show,
reipl_fcp_loadparm_store);
+static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%u\n", reipl_fcp_clear);
+}
+
+static ssize_t reipl_fcp_clear_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ if (strtobool(buf, &reipl_fcp_clear) < 0)
+ return -EINVAL;
+ return len;
+}
+
static struct attribute *reipl_fcp_attrs[] = {
&sys_reipl_fcp_device_attr.attr,
&sys_reipl_fcp_wwpn_attr.attr,
@@ -706,6 +724,9 @@ static struct attribute_group reipl_fcp_attr_group = {
.bin_attrs = reipl_fcp_bin_attrs,
};
+static struct kobj_attribute sys_reipl_fcp_clear_attr =
+ __ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store);
+
/* CCW reipl device attributes */
DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
@@ -741,16 +762,36 @@ static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
reipl_ccw_loadparm_store);
+static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%u\n", reipl_ccw_clear);
+}
+
+static ssize_t reipl_ccw_clear_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ if (strtobool(buf, &reipl_ccw_clear) < 0)
+ return -EINVAL;
+ return len;
+}
+
+static struct kobj_attribute sys_reipl_ccw_clear_attr =
+ __ATTR(clear, 0644, reipl_ccw_clear_show, reipl_ccw_clear_store);
+
static struct attribute *reipl_ccw_attrs_vm[] = {
&sys_reipl_ccw_device_attr.attr,
&sys_reipl_ccw_loadparm_attr.attr,
&sys_reipl_ccw_vmparm_attr.attr,
+ &sys_reipl_ccw_clear_attr.attr,
NULL,
};
static struct attribute *reipl_ccw_attrs_lpar[] = {
&sys_reipl_ccw_device_attr.attr,
&sys_reipl_ccw_loadparm_attr.attr,
+ &sys_reipl_ccw_clear_attr.attr,
NULL,
};
@@ -892,11 +933,17 @@ static void __reipl_run(void *unused)
switch (reipl_type) {
case IPL_TYPE_CCW:
diag308(DIAG308_SET, reipl_block_ccw);
- diag308(DIAG308_LOAD_CLEAR, NULL);
+ if (reipl_ccw_clear)
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ else
+ diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
break;
case IPL_TYPE_FCP:
diag308(DIAG308_SET, reipl_block_fcp);
- diag308(DIAG308_LOAD_CLEAR, NULL);
+ if (reipl_fcp_clear)
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ else
+ diag308(DIAG308_LOAD_NORMAL, NULL);
break;
case IPL_TYPE_NSS:
diag308(DIAG308_SET, reipl_block_nss);
@@ -1008,11 +1055,16 @@ static int __init reipl_fcp_init(void)
}
rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
- if (rc) {
- kset_unregister(reipl_fcp_kset);
- free_page((unsigned long) reipl_block_fcp);
- return rc;
- }
+ if (rc)
+ goto out1;
+
+ if (test_facility(141)) {
+ rc = sysfs_create_file(&reipl_fcp_kset->kobj,
+ &sys_reipl_fcp_clear_attr.attr);
+ if (rc)
+ goto out2;
+ } else
+ reipl_fcp_clear = true;
if (ipl_info.type == IPL_TYPE_FCP) {
memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block));
@@ -1032,6 +1084,13 @@ static int __init reipl_fcp_init(void)
}
reipl_capabilities |= IPL_TYPE_FCP;
return 0;
+
+out2:
+ sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+out1:
+ kset_unregister(reipl_fcp_kset);
+ free_page((unsigned long) reipl_block_fcp);
+ return rc;
}
static int __init reipl_type_init(void)
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 8371855042dc..3514420f0259 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -95,14 +95,6 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"},
};
-void __init init_IRQ(void)
-{
- BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
- init_cio_interrupts();
- init_airq_interrupts();
- init_ext_interrupts();
-}
-
void do_IRQ(struct pt_regs *regs, int irq)
{
struct pt_regs *old_regs;
@@ -294,12 +286,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy)
return IRQ_HANDLED;
}
-static struct irqaction external_interrupt = {
- .name = "EXT",
- .handler = do_ext_interrupt,
-};
-
-void __init init_ext_interrupts(void)
+static void __init init_ext_interrupts(void)
{
int idx;
@@ -308,7 +295,16 @@ void __init init_ext_interrupts(void)
irq_set_chip_and_handler(EXT_INTERRUPT,
&dummy_irq_chip, handle_percpu_irq);
- setup_irq(EXT_INTERRUPT, &external_interrupt);
+ if (request_irq(EXT_INTERRUPT, do_ext_interrupt, 0, "EXT", NULL))
+ panic("Failed to register EXT interrupt\n");
+}
+
+void __init init_IRQ(void)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
+ init_cio_interrupts();
+ init_airq_interrupts();
+ init_ext_interrupts();
}
static DEFINE_SPINLOCK(irq_subclass_lock);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index cb8b1cc285c9..3a854cb5a4c6 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -14,7 +14,6 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/debug_locks.h>
-#include <linux/suspend.h>
#include <asm/cio.h>
#include <asm/setup.h>
#include <asm/pgtable.h>
@@ -39,36 +38,6 @@ extern const unsigned long long relocate_kernel_len;
#ifdef CONFIG_CRASH_DUMP
/*
- * PM notifier callback for kdump
- */
-static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
- void *ptr)
-{
- switch (action) {
- case PM_SUSPEND_PREPARE:
- case PM_HIBERNATION_PREPARE:
- if (kexec_crash_image)
- arch_kexec_unprotect_crashkres();
- break;
- case PM_POST_SUSPEND:
- case PM_POST_HIBERNATION:
- if (kexec_crash_image)
- arch_kexec_protect_crashkres();
- break;
- default:
- return NOTIFY_DONE;
- }
- return NOTIFY_OK;
-}
-
-static int __init machine_kdump_pm_init(void)
-{
- pm_notifier(machine_kdump_pm_cb, 0);
- return 0;
-}
-arch_initcall(machine_kdump_pm_init);
-
-/*
* Reset the system, copy boot CPU registers to absolute zero,
* and jump to the kdump image
*/
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 8b33e03e47b8..1e3df52b2b65 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -238,6 +238,64 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z15, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z15, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z15, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z15, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z15, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z15, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z15, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TABORT, 0x00f3);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCERROR, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+
static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS),
@@ -516,6 +574,67 @@ static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = {
NULL,
};
+static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z15, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z15, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z15, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z15, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, BCD_DFP_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z15, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z15, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z15, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z15, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_CC),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_CCERROR),
+ CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
static struct attribute_group cpumcf_pmu_events_group = {
@@ -624,9 +743,11 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
break;
case 0x3906:
case 0x3907:
+ model = cpumcf_z14_pmu_event_attr;
+ break;
case 0x8561:
case 0x8562:
- model = cpumcf_z14_pmu_event_attr;
+ model = cpumcf_z15_pmu_event_attr;
break;
default:
model = none;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index b095b1c78987..85a711d783eb 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -372,28 +372,33 @@ static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
{
- unsigned long n_sdb, freq, factor;
+ unsigned long n_sdb, freq;
size_t sample_size;
/* Calculate sampling buffers using 4K pages
*
- * 1. Determine the sample data size which depends on the used
- * sampling functions, for example, basic-sampling or
- * basic-sampling with diagnostic-sampling.
+ * 1. The sampling size is 32 bytes for basic sampling. This size
+ * is the same for all machine types. Diagnostic
+ * sampling uses auxlilary data buffer setup which provides the
+ * memory for SDBs using linux common code auxiliary trace
+ * setup.
*
- * 2. Use the sampling frequency as input. The sampling buffer is
- * designed for almost one second. This can be adjusted through
- * the "factor" variable.
- * In any case, alloc_sampling_buffer() sets the Alert Request
+ * 2. Function alloc_sampling_buffer() sets the Alert Request
* Control indicator to trigger a measurement-alert to harvest
- * sample-data-blocks (sdb).
+ * sample-data-blocks (SDB). This is done per SDB. This
+ * measurement alert interrupt fires quick enough to handle
+ * one SDB, on very high frequency and work loads there might
+ * be 2 to 3 SBDs available for sample processing.
+ * Currently there is no need for setup alert request on every
+ * n-th page. This is counterproductive as one IRQ triggers
+ * a very high number of samples to be processed at one IRQ.
*
- * 3. Compute the number of sample-data-blocks and ensure a minimum
- * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
- * exceed a "calculated" maximum. The symbolic maximum is
- * designed for basic-sampling only and needs to be increased if
- * diagnostic-sampling is active.
- * See also the remarks for these symbolic constants.
+ * 3. Use the sampling frequency as input.
+ * Compute the number of SDBs and ensure a minimum
+ * of CPUM_SF_MIN_SDB. Depending on frequency add some more
+ * SDBs to handle a higher sampling rate.
+ * Use a minimum of CPUM_SF_MIN_SDB and allow for 100 samples
+ * (one SDB) for every 10000 HZ frequency increment.
*
* 4. Compute the number of sample-data-block-tables (SDBT) and
* ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
@@ -401,10 +406,7 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
*/
sample_size = sizeof(struct hws_basic_entry);
freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
- factor = 1;
- n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
- if (n_sdb < CPUM_SF_MIN_SDB)
- n_sdb = CPUM_SF_MIN_SDB;
+ n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
/* If there is already a sampling buffer allocated, it is very likely
* that the sampling facility is enabled too. If the event to be
@@ -1576,6 +1578,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
unsigned long range = 0, size;
unsigned long long overflow = 0;
struct perf_output_handle *handle = &cpuhw->handle;
+ unsigned long num_sdb;
aux = perf_get_aux(handle);
if (WARN_ON_ONCE(!aux))
@@ -1587,13 +1590,14 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
size >> PAGE_SHIFT);
perf_aux_output_end(handle, size);
+ num_sdb = aux->sfb.num_sdb;
while (!done) {
/* Get an output handle */
aux = perf_aux_output_begin(handle, cpuhw->event);
if (handle->size == 0) {
pr_err("The AUX buffer with %lu pages for the "
"diagnostic-sampling mode is full\n",
- aux->sfb.num_sdb);
+ num_sdb);
debug_sprintf_event(sfdbg, 1,
"%s: AUX buffer used up\n",
__func__);
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index eee3a482195a..2c27907a5ffc 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -78,8 +78,8 @@ PGM_CHECK(do_dat_exception) /* 39 */
PGM_CHECK(do_dat_exception) /* 3a */
PGM_CHECK(do_dat_exception) /* 3b */
PGM_CHECK_DEFAULT /* 3c */
-PGM_CHECK_DEFAULT /* 3d */
-PGM_CHECK_DEFAULT /* 3e */
+PGM_CHECK(do_secure_storage_access) /* 3d */
+PGM_CHECK(do_non_secure_storage_access) /* 3e */
PGM_CHECK_DEFAULT /* 3f */
PGM_CHECK(monitor_event_exception) /* 40 */
PGM_CHECK_DEFAULT /* 41 */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 6ccef5f29761..eb6e23ad15a2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -106,6 +106,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
p->thread.system_timer = 0;
p->thread.hardirq_timer = 0;
p->thread.softirq_timer = 0;
+ p->thread.last_break = 1;
frame->sf.back_chain = 0;
/* new return point is ret_from_fork */
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 6ebc2117c66c..c92d04f876cb 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -151,10 +151,35 @@ static void show_cpu_summary(struct seq_file *m, void *v)
}
}
+static void show_cpu_topology(struct seq_file *m, unsigned long n)
+{
+#ifdef CONFIG_SCHED_TOPOLOGY
+ seq_printf(m, "physical id : %d\n", topology_physical_package_id(n));
+ seq_printf(m, "core id : %d\n", topology_core_id(n));
+ seq_printf(m, "book id : %d\n", topology_book_id(n));
+ seq_printf(m, "drawer id : %d\n", topology_drawer_id(n));
+ seq_printf(m, "dedicated : %d\n", topology_cpu_dedicated(n));
+ seq_printf(m, "address : %d\n", smp_cpu_get_cpu_address(n));
+ seq_printf(m, "siblings : %d\n", cpumask_weight(topology_core_cpumask(n)));
+ seq_printf(m, "cpu cores : %d\n", topology_booted_cores(n));
+#endif /* CONFIG_SCHED_TOPOLOGY */
+}
+
+static void show_cpu_ids(struct seq_file *m, unsigned long n)
+{
+ struct cpuid *id = &per_cpu(cpu_info.cpu_id, n);
+
+ seq_printf(m, "version : %02X\n", id->version);
+ seq_printf(m, "identification : %06X\n", id->ident);
+ seq_printf(m, "machine : %04X\n", id->machine);
+}
+
static void show_cpu_mhz(struct seq_file *m, unsigned long n)
{
struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
+ if (!machine_has_cpu_mhz)
+ return;
seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
seq_printf(m, "cpu MHz static : %d\n", c->cpu_mhz_static);
}
@@ -165,12 +190,13 @@ static void show_cpu_mhz(struct seq_file *m, unsigned long n)
static int show_cpuinfo(struct seq_file *m, void *v)
{
unsigned long n = (unsigned long) v - 1;
+ unsigned long first = cpumask_first(cpu_online_mask);
- if (!n)
+ if (n == first)
show_cpu_summary(m, v);
- if (!machine_has_cpu_mhz)
- return 0;
seq_printf(m, "\ncpu number : %ld\n", n);
+ show_cpu_topology(m, n);
+ show_cpu_ids(m, n);
show_cpu_mhz(m, n);
return 0;
}
@@ -179,6 +205,8 @@ static inline void *c_update(loff_t *pos)
{
if (*pos)
*pos = cpumask_next(*pos - 1, cpu_online_mask);
+ else
+ *pos = cpumask_first(cpu_online_mask);
return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL;
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b2c2f75860e8..36445dd40fdb 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -73,6 +73,7 @@
#include <asm/nospec-branch.h>
#include <asm/mem_detect.h>
#include <asm/uv.h>
+#include <asm/asm-offsets.h>
#include "entry.h"
/*
@@ -92,10 +93,6 @@ char elf_platform[ELF_PLATFORM_SIZE];
unsigned long int_hwcap = 0;
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
-int __bootdata_preserved(prot_virt_guest);
-#endif
-
int __bootdata(noexec_disabled);
int __bootdata(memory_end_set);
unsigned long __bootdata(memory_end);
@@ -450,6 +447,8 @@ static void __init setup_lowcore_dat_off(void)
lc->spinlock_index = 0;
arch_spin_lock_setup(0);
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
set_prefix((u32)(unsigned long) lc);
lowcore_ptr[0] = lc;
@@ -564,6 +563,9 @@ static void __init setup_memory_end(void)
vmax = _REGION1_SIZE; /* 4-level kernel page table */
}
+ if (is_prot_virt_host())
+ adjust_to_uv_max(&vmax);
+
/* module area is at the end of the kernel address space. */
MODULES_END = vmax;
MODULES_VADDR = MODULES_END - MODULES_LEN;
@@ -790,6 +792,7 @@ static void __init memblock_add_mem_detect_info(void)
memblock_physmem_add(start, end - start);
}
memblock_set_bottom_up(false);
+ memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
memblock_dump_all();
}
@@ -1138,6 +1141,8 @@ void __init setup_arch(char **cmdline_p)
*/
memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
+ if (is_prot_virt_host())
+ setup_uv();
setup_memory_end();
setup_memory();
dma_contiguous_reserve(memory_end);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index e6fca5498e1f..b295090e2ce6 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -487,7 +487,7 @@ void do_signal(struct pt_regs *regs)
regs->gprs[2] = -EINTR;
break;
}
- /* fallthrough */
+ fallthrough;
case -ERESTARTNOINTR:
regs->gprs[2] = regs->orig_gpr2;
regs->psw.addr =
@@ -514,7 +514,7 @@ void do_signal(struct pt_regs *regs)
case -ERESTART_RESTARTBLOCK:
/* Restart with sys_restart_syscall */
regs->int_code = __NR_restart_syscall;
- /* fallthrough */
+ fallthrough;
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a08bd2522dd9..10dbb12eb14d 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -212,6 +212,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
if (nmi_alloc_per_cpu(lc))
goto out_async;
if (vdso_alloc_per_cpu(lc))
@@ -401,7 +403,7 @@ int smp_find_processor_id(u16 address)
return -1;
}
-bool arch_vcpu_is_preempted(int cpu)
+bool notrace arch_vcpu_is_preempted(int cpu)
{
if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
return false;
@@ -411,7 +413,7 @@ bool arch_vcpu_is_preempted(int cpu)
}
EXPORT_SYMBOL(arch_vcpu_is_preempted);
-void smp_yield_cpu(int cpu)
+void notrace smp_yield_cpu(int cpu)
{
if (!MACHINE_HAS_DIAG9C)
return;
@@ -701,6 +703,11 @@ int smp_cpu_get_polarization(int cpu)
return pcpu_devices[cpu].polarization;
}
+int smp_cpu_get_cpu_address(int cpu)
+{
+ return pcpu_devices[cpu].address;
+}
+
static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
{
static int use_sigp_detection;
@@ -851,12 +858,13 @@ static void smp_init_secondary(void)
init_cpu_timer();
vtime_init();
pfault_init();
- notify_cpu_starting(smp_processor_id());
+ notify_cpu_starting(cpu);
if (topology_cpu_dedicated(cpu))
set_cpu_flag(CIF_DEDICATED_CPU);
else
clear_cpu_flag(CIF_DEDICATED_CPU);
- set_cpu_online(smp_processor_id(), true);
+ set_cpu_online(cpu, true);
+ update_cpu_masks();
inc_irq_stat(CPU_RST);
local_irq_enable();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
@@ -928,6 +936,7 @@ int __cpu_disable(void)
/* Handle possible pending IPIs */
smp_handle_ext_call();
set_cpu_online(smp_processor_id(), false);
+ update_cpu_masks();
/* Disable pseudo page faults on this cpu. */
pfault_fini();
/* Disable interrupt sources via control register. */
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
deleted file mode 100644
index 75b7b307946e..000000000000
--- a/arch/s390/kernel/suspend.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Suspend support specific for s390.
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- */
-
-#include <linux/pfn.h>
-#include <linux/suspend.h>
-#include <linux/mm.h>
-#include <linux/pci.h>
-#include <asm/ctl_reg.h>
-#include <asm/ipl.h>
-#include <asm/cio.h>
-#include <asm/sections.h>
-#include "entry.h"
-
-/*
- * The restore of the saved pages in an hibernation image will set
- * the change and referenced bits in the storage key for each page.
- * Overindication of the referenced bits after an hibernation cycle
- * does not cause any harm but the overindication of the change bits
- * would cause trouble.
- * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each
- * page to the most significant byte of the associated page frame
- * number in the hibernation image.
- */
-
-/*
- * Key storage is allocated as a linked list of pages.
- * The size of the keys array is (PAGE_SIZE - sizeof(long))
- */
-struct page_key_data {
- struct page_key_data *next;
- unsigned char data[];
-};
-
-#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *))
-
-static struct page_key_data *page_key_data;
-static struct page_key_data *page_key_rp, *page_key_wp;
-static unsigned long page_key_rx, page_key_wx;
-unsigned long suspend_zero_pages;
-
-/*
- * For each page in the hibernation image one additional byte is
- * stored in the most significant byte of the page frame number.
- * On suspend no additional memory is required but on resume the
- * keys need to be memorized until the page data has been restored.
- * Only then can the storage keys be set to their old state.
- */
-unsigned long page_key_additional_pages(unsigned long pages)
-{
- return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
-}
-
-/*
- * Free page_key_data list of arrays.
- */
-void page_key_free(void)
-{
- struct page_key_data *pkd;
-
- while (page_key_data) {
- pkd = page_key_data;
- page_key_data = pkd->next;
- free_page((unsigned long) pkd);
- }
-}
-
-/*
- * Allocate page_key_data list of arrays with enough room to store
- * one byte for each page in the hibernation image.
- */
-int page_key_alloc(unsigned long pages)
-{
- struct page_key_data *pk;
- unsigned long size;
-
- size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
- while (size--) {
- pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL);
- if (!pk) {
- page_key_free();
- return -ENOMEM;
- }
- pk->next = page_key_data;
- page_key_data = pk;
- }
- page_key_rp = page_key_wp = page_key_data;
- page_key_rx = page_key_wx = 0;
- return 0;
-}
-
-/*
- * Save the storage key into the upper 8 bits of the page frame number.
- */
-void page_key_read(unsigned long *pfn)
-{
- struct page *page;
- unsigned long addr;
- unsigned char key;
-
- page = pfn_to_page(*pfn);
- addr = (unsigned long) page_address(page);
- key = (unsigned char) page_get_storage_key(addr) & 0x7f;
- if (arch_test_page_nodat(page))
- key |= 0x80;
- *(unsigned char *) pfn = key;
-}
-
-/*
- * Extract the storage key from the upper 8 bits of the page frame number
- * and store it in the page_key_data list of arrays.
- */
-void page_key_memorize(unsigned long *pfn)
-{
- page_key_wp->data[page_key_wx] = *(unsigned char *) pfn;
- *(unsigned char *) pfn = 0;
- if (++page_key_wx < PAGE_KEY_DATA_SIZE)
- return;
- page_key_wp = page_key_wp->next;
- page_key_wx = 0;
-}
-
-/*
- * Get the next key from the page_key_data list of arrays and set the
- * storage key of the page referred by @address. If @address refers to
- * a "safe" page the swsusp_arch_resume code will transfer the storage
- * key from the buffer page to the original page.
- */
-void page_key_write(void *address)
-{
- struct page *page;
- unsigned char key;
-
- key = page_key_rp->data[page_key_rx];
- page_set_storage_key((unsigned long) address, key & 0x7f, 0);
- page = virt_to_page(address);
- if (key & 0x80)
- arch_set_page_nodat(page, 0);
- else
- arch_set_page_dat(page, 0);
- if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
- return;
- page_key_rp = page_key_rp->next;
- page_key_rx = 0;
-}
-
-int pfn_is_nosave(unsigned long pfn)
-{
- unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
- unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
- unsigned long end_rodata_pfn = PFN_DOWN(__pa(__end_rodata)) - 1;
- unsigned long stext_pfn = PFN_DOWN(__pa(_stext));
-
- /* Always save lowcore pages (LC protection might be enabled). */
- if (pfn <= LC_PAGES)
- return 0;
- if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
- return 1;
- /* Skip memory holes and read-only pages (DCSS, ...). */
- if (pfn >= stext_pfn && pfn <= end_rodata_pfn)
- return 0;
- if (tprot(PFN_PHYS(pfn)))
- return 1;
- return 0;
-}
-
-/*
- * PM notifier callback for suspend
- */
-static int suspend_pm_cb(struct notifier_block *nb, unsigned long action,
- void *ptr)
-{
- switch (action) {
- case PM_SUSPEND_PREPARE:
- case PM_HIBERNATION_PREPARE:
- suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER);
- if (!suspend_zero_pages)
- return NOTIFY_BAD;
- break;
- case PM_POST_SUSPEND:
- case PM_POST_HIBERNATION:
- free_pages(suspend_zero_pages, LC_ORDER);
- break;
- default:
- return NOTIFY_DONE;
- }
- return NOTIFY_OK;
-}
-
-static int __init suspend_pm_init(void)
-{
- pm_notifier(suspend_pm_cb, 0);
- return 0;
-}
-arch_initcall(suspend_pm_init);
-
-void save_processor_state(void)
-{
- /* swsusp_arch_suspend() actually saves all cpu register contents.
- * Machine checks must be disabled since swsusp_arch_suspend() stores
- * register contents to their lowcore save areas. That's the same
- * place where register contents on machine checks would be saved.
- * To avoid register corruption disable machine checks.
- * We must also disable machine checks in the new psw mask for
- * program checks, since swsusp_arch_suspend() may generate program
- * checks. Disabling machine checks for all other new psw masks is
- * just paranoia.
- */
- local_mcck_disable();
- /* Disable lowcore protection */
- __ctl_clear_bit(0,28);
- S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
- S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
- S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
- S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
-}
-
-void restore_processor_state(void)
-{
- S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
- S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
- S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
- S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
- /* Enable lowcore protection */
- __ctl_set_bit(0,28);
- local_mcck_enable();
-}
-
-/* Called at the end of swsusp_arch_resume */
-void s390_early_resume(void)
-{
- lgr_info_log();
- channel_subsystem_reinit();
- zpci_rescan();
-}
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
deleted file mode 100644
index a7baf0b5f818..000000000000
--- a/arch/s390/kernel/swsusp.S
+++ /dev/null
@@ -1,276 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * S390 64-bit swsusp implementation
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- * Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/nospec-insn.h>
-#include <asm/sigp.h>
-
-/*
- * Save register context in absolute 0 lowcore and call swsusp_save() to
- * create in-memory kernel image. The context is saved in the designated
- * "store status" memory locations (see POP).
- * We return from this function twice. The first time during the suspend to
- * disk process. The second time via the swsusp_arch_resume() function
- * (see below) in the resume process.
- * This function runs with disabled interrupts.
- */
- GEN_BR_THUNK %r14
-
- .section .text
-ENTRY(swsusp_arch_suspend)
- lg %r1,__LC_NODAT_STACK
- stmg %r6,%r15,__SF_GPRS(%r1)
- aghi %r1,-STACK_FRAME_OVERHEAD
- stg %r15,__SF_BACKCHAIN(%r1)
- lgr %r15,%r1
-
- /* Store FPU registers */
- brasl %r14,save_fpu_regs
-
- /* Deactivate DAT */
- stnsm __SF_EMPTY(%r15),0xfb
-
- /* Store prefix register on stack */
- stpx __SF_EMPTY(%r15)
-
- /* Save prefix register contents for lowcore copy */
- llgf %r10,__SF_EMPTY(%r15)
-
- /* Get pointer to save area */
- lghi %r1,0x1000
-
- /* Save CPU address */
- stap __LC_EXT_CPU_ADDR(%r0)
-
- /* Store registers */
- mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */
- stam %a0,%a15,0x340(%r1) /* store access registers */
- stctg %c0,%c15,0x380(%r1) /* store control registers */
- stmg %r0,%r15,0x280(%r1) /* store general registers */
-
- stpt 0x328(%r1) /* store timer */
- stck __SF_EMPTY(%r15) /* store clock */
- stckc 0x330(%r1) /* store clock comparator */
-
- /* Update cputime accounting before going to sleep */
- lg %r0,__LC_LAST_UPDATE_TIMER
- slg %r0,0x328(%r1)
- alg %r0,__LC_SYSTEM_TIMER
- stg %r0,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),0x328(%r1)
- lg %r0,__LC_LAST_UPDATE_CLOCK
- slg %r0,__SF_EMPTY(%r15)
- alg %r0,__LC_STEAL_TIMER
- stg %r0,__LC_STEAL_TIMER
- mvc __LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15)
-
- /* Activate DAT */
- stosm __SF_EMPTY(%r15),0x04
-
- /* Set prefix page to zero */
- xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
- spx __SF_EMPTY(%r15)
-
- /* Save absolute zero pages */
- larl %r2,suspend_zero_pages
- lg %r2,0(%r2)
- lghi %r4,0
- lghi %r3,2*PAGE_SIZE
- lghi %r5,2*PAGE_SIZE
-1: mvcle %r2,%r4,0
- jo 1b
-
- /* Copy lowcore to absolute zero lowcore */
- lghi %r2,0
- lgr %r4,%r10
- lghi %r3,2*PAGE_SIZE
- lghi %r5,2*PAGE_SIZE
-1: mvcle %r2,%r4,0
- jo 1b
-
- /* Save image */
- brasl %r14,swsusp_save
-
- /* Restore prefix register and return */
- lghi %r1,0x1000
- spx 0x318(%r1)
- lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
- lghi %r2,0
- BR_EX %r14
-ENDPROC(swsusp_arch_suspend)
-
-/*
- * Restore saved memory image to correct place and restore register context.
- * Then we return to the function that called swsusp_arch_suspend().
- * swsusp_arch_resume() runs with disabled interrupts.
- */
-ENTRY(swsusp_arch_resume)
- stmg %r6,%r15,__SF_GPRS(%r15)
- lgr %r1,%r15
- aghi %r15,-STACK_FRAME_OVERHEAD
- stg %r1,__SF_BACKCHAIN(%r15)
-
- /* Make all free pages stable */
- lghi %r2,1
- brasl %r14,arch_set_page_states
-
- /* Set prefix page to zero */
- xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
- spx __SF_EMPTY(%r15)
-
- /* Deactivate DAT */
- stnsm __SF_EMPTY(%r15),0xfb
-
- /* Restore saved image */
- larl %r1,restore_pblist
- lg %r1,0(%r1)
- ltgr %r1,%r1
- jz 2f
-0:
- lg %r2,8(%r1)
- lg %r4,0(%r1)
- iske %r0,%r4
- lghi %r3,PAGE_SIZE
- lghi %r5,PAGE_SIZE
-1:
- mvcle %r2,%r4,0
- jo 1b
- lg %r2,8(%r1)
- sske %r0,%r2
- lg %r1,16(%r1)
- ltgr %r1,%r1
- jnz 0b
-2:
- ptlb /* flush tlb */
-
- /* Reset System */
- larl %r1,.Lnew_pgm_check_psw
- epsw %r2,%r3
- stm %r2,%r3,0(%r1)
- mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1)
- larl %r1,__swsusp_reset_dma
- lg %r1,0(%r1)
- BASR_EX %r14,%r1
- larl %r1,smp_cpu_mt_shift
- icm %r1,15,0(%r1)
- jz smt_done
- llgfr %r1,%r1
-smt_loop:
- sigp %r1,%r0,SIGP_SET_MULTI_THREADING
- brc 8,smt_done /* accepted */
- brc 2,smt_loop /* busy, try again */
-smt_done:
- larl %r1,.Lnew_pgm_check_psw
- lpswe 0(%r1)
-pgm_check_entry:
-
- /* Switch to original suspend CPU */
- larl %r1,.Lresume_cpu /* Resume CPU address: r2 */
- stap 0(%r1)
- llgh %r2,0(%r1)
- llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */
- cgr %r1,%r2
- je restore_registers /* r1 = r2 -> nothing to do */
- larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */
- mvc __LC_RST_NEW_PSW(16,%r0),0(%r4)
-3:
- sigp %r9,%r1,SIGP_INITIAL_CPU_RESET /* sigp initial cpu reset */
- brc 8,4f /* accepted */
- brc 2,3b /* busy, try again */
-
- /* Suspend CPU not available -> panic */
- larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
- larl %r2,.Lpanic_string
- brasl %r14,sclp_early_printk_force
- larl %r3,.Ldisabled_wait_31
- lpsw 0(%r3)
-4:
- /* Switch to suspend CPU */
- sigp %r9,%r1,SIGP_RESTART /* sigp restart to suspend CPU */
- brc 2,4b /* busy, try again */
-5:
- sigp %r9,%r2,SIGP_STOP /* sigp stop to current resume CPU */
- brc 2,5b /* busy, try again */
-6: j 6b
-
-restart_suspend:
- larl %r1,.Lresume_cpu
- llgh %r2,0(%r1)
-7:
- sigp %r9,%r2,SIGP_SENSE /* sigp sense, wait for resume CPU */
- brc 8,7b /* accepted, status 0, still running */
- brc 2,7b /* busy, try again */
- tmll %r9,0x40 /* Test if resume CPU is stopped */
- jz 7b
-
-restore_registers:
- /* Restore registers */
- lghi %r13,0x1000 /* %r1 = pointer to save area */
-
- /* Ignore time spent in suspended state. */
- llgf %r1,0x318(%r13)
- stck __LC_LAST_UPDATE_CLOCK(%r1)
- spt 0x328(%r13) /* reprogram timer */
- //sckc 0x330(%r13) /* set clock comparator */
-
- lctlg %c0,%c15,0x380(%r13) /* load control registers */
- lam %a0,%a15,0x340(%r13) /* load access registers */
-
- /* Load old stack */
- lg %r15,0x2f8(%r13)
-
- /* Save prefix register */
- mvc __SF_EMPTY(4,%r15),0x318(%r13)
-
- /* Restore absolute zero pages */
- lghi %r2,0
- larl %r4,suspend_zero_pages
- lg %r4,0(%r4)
- lghi %r3,2*PAGE_SIZE
- lghi %r5,2*PAGE_SIZE
-1: mvcle %r2,%r4,0
- jo 1b
-
- /* Restore prefix register */
- spx __SF_EMPTY(%r15)
-
- /* Activate DAT */
- stosm __SF_EMPTY(%r15),0x04
-
- /* Make all free pages unstable */
- lghi %r2,0
- brasl %r14,arch_set_page_states
-
- /* Call arch specific early resume code */
- brasl %r14,s390_early_resume
-
- /* Return 0 */
- lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
- lghi %r2,0
- BR_EX %r14
-ENDPROC(swsusp_arch_resume)
-
- .section .data..nosave,"aw",@progbits
- .align 8
-.Ldisabled_wait_31:
- .long 0x000a0000,0x00000000
-.Lpanic_string:
- .asciz "Resume not possible because suspend CPU is no longer available\n"
- .align 8
-.Lrestart_suspend_psw:
- .quad 0x0000000180000000,restart_suspend
-.Lnew_pgm_check_psw:
- .quad 0,pgm_check_entry
-.Lresume_cpu:
- .byte 0,0
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 3627953007ed..5f70cefc13e4 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -26,7 +26,6 @@
#include <linux/nodemask.h>
#include <linux/node.h>
#include <asm/sysinfo.h>
-#include <asm/numa.h>
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
@@ -63,8 +62,6 @@ static struct mask_info drawer_info;
struct cpu_topology_s390 cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);
-cpumask_t cpus_with_topology;
-
static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
{
cpumask_t mask;
@@ -86,11 +83,12 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
cpumask_copy(&mask, cpu_present_mask);
break;
default:
- /* fallthrough */
+ fallthrough;
case TOPOLOGY_MODE_SINGLE:
cpumask_copy(&mask, cpumask_of(cpu));
break;
}
+ cpumask_and(&mask, &mask, cpu_online_mask);
return mask;
}
@@ -106,6 +104,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_present(cpu + i))
cpumask_set_cpu(cpu + i, &mask);
+ cpumask_and(&mask, &mask, cpu_online_mask);
return mask;
}
@@ -138,7 +137,6 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
cpumask_set_cpu(lcpu + i, &drawer->mask);
cpumask_set_cpu(lcpu + i, &book->mask);
cpumask_set_cpu(lcpu + i, &socket->mask);
- cpumask_set_cpu(lcpu + i, &cpus_with_topology);
smp_cpu_set_polarization(lcpu + i, tl_core->pp);
}
}
@@ -245,10 +243,10 @@ int topology_set_cpu_management(int fc)
return rc;
}
-static void update_cpu_masks(void)
+void update_cpu_masks(void)
{
- struct cpu_topology_s390 *topo;
- int cpu, id;
+ struct cpu_topology_s390 *topo, *topo_package, *topo_sibling;
+ int cpu, sibling, pkg_first, smt_first, id;
for_each_possible_cpu(cpu) {
topo = &cpu_topology[cpu];
@@ -256,6 +254,7 @@ static void update_cpu_masks(void)
topo->core_mask = cpu_group_map(&socket_info, cpu);
topo->book_mask = cpu_group_map(&book_info, cpu);
topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
+ topo->booted_cores = 0;
if (topology_mode != TOPOLOGY_MODE_HW) {
id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
topo->thread_id = cpu;
@@ -263,11 +262,23 @@ static void update_cpu_masks(void)
topo->socket_id = id;
topo->book_id = id;
topo->drawer_id = id;
- if (cpu_present(cpu))
- cpumask_set_cpu(cpu, &cpus_with_topology);
}
}
- numa_update_cpu_topology();
+ for_each_online_cpu(cpu) {
+ topo = &cpu_topology[cpu];
+ pkg_first = cpumask_first(&topo->core_mask);
+ topo_package = &cpu_topology[pkg_first];
+ if (cpu == pkg_first) {
+ for_each_cpu(sibling, &topo->core_mask) {
+ topo_sibling = &cpu_topology[sibling];
+ smt_first = cpumask_first(&topo_sibling->thread_mask);
+ if (sibling == smt_first)
+ topo_package->booted_cores++;
+ }
+ } else {
+ topo->booted_cores = topo_package->booted_cores;
+ }
+ }
}
void store_topology(struct sysinfo_15_1_x *info)
@@ -289,7 +300,6 @@ static int __arch_update_cpu_topology(void)
int rc = 0;
mutex_lock(&smp_cpu_state_mutex);
- cpumask_clear(&cpus_with_topology);
if (MACHINE_HAS_TOPOLOGY) {
rc = 1;
store_topology(info);
diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c
index 490b52e85014..11a669f3cc93 100644
--- a/arch/s390/kernel/trace.c
+++ b/arch/s390/kernel/trace.c
@@ -14,7 +14,7 @@ EXPORT_TRACEPOINT_SYMBOL(s390_diagnose);
static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth);
-void trace_s390_diagnose_norecursion(int diag_nr)
+void notrace trace_s390_diagnose_norecursion(int diag_nr)
{
unsigned long flags;
unsigned int *depth;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index dc75588d7894..ff9cc4c3290e 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -271,7 +271,7 @@ void kernel_stack_overflow(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(kernel_stack_overflow);
-static void test_monitor_call(void)
+static void __init test_monitor_call(void)
{
int val = 1;
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
new file mode 100644
index 000000000000..4c0677fc8904
--- /dev/null
+++ b/arch/s390/kernel/uv.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Ultravisor functions and initialization
+ *
+ * Copyright IBM Corp. 2019, 2020
+ */
+#define KMSG_COMPONENT "prot_virt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <linux/memblock.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/uv.h>
+
+/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+int __bootdata_preserved(prot_virt_guest);
+#endif
+
+struct uv_info __bootdata_preserved(uv_info);
+
+#if IS_ENABLED(CONFIG_KVM)
+int prot_virt_host;
+EXPORT_SYMBOL(prot_virt_host);
+EXPORT_SYMBOL(uv_info);
+
+static int __init prot_virt_setup(char *val)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(val, &enabled);
+ if (!rc && enabled)
+ prot_virt_host = 1;
+
+ if (is_prot_virt_guest() && prot_virt_host) {
+ prot_virt_host = 0;
+ pr_warn("Protected virtualization not available in protected guests.");
+ }
+
+ if (prot_virt_host && !test_facility(158)) {
+ prot_virt_host = 0;
+ pr_warn("Protected virtualization not supported by the hardware.");
+ }
+
+ return rc;
+}
+early_param("prot_virt", prot_virt_setup);
+
+static int __init uv_init(unsigned long stor_base, unsigned long stor_len)
+{
+ struct uv_cb_init uvcb = {
+ .header.cmd = UVC_CMD_INIT_UV,
+ .header.len = sizeof(uvcb),
+ .stor_origin = stor_base,
+ .stor_len = stor_len,
+ };
+
+ if (uv_call(0, (uint64_t)&uvcb)) {
+ pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n",
+ uvcb.header.rc, uvcb.header.rrc);
+ return -1;
+ }
+ return 0;
+}
+
+void __init setup_uv(void)
+{
+ unsigned long uv_stor_base;
+
+ uv_stor_base = (unsigned long)memblock_alloc_try_nid(
+ uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
+ MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+ if (!uv_stor_base) {
+ pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n",
+ uv_info.uv_base_stor_len);
+ goto fail;
+ }
+
+ if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) {
+ memblock_free(uv_stor_base, uv_info.uv_base_stor_len);
+ goto fail;
+ }
+
+ pr_info("Reserving %luMB as ultravisor base storage\n",
+ uv_info.uv_base_stor_len >> 20);
+ return;
+fail:
+ pr_info("Disabling support for protected virtualization");
+ prot_virt_host = 0;
+}
+
+void adjust_to_uv_max(unsigned long *vmax)
+{
+ *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+}
+
+/*
+ * Requests the Ultravisor to pin the page in the shared state. This will
+ * cause an intercept when the guest attempts to unshare the pinned page.
+ */
+static int uv_pin_shared(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_PIN_PAGE_SHARED,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr,
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Requests the Ultravisor to encrypt a guest page and make it
+ * accessible to the host for paging (export).
+ *
+ * @paddr: Absolute host address of page to be exported
+ */
+int uv_convert_from_secure(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Calculate the expected ref_count for a page that would otherwise have no
+ * further pins. This was cribbed from similar functions in other places in
+ * the kernel, but with some slight modifications. We know that a secure
+ * page can not be a huge page for example.
+ */
+static int expected_page_refs(struct page *page)
+{
+ int res;
+
+ res = page_mapcount(page);
+ if (PageSwapCache(page)) {
+ res++;
+ } else if (page_mapping(page)) {
+ res++;
+ if (page_has_private(page))
+ res++;
+ }
+ return res;
+}
+
+static int make_secure_pte(pte_t *ptep, unsigned long addr,
+ struct page *exp_page, struct uv_cb_header *uvcb)
+{
+ pte_t entry = READ_ONCE(*ptep);
+ struct page *page;
+ int expected, rc = 0;
+
+ if (!pte_present(entry))
+ return -ENXIO;
+ if (pte_val(entry) & _PAGE_INVALID)
+ return -ENXIO;
+
+ page = pte_page(entry);
+ if (page != exp_page)
+ return -ENXIO;
+ if (PageWriteback(page))
+ return -EAGAIN;
+ expected = expected_page_refs(page);
+ if (!page_ref_freeze(page, expected))
+ return -EBUSY;
+ set_bit(PG_arch_1, &page->flags);
+ rc = uv_call(0, (u64)uvcb);
+ page_ref_unfreeze(page, expected);
+ /* Return -ENXIO if the page was not mapped, -EINVAL otherwise */
+ if (rc)
+ rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
+ return rc;
+}
+
+/*
+ * Requests the Ultravisor to make a page accessible to a guest.
+ * If it's brought in the first time, it will be cleared. If
+ * it has been exported before, it will be decrypted and integrity
+ * checked.
+ */
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+{
+ struct vm_area_struct *vma;
+ bool local_drain = false;
+ spinlock_t *ptelock;
+ unsigned long uaddr;
+ struct page *page;
+ pte_t *ptep;
+ int rc;
+
+again:
+ rc = -EFAULT;
+ down_read(&gmap->mm->mmap_sem);
+
+ uaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(uaddr))
+ goto out;
+ vma = find_vma(gmap->mm, uaddr);
+ if (!vma)
+ goto out;
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode. If
+ * userspace is playing dirty tricky with mapping huge pages later
+ * on this will result in a segmentation fault.
+ */
+ if (is_vm_hugetlb_page(vma))
+ goto out;
+
+ rc = -ENXIO;
+ page = follow_page(vma, uaddr, FOLL_WRITE);
+ if (IS_ERR_OR_NULL(page))
+ goto out;
+
+ lock_page(page);
+ ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+ rc = make_secure_pte(ptep, uaddr, page, uvcb);
+ pte_unmap_unlock(ptep, ptelock);
+ unlock_page(page);
+out:
+ up_read(&gmap->mm->mmap_sem);
+
+ if (rc == -EAGAIN) {
+ wait_on_page_writeback(page);
+ } else if (rc == -EBUSY) {
+ /*
+ * If we have tried a local drain and the page refcount
+ * still does not match our expected safe value, try with a
+ * system wide drain. This is needed if the pagevecs holding
+ * the page are on a different CPU.
+ */
+ if (local_drain) {
+ lru_add_drain_all();
+ /* We give up here, and let the caller try again */
+ return -EAGAIN;
+ }
+ /*
+ * We are here if the page refcount does not match the
+ * expected safe value. The main culprits are usually
+ * pagevecs. With lru_add_drain() we drain the pagevecs
+ * on the local CPU so that hopefully the refcount will
+ * reach the expected safe value.
+ */
+ lru_add_drain();
+ local_drain = true;
+ /* And now we try again immediately after draining */
+ goto again;
+ } else if (rc == -ENXIO) {
+ if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
+ return -EFAULT;
+ return -EAGAIN;
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_make_secure);
+
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+{
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .guest_handle = gmap->guest_handle,
+ .gaddr = gaddr,
+ };
+
+ return gmap_make_secure(gmap, gaddr, &uvcb);
+}
+EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+
+/*
+ * To be called with the page locked or with an extra reference! This will
+ * prevent gmap_make_secure from touching the page concurrently. Having 2
+ * parallel make_page_accessible is fine, as the UV calls will become a
+ * no-op if the page is already exported.
+ */
+int arch_make_page_accessible(struct page *page)
+{
+ int rc = 0;
+
+ /* Hugepage cannot be protected, so nothing to do */
+ if (PageHuge(page))
+ return 0;
+
+ /*
+ * PG_arch_1 is used in 3 places:
+ * 1. for kernel page tables during early boot
+ * 2. for storage keys of huge pages and KVM
+ * 3. As an indication that this page might be secure. This can
+ * overindicate, e.g. we set the bit before calling
+ * convert_to_secure.
+ * As secure pages are never huge, all 3 variants can co-exists.
+ */
+ if (!test_bit(PG_arch_1, &page->flags))
+ return 0;
+
+ rc = uv_pin_shared(page_to_phys(page));
+ if (!rc) {
+ clear_bit(PG_arch_1, &page->flags);
+ return 0;
+ }
+
+ rc = uv_convert_from_secure(page_to_phys(page));
+ if (!rc) {
+ clear_bit(PG_arch_1, &page->flags);
+ return 0;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(arch_make_page_accessible);
+
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+static ssize_t uv_query_facilities(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
+ uv_info.inst_calls_list[0],
+ uv_info.inst_calls_list[1],
+ uv_info.inst_calls_list[2],
+ uv_info.inst_calls_list[3]);
+}
+
+static struct kobj_attribute uv_query_facilities_attr =
+ __ATTR(facilities, 0444, uv_query_facilities, NULL);
+
+static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n",
+ uv_info.max_guest_cpus);
+}
+
+static struct kobj_attribute uv_query_max_guest_cpus_attr =
+ __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
+
+static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n",
+ uv_info.max_num_sec_conf);
+}
+
+static struct kobj_attribute uv_query_max_guest_vms_attr =
+ __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
+
+static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%lx\n",
+ uv_info.max_sec_stor_addr);
+}
+
+static struct kobj_attribute uv_query_max_guest_addr_attr =
+ __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+
+static struct attribute *uv_query_attrs[] = {
+ &uv_query_facilities_attr.attr,
+ &uv_query_max_guest_cpus_attr.attr,
+ &uv_query_max_guest_vms_attr.attr,
+ &uv_query_max_guest_addr_attr.attr,
+ NULL,
+};
+
+static struct attribute_group uv_query_attr_group = {
+ .attrs = uv_query_attrs,
+};
+
+static struct kset *uv_query_kset;
+static struct kobject *uv_kobj;
+
+static int __init uv_info_init(void)
+{
+ int rc = -ENOMEM;
+
+ if (!test_facility(158))
+ return 0;
+
+ uv_kobj = kobject_create_and_add("uv", firmware_kobj);
+ if (!uv_kobj)
+ return -ENOMEM;
+
+ uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
+ if (!uv_query_kset)
+ goto out_kobj;
+
+ rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
+ if (!rc)
+ return 0;
+
+ kset_unregister(uv_query_kset);
+out_kobj:
+ kobject_del(uv_kobj);
+ kobject_put(uv_kobj);
+ return rc;
+}
+device_initcall(uv_info_init);
+#endif
diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore
index 3fd18cf9fec2..4ec80685fecc 100644
--- a/arch/s390/kernel/vdso64/.gitignore
+++ b/arch/s390/kernel/vdso64/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
vdso64.lds
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index d3db3d7ed077..def3b60f1fe8 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -55,8 +55,4 @@ config KVM_S390_UCONTROL
If unsure, say N.
-# OK, it's a little counter-intuitive to do this, but it puts it neatly under
-# the virtualization menu.
-source "drivers/vhost/Kconfig"
-
endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 05ee90a5ea08..12decca22e7c 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -9,6 +9,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o vsie.o
+kvm-objs += diag.o gaccess.o guestdbg.o vsie.o pv.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 3fb54ec2cf3e..563429dece03 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -2,7 +2,7 @@
/*
* handling diagnose instructions
*
- * Copyright IBM Corp. 2008, 2011
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -201,6 +201,10 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
+ /*
+ * no need to check the return value of vcpu_stop as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 07d30ffcfa41..47a67a958107 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -505,7 +505,7 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (prot) {
case PROT_TYPE_IEP:
tec->b61 = 1;
- /* FALL THROUGH */
+ fallthrough;
case PROT_TYPE_LA:
tec->b56 = 1;
break;
@@ -514,12 +514,12 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
break;
case PROT_TYPE_ALC:
tec->b60 = 1;
- /* FALL THROUGH */
+ fallthrough;
case PROT_TYPE_DAT:
tec->b61 = 1;
break;
}
- /* FALL THROUGH */
+ fallthrough;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
case PGM_REGION_FIRST_TRANS:
@@ -534,7 +534,7 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
tec->addr = gva >> PAGE_SHIFT;
tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
- /* FALL THROUGH */
+ fallthrough;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
case PGM_ASTE_VALIDITY:
@@ -677,7 +677,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rfte.p;
ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
@@ -695,7 +695,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rste.p;
ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
@@ -723,7 +723,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rtte.fc0.p;
ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
@@ -1050,7 +1050,8 @@ shadow_r2t:
rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
@@ -1076,7 +1077,8 @@ shadow_r3t:
rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
@@ -1111,7 +1113,8 @@ shadow_sgt:
rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index a389fa85cca2..e7a7c499a73f 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -2,7 +2,7 @@
/*
* in-kernel handling for sie intercepts
*
- * Copyright IBM Corp. 2008, 2014
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -12,10 +12,10 @@
#include <linux/errno.h>
#include <linux/pagemap.h>
-#include <asm/kvm_host.h>
#include <asm/asm-offsets.h>
#include <asm/irq.h>
#include <asm/sysinfo.h>
+#include <asm/uv.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -79,6 +79,10 @@ static int handle_stop(struct kvm_vcpu *vcpu)
return rc;
}
+ /*
+ * no need to check the return value of vcpu_stop as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
return -EOPNOTSUPP;
@@ -231,6 +235,13 @@ static int handle_prog(struct kvm_vcpu *vcpu)
vcpu->stat.exit_program_interruption++;
+ /*
+ * Intercept 8 indicates a loop of specification exceptions
+ * for protected guests.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EOPNOTSUPP;
+
if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
rc = kvm_s390_handle_per_event(vcpu);
if (rc)
@@ -384,7 +395,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
goto out;
}
- if (addr & ~PAGE_MASK)
+ if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
sctns = (void *)get_zeroed_page(GFP_KERNEL);
@@ -395,10 +406,15 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
out:
if (!cc) {
- r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
- if (r) {
- free_page((unsigned long)sctns);
- return kvm_s390_inject_prog_cond(vcpu, r);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy((void *)(sida_origin(vcpu->arch.sie_block)),
+ sctns, PAGE_SIZE);
+ } else {
+ r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+ if (r) {
+ free_page((unsigned long)sctns);
+ return kvm_s390_inject_prog_cond(vcpu, r);
+ }
}
}
@@ -444,6 +460,77 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
}
+static int handle_pv_spx(struct kvm_vcpu *vcpu)
+{
+ u32 pref = *(u32 *)vcpu->arch.sie_block->sidad;
+
+ kvm_s390_set_prefix(vcpu, pref);
+ trace_kvm_s390_handle_prefix(vcpu, 1, pref);
+ return 0;
+}
+
+static int handle_pv_sclp(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+
+ spin_lock(&fi->lock);
+ /*
+ * 2 cases:
+ * a: an sccb answering interrupt was already pending or in flight.
+ * As the sccb value is not known we can simply set some value to
+ * trigger delivery of a saved SCCB. UV will then use its saved
+ * copy of the SCCB value.
+ * b: an error SCCB interrupt needs to be injected so we also inject
+ * a fake SCCB address. Firmware will use the proper one.
+ * This makes sure, that both errors and real sccb returns will only
+ * be delivered after a notification intercept (instruction has
+ * finished) but not after others.
+ */
+ fi->srv_signal.ext_params |= 0x43000;
+ set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+ clear_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
+ spin_unlock(&fi->lock);
+ return 0;
+}
+
+static int handle_pv_uvc(struct kvm_vcpu *vcpu)
+{
+ struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad;
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_UNPIN_PAGE_SHARED,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_get_handle(vcpu->kvm),
+ .gaddr = guest_uvcb->paddr,
+ };
+ int rc;
+
+ if (guest_uvcb->header.cmd != UVC_CMD_REMOVE_SHARED_ACCESS) {
+ WARN_ONCE(1, "Unexpected notification intercept for UVC 0x%x\n",
+ guest_uvcb->header.cmd);
+ return 0;
+ }
+ rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+ /*
+ * If the unpin did not succeed, the guest will exit again for the UVC
+ * and we will retry the unpin.
+ */
+ if (rc == -EINVAL)
+ return 0;
+ return rc;
+}
+
+static int handle_pv_notification(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->ipa == 0xb210)
+ return handle_pv_spx(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb220)
+ return handle_pv_sclp(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb9a4)
+ return handle_pv_uvc(vcpu);
+
+ return handle_instruction(vcpu);
+}
+
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
int rc, per_rc = 0;
@@ -480,6 +567,28 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
case ICPT_KSS:
rc = kvm_s390_skey_check_enable(vcpu);
break;
+ case ICPT_MCHKREQ:
+ case ICPT_INT_ENABLE:
+ /*
+ * PSW bit 13 or a CR (0, 6, 14) changed and we might
+ * now be able to deliver interrupts. The pre-run code
+ * will take care of this.
+ */
+ rc = 0;
+ break;
+ case ICPT_PV_INSTR:
+ rc = handle_instruction(vcpu);
+ break;
+ case ICPT_PV_NOTIFY:
+ rc = handle_pv_notification(vcpu);
+ break;
+ case ICPT_PV_PREF:
+ rc = 0;
+ gmap_convert_to_secure(vcpu->arch.gmap,
+ kvm_s390_get_prefix(vcpu));
+ gmap_convert_to_secure(vcpu->arch.gmap,
+ kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c06c89d370a7..bfb481134994 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2,7 +2,7 @@
/*
* handling kvm guest interrupts
*
- * Copyright IBM Corp. 2008, 2015
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
@@ -324,8 +324,11 @@ static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
{
- return vcpu->kvm->arch.float_int.pending_irqs |
- vcpu->arch.local_int.pending_irqs;
+ unsigned long pending = vcpu->kvm->arch.float_int.pending_irqs |
+ vcpu->arch.local_int.pending_irqs;
+
+ pending &= ~vcpu->kvm->arch.float_int.masked_irqs;
+ return pending;
}
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
@@ -383,10 +386,18 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
if (!(vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK))
__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
- if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
+ if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) {
__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
+ __clear_bit(IRQ_PEND_EXT_SERVICE_EV, &active_mask);
+ }
if (psw_mchk_disabled(vcpu))
active_mask &= ~IRQ_PEND_MCHK_MASK;
+ /* PV guest cpus can have a single interruption injected at a time. */
+ if (kvm_s390_pv_cpu_get_handle(vcpu) &&
+ vcpu->arch.sie_block->iictl != IICTL_CODE_NONE)
+ active_mask &= ~(IRQ_PEND_EXT_II_MASK |
+ IRQ_PEND_IO_MASK |
+ IRQ_PEND_MCHK_MASK);
/*
* Check both floating and local interrupt's cr14 because
* bit IRQ_PEND_MCHK_REP could be set in both cases.
@@ -479,19 +490,23 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu)
static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
vcpu->stat.deliver_cputm++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
0, 0);
-
- rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
- (u16 *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_CPU_TIMER;
+ } else {
+ rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -499,19 +514,23 @@ static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
vcpu->stat.deliver_ckc++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
0, 0);
-
- rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
- (u16 __user *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_CLK_COMP;
+ } else {
+ rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+ (u16 __user *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -553,6 +572,20 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
union mci mci;
int rc;
+ /*
+ * All other possible payload for a machine check (e.g. the register
+ * contents in the save area) will be handled by the ultravisor, as
+ * the hypervisor does not not have the needed information for
+ * protected guests.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_MCHK;
+ vcpu->arch.sie_block->mcic = mchk->mcic;
+ vcpu->arch.sie_block->faddr = mchk->failing_storage_address;
+ vcpu->arch.sie_block->edc = mchk->ext_damage_code;
+ return 0;
+ }
+
mci.val = mchk->mcic;
/* take care of lazy register loading */
save_fpu_regs();
@@ -696,17 +729,21 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
vcpu->stat.deliver_restart_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
- rc = write_guest_lc(vcpu,
- offsetof(struct lowcore, restart_old_psw),
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_RESTART;
+ } else {
+ rc = write_guest_lc(vcpu,
+ offsetof(struct lowcore, restart_old_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -748,6 +785,12 @@ static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
vcpu->stat.deliver_emergency_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
cpu_addr, 0);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_EMERGENCY_SIG;
+ vcpu->arch.sie_block->extcpuaddr = cpu_addr;
+ return 0;
+ }
rc = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
(u16 *)__LC_EXT_INT_CODE);
@@ -776,6 +819,12 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
KVM_S390_INT_EXTERNAL_CALL,
extcall.code, 0);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_EXTERNAL_CALL;
+ vcpu->arch.sie_block->extcpuaddr = extcall.code;
+ return 0;
+ }
rc = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
(u16 *)__LC_EXT_INT_CODE);
@@ -787,6 +836,21 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
return rc ? -EFAULT : 0;
}
+static int __deliver_prog_pv(struct kvm_vcpu *vcpu, u16 code)
+{
+ switch (code) {
+ case PGM_SPECIFICATION:
+ vcpu->arch.sie_block->iictl = IICTL_CODE_SPECIFICATION;
+ break;
+ case PGM_OPERAND:
+ vcpu->arch.sie_block->iictl = IICTL_CODE_OPERAND;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -807,6 +871,10 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
pgm_info.code, 0);
+ /* PER is handled by the ultravisor */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return __deliver_prog_pv(vcpu, pgm_info.code & ~PGM_PER);
+
switch (pgm_info.code & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
case PGM_ASX_TRANSLATION:
@@ -818,7 +886,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
nullifying = true;
- /* fall through */
+ fallthrough;
case PGM_SPACE_SWITCH:
rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
@@ -902,20 +970,49 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
return rc ? -EFAULT : 0;
}
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int write_sclp(struct kvm_vcpu *vcpu, u32 parm)
+{
+ int rc;
+
+ if (kvm_s390_pv_cpu_get_handle(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_SERVICE_SIG;
+ vcpu->arch.sie_block->eiparams = parm;
+ return 0;
+ }
+
+ rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, parm,
+ (u32 *)__LC_EXT_PARAMS);
+
+ return rc ? -EFAULT : 0;
+}
+
static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
{
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_ext_info ext;
- int rc = 0;
spin_lock(&fi->lock);
- if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+ if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs) ||
+ !(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
spin_unlock(&fi->lock);
return 0;
}
ext = fi->srv_signal;
memset(&fi->srv_signal, 0, sizeof(ext));
clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+ clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ set_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
@@ -924,16 +1021,31 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
ext.ext_params, 0);
- rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= put_guest_lc(vcpu, ext.ext_params,
- (u32 *)__LC_EXT_PARAMS);
+ return write_sclp(vcpu, ext.ext_params);
+}
- return rc ? -EFAULT : 0;
+static int __must_check __deliver_service_ev(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_ext_info ext;
+
+ spin_lock(&fi->lock);
+ if (!(test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs))) {
+ spin_unlock(&fi->lock);
+ return 0;
+ }
+ ext = fi->srv_signal;
+ /* only clear the event bit */
+ fi->srv_signal.ext_params &= ~SCCB_EVENT_PENDING;
+ clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: sclp parameter event");
+ vcpu->stat.deliver_service_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+ ext.ext_params, 0);
+
+ return write_sclp(vcpu, SCCB_EVENT_PENDING);
}
static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
@@ -1028,6 +1140,15 @@ static int __do_deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_io_info *io)
{
int rc;
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_IO;
+ vcpu->arch.sie_block->subchannel_id = io->subchannel_id;
+ vcpu->arch.sie_block->subchannel_nr = io->subchannel_nr;
+ vcpu->arch.sie_block->io_int_parm = io->io_int_parm;
+ vcpu->arch.sie_block->io_int_word = io->io_int_word;
+ return 0;
+ }
+
rc = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID);
rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR);
rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM);
@@ -1329,6 +1450,9 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
case IRQ_PEND_EXT_SERVICE:
rc = __deliver_service(vcpu);
break;
+ case IRQ_PEND_EXT_SERVICE_EV:
+ rc = __deliver_service_ev(vcpu);
+ break;
case IRQ_PEND_PFAULT_DONE:
rc = __deliver_pfault_done(vcpu);
break;
@@ -1421,7 +1545,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
return -EINVAL;
- if (sclp.has_sigpif)
+ if (sclp.has_sigpif && !kvm_s390_pv_cpu_get_handle(vcpu))
return sca_inject_ext_call(vcpu, src_id);
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
@@ -1681,9 +1805,6 @@ out:
return inti;
}
-#define SCCB_MASK 0xFFFFFFF8
-#define SCCB_EVENT_PENDING 0x3
-
static int __inject_service(struct kvm *kvm,
struct kvm_s390_interrupt_info *inti)
{
@@ -1692,6 +1813,11 @@ static int __inject_service(struct kvm *kvm,
kvm->stat.inject_service_signal++;
spin_lock(&fi->lock);
fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+
+ /* We always allow events, track them separately from the sccb ints */
+ if (fi->srv_signal.ext_params & SCCB_EVENT_PENDING)
+ set_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+
/*
* Early versions of the QEMU s390 bios will inject several
* service interrupts after another without handling a
@@ -1773,7 +1899,14 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
kvm->stat.inject_io++;
isc = int_word_to_isc(inti->io.io_int_word);
- if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
+ /*
+ * Do not make use of gisa in protected mode. We do not use the lock
+ * checking variant as this is just a performance optimization and we
+ * do not hold the lock here. This is ok as the code will pick
+ * interrupts from both "lists" for delivery.
+ */
+ if (!kvm_s390_pv_get_handle(kvm) &&
+ gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
gisa_set_ipm_gisc(gi->origin, isc);
kfree(inti);
@@ -1834,7 +1967,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
if (!(type & KVM_S390_INT_IO_AI_MASK &&
- kvm->arch.gisa_int.origin))
+ kvm->arch.gisa_int.origin) ||
+ kvm_s390_pv_cpu_get_handle(dst_vcpu))
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
break;
default:
@@ -2080,6 +2214,10 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
int i;
+ mutex_lock(&kvm->lock);
+ if (!kvm_s390_pv_is_protected(kvm))
+ fi->masked_irqs = 0;
+ mutex_unlock(&kvm->lock);
spin_lock(&fi->lock);
fi->pending_irqs = 0;
memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
@@ -2146,7 +2284,8 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
n++;
}
}
- if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+ if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs) ||
+ test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs)) {
if (n == max_irqs) {
/* signal userspace to try again */
ret = -ENOMEM;
@@ -2327,9 +2466,6 @@ static int register_io_adapter(struct kvm_device *dev,
if (!adapter)
return -ENOMEM;
- INIT_LIST_HEAD(&adapter->maps);
- init_rwsem(&adapter->maps_lock);
- atomic_set(&adapter->nr_maps, 0);
adapter->id = adapter_info.id;
adapter->isc = adapter_info.isc;
adapter->maskable = adapter_info.maskable;
@@ -2354,87 +2490,12 @@ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
return ret;
}
-static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
-{
- struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
- struct s390_map_info *map;
- int ret;
-
- if (!adapter || !addr)
- return -EINVAL;
-
- map = kzalloc(sizeof(*map), GFP_KERNEL);
- if (!map) {
- ret = -ENOMEM;
- goto out;
- }
- INIT_LIST_HEAD(&map->list);
- map->guest_addr = addr;
- map->addr = gmap_translate(kvm->arch.gmap, addr);
- if (map->addr == -EFAULT) {
- ret = -EFAULT;
- goto out;
- }
- ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page);
- if (ret < 0)
- goto out;
- BUG_ON(ret != 1);
- down_write(&adapter->maps_lock);
- if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
- list_add_tail(&map->list, &adapter->maps);
- ret = 0;
- } else {
- put_page(map->page);
- ret = -EINVAL;
- }
- up_write(&adapter->maps_lock);
-out:
- if (ret)
- kfree(map);
- return ret;
-}
-
-static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
-{
- struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
- struct s390_map_info *map, *tmp;
- int found = 0;
-
- if (!adapter || !addr)
- return -EINVAL;
-
- down_write(&adapter->maps_lock);
- list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
- if (map->guest_addr == addr) {
- found = 1;
- atomic_dec(&adapter->nr_maps);
- list_del(&map->list);
- put_page(map->page);
- kfree(map);
- break;
- }
- }
- up_write(&adapter->maps_lock);
-
- return found ? 0 : -EINVAL;
-}
-
void kvm_s390_destroy_adapters(struct kvm *kvm)
{
int i;
- struct s390_map_info *map, *tmp;
- for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
- if (!kvm->arch.adapters[i])
- continue;
- list_for_each_entry_safe(map, tmp,
- &kvm->arch.adapters[i]->maps, list) {
- list_del(&map->list);
- put_page(map->page);
- kfree(map);
- }
+ for (i = 0; i < MAX_S390_IO_ADAPTERS; i++)
kfree(kvm->arch.adapters[i]);
- }
}
static int modify_io_adapter(struct kvm_device *dev,
@@ -2456,11 +2517,14 @@ static int modify_io_adapter(struct kvm_device *dev,
if (ret > 0)
ret = 0;
break;
+ /*
+ * The following operations are no longer needed and therefore no-ops.
+ * The gpa to hva translation is done when an IRQ route is set up. The
+ * set_irq code uses get_user_pages_remote() to do the actual write.
+ */
case KVM_S390_IO_ADAPTER_MAP:
- ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
- break;
case KVM_S390_IO_ADAPTER_UNMAP:
- ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+ ret = 0;
break;
default:
ret = -EINVAL;
@@ -2699,19 +2763,15 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
}
-static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
- u64 addr)
+static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
{
- struct s390_map_info *map;
+ struct page *page = NULL;
- if (!adapter)
- return NULL;
-
- list_for_each_entry(map, &adapter->maps, list) {
- if (map->guest_addr == addr)
- return map;
- }
- return NULL;
+ down_read(&kvm->mm->mmap_sem);
+ get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE,
+ &page, NULL, NULL);
+ up_read(&kvm->mm->mmap_sem);
+ return page;
}
static int adapter_indicators_set(struct kvm *kvm,
@@ -2720,30 +2780,35 @@ static int adapter_indicators_set(struct kvm *kvm,
{
unsigned long bit;
int summary_set, idx;
- struct s390_map_info *info;
+ struct page *ind_page, *summary_page;
void *map;
- info = get_map_info(adapter, adapter_int->ind_addr);
- if (!info)
+ ind_page = get_map_page(kvm, adapter_int->ind_addr);
+ if (!ind_page)
return -1;
- map = page_address(info->page);
- bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
- set_bit(bit, map);
- idx = srcu_read_lock(&kvm->srcu);
- mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
- set_page_dirty_lock(info->page);
- info = get_map_info(adapter, adapter_int->summary_addr);
- if (!info) {
- srcu_read_unlock(&kvm->srcu, idx);
+ summary_page = get_map_page(kvm, adapter_int->summary_addr);
+ if (!summary_page) {
+ put_page(ind_page);
return -1;
}
- map = page_address(info->page);
- bit = get_ind_bit(info->addr, adapter_int->summary_offset,
- adapter->swap);
+
+ idx = srcu_read_lock(&kvm->srcu);
+ map = page_address(ind_page);
+ bit = get_ind_bit(adapter_int->ind_addr,
+ adapter_int->ind_offset, adapter->swap);
+ set_bit(bit, map);
+ mark_page_dirty(kvm, adapter_int->ind_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(ind_page);
+ map = page_address(summary_page);
+ bit = get_ind_bit(adapter_int->summary_addr,
+ adapter_int->summary_offset, adapter->swap);
summary_set = test_and_set_bit(bit, map);
- mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
- set_page_dirty_lock(info->page);
+ mark_page_dirty(kvm, adapter_int->summary_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(summary_page);
srcu_read_unlock(&kvm->srcu, idx);
+
+ put_page(ind_page);
+ put_page(summary_page);
return summary_set ? 0 : 1;
}
@@ -2765,9 +2830,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
adapter = get_io_adapter(kvm, e->adapter.adapter_id);
if (!adapter)
return -1;
- down_read(&adapter->maps_lock);
ret = adapter_indicators_set(kvm, adapter, &e->adapter);
- up_read(&adapter->maps_lock);
if ((ret > 0) && !adapter->masked) {
ret = kvm_s390_inject_airq(kvm, adapter);
if (ret == 0)
@@ -2818,23 +2881,27 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- int ret;
+ u64 uaddr;
switch (ue->type) {
+ /* we store the userspace addresses instead of the guest addresses */
case KVM_IRQ_ROUTING_S390_ADAPTER:
e->set = set_adapter_int;
- e->adapter.summary_addr = ue->u.adapter.summary_addr;
- e->adapter.ind_addr = ue->u.adapter.ind_addr;
+ uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
+ if (uaddr == -EFAULT)
+ return -EFAULT;
+ e->adapter.summary_addr = uaddr;
+ uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
+ if (uaddr == -EFAULT)
+ return -EFAULT;
+ e->adapter.ind_addr = uaddr;
e->adapter.summary_offset = ue->u.adapter.summary_offset;
e->adapter.ind_offset = ue->u.adapter.ind_offset;
e->adapter.adapter_id = ue->u.adapter.adapter_id;
- ret = 0;
- break;
+ return 0;
default:
- ret = -EINVAL;
+ return -EINVAL;
}
-
- return ret;
}
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c2e6d4ba4e23..5dcf9ff12828 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2,7 +2,7 @@
/*
* hosting IBM Z kernel virtual machines (s390x)
*
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -44,6 +44,7 @@
#include <asm/cpacf.h>
#include <asm/timex.h>
#include <asm/ap.h>
+#include <asm/uv.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -184,6 +185,11 @@ static u8 halt_poll_max_steal = 10;
module_param(halt_poll_max_steal, byte, 0644);
MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
+/* if set to true, the GISA will be initialized and used if available */
+static bool use_gisa = true;
+module_param(use_gisa, bool, 0644);
+MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
+
/*
* For now we handle at most 16 double words as this is what the s390 base
* kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -220,6 +226,7 @@ static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
static struct gmap_notifier gmap_notifier;
static struct gmap_notifier vsie_gmap_notifier;
debug_info_t *kvm_s390_dbf;
+debug_info_t *kvm_s390_dbf_uv;
/* Section: not file related */
int kvm_arch_hardware_enable(void)
@@ -228,13 +235,15 @@ int kvm_arch_hardware_enable(void)
return 0;
}
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
{
return 0;
}
+/* forward declarations */
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
+static int sca_switch_to_extended(struct kvm *kvm);
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
{
@@ -293,7 +302,7 @@ static struct notifier_block kvm_clock_notifier = {
.notifier_call = kvm_clock_sync,
};
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
{
gmap_notifier.notifier_call = kvm_gmap_notifier;
gmap_register_pte_notifier(&gmap_notifier);
@@ -460,7 +469,12 @@ int kvm_arch_init(void *opaque)
if (!kvm_s390_dbf)
return -ENOMEM;
- if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
+ kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
+ if (!kvm_s390_dbf_uv)
+ goto out;
+
+ if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
+ debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
goto out;
kvm_s390_cpu_feat_init();
@@ -487,6 +501,7 @@ void kvm_arch_exit(void)
{
kvm_s390_gib_destroy();
debug_unregister(kvm_s390_dbf);
+ debug_unregister(kvm_s390_dbf_uv);
}
/* Section: device related */
@@ -564,14 +579,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_BPB:
r = test_facility(82);
break;
+ case KVM_CAP_S390_PROTECTED:
+ r = is_prot_virt_host();
+ break;
default:
r = 0;
}
return r;
}
-static void kvm_s390_sync_dirty_log(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
int i;
gfn_t cur_gfn, last_gfn;
@@ -612,9 +629,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
{
int r;
unsigned long n;
- struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int is_dirty = 0;
+ int is_dirty;
if (kvm_is_ucontrol(kvm))
return -EINVAL;
@@ -625,14 +641,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
- slots = kvm_memslots(kvm);
- memslot = id_to_memslot(slots, log->slot);
- r = -ENOENT;
- if (!memslot->dirty_bitmap)
- goto out;
-
- kvm_s390_sync_dirty_log(kvm, memslot);
- r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
if (r)
goto out;
@@ -1930,6 +1939,9 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
start = slot + 1;
}
+ if (start >= slots->used_slots)
+ return slots->used_slots - 1;
+
if (gfn >= memslots[start].base_gfn &&
gfn < memslots[start].base_gfn + memslots[start].npages) {
atomic_set(&slots->lru_slot, start);
@@ -1993,6 +2005,9 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *ms;
+ if (unlikely(!slots->used_slots))
+ return 0;
+
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
ms = gfn_to_memslot(kvm, cur_gfn);
args->count = 0;
@@ -2158,6 +2173,194 @@ out:
return r;
}
+static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
+{
+ struct kvm_vcpu *vcpu;
+ u16 rc, rrc;
+ int ret = 0;
+ int i;
+
+ /*
+ * We ignore failures and try to destroy as many CPUs as possible.
+ * At the same time we must not free the assigned resources when
+ * this fails, as the ultravisor has still access to that memory.
+ * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
+ * behind.
+ * We want to return the first failure rc and rrc, though.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
+ *rcp = rc;
+ *rrcp = rrc;
+ ret = -EIO;
+ }
+ mutex_unlock(&vcpu->mutex);
+ }
+ return ret;
+}
+
+static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ int i, r = 0;
+ u16 dummy;
+
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
+ mutex_unlock(&vcpu->mutex);
+ if (r)
+ break;
+ }
+ if (r)
+ kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+ return r;
+}
+
+static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
+{
+ int r = 0;
+ u16 dummy;
+ void __user *argp = (void __user *)cmd->data;
+
+ switch (cmd->cmd) {
+ case KVM_PV_ENABLE: {
+ r = -EINVAL;
+ if (kvm_s390_pv_is_protected(kvm))
+ break;
+
+ /*
+ * FMT 4 SIE needs esca. As we never switch back to bsca from
+ * esca, we need no cleanup in the error cases below
+ */
+ r = sca_switch_to_extended(kvm);
+ if (r)
+ break;
+
+ down_write(&current->mm->mmap_sem);
+ r = gmap_mark_unmergeable();
+ up_write(&current->mm->mmap_sem);
+ if (r)
+ break;
+
+ r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
+ if (r)
+ break;
+
+ r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
+ if (r)
+ kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+
+ /* we need to block service interrupts from now on */
+ set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ }
+ case KVM_PV_DISABLE: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+ /*
+ * If a CPU could not be destroyed, destroy VM will also fail.
+ * There is no point in trying to destroy it. Instead return
+ * the rc and rrc from the first CPU that failed destroying.
+ */
+ if (r)
+ break;
+ r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
+
+ /* no need to block service interrupts any more */
+ clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ }
+ case KVM_PV_SET_SEC_PARMS: {
+ struct kvm_s390_pv_sec_parm parms = {};
+ void *hdr;
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&parms, argp, sizeof(parms)))
+ break;
+
+ /* Currently restricted to 8KB */
+ r = -EINVAL;
+ if (parms.length > PAGE_SIZE * 2)
+ break;
+
+ r = -ENOMEM;
+ hdr = vmalloc(parms.length);
+ if (!hdr)
+ break;
+
+ r = -EFAULT;
+ if (!copy_from_user(hdr, (void __user *)parms.origin,
+ parms.length))
+ r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
+ &cmd->rc, &cmd->rrc);
+
+ vfree(hdr);
+ break;
+ }
+ case KVM_PV_UNPACK: {
+ struct kvm_s390_pv_unp unp = {};
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&unp, argp, sizeof(unp)))
+ break;
+
+ r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
+ &cmd->rc, &cmd->rrc);
+ break;
+ }
+ case KVM_PV_VERIFY: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
+ cmd->rrc);
+ break;
+ }
+ case KVM_PV_PREP_RESET: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ break;
+ }
+ case KVM_PV_UNSHARE_ALL: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ }
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2255,6 +2458,33 @@ long kvm_arch_vm_ioctl(struct file *filp,
mutex_unlock(&kvm->slots_lock);
break;
}
+ case KVM_S390_PV_COMMAND: {
+ struct kvm_pv_cmd args;
+
+ /* protvirt means user sigp */
+ kvm->arch.user_cpu_state_ctrl = 1;
+ r = 0;
+ if (!is_prot_virt_host()) {
+ r = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&args, argp, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ if (args.flags) {
+ r = -EINVAL;
+ break;
+ }
+ mutex_lock(&kvm->lock);
+ r = kvm_s390_handle_pv(kvm, &args);
+ mutex_unlock(&kvm->lock);
+ if (copy_to_user(argp, &args, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -2504,7 +2734,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.use_skf = sclp.has_skey;
spin_lock_init(&kvm->arch.start_stop_lock);
kvm_s390_vsie_init(kvm);
- kvm_s390_gisa_init(kvm);
+ if (use_gisa)
+ kvm_s390_gisa_init(kvm);
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
@@ -2518,6 +2749,8 @@ out_err:
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
+ u16 rc, rrc;
+
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
kvm_s390_clear_local_irqs(vcpu);
@@ -2530,6 +2763,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.use_cmma)
kvm_s390_vcpu_unsetup_cmma(vcpu);
+ /* We can not hold the vcpu mutex here, we are already dying */
+ if (kvm_s390_pv_cpu_get_handle(vcpu))
+ kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
free_page((unsigned long)(vcpu->arch.sie_block));
}
@@ -2551,10 +2787,20 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ u16 rc, rrc;
+
kvm_free_vcpus(kvm);
sca_dispose(kvm);
- debug_unregister(kvm->arch.dbf);
kvm_s390_gisa_destroy(kvm);
+ /*
+ * We are already at the end of life and kvm->lock is not taken.
+ * This is ok as the file descriptor is closed by now and nobody
+ * can mess with the pv state. To avoid lockdep_assert_held from
+ * complaining we do not use kvm_s390_pv_is_protected.
+ */
+ if (kvm_s390_pv_get_handle(kvm))
+ kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+ debug_unregister(kvm->arch.dbf);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
@@ -2650,6 +2896,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
unsigned int vcpu_idx;
u32 scaol, scaoh;
+ if (kvm->arch.use_esca)
+ return 0;
+
new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
if (!new_sca)
return -ENOMEM;
@@ -2901,6 +3150,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
{
int rc = 0;
+ u16 uvrc, uvrrc;
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
@@ -2968,6 +3218,14 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_crypto_setup(vcpu);
+ mutex_lock(&vcpu->kvm->lock);
+ if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+ rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
+ if (rc)
+ kvm_s390_vcpu_unsetup_cmma(vcpu);
+ }
+ mutex_unlock(&vcpu->kvm->lock);
+
return rc;
}
@@ -3277,7 +3535,6 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
kvm_s390_set_prefix(vcpu, 0);
kvm_s390_set_cpu_timer(vcpu, 0);
vcpu->arch.sie_block->ckc = 0;
- vcpu->arch.sie_block->todpr = 0;
memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
@@ -3295,9 +3552,17 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
vcpu->run->s.regs.pp = 0;
vcpu->run->s.regs.gbea = 1;
vcpu->run->s.regs.fpc = 0;
- vcpu->arch.sie_block->gbea = 1;
- vcpu->arch.sie_block->pp = 0;
- vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ /*
+ * Do not reset these registers in the protected case, as some of
+ * them are overlayed and they are not accessible in this case
+ * anyway.
+ */
+ if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->gbea = 1;
+ vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ vcpu->arch.sie_block->todpr = 0;
+ }
}
static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
@@ -3487,14 +3752,20 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
switch (mp_state->mp_state) {
case KVM_MP_STATE_STOPPED:
- kvm_s390_vcpu_stop(vcpu);
+ rc = kvm_s390_vcpu_stop(vcpu);
break;
case KVM_MP_STATE_OPERATING:
- kvm_s390_vcpu_start(vcpu);
+ rc = kvm_s390_vcpu_start(vcpu);
break;
case KVM_MP_STATE_LOAD:
+ if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+ rc = -ENXIO;
+ break;
+ }
+ rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
+ break;
case KVM_MP_STATE_CHECK_STOP:
- /* fall through - CHECK_STOP and LOAD are not supported yet */
+ fallthrough; /* CHECK_STOP and LOAD are not supported yet */
default:
rc = -ENXIO;
}
@@ -3844,9 +4115,11 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
return vcpu_post_run_fault_in_sie(vcpu);
}
+#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int rc, exit_reason;
+ struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
/*
* We try to hold kvm->srcu during most of vcpu_run (except when run-
@@ -3868,8 +4141,28 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
guest_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
local_irq_enable();
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy(sie_page->pv_grregs,
+ vcpu->run->s.regs.gprs,
+ sizeof(sie_page->pv_grregs));
+ }
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy(vcpu->run->s.regs.gprs,
+ sie_page->pv_grregs,
+ sizeof(sie_page->pv_grregs));
+ /*
+ * We're not allowed to inject interrupts on intercepts
+ * that leave the guest state in an "in-between" state
+ * where the next SIE entry will do a continuation.
+ * Fence interrupts in our "internal" PSW.
+ */
+ if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
+ vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+ }
+ }
local_irq_disable();
__enable_cpu_timer_accounting(vcpu);
guest_exit_irqoff();
@@ -3883,7 +4176,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
}
-static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct runtime_instr_cb *riccb;
struct gs_cb *gscb;
@@ -3892,16 +4185,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
- kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
- memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
- /* some control register changes require a tlb flush */
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- }
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
- kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
- vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
@@ -3942,6 +4226,36 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
}
+ if (MACHINE_HAS_GS) {
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ if (current->thread.gs_cb) {
+ vcpu->arch.host_gscb = current->thread.gs_cb;
+ save_gs_cb(vcpu->arch.host_gscb);
+ }
+ if (vcpu->arch.gs_enabled) {
+ current->thread.gs_cb = (struct gs_cb *)
+ &vcpu->run->s.regs.gscb;
+ restore_gs_cb(current->thread.gs_cb);
+ }
+ preempt_enable();
+ }
+ /* SIE will load etoken directly from SDNX and therefore kvm_run */
+}
+
+static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+ kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+ memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+ /* some control register changes require a tlb flush */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+ kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
+ vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+ }
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
/* save host (userspace) fprs/vrs */
@@ -3956,23 +4270,47 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
current->thread.fpu.fpc = 0;
+
+ /* Sync fmt2 only data */
+ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
+ sync_regs_fmt2(vcpu, kvm_run);
+ } else {
+ /*
+ * In several places we have to modify our internal view to
+ * not do things that are disallowed by the ultravisor. For
+ * example we must not inject interrupts after specific exits
+ * (e.g. 112 prefix page not secure). We do this by turning
+ * off the machine check, external and I/O interrupt bits
+ * of our PSW copy. To avoid getting validity intercepts, we
+ * do only accept the condition code from userspace.
+ */
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
+ vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
+ PSW_MASK_CC;
+ }
+
+ kvm_run->kvm_dirty_regs = 0;
+}
+
+static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
+ kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
+ kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
+ kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
if (MACHINE_HAS_GS) {
- preempt_disable();
__ctl_set_bit(2, 4);
- if (current->thread.gs_cb) {
- vcpu->arch.host_gscb = current->thread.gs_cb;
- save_gs_cb(vcpu->arch.host_gscb);
- }
- if (vcpu->arch.gs_enabled) {
- current->thread.gs_cb = (struct gs_cb *)
- &vcpu->run->s.regs.gscb;
- restore_gs_cb(current->thread.gs_cb);
- }
+ if (vcpu->arch.gs_enabled)
+ save_gs_cb(current->thread.gs_cb);
+ preempt_disable();
+ current->thread.gs_cb = vcpu->arch.host_gscb;
+ restore_gs_cb(vcpu->arch.host_gscb);
preempt_enable();
+ if (!vcpu->arch.host_gscb)
+ __ctl_clear_bit(2, 4);
+ vcpu->arch.host_gscb = NULL;
}
- /* SIE will load etoken directly from SDNX and therefore kvm_run */
-
- kvm_run->kvm_dirty_regs = 0;
+ /* SIE will save etoken directly into SDNX and therefore kvm_run */
}
static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3983,13 +4321,9 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
- kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
- kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
- kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
kvm_run->s.regs.pft = vcpu->arch.pfault_token;
kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
- kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
/* Save guest register state */
@@ -3998,19 +4332,8 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* Restore will be done lazily at return */
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
- if (MACHINE_HAS_GS) {
- __ctl_set_bit(2, 4);
- if (vcpu->arch.gs_enabled)
- save_gs_cb(current->thread.gs_cb);
- preempt_disable();
- current->thread.gs_cb = vcpu->arch.host_gscb;
- restore_gs_cb(vcpu->arch.host_gscb);
- preempt_enable();
- if (!vcpu->arch.host_gscb)
- __ctl_clear_bit(2, 4);
- vcpu->arch.host_gscb = NULL;
- }
- /* SIE will save etoken directly into SDNX and therefore kvm_run */
+ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
+ store_regs_fmt2(vcpu, kvm_run);
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -4034,6 +4357,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_sigset_activate(vcpu);
+ /*
+ * no need to check the return value of vcpu_start as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
} else if (is_vcpu_stopped(vcpu)) {
@@ -4171,18 +4498,27 @@ static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
}
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
{
- int i, online_vcpus, started_vcpus = 0;
+ int i, online_vcpus, r = 0, started_vcpus = 0;
if (!is_vcpu_stopped(vcpu))
- return;
+ return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+ /* Let's tell the UV that we want to change into the operating state */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
+ if (r) {
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return r;
+ }
+ }
+
for (i = 0; i < online_vcpus; i++) {
if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
started_vcpus++;
@@ -4202,27 +4538,43 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
/*
+ * The real PSW might have changed due to a RESTART interpreted by the
+ * ultravisor. We block all interrupts and let the next sie exit
+ * refresh our view.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+ /*
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
*/
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
- return;
+ return 0;
}
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
{
- int i, online_vcpus, started_vcpus = 0;
+ int i, online_vcpus, r = 0, started_vcpus = 0;
struct kvm_vcpu *started_vcpu = NULL;
if (is_vcpu_stopped(vcpu))
- return;
+ return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+ /* Let's tell the UV that we want to change into the stopped state */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
+ if (r) {
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return r;
+ }
+ }
+
/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
kvm_s390_clear_stop_irq(vcpu);
@@ -4245,7 +4597,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
}
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
- return;
+ return 0;
}
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
@@ -4272,12 +4624,40 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
+static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ int r = 0;
+
+ if (mop->flags || !mop->size)
+ return -EINVAL;
+ if (mop->size + mop->sida_offset < mop->size)
+ return -EINVAL;
+ if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
+ return -E2BIG;
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_SIDA_READ:
+ if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
+ mop->sida_offset), mop->size))
+ r = -EFAULT;
+
+ break;
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
+ mop->sida_offset), uaddr, mop->size))
+ r = -EFAULT;
+ break;
+ }
+ return r;
+}
static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
void *tmpbuf = NULL;
- int r, srcu_idx;
+ int r = 0;
const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
| KVM_S390_MEMOP_F_CHECK_ONLY;
@@ -4287,14 +4667,15 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
if (mop->size > MEM_OP_MAX_SIZE)
return -E2BIG;
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EINVAL;
+
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
return -ENOMEM;
}
- srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
@@ -4320,12 +4701,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
}
r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
break;
- default:
- r = -EINVAL;
}
- srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
-
if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
@@ -4333,6 +4710,31 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
return r;
}
+static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ int r, srcu_idx;
+
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_LOGICAL_READ:
+ case KVM_S390_MEMOP_LOGICAL_WRITE:
+ r = kvm_s390_guest_mem_op(vcpu, mop);
+ break;
+ case KVM_S390_MEMOP_SIDA_READ:
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ /* we are locked against sida going away by the vcpu->mutex */
+ r = kvm_s390_guest_sida_op(vcpu, mop);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ return r;
+}
+
long kvm_arch_vcpu_async_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4368,6 +4770,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
void __user *argp = (void __user *)arg;
int idx;
long r;
+ u16 rc, rrc;
vcpu_load(vcpu);
@@ -4389,18 +4792,40 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_S390_CLEAR_RESET:
r = 0;
kvm_arch_vcpu_ioctl_clear_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_S390_INITIAL_RESET:
r = 0;
kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET_INITIAL,
+ &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_S390_NORMAL_RESET:
r = 0;
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET, &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
+ r = -EINVAL;
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ break;
r = -EFAULT;
if (copy_from_user(&reg, argp, sizeof(reg)))
break;
@@ -4463,7 +4888,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_s390_mem_op mem_op;
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
- r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+ r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
else
r = -EFAULT;
break;
@@ -4523,12 +4948,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
-
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
@@ -4549,12 +4968,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
return -EINVAL;
+ /* When we are protected, we should not change the memory slots */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
@@ -4570,7 +4992,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
old->npages * PAGE_SIZE);
if (rc)
break;
- /* FALLTHROUGH */
+ fallthrough;
case KVM_MR_CREATE:
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 6d9448dbd052..79dcd647b378 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -2,7 +2,7 @@
/*
* definition for kvm on s390
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -15,6 +15,7 @@
#include <linux/hrtimer.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/lockdep.h>
#include <asm/facility.h>
#include <asm/processor.h>
#include <asm/sclp.h>
@@ -25,6 +26,17 @@
#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
extern debug_info_t *kvm_s390_dbf;
+extern debug_info_t *kvm_s390_dbf_uv;
+
+#define KVM_UV_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event((d_kvm)->arch.dbf, d_loglevel, d_string "\n", \
+ d_args); \
+ debug_sprintf_event(kvm_s390_dbf_uv, d_loglevel, \
+ "%d: " d_string "\n", (d_kvm)->userspace_pid, \
+ d_args); \
+} while (0)
+
#define KVM_EVENT(d_loglevel, d_string, d_args...)\
do { \
debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
@@ -196,6 +208,39 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
return kvm->arch.user_cpu_state_ctrl != 0;
}
+/* implemented in pv.c */
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+ u16 *rrc);
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+ unsigned long tweak, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
+
+static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
+{
+ return kvm->arch.pv.handle;
+}
+
+static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pv.handle;
+}
+
+static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+ lockdep_assert_held(&kvm->lock);
+ return !!kvm_s390_pv_get_handle(kvm);
+}
+
+static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+ lockdep_assert_held(&vcpu->mutex);
+ return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+
/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
@@ -286,8 +331,8 @@ void kvm_s390_set_tod_clock(struct kvm *kvm,
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ed52ffa8d5d4..69a824f9ef0b 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -2,7 +2,7 @@
/*
* handling privileged instructions
*
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -872,7 +872,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
- if (operand2 & 0xfff)
+ if (!kvm_s390_pv_cpu_is_protected(vcpu) && (operand2 & 0xfff))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
switch (fc) {
@@ -893,8 +893,13 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
}
-
- rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
+ PAGE_SIZE);
+ rc = 0;
+ } else {
+ rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+ }
if (rc) {
rc = kvm_s390_inject_prog_cond(vcpu, rc);
goto out;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
new file mode 100644
index 000000000000..63e330109b63
--- /dev/null
+++ b/arch/s390/kvm/pv.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hosting Protected Virtual Machines
+ *
+ * Copyright IBM Corp. 2019, 2020
+ * Author(s): Janosch Frank <frankja@linux.ibm.com>
+ */
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+#include <asm/mman.h>
+#include "kvm-s390.h"
+
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+ int cc = 0;
+
+ if (kvm_s390_pv_cpu_get_handle(vcpu)) {
+ cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
+
+ KVM_UV_EVENT(vcpu->kvm, 3,
+ "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
+ vcpu->vcpu_id, *rc, *rrc);
+ WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x",
+ *rc, *rrc);
+ }
+ /* Intended memory leak for something that should never happen. */
+ if (!cc)
+ free_pages(vcpu->arch.pv.stor_base,
+ get_order(uv_info.guest_cpu_stor_len));
+
+ free_page(sida_origin(vcpu->arch.sie_block));
+ vcpu->arch.sie_block->pv_handle_cpu = 0;
+ vcpu->arch.sie_block->pv_handle_config = 0;
+ memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
+ vcpu->arch.sie_block->sdf = 0;
+ /*
+ * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
+ * Use the reset value of gbea to avoid leaking the kernel pointer of
+ * the just freed sida.
+ */
+ vcpu->arch.sie_block->gbea = 1;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+
+ return cc ? EIO : 0;
+}
+
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_csc uvcb = {
+ .header.cmd = UVC_CMD_CREATE_SEC_CPU,
+ .header.len = sizeof(uvcb),
+ };
+ int cc;
+
+ if (kvm_s390_pv_cpu_get_handle(vcpu))
+ return -EINVAL;
+
+ vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL,
+ get_order(uv_info.guest_cpu_stor_len));
+ if (!vcpu->arch.pv.stor_base)
+ return -ENOMEM;
+
+ /* Input */
+ uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
+ uvcb.num = vcpu->arch.sie_block->icpua;
+ uvcb.state_origin = (u64)vcpu->arch.sie_block;
+ uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
+
+ /* Alloc Secure Instruction Data Area Designation */
+ vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!vcpu->arch.sie_block->sidad) {
+ free_pages(vcpu->arch.pv.stor_base,
+ get_order(uv_info.guest_cpu_stor_len));
+ return -ENOMEM;
+ }
+
+ cc = uv_call(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(vcpu->kvm, 3,
+ "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
+ vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
+ uvcb.header.rrc);
+
+ if (cc) {
+ u16 dummy;
+
+ kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
+ return -EIO;
+ }
+
+ /* Output */
+ vcpu->arch.pv.handle = uvcb.cpu_handle;
+ vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
+ vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
+ vcpu->arch.sie_block->sdf = 2;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ return 0;
+}
+
+/* only free resources when the destroy was successful */
+static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
+{
+ vfree(kvm->arch.pv.stor_var);
+ free_pages(kvm->arch.pv.stor_base,
+ get_order(uv_info.guest_base_stor_len));
+ memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
+}
+
+static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
+{
+ unsigned long base = uv_info.guest_base_stor_len;
+ unsigned long virt = uv_info.guest_virt_var_stor_len;
+ unsigned long npages = 0, vlen = 0;
+ struct kvm_memory_slot *memslot;
+
+ kvm->arch.pv.stor_var = NULL;
+ kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base));
+ if (!kvm->arch.pv.stor_base)
+ return -ENOMEM;
+
+ /*
+ * Calculate current guest storage for allocation of the
+ * variable storage, which is based on the length in MB.
+ *
+ * Slots are sorted by GFN
+ */
+ mutex_lock(&kvm->slots_lock);
+ memslot = kvm_memslots(kvm)->memslots;
+ npages = memslot->base_gfn + memslot->npages;
+ mutex_unlock(&kvm->slots_lock);
+
+ kvm->arch.pv.guest_len = npages * PAGE_SIZE;
+
+ /* Allocate variable storage */
+ vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
+ vlen += uv_info.guest_virt_base_stor_len;
+ kvm->arch.pv.stor_var = vzalloc(vlen);
+ if (!kvm->arch.pv.stor_var)
+ goto out_err;
+ return 0;
+
+out_err:
+ kvm_s390_pv_dealloc_vm(kvm);
+ return -ENOMEM;
+}
+
+/* this should not fail, but if it does, we must not free the donated memory */
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ int cc;
+
+ /* make all pages accessible before destroying the guest */
+ s390_reset_acc(kvm->mm);
+
+ cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+ WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+ atomic_set(&kvm->mm->context.is_protected, 0);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
+ WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
+ /* Inteded memory leak on "impossible" error */
+ if (!cc)
+ kvm_s390_pv_dealloc_vm(kvm);
+ return cc ? -EIO : 0;
+}
+
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_cgc uvcb = {
+ .header.cmd = UVC_CMD_CREATE_SEC_CONF,
+ .header.len = sizeof(uvcb)
+ };
+ int cc, ret;
+ u16 dummy;
+
+ ret = kvm_s390_pv_alloc_vm(kvm);
+ if (ret)
+ return ret;
+
+ /* Inputs */
+ uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
+ uvcb.guest_stor_len = kvm->arch.pv.guest_len;
+ uvcb.guest_asce = kvm->arch.gmap->asce;
+ uvcb.guest_sca = (unsigned long)kvm->arch.sca;
+ uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
+ uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+
+ cc = uv_call(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
+ uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+
+ /* Outputs */
+ kvm->arch.pv.handle = uvcb.guest_handle;
+
+ if (cc) {
+ if (uvcb.header.rc & UVC_RC_NEED_DESTROY)
+ kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+ else
+ kvm_s390_pv_dealloc_vm(kvm);
+ return -EIO;
+ }
+ kvm->arch.gmap->guest_handle = uvcb.guest_handle;
+ atomic_set(&kvm->mm->context.is_protected, 1);
+ return 0;
+}
+
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+ u16 *rrc)
+{
+ struct uv_cb_ssc uvcb = {
+ .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
+ .header.len = sizeof(uvcb),
+ .sec_header_origin = (u64)hdr,
+ .sec_header_len = length,
+ .guest_handle = kvm_s390_pv_get_handle(kvm),
+ };
+ int cc = uv_call(0, (u64)&uvcb);
+
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
+ *rc, *rrc);
+ return cc ? -EINVAL : 0;
+}
+
+static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
+ u64 offset, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_unp uvcb = {
+ .header.cmd = UVC_CMD_UNPACK_IMG,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_get_handle(kvm),
+ .gaddr = addr,
+ .tweak[0] = tweak,
+ .tweak[1] = offset,
+ };
+ int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+
+ if (ret && ret != -EAGAIN)
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
+ uvcb.gaddr, *rc, *rrc);
+ return ret;
+}
+
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+ unsigned long tweak, u16 *rc, u16 *rrc)
+{
+ u64 offset = 0;
+ int ret = 0;
+
+ if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
+ return -EINVAL;
+
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
+ addr, size);
+
+ while (offset < size) {
+ ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
+ if (ret == -EAGAIN) {
+ cond_resched();
+ if (fatal_signal_pending(current))
+ break;
+ continue;
+ }
+ if (ret)
+ break;
+ addr += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+ if (!ret)
+ KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
+ return ret;
+}
+
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
+{
+ struct uv_cb_cpu_set_state uvcb = {
+ .header.cmd = UVC_CMD_CPU_SET_STATE,
+ .header.len = sizeof(uvcb),
+ .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu),
+ .state = state,
+ };
+ int cc;
+
+ cc = uv_call(0, (u64)&uvcb);
+ KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
+ vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
+ if (cc)
+ return -EINVAL;
+ return 0;
+}
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 076090f9e666..4f6c22d72072 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1202,6 +1202,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->iprcc = PGM_ADDRESSING;
scb_s->pgmilc = 4;
scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
+ rc = 1;
}
return rc;
}
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index a51c892f14f3..ae989b740376 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -19,7 +19,6 @@
#include <linux/swap.h>
#include <linux/kthread.h>
#include <linux/oom.h>
-#include <linux/suspend.h>
#include <linux/uaccess.h>
#include <asm/pgalloc.h>
@@ -49,7 +48,6 @@ static volatile long cmm_pages_target;
static volatile long cmm_timed_pages_target;
static long cmm_timeout_pages;
static long cmm_timeout_seconds;
-static int cmm_suspended;
static struct cmm_page_array *cmm_page_list;
static struct cmm_page_array *cmm_timed_page_list;
@@ -151,9 +149,9 @@ static int cmm_thread(void *dummy)
while (1) {
rc = wait_event_interruptible(cmm_thread_wait,
- (!cmm_suspended && (cmm_pages != cmm_pages_target ||
- cmm_timed_pages != cmm_timed_pages_target)) ||
- kthread_should_stop());
+ cmm_pages != cmm_pages_target ||
+ cmm_timed_pages != cmm_timed_pages_target ||
+ kthread_should_stop());
if (kthread_should_stop() || rc == -ERESTARTSYS) {
cmm_pages_target = cmm_pages;
cmm_timed_pages_target = cmm_timed_pages;
@@ -390,38 +388,6 @@ static void cmm_smsg_target(const char *from, char *msg)
static struct ctl_table_header *cmm_sysctl_header;
-static int cmm_suspend(void)
-{
- cmm_suspended = 1;
- cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
- cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
- return 0;
-}
-
-static int cmm_resume(void)
-{
- cmm_suspended = 0;
- cmm_kick_thread();
- return 0;
-}
-
-static int cmm_power_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- switch (event) {
- case PM_POST_HIBERNATION:
- return cmm_resume();
- case PM_HIBERNATION_PREPARE:
- return cmm_suspend();
- default:
- return NOTIFY_DONE;
- }
-}
-
-static struct notifier_block cmm_power_notifier = {
- .notifier_call = cmm_power_event,
-};
-
static int __init cmm_init(void)
{
int rc = -ENOMEM;
@@ -446,16 +412,11 @@ static int __init cmm_init(void)
rc = register_oom_notifier(&cmm_oom_nb);
if (rc < 0)
goto out_oom_notify;
- rc = register_pm_notifier(&cmm_power_notifier);
- if (rc)
- goto out_pm;
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
if (!IS_ERR(cmm_thread_ptr))
return 0;
rc = PTR_ERR(cmm_thread_ptr);
- unregister_pm_notifier(&cmm_power_notifier);
-out_pm:
unregister_oom_notifier(&cmm_oom_nb);
out_oom_notify:
#ifdef CONFIG_CMM_IUCV
@@ -475,7 +436,6 @@ static void __exit cmm_exit(void)
#ifdef CONFIG_CMM_IUCV
smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
#endif
- unregister_pm_notifier(&cmm_power_notifier);
unregister_oom_notifier(&cmm_oom_nb);
kthread_stop(cmm_thread_ptr);
del_timer_sync(&cmm_timer);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 7b0bb475c166..dedc28be27ab 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -38,17 +38,18 @@
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/facility.h>
+#include <asm/uv.h>
#include "../kernel/entry.h"
#define __FAIL_ADDR_MASK -4096L
#define __SUBCODE_MASK 0x0600
#define __PF_RES_FIELD 0x8000000000000000ULL
-#define VM_FAULT_BADCONTEXT 0x010000
-#define VM_FAULT_BADMAP 0x020000
-#define VM_FAULT_BADACCESS 0x040000
-#define VM_FAULT_SIGNAL 0x080000
-#define VM_FAULT_PFAULT 0x100000
+#define VM_FAULT_BADCONTEXT ((__force vm_fault_t) 0x010000)
+#define VM_FAULT_BADMAP ((__force vm_fault_t) 0x020000)
+#define VM_FAULT_BADACCESS ((__force vm_fault_t) 0x040000)
+#define VM_FAULT_SIGNAL ((__force vm_fault_t) 0x080000)
+#define VM_FAULT_PFAULT ((__force vm_fault_t) 0x100000)
enum fault_type {
KERNEL_FAULT,
@@ -122,7 +123,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION2:
table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
if (bad_address(table))
@@ -131,7 +132,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION3:
table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
if (bad_address(table))
@@ -140,7 +141,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* fallthrough */
+ fallthrough;
case _ASCE_TYPE_SEGMENT:
table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (bad_address(table))
@@ -327,7 +328,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int access,
case VM_FAULT_BADACCESS:
if (access == VM_EXEC && signal_return(regs) == 0)
break;
- /* fallthrough */
+ fallthrough;
case VM_FAULT_BADMAP:
/* Bad memory access. Check if it is kernel or user space. */
if (user_mode(regs)) {
@@ -337,9 +338,8 @@ static noinline void do_fault_error(struct pt_regs *regs, int access,
do_sigsegv(regs, si_code);
break;
}
- /* fallthrough */
+ fallthrough;
case VM_FAULT_BADCONTEXT:
- /* fallthrough */
case VM_FAULT_PFAULT:
do_no_context(regs);
break;
@@ -429,7 +429,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
address = trans_exc_code & __FAIL_ADDR_MASK;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
- flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ flags = FAULT_FLAG_DEFAULT;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
@@ -480,8 +480,7 @@ retry:
* the fault.
*/
fault = handle_mm_fault(vma, address, flags);
- /* No reason to continue if interrupted by SIGKILL. */
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+ if (fault_signal_pending(fault, regs)) {
fault = VM_FAULT_SIGNAL;
if (flags & FAULT_FLAG_RETRY_NOWAIT)
goto out_up;
@@ -514,10 +513,7 @@ retry:
fault = VM_FAULT_PFAULT;
goto out_up;
}
- /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
- * of starvation. */
- flags &= ~(FAULT_FLAG_ALLOW_RETRY |
- FAULT_FLAG_RETRY_NOWAIT);
+ flags &= ~FAULT_FLAG_RETRY_NOWAIT;
flags |= FAULT_FLAG_TRIED;
down_read(&mm->mmap_sem);
goto retry;
@@ -584,7 +580,7 @@ void do_dat_exception(struct pt_regs *regs)
int access;
vm_fault_t fault;
- access = VM_READ | VM_EXEC | VM_WRITE;
+ access = VM_ACCESS_FLAGS;
fault = do_exception(regs, access);
if (unlikely(fault))
do_fault_error(regs, access, fault);
@@ -816,3 +812,78 @@ out_extint:
early_initcall(pfault_irq_init);
#endif /* CONFIG_PFAULT */
+
+#if IS_ENABLED(CONFIG_PGSTE)
+void do_secure_storage_access(struct pt_regs *regs)
+{
+ unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ struct page *page;
+ int rc;
+
+ switch (get_fault_type(regs)) {
+ case USER_FAULT:
+ mm = current->mm;
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, addr);
+ if (!vma) {
+ up_read(&mm->mmap_sem);
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ break;
+ }
+ page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
+ if (IS_ERR_OR_NULL(page)) {
+ up_read(&mm->mmap_sem);
+ break;
+ }
+ if (arch_make_page_accessible(page))
+ send_sig(SIGSEGV, current, 0);
+ put_page(page);
+ up_read(&mm->mmap_sem);
+ break;
+ case KERNEL_FAULT:
+ page = phys_to_page(addr);
+ if (unlikely(!try_get_page(page)))
+ break;
+ rc = arch_make_page_accessible(page);
+ put_page(page);
+ if (rc)
+ BUG();
+ break;
+ case VDSO_FAULT:
+ case GMAP_FAULT:
+ default:
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ WARN_ON_ONCE(1);
+ }
+}
+NOKPROBE_SYMBOL(do_secure_storage_access);
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+ unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+
+ if (get_fault_type(regs) != GMAP_FAULT) {
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
+ send_sig(SIGSEGV, current, 0);
+}
+NOKPROBE_SYMBOL(do_non_secure_storage_access);
+
+#else
+void do_secure_storage_access(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
+#endif
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index edcdca97e85e..1a95d8809cc3 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -787,14 +787,18 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
static inline unsigned long *gmap_table_walk(struct gmap *gmap,
unsigned long gaddr, int level)
{
+ const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
unsigned long *table;
if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4))
return NULL;
if (gmap_is_shadow(gmap) && gmap->removed)
return NULL;
- if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11)))
+
+ if (asce_type != _ASCE_TYPE_REGION1 &&
+ gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
return NULL;
+
table = gmap->table;
switch (gmap->asce & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
@@ -804,7 +808,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION2:
table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
if (level == 3)
@@ -812,7 +816,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION3:
table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
if (level == 2)
@@ -820,7 +824,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_SEGMENT:
table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (level == 1)
@@ -1840,6 +1844,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
goto out_free;
} else if (*table & _REGION_ENTRY_ORIGIN) {
rc = -EAGAIN; /* Race with shadow */
+ goto out_free;
}
crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY);
/* mark as invalid as long as the parent table is not protected */
@@ -2548,6 +2553,23 @@ int s390_enable_sie(void)
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
+int gmap_mark_unmergeable(void)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int ret;
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ MADV_UNMERGEABLE, &vma->vm_flags);
+ if (ret)
+ return ret;
+ }
+ mm->def_flags &= ~VM_MERGEABLE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
+
/*
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
@@ -2593,7 +2615,6 @@ static const struct mm_walk_ops enable_skey_walk_ops = {
int s390_enable_skey(void)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
int rc = 0;
down_write(&mm->mmap_sem);
@@ -2601,16 +2622,11 @@ int s390_enable_skey(void)
goto out_up;
mm->context.uses_skeys = 1;
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags)) {
- mm->context.uses_skeys = 0;
- rc = -ENOMEM;
- goto out_up;
- }
+ rc = gmap_mark_unmergeable();
+ if (rc) {
+ mm->context.uses_skeys = 0;
+ goto out_up;
}
- mm->def_flags &= ~VM_MERGEABLE;
-
walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
out_up:
@@ -2640,3 +2656,38 @@ void s390_reset_cmma(struct mm_struct *mm)
up_write(&mm->mmap_sem);
}
EXPORT_SYMBOL_GPL(s390_reset_cmma);
+
+/*
+ * make inaccessible pages accessible again
+ */
+static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t pte = READ_ONCE(*ptep);
+
+ if (pte_present(pte))
+ WARN_ON_ONCE(uv_convert_from_secure(pte_val(pte) & PAGE_MASK));
+ return 0;
+}
+
+static const struct mm_walk_ops reset_acc_walk_ops = {
+ .pte_entry = __s390_reset_acc,
+};
+
+#include <linux/sched/mm.h>
+void s390_reset_acc(struct mm_struct *mm)
+{
+ /*
+ * we might be called during
+ * reset: we walk the pages and clear
+ * close of all kvm file descriptors: we walk the pages and clear
+ * exit of process on fd closure: vma already gone, do nothing
+ */
+ if (!mmget_not_zero(mm))
+ return;
+ down_read(&mm->mmap_sem);
+ walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+}
+EXPORT_SYMBOL_GPL(s390_reset_acc);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 5674710a4841..f01daddcbc5e 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -326,7 +326,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct hstate *h = hstate_file(file);
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- int rc;
if (len & ~huge_page_mask(h))
return -EINVAL;
@@ -353,15 +352,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
else
addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
pgoff, flags);
- if (addr & ~PAGE_MASK)
+ if (offset_in_page(addr))
return addr;
check_asce_limit:
- if (addr + len > current->mm->context.asce_limit &&
- addr + len <= TASK_SIZE) {
- rc = crst_table_upgrade(mm, addr + len);
- if (rc)
- return (unsigned long) rc;
- }
- return addr;
+ return check_asce_limit(mm, addr, len);
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ac44bd76db4b..87b2d024e75a 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -268,20 +268,23 @@ device_initcall(s390_cma_mem_init);
#endif /* CONFIG_CMA */
int arch_add_memory(int nid, u64 start, u64 size,
- struct mhp_restrictions *restrictions)
+ struct mhp_params *params)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long size_pages = PFN_DOWN(size);
int rc;
- if (WARN_ON_ONCE(restrictions->altmap))
+ if (WARN_ON_ONCE(params->altmap))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
return -EINVAL;
rc = vmem_add_mapping(start, size);
if (rc)
return rc;
- rc = __add_pages(nid, start_pfn, size_pages, restrictions);
+ rc = __add_pages(nid, start_pfn, size_pages, params);
if (rc)
vmem_remove_mapping(start, size);
return rc;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index cbc718ba6d78..1b78f630a9ca 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -72,14 +72,13 @@ static inline unsigned long mmap_base(unsigned long rnd,
return PAGE_ALIGN(STACK_TOP - gap - rnd);
}
-unsigned long
-arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_unmapped_area_info info;
- int rc;
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
@@ -105,30 +104,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
- if (addr & ~PAGE_MASK)
+ if (offset_in_page(addr))
return addr;
check_asce_limit:
- if (addr + len > current->mm->context.asce_limit &&
- addr + len <= TASK_SIZE) {
- rc = crst_table_upgrade(mm, addr + len);
- if (rc)
- return (unsigned long) rc;
- }
-
- return addr;
+ return check_asce_limit(mm, addr, len);
}
-unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
- const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
+unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- unsigned long addr = addr0;
struct vm_unmapped_area_info info;
- int rc;
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
@@ -163,25 +152,18 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
* can happen with large stack limits and large mmap()
* allocations.
*/
- if (addr & ~PAGE_MASK) {
+ if (offset_in_page(addr)) {
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = TASK_SIZE;
addr = vm_unmapped_area(&info);
- if (addr & ~PAGE_MASK)
+ if (offset_in_page(addr))
return addr;
}
check_asce_limit:
- if (addr + len > current->mm->context.asce_limit &&
- addr + len <= TASK_SIZE) {
- rc = crst_table_upgrade(mm, addr + len);
- if (rc)
- return (unsigned long) rc;
- }
-
- return addr;
+ return check_asce_limit(mm, addr, len);
}
/*
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index f8c6faab41f4..e22c06d5f206 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -367,20 +367,4 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
}
}
-#ifdef CONFIG_HIBERNATION
-bool kernel_page_present(struct page *page)
-{
- unsigned long addr;
- int cc;
-
- addr = page_to_phys(page);
- asm volatile(
- " lra %1,0(%1)\n"
- " ipm %0\n"
- " srl %0,28"
- : "=d" (cc), "+a" (addr) : : "cc");
- return cc == 0;
-}
-#endif /* CONFIG_HIBERNATION */
-
#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 46071be897ab..fff169d64711 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -89,67 +89,65 @@ static void __crst_table_upgrade(void *arg)
int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
{
- unsigned long *table, *pgd;
- int rc, notify;
+ unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
+ unsigned long asce_limit = mm->context.asce_limit;
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
- VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
- rc = 0;
- notify = 0;
- while (mm->context.asce_limit < end) {
- table = crst_table_alloc(mm);
- if (!table) {
- rc = -ENOMEM;
- break;
- }
- spin_lock_bh(&mm->page_table_lock);
- pgd = (unsigned long *) mm->pgd;
- if (mm->context.asce_limit == _REGION2_SIZE) {
- crst_table_init(table, _REGION2_ENTRY_EMPTY);
- p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
- mm->pgd = (pgd_t *) table;
- mm->context.asce_limit = _REGION1_SIZE;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
- mm_inc_nr_puds(mm);
- } else {
- crst_table_init(table, _REGION1_ENTRY_EMPTY);
- pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
- mm->pgd = (pgd_t *) table;
- mm->context.asce_limit = -PAGE_SIZE;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
- }
- notify = 1;
- spin_unlock_bh(&mm->page_table_lock);
- }
- if (notify)
- on_each_cpu(__crst_table_upgrade, mm, 0);
- return rc;
-}
+ VM_BUG_ON(asce_limit < _REGION2_SIZE);
-void crst_table_downgrade(struct mm_struct *mm)
-{
- pgd_t *pgd;
+ if (end <= asce_limit)
+ return 0;
- /* downgrade should only happen from 3 to 2 levels (compat only) */
- VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
+ if (asce_limit == _REGION2_SIZE) {
+ p4d = crst_table_alloc(mm);
+ if (unlikely(!p4d))
+ goto err_p4d;
+ crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
+ }
+ if (end > _REGION1_SIZE) {
+ pgd = crst_table_alloc(mm);
+ if (unlikely(!pgd))
+ goto err_pgd;
+ crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
+ }
- if (current->active_mm == mm) {
- clear_user_asce();
- __tlb_flush_mm(mm);
+ spin_lock_bh(&mm->page_table_lock);
+
+ /*
+ * This routine gets called with mmap_sem lock held and there is
+ * no reason to optimize for the case of otherwise. However, if
+ * that would ever change, the below check will let us know.
+ */
+ VM_BUG_ON(asce_limit != mm->context.asce_limit);
+
+ if (p4d) {
+ __pgd = (unsigned long *) mm->pgd;
+ p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
+ mm->pgd = (pgd_t *) p4d;
+ mm->context.asce_limit = _REGION1_SIZE;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ mm_inc_nr_puds(mm);
+ }
+ if (pgd) {
+ __pgd = (unsigned long *) mm->pgd;
+ pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd);
+ mm->pgd = (pgd_t *) pgd;
+ mm->context.asce_limit = TASK_SIZE_MAX;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
}
- pgd = mm->pgd;
- mm_dec_nr_pmds(mm);
- mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
- mm->context.asce_limit = _REGION3_SIZE;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
- crst_table_free(mm, (unsigned long *) pgd);
+ spin_unlock_bh(&mm->page_table_lock);
+
+ on_each_cpu(__crst_table_upgrade, mm, 0);
+
+ return 0;
- if (current->active_mm == mm)
- set_user_asce(mm);
+err_pgd:
+ crst_table_free(mm, p4d);
+err_p4d:
+ return -ENOMEM;
}
static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
@@ -316,7 +314,7 @@ void __tlb_remove_table(void *_table)
mask >>= 24;
if (mask != 0)
break;
- /* fallthrough */
+ fallthrough;
case 3: /* 4K page table with pgstes */
if (mask & 3)
atomic_xor_bits(&page->_refcount, 3 << 24);
@@ -541,7 +539,7 @@ void base_asce_free(unsigned long asce)
base_region2_walk(table, 0, _REGION1_SIZE, 0);
break;
case _ASCE_TYPE_REGION1:
- base_region1_walk(table, 0, -_PAGE_SIZE, 0);
+ base_region1_walk(table, 0, TASK_SIZE_MAX, 0);
break;
}
base_crst_free(table);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index b403fa14847d..f810930aff42 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -415,6 +415,10 @@ void __init vmem_map_init(void)
SET_MEMORY_RO | SET_MEMORY_X);
__set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
+
+ /* we need lowcore executable for our LPSWE instructions */
+ set_memory_x(0, 1);
+
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
index 66c2dff74895..c89d26f4f77d 100644
--- a/arch/s390/numa/Makefile
+++ b/arch/s390/numa/Makefile
@@ -1,4 +1,2 @@
# SPDX-License-Identifier: GPL-2.0
obj-y += numa.o
-obj-y += toptree.o
-obj-$(CONFIG_NUMA_EMU) += mode_emu.o
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
deleted file mode 100644
index 72d742bb2d17..000000000000
--- a/arch/s390/numa/mode_emu.c
+++ /dev/null
@@ -1,577 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA support for s390
- *
- * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
- * without using real topology information about the physical memory of the
- * machine.
- *
- * It distributes the available CPUs to nodes while respecting the original
- * machine topology information. This is done by trying to avoid to separate
- * CPUs which reside on the same book or even on the same MC.
- *
- * Because the current Linux scheduler code requires a stable cpu to node
- * mapping, cores are pinned to nodes when the first CPU thread is set online.
- *
- * Copyright IBM Corp. 2015
- */
-
-#define KMSG_COMPONENT "numa_emu"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/cpumask.h>
-#include <linux/memblock.h>
-#include <linux/node.h>
-#include <linux/memory.h>
-#include <linux/slab.h>
-#include <asm/smp.h>
-#include <asm/topology.h>
-#include "numa_mode.h"
-#include "toptree.h"
-
-/* Distances between the different system components */
-#define DIST_EMPTY 0
-#define DIST_CORE 1
-#define DIST_MC 2
-#define DIST_BOOK 3
-#define DIST_DRAWER 4
-#define DIST_MAX 5
-
-/* Node distance reported to common code */
-#define EMU_NODE_DIST 10
-
-/* Node ID for free (not yet pinned) cores */
-#define NODE_ID_FREE -1
-
-/* Different levels of toptree */
-enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY};
-
-/* The two toptree IDs */
-enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
-
-/* Number of NUMA nodes */
-static int emu_nodes = 1;
-/* NUMA stripe size */
-static unsigned long emu_size;
-
-/*
- * Node to core pinning information updates are protected by
- * "sched_domains_mutex".
- */
-static struct {
- s32 to_node_id[CONFIG_NR_CPUS]; /* Pinned core to node mapping */
- int total; /* Total number of pinned cores */
- int per_node_target; /* Cores per node without extra cores */
- int per_node[MAX_NUMNODES]; /* Number of cores pinned to node */
-} *emu_cores;
-
-/*
- * Pin a core to a node
- */
-static void pin_core_to_node(int core_id, int node_id)
-{
- if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) {
- emu_cores->per_node[node_id]++;
- emu_cores->to_node_id[core_id] = node_id;
- emu_cores->total++;
- } else {
- WARN_ON(emu_cores->to_node_id[core_id] != node_id);
- }
-}
-
-/*
- * Number of pinned cores of a node
- */
-static int cores_pinned(struct toptree *node)
-{
- return emu_cores->per_node[node->id];
-}
-
-/*
- * ID of the node where the core is pinned (or NODE_ID_FREE)
- */
-static int core_pinned_to_node_id(struct toptree *core)
-{
- return emu_cores->to_node_id[core->id];
-}
-
-/*
- * Number of cores in the tree that are not yet pinned
- */
-static int cores_free(struct toptree *tree)
-{
- struct toptree *core;
- int count = 0;
-
- toptree_for_each(core, tree, CORE) {
- if (core_pinned_to_node_id(core) == NODE_ID_FREE)
- count++;
- }
- return count;
-}
-
-/*
- * Return node of core
- */
-static struct toptree *core_node(struct toptree *core)
-{
- return core->parent->parent->parent->parent;
-}
-
-/*
- * Return drawer of core
- */
-static struct toptree *core_drawer(struct toptree *core)
-{
- return core->parent->parent->parent;
-}
-
-/*
- * Return book of core
- */
-static struct toptree *core_book(struct toptree *core)
-{
- return core->parent->parent;
-}
-
-/*
- * Return mc of core
- */
-static struct toptree *core_mc(struct toptree *core)
-{
- return core->parent;
-}
-
-/*
- * Distance between two cores
- */
-static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
-{
- if (core_drawer(core1)->id != core_drawer(core2)->id)
- return DIST_DRAWER;
- if (core_book(core1)->id != core_book(core2)->id)
- return DIST_BOOK;
- if (core_mc(core1)->id != core_mc(core2)->id)
- return DIST_MC;
- /* Same core or sibling on same MC */
- return DIST_CORE;
-}
-
-/*
- * Distance of a node to a core
- */
-static int dist_node_to_core(struct toptree *node, struct toptree *core)
-{
- struct toptree *core_node;
- int dist_min = DIST_MAX;
-
- toptree_for_each(core_node, node, CORE)
- dist_min = min(dist_min, dist_core_to_core(core_node, core));
- return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
-}
-
-/*
- * Unify will delete empty nodes, therefore recreate nodes.
- */
-static void toptree_unify_tree(struct toptree *tree)
-{
- int nid;
-
- toptree_unify(tree);
- for (nid = 0; nid < emu_nodes; nid++)
- toptree_get_child(tree, nid);
-}
-
-/*
- * Find the best/nearest node for a given core and ensure that no node
- * gets more than "emu_cores->per_node_target + extra" cores.
- */
-static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
- int extra)
-{
- struct toptree *node, *node_best = NULL;
- int dist_cur, dist_best, cores_target;
-
- cores_target = emu_cores->per_node_target + extra;
- dist_best = DIST_MAX;
- node_best = NULL;
- toptree_for_each(node, numa, NODE) {
- /* Already pinned cores must use their nodes */
- if (core_pinned_to_node_id(core) == node->id) {
- node_best = node;
- break;
- }
- /* Skip nodes that already have enough cores */
- if (cores_pinned(node) >= cores_target)
- continue;
- dist_cur = dist_node_to_core(node, core);
- if (dist_cur < dist_best) {
- dist_best = dist_cur;
- node_best = node;
- }
- }
- return node_best;
-}
-
-/*
- * Find the best node for each core with respect to "extra" core count
- */
-static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
- int extra)
-{
- struct toptree *node, *core, *tmp;
-
- toptree_for_each_safe(core, tmp, phys, CORE) {
- node = node_for_core(numa, core, extra);
- if (!node)
- return;
- toptree_move(core, node);
- pin_core_to_node(core->id, node->id);
- }
-}
-
-/*
- * Move structures of given level to specified NUMA node
- */
-static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
- enum toptree_level level, bool perfect)
-{
- int cores_free, cores_target = emu_cores->per_node_target;
- struct toptree *cur, *tmp;
-
- toptree_for_each_safe(cur, tmp, phys, level) {
- cores_free = cores_target - toptree_count(node, CORE);
- if (perfect) {
- if (cores_free == toptree_count(cur, CORE))
- toptree_move(cur, node);
- } else {
- if (cores_free >= toptree_count(cur, CORE))
- toptree_move(cur, node);
- }
- }
-}
-
-/*
- * Move structures of a given level to NUMA nodes. If "perfect" is specified
- * move only perfectly fitting structures. Otherwise move also smaller
- * than needed structures.
- */
-static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
- enum toptree_level level, bool perfect)
-{
- struct toptree *node;
-
- toptree_for_each(node, numa, NODE)
- move_level_to_numa_node(node, phys, level, perfect);
-}
-
-/*
- * For the first run try to move the big structures
- */
-static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
-{
- struct toptree *core;
-
- /* Always try to move perfectly fitting structures first */
- move_level_to_numa(numa, phys, DRAWER, true);
- move_level_to_numa(numa, phys, DRAWER, false);
- move_level_to_numa(numa, phys, BOOK, true);
- move_level_to_numa(numa, phys, BOOK, false);
- move_level_to_numa(numa, phys, MC, true);
- move_level_to_numa(numa, phys, MC, false);
- /* Now pin all the moved cores */
- toptree_for_each(core, numa, CORE)
- pin_core_to_node(core->id, core_node(core)->id);
-}
-
-/*
- * Allocate new topology and create required nodes
- */
-static struct toptree *toptree_new(int id, int nodes)
-{
- struct toptree *tree;
- int nid;
-
- tree = toptree_alloc(TOPOLOGY, id);
- if (!tree)
- goto fail;
- for (nid = 0; nid < nodes; nid++) {
- if (!toptree_get_child(tree, nid))
- goto fail;
- }
- return tree;
-fail:
- panic("NUMA emulation could not allocate topology");
-}
-
-/*
- * Allocate and initialize core to node mapping
- */
-static void __ref create_core_to_node_map(void)
-{
- int i;
-
- emu_cores = memblock_alloc(sizeof(*emu_cores), 8);
- if (!emu_cores)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*emu_cores), 8);
- for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
- emu_cores->to_node_id[i] = NODE_ID_FREE;
-}
-
-/*
- * Move cores from physical topology into NUMA target topology
- * and try to keep as much of the physical topology as possible.
- */
-static struct toptree *toptree_to_numa(struct toptree *phys)
-{
- static int first = 1;
- struct toptree *numa;
- int cores_total;
-
- cores_total = emu_cores->total + cores_free(phys);
- emu_cores->per_node_target = cores_total / emu_nodes;
- numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
- if (first) {
- toptree_to_numa_first(numa, phys);
- first = 0;
- }
- toptree_to_numa_single(numa, phys, 0);
- toptree_to_numa_single(numa, phys, 1);
- toptree_unify_tree(numa);
-
- WARN_ON(cpumask_weight(&phys->mask));
- return numa;
-}
-
-/*
- * Create a toptree out of the physical topology that we got from the hypervisor
- */
-static struct toptree *toptree_from_topology(void)
-{
- struct toptree *phys, *node, *drawer, *book, *mc, *core;
- struct cpu_topology_s390 *top;
- int cpu;
-
- phys = toptree_new(TOPTREE_ID_PHYS, 1);
-
- for_each_cpu(cpu, &cpus_with_topology) {
- top = &cpu_topology[cpu];
- node = toptree_get_child(phys, 0);
- drawer = toptree_get_child(node, top->drawer_id);
- book = toptree_get_child(drawer, top->book_id);
- mc = toptree_get_child(book, top->socket_id);
- core = toptree_get_child(mc, smp_get_base_cpu(cpu));
- if (!drawer || !book || !mc || !core)
- panic("NUMA emulation could not allocate memory");
- cpumask_set_cpu(cpu, &core->mask);
- toptree_update_mask(mc);
- }
- return phys;
-}
-
-/*
- * Add toptree core to topology and create correct CPU masks
- */
-static void topology_add_core(struct toptree *core)
-{
- struct cpu_topology_s390 *top;
- int cpu;
-
- for_each_cpu(cpu, &core->mask) {
- top = &cpu_topology[cpu];
- cpumask_copy(&top->thread_mask, &core->mask);
- cpumask_copy(&top->core_mask, &core_mc(core)->mask);
- cpumask_copy(&top->book_mask, &core_book(core)->mask);
- cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask);
- cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
- top->node_id = core_node(core)->id;
- }
-}
-
-/*
- * Apply toptree to topology and create CPU masks
- */
-static void toptree_to_topology(struct toptree *numa)
-{
- struct toptree *core;
- int i;
-
- /* Clear all node masks */
- for (i = 0; i < MAX_NUMNODES; i++)
- cpumask_clear(&node_to_cpumask_map[i]);
-
- /* Rebuild all masks */
- toptree_for_each(core, numa, CORE)
- topology_add_core(core);
-}
-
-/*
- * Show the node to core mapping
- */
-static void print_node_to_core_map(void)
-{
- int nid, cid;
-
- if (!numa_debug_enabled)
- return;
- printk(KERN_DEBUG "NUMA node to core mapping\n");
- for (nid = 0; nid < emu_nodes; nid++) {
- printk(KERN_DEBUG " node %3d: ", nid);
- for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) {
- if (emu_cores->to_node_id[cid] == nid)
- printk(KERN_CONT "%d ", cid);
- }
- printk(KERN_CONT "\n");
- }
-}
-
-static void pin_all_possible_cpus(void)
-{
- int core_id, node_id, cpu;
- static int initialized;
-
- if (initialized)
- return;
- print_node_to_core_map();
- node_id = 0;
- for_each_possible_cpu(cpu) {
- core_id = smp_get_base_cpu(cpu);
- if (emu_cores->to_node_id[core_id] != NODE_ID_FREE)
- continue;
- pin_core_to_node(core_id, node_id);
- cpu_topology[cpu].node_id = node_id;
- node_id = (node_id + 1) % emu_nodes;
- }
- print_node_to_core_map();
- initialized = 1;
-}
-
-/*
- * Transfer physical topology into a NUMA topology and modify CPU masks
- * according to the NUMA topology.
- *
- * Must be called with "sched_domains_mutex" lock held.
- */
-static void emu_update_cpu_topology(void)
-{
- struct toptree *phys, *numa;
-
- if (emu_cores == NULL)
- create_core_to_node_map();
- phys = toptree_from_topology();
- numa = toptree_to_numa(phys);
- toptree_free(phys);
- toptree_to_topology(numa);
- toptree_free(numa);
- pin_all_possible_cpus();
-}
-
-/*
- * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
- * alignment (needed for memory hotplug).
- */
-static unsigned long emu_setup_size_adjust(unsigned long size)
-{
- unsigned long size_new;
-
- size = size ? : CONFIG_EMU_SIZE;
- size_new = roundup(size, memory_block_size_bytes());
- if (size_new == size)
- return size;
- pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n",
- size >> 20, size_new >> 20);
- return size_new;
-}
-
-/*
- * If we have not enough memory for the specified nodes, reduce the node count.
- */
-static int emu_setup_nodes_adjust(int nodes)
-{
- int nodes_max;
-
- nodes_max = memblock.memory.total_size / emu_size;
- nodes_max = max(nodes_max, 1);
- if (nodes_max >= nodes)
- return nodes;
- pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
- return nodes_max;
-}
-
-/*
- * Early emu setup
- */
-static void emu_setup(void)
-{
- int nid;
-
- emu_size = emu_setup_size_adjust(emu_size);
- emu_nodes = emu_setup_nodes_adjust(emu_nodes);
- for (nid = 0; nid < emu_nodes; nid++)
- node_set(nid, node_possible_map);
- pr_info("Creating %d nodes with memory stripe size %ld MB\n",
- emu_nodes, emu_size >> 20);
-}
-
-/*
- * Return node id for given page number
- */
-static int emu_pfn_to_nid(unsigned long pfn)
-{
- return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
-}
-
-/*
- * Return stripe size
- */
-static unsigned long emu_align(void)
-{
- return emu_size;
-}
-
-/*
- * Return distance between two nodes
- */
-static int emu_distance(int node1, int node2)
-{
- return (node1 != node2) * EMU_NODE_DIST;
-}
-
-/*
- * Define callbacks for generic s390 NUMA infrastructure
- */
-const struct numa_mode numa_mode_emu = {
- .name = "emu",
- .setup = emu_setup,
- .update_cpu_topology = emu_update_cpu_topology,
- .__pfn_to_nid = emu_pfn_to_nid,
- .align = emu_align,
- .distance = emu_distance,
-};
-
-/*
- * Kernel parameter: emu_nodes=<n>
- */
-static int __init early_parse_emu_nodes(char *p)
-{
- int count;
-
- if (!p || kstrtoint(p, 0, &count) != 0 || count <= 0)
- return 0;
- emu_nodes = min(count, MAX_NUMNODES);
- return 0;
-}
-early_param("emu_nodes", early_parse_emu_nodes);
-
-/*
- * Kernel parameter: emu_size=[<n>[k|M|G|T]]
- */
-static int __init early_parse_emu_size(char *p)
-{
- if (p)
- emu_size = memparse(p, NULL);
- return 0;
-}
-early_param("emu_size", early_parse_emu_size);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index d2910fa834c8..51c5a9f6e525 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -7,165 +7,36 @@
* Copyright IBM Corp. 2015
*/
-#define KMSG_COMPONENT "numa"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
#include <linux/kernel.h>
#include <linux/mmzone.h>
#include <linux/cpumask.h>
#include <linux/memblock.h>
-#include <linux/slab.h>
#include <linux/node.h>
-
#include <asm/numa.h>
-#include "numa_mode.h"
-pg_data_t *node_data[MAX_NUMNODES];
+struct pglist_data *node_data[MAX_NUMNODES];
EXPORT_SYMBOL(node_data);
-cpumask_t node_to_cpumask_map[MAX_NUMNODES];
-EXPORT_SYMBOL(node_to_cpumask_map);
-
-static void plain_setup(void)
-{
- node_set(0, node_possible_map);
-}
-
-const struct numa_mode numa_mode_plain = {
- .name = "plain",
- .setup = plain_setup,
-};
-
-static const struct numa_mode *mode = &numa_mode_plain;
-
-int numa_pfn_to_nid(unsigned long pfn)
-{
- return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0;
-}
-
-void numa_update_cpu_topology(void)
-{
- if (mode->update_cpu_topology)
- mode->update_cpu_topology();
-}
-
-int __node_distance(int a, int b)
-{
- return mode->distance ? mode->distance(a, b) : 0;
-}
-EXPORT_SYMBOL(__node_distance);
-
-int numa_debug_enabled;
-
-/*
- * numa_setup_memory() - Assign bootmem to nodes
- *
- * The memory is first added to memblock without any respect to nodes.
- * This is fixed before remaining memblock memory is handed over to the
- * buddy allocator.
- * An important side effect is that large bootmem allocations might easily
- * cross node boundaries, which can be needed for large allocations with
- * smaller memory stripes in each node (i.e. when using NUMA emulation).
- *
- * Memory defines nodes:
- * Therefore this routine also sets the nodes online with memory.
- */
-static void __init numa_setup_memory(void)
+void __init numa_setup(void)
{
- unsigned long cur_base, align, end_of_dram;
- int nid = 0;
-
- end_of_dram = memblock_end_of_DRAM();
- align = mode->align ? mode->align() : ULONG_MAX;
-
- /*
- * Step through all available memory and assign it to the nodes
- * indicated by the mode implementation.
- * All nodes which are seen here will be set online.
- */
- cur_base = 0;
- do {
- nid = numa_pfn_to_nid(PFN_DOWN(cur_base));
- node_set_online(nid);
- memblock_set_node(cur_base, align, &memblock.memory, nid);
- cur_base += align;
- } while (cur_base < end_of_dram);
+ int nid;
- /* Allocate and fill out node_data */
+ nodes_clear(node_possible_map);
+ node_set(0, node_possible_map);
+ node_set_online(0);
for (nid = 0; nid < MAX_NUMNODES; nid++) {
NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
if (!NODE_DATA(nid))
panic("%s: Failed to allocate %zu bytes align=0x%x\n",
__func__, sizeof(pg_data_t), 8);
}
-
- for_each_online_node(nid) {
- unsigned long start_pfn, end_pfn;
- unsigned long t_start, t_end;
- int i;
-
- start_pfn = ULONG_MAX;
- end_pfn = 0;
- for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) {
- if (t_start < start_pfn)
- start_pfn = t_start;
- if (t_end > end_pfn)
- end_pfn = t_end;
- }
- NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
- NODE_DATA(nid)->node_id = nid;
- }
-}
-
-/*
- * numa_setup() - Earliest initialization
- *
- * Assign the mode and call the mode's setup routine.
- */
-void __init numa_setup(void)
-{
- pr_info("NUMA mode: %s\n", mode->name);
- nodes_clear(node_possible_map);
- /* Initially attach all possible CPUs to node 0. */
- cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask);
- if (mode->setup)
- mode->setup();
- numa_setup_memory();
- memblock_dump_all();
+ NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ NODE_DATA(0)->node_id = 0;
}
-/*
- * numa_init_late() - Initialization initcall
- *
- * Register NUMA nodes.
- */
static int __init numa_init_late(void)
{
- int nid;
-
- for_each_online_node(nid)
- register_one_node(nid);
+ register_one_node(0);
return 0;
}
arch_initcall(numa_init_late);
-
-static int __init parse_debug(char *parm)
-{
- numa_debug_enabled = 1;
- return 0;
-}
-early_param("numa_debug", parse_debug);
-
-static int __init parse_numa(char *parm)
-{
- if (!parm)
- return 1;
- if (strcmp(parm, numa_mode_plain.name) == 0)
- mode = &numa_mode_plain;
-#ifdef CONFIG_NUMA_EMU
- if (strcmp(parm, numa_mode_emu.name) == 0)
- mode = &numa_mode_emu;
-#endif
- return 0;
-}
-early_param("numa", parse_numa);
diff --git a/arch/s390/numa/numa_mode.h b/arch/s390/numa/numa_mode.h
deleted file mode 100644
index dfd3e2784081..000000000000
--- a/arch/s390/numa/numa_mode.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NUMA support for s390
- *
- * Define declarations used for communication between NUMA mode
- * implementations and NUMA core functionality.
- *
- * Copyright IBM Corp. 2015
- */
-#ifndef __S390_NUMA_MODE_H
-#define __S390_NUMA_MODE_H
-
-struct numa_mode {
- char *name; /* Name of mode */
- void (*setup)(void); /* Initizalize mode */
- void (*update_cpu_topology)(void); /* Called by topology code */
- int (*__pfn_to_nid)(unsigned long pfn); /* PFN to node ID */
- unsigned long (*align)(void); /* Minimum node alignment */
- int (*distance)(int a, int b); /* Distance between two nodes */
-};
-
-extern const struct numa_mode numa_mode_plain;
-extern const struct numa_mode numa_mode_emu;
-
-#endif /* __S390_NUMA_MODE_H */
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
deleted file mode 100644
index 71a608cd4f61..000000000000
--- a/arch/s390/numa/toptree.c
+++ /dev/null
@@ -1,351 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA support for s390
- *
- * A tree structure used for machine topology mangling
- *
- * Copyright IBM Corp. 2015
- */
-
-#include <linux/kernel.h>
-#include <linux/memblock.h>
-#include <linux/cpumask.h>
-#include <linux/list.h>
-#include <linux/list_sort.h>
-#include <linux/slab.h>
-#include <asm/numa.h>
-
-#include "toptree.h"
-
-/**
- * toptree_alloc - Allocate and initialize a new tree node.
- * @level: The node's vertical level; level 0 contains the leaves.
- * @id: ID number, explicitly not unique beyond scope of node's siblings
- *
- * Allocate a new tree node and initialize it.
- *
- * RETURNS:
- * Pointer to the new tree node or NULL on error
- */
-struct toptree __ref *toptree_alloc(int level, int id)
-{
- struct toptree *res;
-
- if (slab_is_available())
- res = kzalloc(sizeof(*res), GFP_KERNEL);
- else
- res = memblock_alloc(sizeof(*res), 8);
- if (!res)
- return res;
-
- INIT_LIST_HEAD(&res->children);
- INIT_LIST_HEAD(&res->sibling);
- cpumask_clear(&res->mask);
- res->level = level;
- res->id = id;
- return res;
-}
-
-/**
- * toptree_remove - Remove a tree node from a tree
- * @cand: Pointer to the node to remove
- *
- * The node is detached from its parent node. The parent node's
- * masks will be updated to reflect the loss of the child.
- */
-static void toptree_remove(struct toptree *cand)
-{
- struct toptree *oldparent;
-
- list_del_init(&cand->sibling);
- oldparent = cand->parent;
- cand->parent = NULL;
- toptree_update_mask(oldparent);
-}
-
-/**
- * toptree_free - discard a tree node
- * @cand: Pointer to the tree node to discard
- *
- * Checks if @cand is attached to a parent node. Detaches it
- * cleanly using toptree_remove. Possible children are freed
- * recursively. In the end @cand itself is freed.
- */
-void __ref toptree_free(struct toptree *cand)
-{
- struct toptree *child, *tmp;
-
- if (cand->parent)
- toptree_remove(cand);
- toptree_for_each_child_safe(child, tmp, cand)
- toptree_free(child);
- if (slab_is_available())
- kfree(cand);
- else
- memblock_free_early((unsigned long)cand, sizeof(*cand));
-}
-
-/**
- * toptree_update_mask - Update node bitmasks
- * @cand: Pointer to a tree node
- *
- * The node's cpumask will be updated by combining all children's
- * masks. Then toptree_update_mask is called recursively for the
- * parent if applicable.
- *
- * NOTE:
- * This must not be called on leaves. If called on a leaf, its
- * CPU mask is cleared and lost.
- */
-void toptree_update_mask(struct toptree *cand)
-{
- struct toptree *child;
-
- cpumask_clear(&cand->mask);
- list_for_each_entry(child, &cand->children, sibling)
- cpumask_or(&cand->mask, &cand->mask, &child->mask);
- if (cand->parent)
- toptree_update_mask(cand->parent);
-}
-
-/**
- * toptree_insert - Insert a tree node into tree
- * @cand: Pointer to the node to insert
- * @target: Pointer to the node to which @cand will added as a child
- *
- * Insert a tree node into a tree. Masks will be updated automatically.
- *
- * RETURNS:
- * 0 on success, -1 if NULL is passed as argument or the node levels
- * don't fit.
- */
-static int toptree_insert(struct toptree *cand, struct toptree *target)
-{
- if (!cand || !target)
- return -1;
- if (target->level != (cand->level + 1))
- return -1;
- list_add_tail(&cand->sibling, &target->children);
- cand->parent = target;
- toptree_update_mask(target);
- return 0;
-}
-
-/**
- * toptree_move_children - Move all child nodes of a node to a new place
- * @cand: Pointer to the node whose children are to be moved
- * @target: Pointer to the node to which @cand's children will be attached
- *
- * Take all child nodes of @cand and move them using toptree_move.
- */
-static void toptree_move_children(struct toptree *cand, struct toptree *target)
-{
- struct toptree *child, *tmp;
-
- toptree_for_each_child_safe(child, tmp, cand)
- toptree_move(child, target);
-}
-
-/**
- * toptree_unify - Merge children with same ID
- * @cand: Pointer to node whose direct children should be made unique
- *
- * When mangling the tree it is possible that a node has two or more children
- * which have the same ID. This routine merges these children into one and
- * moves all children of the merged nodes into the unified node.
- */
-void toptree_unify(struct toptree *cand)
-{
- struct toptree *child, *tmp, *cand_copy;
-
- /* Threads cannot be split, cores are not split */
- if (cand->level < 2)
- return;
-
- cand_copy = toptree_alloc(cand->level, 0);
- toptree_for_each_child_safe(child, tmp, cand) {
- struct toptree *tmpchild;
-
- if (!cpumask_empty(&child->mask)) {
- tmpchild = toptree_get_child(cand_copy, child->id);
- toptree_move_children(child, tmpchild);
- }
- toptree_free(child);
- }
- toptree_move_children(cand_copy, cand);
- toptree_free(cand_copy);
-
- toptree_for_each_child(child, cand)
- toptree_unify(child);
-}
-
-/**
- * toptree_move - Move a node to another context
- * @cand: Pointer to the node to move
- * @target: Pointer to the node where @cand should go
- *
- * In the easiest case @cand is exactly on the level below @target
- * and will be immediately moved to the target.
- *
- * If @target's level is not the direct parent level of @cand,
- * nodes for the missing levels are created and put between
- * @cand and @target. The "stacking" nodes' IDs are taken from
- * @cand's parents.
- *
- * After this it is likely to have redundant nodes in the tree
- * which are addressed by means of toptree_unify.
- */
-void toptree_move(struct toptree *cand, struct toptree *target)
-{
- struct toptree *stack_target, *real_insert_point, *ptr, *tmp;
-
- if (cand->level + 1 == target->level) {
- toptree_remove(cand);
- toptree_insert(cand, target);
- return;
- }
-
- real_insert_point = NULL;
- ptr = cand;
- stack_target = NULL;
-
- do {
- tmp = stack_target;
- stack_target = toptree_alloc(ptr->level + 1,
- ptr->parent->id);
- toptree_insert(tmp, stack_target);
- if (!real_insert_point)
- real_insert_point = stack_target;
- ptr = ptr->parent;
- } while (stack_target->level < (target->level - 1));
-
- toptree_remove(cand);
- toptree_insert(cand, real_insert_point);
- toptree_insert(stack_target, target);
-}
-
-/**
- * toptree_get_child - Access a tree node's child by its ID
- * @cand: Pointer to tree node whose child is to access
- * @id: The desired child's ID
- *
- * @cand's children are searched for a child with matching ID.
- * If no match can be found, a new child with the desired ID
- * is created and returned.
- */
-struct toptree *toptree_get_child(struct toptree *cand, int id)
-{
- struct toptree *child;
-
- toptree_for_each_child(child, cand)
- if (child->id == id)
- return child;
- child = toptree_alloc(cand->level-1, id);
- toptree_insert(child, cand);
- return child;
-}
-
-/**
- * toptree_first - Find the first descendant on specified level
- * @context: Pointer to tree node whose descendants are to be used
- * @level: The level of interest
- *
- * RETURNS:
- * @context's first descendant on the specified level, or NULL
- * if there is no matching descendant
- */
-struct toptree *toptree_first(struct toptree *context, int level)
-{
- struct toptree *child, *tmp;
-
- if (context->level == level)
- return context;
-
- if (!list_empty(&context->children)) {
- list_for_each_entry(child, &context->children, sibling) {
- tmp = toptree_first(child, level);
- if (tmp)
- return tmp;
- }
- }
- return NULL;
-}
-
-/**
- * toptree_next_sibling - Return next sibling
- * @cur: Pointer to a tree node
- *
- * RETURNS:
- * If @cur has a parent and is not the last in the parent's children list,
- * the next sibling is returned. Or NULL when there are no siblings left.
- */
-static struct toptree *toptree_next_sibling(struct toptree *cur)
-{
- if (cur->parent == NULL)
- return NULL;
-
- if (cur == list_last_entry(&cur->parent->children,
- struct toptree, sibling))
- return NULL;
- return (struct toptree *) list_next_entry(cur, sibling);
-}
-
-/**
- * toptree_next - Tree traversal function
- * @cur: Pointer to current element
- * @context: Pointer to the root node of the tree or subtree to
- * be traversed.
- * @level: The level of interest.
- *
- * RETURNS:
- * Pointer to the next node on level @level
- * or NULL when there is no next node.
- */
-struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
- int level)
-{
- struct toptree *cur_context, *tmp;
-
- if (!cur)
- return NULL;
-
- if (context->level == level)
- return NULL;
-
- tmp = toptree_next_sibling(cur);
- if (tmp != NULL)
- return tmp;
-
- cur_context = cur;
- while (cur_context->level < context->level - 1) {
- /* Step up */
- cur_context = cur_context->parent;
- /* Step aside */
- tmp = toptree_next_sibling(cur_context);
- if (tmp != NULL) {
- /* Step down */
- tmp = toptree_first(tmp, level);
- if (tmp != NULL)
- return tmp;
- }
- }
- return NULL;
-}
-
-/**
- * toptree_count - Count descendants on specified level
- * @context: Pointer to node whose descendants are to be considered
- * @level: Only descendants on the specified level will be counted
- *
- * RETURNS:
- * Number of descendants on the specified level
- */
-int toptree_count(struct toptree *context, int level)
-{
- struct toptree *cur;
- int cnt = 0;
-
- toptree_for_each(cur, context, level)
- cnt++;
- return cnt;
-}
diff --git a/arch/s390/numa/toptree.h b/arch/s390/numa/toptree.h
deleted file mode 100644
index 5246371ec713..000000000000
--- a/arch/s390/numa/toptree.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NUMA support for s390
- *
- * A tree structure used for machine topology mangling
- *
- * Copyright IBM Corp. 2015
- */
-#ifndef S390_TOPTREE_H
-#define S390_TOPTREE_H
-
-#include <linux/cpumask.h>
-#include <linux/list.h>
-
-struct toptree {
- int level;
- int id;
- cpumask_t mask;
- struct toptree *parent;
- struct list_head sibling;
- struct list_head children;
-};
-
-struct toptree *toptree_alloc(int level, int id);
-void toptree_free(struct toptree *cand);
-void toptree_update_mask(struct toptree *cand);
-void toptree_unify(struct toptree *cand);
-struct toptree *toptree_get_child(struct toptree *cand, int id);
-void toptree_move(struct toptree *cand, struct toptree *target);
-int toptree_count(struct toptree *context, int level);
-
-struct toptree *toptree_first(struct toptree *context, int level);
-struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
- int level);
-
-#define toptree_for_each_child(child, ptree) \
- list_for_each_entry(child, &ptree->children, sibling)
-
-#define toptree_for_each_child_safe(child, ptmp, ptree) \
- list_for_each_entry_safe(child, ptmp, &ptree->children, sibling)
-
-#define toptree_is_last(ptree) \
- ((ptree->parent == NULL) || \
- (ptree->parent->children.prev == &ptree->sibling))
-
-#define toptree_for_each(ptree, cont, ttype) \
- for (ptree = toptree_first(cont, ttype); \
- ptree != NULL; \
- ptree = toptree_next(ptree, cont, ttype))
-
-#define toptree_for_each_safe(ptree, tmp, cont, ttype) \
- for (ptree = toptree_first(cont, ttype), \
- tmp = toptree_next(ptree, cont, ttype); \
- ptree != NULL; \
- ptree = tmp, \
- tmp = toptree_next(ptree, cont, ttype))
-
-#define toptree_for_each_sibling(ptree, start) \
- toptree_for_each(ptree, start->parent, start->level)
-
-#endif /* S390_TOPTREE_H */
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 60716d18ce5a..94ca121933de 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -40,8 +40,9 @@
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
-static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
+static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
static DEFINE_SPINLOCK(zpci_domain_lock);
+static unsigned int zpci_num_domains_allocated;
#define ZPCI_IOMAP_ENTRIES \
min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2), \
@@ -607,57 +608,25 @@ void pcibios_disable_device(struct pci_dev *pdev)
zpci_debug_exit_device(zdev);
}
-#ifdef CONFIG_HIBERNATE_CALLBACKS
-static int zpci_restore(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = to_zpci(pdev);
- int ret = 0;
-
- if (zdev->state != ZPCI_FN_STATE_ONLINE)
- goto out;
-
- ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
- if (ret)
- goto out;
-
- zpci_map_resources(pdev);
- zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- (u64) zdev->dma_table);
-
-out:
- return ret;
-}
-
-static int zpci_freeze(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = to_zpci(pdev);
-
- if (zdev->state != ZPCI_FN_STATE_ONLINE)
- return 0;
-
- zpci_unregister_ioat(zdev, 0);
- zpci_unmap_resources(pdev);
- return clp_disable_fh(zdev);
-}
-
-struct dev_pm_ops pcibios_pm_ops = {
- .thaw_noirq = zpci_restore,
- .freeze_noirq = zpci_freeze,
- .restore_noirq = zpci_restore,
- .poweroff_noirq = zpci_freeze,
-};
-#endif /* CONFIG_HIBERNATE_CALLBACKS */
-
static int zpci_alloc_domain(struct zpci_dev *zdev)
{
+ spin_lock(&zpci_domain_lock);
+ if (zpci_num_domains_allocated > (ZPCI_NR_DEVICES - 1)) {
+ spin_unlock(&zpci_domain_lock);
+ pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n",
+ zdev->fid, ZPCI_NR_DEVICES);
+ return -ENOSPC;
+ }
+
if (zpci_unique_uid) {
zdev->domain = (u16) zdev->uid;
- if (zdev->domain >= ZPCI_NR_DEVICES)
- return 0;
+ if (zdev->domain == 0) {
+ pr_warn("UID checking is active but no UID is set for PCI function %08x, so automatic domain allocation is used instead\n",
+ zdev->fid);
+ update_uid_checking(false);
+ goto auto_allocate;
+ }
- spin_lock(&zpci_domain_lock);
if (test_bit(zdev->domain, zpci_domain)) {
spin_unlock(&zpci_domain_lock);
pr_err("Adding PCI function %08x failed because domain %04x is already assigned\n",
@@ -665,30 +634,28 @@ static int zpci_alloc_domain(struct zpci_dev *zdev)
return -EEXIST;
}
set_bit(zdev->domain, zpci_domain);
+ zpci_num_domains_allocated++;
spin_unlock(&zpci_domain_lock);
return 0;
}
-
- spin_lock(&zpci_domain_lock);
+auto_allocate:
+ /*
+ * We can always auto allocate domains below ZPCI_NR_DEVICES.
+ * There is either a free domain or we have reached the maximum in
+ * which case we would have bailed earlier.
+ */
zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
- if (zdev->domain == ZPCI_NR_DEVICES) {
- spin_unlock(&zpci_domain_lock);
- pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n",
- zdev->fid, ZPCI_NR_DEVICES);
- return -ENOSPC;
- }
set_bit(zdev->domain, zpci_domain);
+ zpci_num_domains_allocated++;
spin_unlock(&zpci_domain_lock);
return 0;
}
static void zpci_free_domain(struct zpci_dev *zdev)
{
- if (zdev->domain >= ZPCI_NR_DEVICES)
- return;
-
spin_lock(&zpci_domain_lock);
clear_bit(zdev->domain, zpci_domain);
+ zpci_num_domains_allocated--;
spin_unlock(&zpci_domain_lock);
}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 0d3d8f170ea4..ea794ae755ae 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -24,7 +24,7 @@
bool zpci_unique_uid;
-static void update_uid_checking(bool new)
+void update_uid_checking(bool new)
{
if (zpci_unique_uid != new)
zpci_dbg(1, "uid checking:%d\n", new);
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index fbe97ab2e228..743f257cf2cb 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -115,7 +115,6 @@ static struct irq_chip zpci_irq_chip = {
.name = "PCI-MSI",
.irq_unmask = pci_msi_unmask_irq,
.irq_mask = pci_msi_mask_irq,
- .irq_set_affinity = zpci_set_irq_affinity,
};
static void zpci_handle_cpu_local_irq(bool rescan)
@@ -276,7 +275,9 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
rc = -EIO;
if (hwirq - bit >= msi_vecs)
break;
- irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, msi->affinity);
+ irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE,
+ (irq_delivery == DIRECTED) ?
+ msi->affinity : NULL);
if (irq < 0)
return -ENOMEM;
rc = irq_set_msi_desc(irq, msi);
diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore
index c82157f46b18..97ca52779457 100644
--- a/arch/s390/purgatory/.gitignore
+++ b/arch/s390/purgatory/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
purgatory
purgatory.chk
purgatory.lds
diff --git a/arch/s390/tools/.gitignore b/arch/s390/tools/.gitignore
index 71bd6f8eebaf..ea62f37b79ef 100644
--- a/arch/s390/tools/.gitignore
+++ b/arch/s390/tools/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
gen_facilities
gen_opcode_table