diff options
Diffstat (limited to 'arch/s390')
115 files changed, 3051 insertions, 2599 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b4c7c34069f8..a0e2130f0100 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -2,9 +2,6 @@ config MMU def_bool y -config ZONE_DMA - def_bool y - config CPU_BIG_ENDIAN def_bool y @@ -62,7 +59,7 @@ config S390 select ARCH_BINFMT_ELF_STATE select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE - select ARCH_ENABLE_SPLIT_PMD_PTLOCK + select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED @@ -117,6 +114,7 @@ config S390 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANTS_DYNAMIC_TASK_STRUCT + select ARCH_WANTS_NO_INSTR select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT @@ -164,7 +162,7 @@ config S390 select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO - select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_IOREMAP_PROT if PCI select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 @@ -172,6 +170,7 @@ config S390 select HAVE_KERNEL_LZO select HAVE_KERNEL_UNCOMPRESSED select HAVE_KERNEL_XZ + select HAVE_KERNEL_ZSTD select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES @@ -210,6 +209,7 @@ config S390 select THREAD_INFO_IN_TASK select TTY select VIRT_CPU_ACCOUNTING + select ZONE_DMA # Note: keep the above list sorted alphabetically config SCHED_OMIT_FRAME_POINTER @@ -437,6 +437,7 @@ config COMPAT select COMPAT_OLD_SIGACTION select HAVE_UID16 depends on MULTIUSER + depends on !CC_IS_CLANG help Select this option if you want to enable your system kernel to handle system-calls from ELF binaries for 31 bit ESA. This option @@ -475,7 +476,7 @@ config NUMA config NODES_SHIFT int - depends on NEED_MULTIPLE_NODES + depends on NUMA default "1" config SCHED_SMT @@ -768,7 +769,7 @@ config VFIO_CCW config VFIO_AP def_tristate n prompt "VFIO support for AP devices" - depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM + depends on S390_AP_IOMMU && VFIO_MDEV && KVM depends on ZCRYPT help This driver grants access to Adjunct Processor (AP) devices @@ -853,7 +854,7 @@ config CMM_IUCV config APPLDATA_BASE def_bool n prompt "Linux - VM Monitor Stream, base infrastructure" - depends on PROC_FS + depends on PROC_SYSCTL help This provides a kernel interface for creating and updating z/VM APPLDATA monitor records. The monitor records are updated at certain time diff --git a/arch/s390/Makefile b/arch/s390/Makefile index e443ed9947bd..1e3172877982 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -28,6 +28,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding +KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) @@ -128,9 +129,6 @@ OBJCOPYFLAGS := -O binary head-y := arch/s390/kernel/head64.o -# See arch/s390/Kbuild for content of core part of the kernel -core-y += arch/s390/ - libs-y += arch/s390/lib/ drivers-y += drivers/s390/ @@ -165,6 +163,19 @@ archheaders: archprepare: $(Q)$(MAKE) $(build)=$(syscalls) kapi $(Q)$(MAKE) $(build)=$(tools) kapi +ifeq ($(KBUILD_EXTMOD),) +# We need to generate vdso-offsets.h before compiling certain files in kernel/. +# In order to do that, we should use the archprepare target, but we can't since +# asm-offsets.h is included in some files used to generate vdso-offsets.h, and +# asm-offsets.h is built in prepare0, for which archprepare is a dependency. +# Therefore we need to generate the header after prepare0 has been made, hence +# this hack. +prepare: vdso_prepare +vdso_prepare: prepare0 + $(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h + $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ + $(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h) +endif # Don't use tabs in echo arguments define archhelp diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c index ff6801d401c4..47c48fbfb563 100644 --- a/arch/s390/boot/als.c +++ b/arch/s390/boot/als.c @@ -68,7 +68,7 @@ void print_missing_facilities(void) first = 1; for (i = 0; i < ARRAY_SIZE(als); i++) { - val = ~S390_lowcore.stfle_fac_list[i] & als[i]; + val = ~stfle_fac_list[i] & als[i]; for (j = 0; j < BITS_PER_LONG; j++) { if (!(val & (1UL << (BITS_PER_LONG - 1 - j)))) continue; @@ -106,9 +106,9 @@ void verify_facilities(void) { int i; - __stfle(S390_lowcore.stfle_fac_list, ARRAY_SIZE(S390_lowcore.stfle_fac_list)); + __stfle(stfle_fac_list, ARRAY_SIZE(stfle_fac_list)); for (i = 0; i < ARRAY_SIZE(als); i++) { - if ((S390_lowcore.stfle_fac_list[i] & als[i]) != als[i]) + if ((stfle_fac_list[i] & als[i]) != als[i]) facility_mismatch(); } } diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 8b50967f5804..ae04e1c93764 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -24,6 +24,7 @@ void __printf(1, 2) decompressor_printk(const char *fmt, ...); extern const char kernel_version[]; extern unsigned long memory_limit; +extern unsigned long vmalloc_size; extern int vmalloc_size_set; extern int kaslr_enabled; diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index de18dab518bb..660c799d875d 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -14,6 +14,7 @@ obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-all := $(obj-y) piggy.o syms.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 +targets += vmlinux.bin.zst targets += info.bin syms.bin vmlinux.syms $(obj-all) KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) @@ -33,7 +34,7 @@ $(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OB quiet_cmd_dumpsyms = DUMPSYMS $< define cmd_dumpsyms - $(NM) -n -S --format=bsd "$<" | $(PERL) -ne '/(\w+)\s+(\w+)\s+[tT]\s+(\w+)/ and printf "%x %x %s\0",hex $$1,hex $$2,$$3' > "$@" + $(NM) -n -S --format=bsd "$<" | sed -nE 's/^0*([0-9a-fA-F]+) 0*([0-9a-fA-F]+) [tT] ([^ ]*)$$/\1 \2 \3/p' | tr '\n' '\0' > "$@" endef $(obj)/syms.bin: $(obj)/vmlinux.syms FORCE @@ -63,6 +64,7 @@ suffix-$(CONFIG_KERNEL_LZ4) := .lz4 suffix-$(CONFIG_KERNEL_LZMA) := .lzma suffix-$(CONFIG_KERNEL_LZO) := .lzo suffix-$(CONFIG_KERNEL_XZ) := .xz +suffix-$(CONFIG_KERNEL_ZSTD) := .zst $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE $(call if_changed,gzip) @@ -76,6 +78,8 @@ $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzo) $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE $(call if_changed,xzkern) +$(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE + $(call if_changed,zstd22) OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed $(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c index 3061b11c4d27..37a4a8d33c6c 100644 --- a/arch/s390/boot/compressed/decompressor.c +++ b/arch/s390/boot/compressed/decompressor.c @@ -28,8 +28,10 @@ extern char _end[]; extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; -#ifdef CONFIG_HAVE_KERNEL_BZIP2 +#ifdef CONFIG_KERNEL_BZIP2 #define BOOT_HEAP_SIZE 0x400000 +#elif CONFIG_KERNEL_ZSTD +#define BOOT_HEAP_SIZE 0x30000 #else #define BOOT_HEAP_SIZE 0x10000 #endif @@ -61,6 +63,10 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; #include "../../../../lib/decompress_unxz.c" #endif +#ifdef CONFIG_KERNEL_ZSTD +#include "../../../../lib/decompress_unzstd.c" +#endif + #define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE) unsigned long mem_safe_offset(void) diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index dacb7813f982..51693cfb65c2 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -401,6 +401,7 @@ SYM_CODE_END(startup_pgm_check_handler) # Must be keept in sync with struct parmarea in setup.h # .org PARMAREA +SYM_DATA_START(parmarea) .quad 0 # IPL_DEVICE .quad 0 # INITRD_START .quad 0 # INITRD_SIZE @@ -411,6 +412,8 @@ SYM_CODE_END(startup_pgm_check_handler) .org COMMAND_LINE .byte "root=/dev/ram0 ro" .byte 0 + .org PARMAREA+__PARMAREA_SIZE +SYM_DATA_END(parmarea) .org EARLY_SCCB_OFFSET .fill 4096 diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index d372a45fe10e..0f84c072625e 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -12,39 +12,44 @@ #include "boot.h" char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; +int __bootdata(noexec_disabled); + +unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; struct ipl_parameter_block __bootdata_preserved(ipl_block); int __bootdata_preserved(ipl_block_valid); -unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; - -unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE; -int __bootdata(noexec_disabled); +unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE; unsigned long memory_limit; int vmalloc_size_set; int kaslr_enabled; static inline int __diag308(unsigned long subcode, void *addr) { - register unsigned long _addr asm("0") = (unsigned long)addr; - register unsigned long _rc asm("1") = 0; unsigned long reg1, reg2; - psw_t old = S390_lowcore.program_new_psw; + union register_pair r1; + psw_t old; + r1.even = (unsigned long) addr; + r1.odd = 0; asm volatile( - " epsw %0,%1\n" - " st %0,%[psw_pgm]\n" - " st %1,%[psw_pgm]+4\n" - " larl %0,1f\n" - " stg %0,%[psw_pgm]+8\n" - " diag %[addr],%[subcode],0x308\n" - "1: nopr %%r7\n" - : "=&d" (reg1), "=&a" (reg2), - [psw_pgm] "=Q" (S390_lowcore.program_new_psw), - [addr] "+d" (_addr), "+d" (_rc) - : [subcode] "d" (subcode) + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" + " diag %[r1],%[subcode],0x308\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [r1] "+&d" (r1.pair), + [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + "+Q" (S390_lowcore.program_new_psw), + "=Q" (old) + : [subcode] "d" (subcode), + [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw) : "cc", "memory"); - S390_lowcore.program_new_psw = old; - return _rc; + return r1.odd; } void store_ipl_parmblock(void) @@ -165,12 +170,12 @@ static inline int has_ebcdic_char(const char *str) void setup_boot_command_line(void) { - COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; + parmarea.command_line[ARCH_COMMAND_LINE_SIZE - 1] = 0; /* convert arch command line to ascii if necessary */ - if (has_ebcdic_char(COMMAND_LINE)) - EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); + if (has_ebcdic_char(parmarea.command_line)) + EBCASC(parmarea.command_line, ARCH_COMMAND_LINE_SIZE); /* copy arch command line */ - strcpy(early_command_line, strim(COMMAND_LINE)); + strcpy(early_command_line, strim(parmarea.command_line)); /* append IPL PARM data to the boot command line */ if (!is_prot_virt_guest() && ipl_block_valid) @@ -180,9 +185,9 @@ void setup_boot_command_line(void) static void modify_facility(unsigned long nr, bool clear) { if (clear) - __clear_facility(nr, S390_lowcore.stfle_fac_list); + __clear_facility(nr, stfle_fac_list); else - __set_facility(nr, S390_lowcore.stfle_fac_list); + __set_facility(nr, stfle_fac_list); } static void check_cleared_facilities(void) @@ -191,7 +196,7 @@ static void check_cleared_facilities(void) int i; for (i = 0; i < ARRAY_SIZE(als); i++) { - if ((S390_lowcore.stfle_fac_list[i] & als[i]) != als[i]) { + if ((stfle_fac_list[i] & als[i]) != als[i]) { sclp_early_printk("Warning: The Linux kernel requires facilities cleared via command line option\n"); print_missing_facilities(); break; diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 40168e59abd3..4e17adbde495 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -64,30 +64,37 @@ void add_mem_detect_block(u64 start, u64 end) static int __diag260(unsigned long rx1, unsigned long rx2) { - register unsigned long _rx1 asm("2") = rx1; - register unsigned long _rx2 asm("3") = rx2; - register unsigned long _ry asm("4") = 0x10; /* storage configuration */ - int rc = -1; /* fail */ - unsigned long reg1, reg2; - psw_t old = S390_lowcore.program_new_psw; - + unsigned long reg1, reg2, ry; + union register_pair rx; + psw_t old; + int rc; + + rx.even = rx1; + rx.odd = rx2; + ry = 0x10; /* storage configuration */ + rc = -1; /* fail */ asm volatile( - " epsw %0,%1\n" - " st %0,%[psw_pgm]\n" - " st %1,%[psw_pgm]+4\n" - " larl %0,1f\n" - " stg %0,%[psw_pgm]+8\n" + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" " diag %[rx],%[ry],0x260\n" " ipm %[rc]\n" " srl %[rc],28\n" - "1:\n" - : "=&d" (reg1), "=&a" (reg2), - [psw_pgm] "=Q" (S390_lowcore.program_new_psw), - [rc] "+&d" (rc), [ry] "+d" (_ry) - : [rx] "d" (_rx1), "d" (_rx2) + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + [rc] "+&d" (rc), + [ry] "+&d" (ry), + "+Q" (S390_lowcore.program_new_psw), + "=Q" (old) + : [rx] "d" (rx.pair), + [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw) : "cc", "memory"); - S390_lowcore.program_new_psw = old; - return rc == 0 ? _ry : -1; + return rc == 0 ? ry : -1; } static int diag260(void) @@ -111,24 +118,30 @@ static int diag260(void) static int tprot(unsigned long addr) { - unsigned long pgm_addr; + unsigned long reg1, reg2; int rc = -EFAULT; - psw_t old = S390_lowcore.program_new_psw; + psw_t old; - S390_lowcore.program_new_psw.mask = __extract_psw(); asm volatile( - " larl %[pgm_addr],1f\n" - " stg %[pgm_addr],%[psw_pgm_addr]\n" + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" " tprot 0(%[addr]),0\n" " ipm %[rc]\n" " srl %[rc],28\n" - "1:\n" - : [pgm_addr] "=&d"(pgm_addr), - [psw_pgm_addr] "=Q"(S390_lowcore.program_new_psw.addr), - [rc] "+&d"(rc) - : [addr] "a"(addr) + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + [rc] "+&d" (rc), + "=Q" (S390_lowcore.program_new_psw.addr), + "=Q" (old) + : [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw), + [addr] "a" (addr) : "cc", "memory"); - S390_lowcore.program_new_psw = old; return rc; } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 05f8eefa3dcf..d0cf21641e3a 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -5,6 +5,7 @@ #include <asm/sections.h> #include <asm/cpu_mf.h> #include <asm/setup.h> +#include <asm/kasan.h> #include <asm/kexec.h> #include <asm/sclp.h> #include <asm/diag.h> @@ -15,7 +16,17 @@ extern char __boot_data_start[], __boot_data_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata_preserved(VMALLOC_START); +unsigned long __bootdata_preserved(VMALLOC_END); +struct page *__bootdata_preserved(vmemmap); +unsigned long __bootdata_preserved(vmemmap_size); +unsigned long __bootdata_preserved(MODULES_VADDR); +unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata(ident_map_size); +int __bootdata(is_full_image) = 1; + +u64 __bootdata_preserved(stfle_fac_list[16]); +u64 __bootdata_preserved(alt_stfle_fac_list[16]); /* * Some code and data needs to stay below 2 GB, even when the kernel would be @@ -169,6 +180,86 @@ static void setup_ident_map_size(unsigned long max_physmem_end) #endif } +static void setup_kernel_memory_layout(void) +{ + bool vmalloc_size_verified = false; + unsigned long vmemmap_off; + unsigned long vspace_left; + unsigned long rte_size; + unsigned long pages; + unsigned long vmax; + + pages = ident_map_size / PAGE_SIZE; + /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ + vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); + + /* choose kernel address space layout: 4 or 3 levels. */ + vmemmap_off = round_up(ident_map_size, _REGION3_SIZE); + if (IS_ENABLED(CONFIG_KASAN) || + vmalloc_size > _REGION2_SIZE || + vmemmap_off + vmemmap_size + vmalloc_size + MODULES_LEN > _REGION2_SIZE) + vmax = _REGION1_SIZE; + else + vmax = _REGION2_SIZE; + + /* keep vmemmap_off aligned to a top level region table entry */ + rte_size = vmax == _REGION1_SIZE ? _REGION2_SIZE : _REGION3_SIZE; + MODULES_END = vmax; + if (is_prot_virt_host()) { + /* + * forcing modules and vmalloc area under the ultravisor + * secure storage limit, so that any vmalloc allocation + * we do could be used to back secure guest storage. + */ + adjust_to_uv_max(&MODULES_END); + } + +#ifdef CONFIG_KASAN + if (MODULES_END < vmax) { + /* force vmalloc and modules below kasan shadow */ + MODULES_END = min(MODULES_END, KASAN_SHADOW_START); + } else { + /* + * leave vmalloc and modules above kasan shadow but make + * sure they don't overlap with it + */ + vmalloc_size = min(vmalloc_size, vmax - KASAN_SHADOW_END - MODULES_LEN); + vmalloc_size_verified = true; + vspace_left = KASAN_SHADOW_START; + } +#endif + MODULES_VADDR = MODULES_END - MODULES_LEN; + VMALLOC_END = MODULES_VADDR; + + if (vmalloc_size_verified) { + VMALLOC_START = VMALLOC_END - vmalloc_size; + } else { + vmemmap_off = round_up(ident_map_size, rte_size); + + if (vmemmap_off + vmemmap_size > VMALLOC_END || + vmalloc_size > VMALLOC_END - vmemmap_off - vmemmap_size) { + /* + * allow vmalloc area to occupy up to 1/2 of + * the rest virtual space left. + */ + vmalloc_size = min(vmalloc_size, VMALLOC_END / 2); + } + VMALLOC_START = VMALLOC_END - vmalloc_size; + vspace_left = VMALLOC_START; + } + + pages = vspace_left / (PAGE_SIZE + sizeof(struct page)); + pages = SECTION_ALIGN_UP(pages); + vmemmap_off = round_up(vspace_left - pages * sizeof(struct page), rte_size); + /* keep vmemmap left most starting from a fresh region table entry */ + vmemmap_off = min(vmemmap_off, round_up(ident_map_size, rte_size)); + /* take care that identity map is lower then vmemmap */ + ident_map_size = min(ident_map_size, vmemmap_off); + vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); + VMALLOC_START = max(vmemmap_off + vmemmap_size, VMALLOC_START); + vmemmap = (struct page *)vmemmap_off; +} + /* * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. */ @@ -208,6 +299,7 @@ void startup_kernel(void) parse_boot_command_line(); setup_ident_map_size(detect_memory()); setup_vmalloc_size(); + setup_kernel_memory_layout(); random_lma = __kaslr_offset = 0; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index 87641dd65ccf..f6b0c4f43c99 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -36,6 +36,7 @@ void uv_query_info(void) uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE); uv_info.max_num_sec_conf = uvcb.max_num_sec_conf; uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id; + uv_info.uv_feature_indications = uvcb.uv_feature_indications; } #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST @@ -44,3 +45,28 @@ void uv_query_info(void) prot_virt_guest = 1; #endif } + +#if IS_ENABLED(CONFIG_KVM) +static bool has_uv_sec_stor_limit(void) +{ + /* + * keep these conditions in line with setup_uv() + */ + if (!is_prot_virt_host()) + return false; + + if (is_prot_virt_guest()) + return false; + + if (!test_facility(158)) + return false; + + return !!uv_info.max_sec_stor_addr; +} + +void adjust_to_uv_max(unsigned long *vmax) +{ + if (has_uv_sec_stor_limit()) + *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr); +} +#endif diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c index 7d9fb496d155..f5f7e78ddc0c 100644 --- a/arch/s390/hypfs/hypfs_sprp.c +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -25,14 +25,13 @@ static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd) { - register unsigned long _data asm("2") = (unsigned long) data; - register unsigned long _rc asm("3"); - register unsigned long _cmd asm("4") = cmd; + union register_pair r1 = { .even = (unsigned long)data, }; - asm volatile("diag %1,%2,0x304\n" - : "=d" (_rc) : "d" (_data), "d" (_cmd) : "memory"); - - return _rc; + asm volatile("diag %[r1],%[r3],0x304\n" + : [r1] "+&d" (r1.pair) + : [r3] "d" (cmd) + : "memory"); + return r1.odd; } static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd) diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 837d1699b109..3afbee21dc1f 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -53,18 +53,20 @@ struct ap_queue_status { */ static inline bool ap_instructions_available(void) { - register unsigned long reg0 asm ("0") = AP_MKQID(0, 0); - register unsigned long reg1 asm ("1") = 0; - register unsigned long reg2 asm ("2") = 0; + unsigned long reg0 = AP_MKQID(0, 0); + unsigned long reg1 = 0; asm volatile( - " .long 0xb2af0000\n" /* PQAP(TAPQ) */ - "0: la %0,1\n" + " lgr 0,%[reg0]\n" /* qid into gr0 */ + " lghi 1,0\n" /* 0 into gr1 */ + " lghi 2,0\n" /* 0 into gr2 */ + " .long 0xb2af0000\n" /* PQAP(TAPQ) */ + "0: la %[reg1],1\n" /* 1 into reg1 */ "1:\n" EX_TABLE(0b, 1b) - : "+d" (reg1), "+d" (reg2) - : "d" (reg0) - : "cc"); + : [reg1] "+&d" (reg1) + : [reg0] "d" (reg0) + : "cc", "0", "1", "2"); return reg1 != 0; } @@ -77,14 +79,18 @@ static inline bool ap_instructions_available(void) */ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) { - register unsigned long reg0 asm ("0") = qid; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2"); - - asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ - : "=d" (reg1), "=d" (reg2) - : "d" (reg0) - : "cc"); + struct ap_queue_status reg1; + unsigned long reg2; + + asm volatile( + " lgr 0,%[qid]\n" /* qid into gr0 */ + " lghi 2,0\n" /* 0 into gr2 */ + " .long 0xb2af0000\n" /* PQAP(TAPQ) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr %[reg2],2\n" /* gr2 into reg2 */ + : [reg1] "=&d" (reg1), [reg2] "=&d" (reg2) + : [qid] "d" (qid) + : "cc", "0", "1", "2"); if (info) *info = reg2; return reg1; @@ -115,14 +121,16 @@ static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, */ static inline struct ap_queue_status ap_rapq(ap_qid_t qid) { - register unsigned long reg0 asm ("0") = qid | (1UL << 24); - register struct ap_queue_status reg1 asm ("1"); + unsigned long reg0 = qid | (1UL << 24); /* fc 1UL is RAPQ */ + struct ap_queue_status reg1; asm volatile( - ".long 0xb2af0000" /* PQAP(RAPQ) */ - : "=d" (reg1) - : "d" (reg0) - : "cc"); + " lgr 0,%[reg0]\n" /* qid arg into gr0 */ + " .long 0xb2af0000\n" /* PQAP(RAPQ) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + : [reg1] "=&d" (reg1) + : [reg0] "d" (reg0) + : "cc", "0", "1"); return reg1; } @@ -134,14 +142,16 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid) */ static inline struct ap_queue_status ap_zapq(ap_qid_t qid) { - register unsigned long reg0 asm ("0") = qid | (2UL << 24); - register struct ap_queue_status reg1 asm ("1"); + unsigned long reg0 = qid | (2UL << 24); /* fc 2UL is ZAPQ */ + struct ap_queue_status reg1; asm volatile( - ".long 0xb2af0000" /* PQAP(ZAPQ) */ - : "=d" (reg1) - : "d" (reg0) - : "cc"); + " lgr 0,%[reg0]\n" /* qid arg into gr0 */ + " .long 0xb2af0000\n" /* PQAP(ZAPQ) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + : [reg1] "=&d" (reg1) + : [reg0] "d" (reg0) + : "cc", "0", "1"); return reg1; } @@ -172,18 +182,20 @@ struct ap_config_info { */ static inline int ap_qci(struct ap_config_info *config) { - register unsigned long reg0 asm ("0") = 4UL << 24; - register unsigned long reg1 asm ("1") = -EOPNOTSUPP; - register struct ap_config_info *reg2 asm ("2") = config; + unsigned long reg0 = 4UL << 24; /* fc 4UL is QCI */ + unsigned long reg1 = -EOPNOTSUPP; + struct ap_config_info *reg2 = config; asm volatile( - ".long 0xb2af0000\n" /* PQAP(QCI) */ - "0: la %0,0\n" + " lgr 0,%[reg0]\n" /* QCI fc into gr0 */ + " lgr 2,%[reg2]\n" /* ptr to config into gr2 */ + " .long 0xb2af0000\n" /* PQAP(QCI) */ + "0: la %[reg1],0\n" /* good case, QCI fc available */ "1:\n" EX_TABLE(0b, 1b) - : "+d" (reg1) - : "d" (reg0), "d" (reg2) - : "cc", "memory"); + : [reg1] "+&d" (reg1) + : [reg0] "d" (reg0), [reg2] "d" (reg2) + : "cc", "memory", "0", "2"); return reg1; } @@ -220,21 +232,25 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid, struct ap_qirq_ctrl qirqctrl, void *ind) { - register unsigned long reg0 asm ("0") = qid | (3UL << 24); - register union { + unsigned long reg0 = qid | (3UL << 24); /* fc 3UL is AQIC */ + union { unsigned long value; struct ap_qirq_ctrl qirqctrl; struct ap_queue_status status; - } reg1 asm ("1"); - register void *reg2 asm ("2") = ind; + } reg1; + void *reg2 = ind; reg1.qirqctrl = qirqctrl; asm volatile( - ".long 0xb2af0000" /* PQAP(AQIC) */ - : "+d" (reg1) - : "d" (reg0), "d" (reg2) - : "cc"); + " lgr 0,%[reg0]\n" /* qid param into gr0 */ + " lgr 1,%[reg1]\n" /* irq ctrl into gr1 */ + " lgr 2,%[reg2]\n" /* ni addr into gr2 */ + " .long 0xb2af0000\n" /* PQAP(AQIC) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + : [reg1] "+&d" (reg1) + : [reg0] "d" (reg0), [reg2] "d" (reg2) + : "cc", "0", "1", "2"); return reg1.status; } @@ -268,21 +284,24 @@ union ap_qact_ap_info { static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, union ap_qact_ap_info *apinfo) { - register unsigned long reg0 asm ("0") = qid | (5UL << 24) - | ((ifbit & 0x01) << 22); - register union { + unsigned long reg0 = qid | (5UL << 24) | ((ifbit & 0x01) << 22); + union { unsigned long value; struct ap_queue_status status; - } reg1 asm ("1"); - register unsigned long reg2 asm ("2"); + } reg1; + unsigned long reg2; reg1.value = apinfo->val; asm volatile( - ".long 0xb2af0000" /* PQAP(QACT) */ - : "+d" (reg1), "=d" (reg2) - : "d" (reg0) - : "cc"); + " lgr 0,%[reg0]\n" /* qid param into gr0 */ + " lgr 1,%[reg1]\n" /* qact in info into gr1 */ + " .long 0xb2af0000\n" /* PQAP(QACT) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr %[reg2],2\n" /* qact out info into reg2 */ + : [reg1] "+&d" (reg1), [reg2] "=&d" (reg2) + : [reg0] "d" (reg0) + : "cc", "0", "1", "2"); apinfo->val = reg2; return reg1.status; } @@ -303,19 +322,24 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length) { - register unsigned long reg0 asm ("0") = qid | 0x40000000UL; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2") = (unsigned long) msg; - register unsigned long reg3 asm ("3") = (unsigned long) length; - register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); - register unsigned long reg5 asm ("5") = psmid & 0xffffffff; + unsigned long reg0 = qid | 0x40000000UL; /* 0x4... is last msg part */ + union register_pair nqap_r1, nqap_r2; + struct ap_queue_status reg1; + + nqap_r1.even = (unsigned int)(psmid >> 32); + nqap_r1.odd = psmid & 0xffffffff; + nqap_r2.even = (unsigned long)msg; + nqap_r2.odd = (unsigned long)length; asm volatile ( - "0: .long 0xb2ad0042\n" /* NQAP */ - " brc 2,0b" - : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) - : "d" (reg4), "d" (reg5) - : "cc", "memory"); + " lgr 0,%[reg0]\n" /* qid param in gr0 */ + "0: .insn rre,0xb2ad0000,%[nqap_r1],%[nqap_r2]\n" + " brc 2,0b\n" /* handle partial completion */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1), + [nqap_r2] "+&d" (nqap_r2.pair) + : [nqap_r1] "d" (nqap_r1.pair) + : "cc", "memory", "0", "1"); return reg1; } @@ -325,6 +349,8 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid, * @psmid: Pointer to program supplied message identifier * @msg: The message text * @length: The message length + * @reslength: Resitual length on return + * @resgr0: input: gr0 value (only used if != 0), output: resitual gr0 content * * Returns AP queue status structure. * Condition code 1 on DQAP means the receive has taken place @@ -336,27 +362,65 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid, * Note that gpr2 is used by the DQAP instruction to keep track of * any 'residual' length, in case the instruction gets interrupted. * Hence it gets zeroed before the instruction. + * If the message does not fit into the buffer, this function will + * return with a truncated message and the reply in the firmware queue + * is not removed. This is indicated to the caller with an + * ap_queue_status response_code value of all bits on (0xFF) and (if + * the reslength ptr is given) the remaining length is stored in + * *reslength and (if the resgr0 ptr is given) the updated gr0 value + * for further processing of this msg entry is stored in *resgr0. The + * caller needs to detect this situation and should invoke ap_dqap + * with a valid resgr0 ptr and a value in there != 0 to indicate that + * *resgr0 is to be used instead of qid to further process this entry. */ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, unsigned long long *psmid, - void *msg, size_t length) + void *msg, size_t length, + size_t *reslength, + unsigned long *resgr0) { - register unsigned long reg0 asm("0") = qid | 0x80000000UL; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm("2") = 0UL; - register unsigned long reg4 asm("4") = (unsigned long) msg; - register unsigned long reg5 asm("5") = (unsigned long) length; - register unsigned long reg6 asm("6") = 0UL; - register unsigned long reg7 asm("7") = 0UL; + unsigned long reg0 = resgr0 && *resgr0 ? *resgr0 : qid | 0x80000000UL; + struct ap_queue_status reg1; + unsigned long reg2; + union register_pair rp1, rp2; + rp1.even = 0UL; + rp1.odd = 0UL; + rp2.even = (unsigned long)msg; + rp2.odd = (unsigned long)length; asm volatile( - "0: .long 0xb2ae0064\n" /* DQAP */ - " brc 6,0b\n" - : "+d" (reg0), "=d" (reg1), "+d" (reg2), - "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7) - : : "cc", "memory"); - *psmid = (((unsigned long long) reg6) << 32) + reg7; + " lgr 0,%[reg0]\n" /* qid param into gr0 */ + " lghi 2,0\n" /* 0 into gr2 (res length) */ + "0: ltgr %N[rp2],%N[rp2]\n" /* check buf len */ + " jz 2f\n" /* go out if buf len is 0 */ + "1: .insn rre,0xb2ae0000,%[rp1],%[rp2]\n" + " brc 6,0b\n" /* handle partial complete */ + "2: lgr %[reg0],0\n" /* gr0 (qid + info) into reg0 */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr %[reg2],2\n" /* gr2 (res length) into reg2 */ + : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1), [reg2] "=&d" (reg2), + [rp1] "+&d" (rp1.pair), [rp2] "+&d" (rp2.pair) + : + : "cc", "memory", "0", "1", "2"); + + if (reslength) + *reslength = reg2; + if (reg2 != 0 && rp2.odd == 0) { + /* + * Partially complete, status in gr1 is not set. + * Signal the caller that this dqap is only partially received + * with a special status response code 0xFF and *resgr0 updated + */ + reg1.response_code = 0xFF; + if (resgr0) + *resgr0 = reg0; + } else { + *psmid = (((unsigned long long)rp1.even) << 32) + rp1.odd; + if (resgr0) + *resgr0 = 0; + } + return reg1; } diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 7c93c6573524..7138d189cc42 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -147,6 +147,4 @@ ATOMIC64_OPS(xor) #define arch_atomic64_fetch_sub(_i, _v) arch_atomic64_fetch_add(-(s64)(_i), _v) #define arch_atomic64_sub(_i, _v) arch_atomic64_add(-(s64)(_i), _v) -#define ARCH_ATOMIC - #endif /* __ARCH_S390_ATOMIC__ */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 68da67d2c4c9..fd149480b6e2 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -299,13 +299,13 @@ static inline unsigned char __flogr(unsigned long word) } return bit; } else { - register unsigned long bit asm("4") = word; - register unsigned long out asm("5"); + union register_pair rp; + rp.even = word; asm volatile( - " flogr %[bit],%[bit]\n" - : [bit] "+d" (bit), [out] "=d" (out) : : "cc"); - return bit; + " flogr %[rp],%[rp]\n" + : [rp] "+d" (rp.pair) : : "cc"); + return rp.even; } } diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index ad3acb1e882b..20f169b6db4e 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -11,8 +11,7 @@ struct ccw_driver; * @count: number of attached slave devices * @dev: embedded device structure * @cdev: variable number of slave devices, allocated as needed - * @ungroup_work: work to be done when a ccwgroup notifier has action - * type %BUS_NOTIFY_UNBIND_DRIVER + * @ungroup_work: used to ungroup the ccwgroup device */ struct ccwgroup_device { enum { diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index a8c02cfbc712..cdd19d326345 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -29,13 +29,15 @@ */ static inline __wsum csum_partial(const void *buff, int len, __wsum sum) { - register unsigned long reg2 asm("2") = (unsigned long) buff; - register unsigned long reg3 asm("3") = (unsigned long) len; + union register_pair rp = { + .even = (unsigned long) buff, + .odd = (unsigned long) len, + }; asm volatile( - "0: cksm %0,%1\n" /* do checksum on longs */ + "0: cksm %[sum],%[rp]\n" " jo 0b\n" - : "+d" (sum), "+d" (reg2), "+d" (reg3) : : "cc", "memory"); + : [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory"); return sum; } diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index ac02df906cae..f58c92f28701 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -9,6 +9,7 @@ #include <linux/bitops.h> #include <linux/genalloc.h> #include <asm/types.h> +#include <asm/tpi.h> #define LPM_ANYPATH 0xff #define __MAX_CSSID 0 diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 1960a7295ae5..84c3f0d576c5 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -169,32 +169,36 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, #define system_has_cmpxchg_double() 1 -#define __cmpxchg_double(p1, p2, o1, o2, n1, n2) \ -({ \ - register __typeof__(*(p1)) __old1 asm("2") = (o1); \ - register __typeof__(*(p2)) __old2 asm("3") = (o2); \ - register __typeof__(*(p1)) __new1 asm("4") = (n1); \ - register __typeof__(*(p2)) __new2 asm("5") = (n2); \ - int cc; \ - asm volatile( \ - " cdsg %[old],%[new],%[ptr]\n" \ - " ipm %[cc]\n" \ - " srl %[cc],28" \ - : [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2) \ - : [new] "d" (__new1), "d" (__new2), \ - [ptr] "Q" (*(p1)), "Q" (*(p2)) \ - : "memory", "cc"); \ - !cc; \ -}) +static __always_inline int __cmpxchg_double(unsigned long p1, unsigned long p2, + unsigned long o1, unsigned long o2, + unsigned long n1, unsigned long n2) +{ + union register_pair old = { .even = o1, .odd = o2, }; + union register_pair new = { .even = n1, .odd = n2, }; + int cc; + + asm volatile( + " cdsg %[old],%[new],%[ptr]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=&d" (cc), [old] "+&d" (old.pair) + : [new] "d" (new.pair), + [ptr] "QS" (*(unsigned long *)p1), "Q" (*(unsigned long *)p2) + : "memory", "cc"); + return !cc; +} #define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \ ({ \ - __typeof__(p1) __p1 = (p1); \ - __typeof__(p2) __p2 = (p2); \ + typeof(p1) __p1 = (p1); \ + typeof(p2) __p2 = (p2); \ + \ BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\ - __cmpxchg_double(__p1, __p2, o1, o2, n1, n2); \ + __cmpxchg_double((unsigned long)__p1, (unsigned long)__p2, \ + (unsigned long)(o1), (unsigned long)(o2), \ + (unsigned long)(n1), (unsigned long)(n2)); \ }) #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h index 3e4cbcb7c4cc..ca0e0e5ddbc4 100644 --- a/arch/s390/include/asm/cpu_mcf.h +++ b/arch/s390/include/asm/cpu_mcf.h @@ -32,39 +32,22 @@ static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { [CPUMF_CTR_SET_MT_DIAG] = 0x20, }; -static inline void ctr_set_enable(u64 *state, int ctr_set) -{ - *state |= cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT; -} -static inline void ctr_set_disable(u64 *state, int ctr_set) -{ - *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT); -} -static inline void ctr_set_start(u64 *state, int ctr_set) -{ - *state |= cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT; -} -static inline void ctr_set_stop(u64 *state, int ctr_set) -{ - *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); -} - -static inline void ctr_set_multiple_enable(u64 *state, u64 ctrsets) +static inline void ctr_set_enable(u64 *state, u64 ctrsets) { *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT; } -static inline void ctr_set_multiple_disable(u64 *state, u64 ctrsets) +static inline void ctr_set_disable(u64 *state, u64 ctrsets) { *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT); } -static inline void ctr_set_multiple_start(u64 *state, u64 ctrsets) +static inline void ctr_set_start(u64 *state, u64 ctrsets) { *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT; } -static inline void ctr_set_multiple_stop(u64 *state, u64 ctrsets) +static inline void ctr_set_stop(u64 *state, u64 ctrsets) { *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT); } @@ -92,9 +75,15 @@ struct cpu_cf_events { struct cpumf_ctr_info info; atomic_t ctr_set[CPUMF_CTR_SET_MAX]; atomic64_t alert; - u64 state, tx_state; + u64 state; /* For perf_event_open SVC */ + u64 dev_state; /* For /dev/hwctr */ unsigned int flags; - unsigned int txn_flags; + size_t used; /* Bytes used in data */ + size_t usedss; /* Bytes used in start/stop */ + unsigned char start[PAGE_SIZE]; /* Counter set at event add */ + unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */ + unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */ + unsigned int sets; /* # Counter set saved in memory */ }; DECLARE_PER_CPU(struct cpu_cf_events, cpu_cf_events); @@ -125,4 +114,6 @@ static inline int stccm_avail(void) size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset, struct cpumf_ctr_info *info); +int cfset_online_cpu(unsigned int cpu); +int cfset_offline_cpu(unsigned int cpu); #endif /* _ASM_S390_CPU_MCF_H */ diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index ed5efbb531c4..adc0179fa34e 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -21,8 +21,6 @@ #define CR0_INTERRUPT_KEY_SUBMASK BIT(63 - 57) #define CR0_MEASUREMENT_ALERT_SUBMASK BIT(63 - 58) -#define CR2_GUARDED_STORAGE BIT(63 - 59) - #define CR14_UNUSED_32 BIT(63 - 32) #define CR14_UNUSED_33 BIT(63 - 33) #define CR14_CHANNEL_REPORT_SUBMASK BIT(63 - 35) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 66d51ad090ab..bd00c94620d3 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -144,10 +144,6 @@ typedef s390_compat_regs compat_elf_gregset_t; #include <linux/sched/mm.h> /* for task_struct */ #include <asm/mmu_context.h> -#include <asm/vdso.h> - -extern unsigned int vdso_enabled; - /* * This is used to ensure we don't load something for the wrong architecture. */ @@ -176,7 +172,7 @@ struct arch_elf_state { !current->mm->context.alloc_pgste) { \ set_thread_flag(TIF_PGSTE); \ set_pt_regs_flag(task_pt_regs(current), \ - PIF_SYSCALL_RESTART); \ + PIF_EXECVE_PGSTE_RESTART); \ _state->rc = -EAGAIN; \ } \ _state->rc; \ @@ -268,11 +264,10 @@ do { \ #define STACK_RND_MASK MMAP_RND_MASK /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ -#define ARCH_DLINFO \ -do { \ - if (vdso_enabled) \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, \ - (unsigned long)current->mm->context.vdso_base); \ +#define ARCH_DLINFO \ +do { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso_base); \ } while (0) struct linux_binprm; diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index baa8005090c3..17aead80aadb 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -14,7 +14,6 @@ #define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP) void do_per_trap(struct pt_regs *regs); -void do_syscall(struct pt_regs *regs); #ifdef CONFIG_DEBUG_ENTRY static __always_inline void arch_check_user_regs(struct pt_regs *regs) diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 91b5d714d28f..e3aa354ab9f4 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -13,7 +13,10 @@ #include <linux/preempt.h> #include <asm/lowcore.h> -#define MAX_FACILITY_BIT (sizeof(((struct lowcore *)0)->stfle_fac_list) * 8) +#define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8) + +extern u64 stfle_fac_list[16]; +extern u64 alt_stfle_fac_list[16]; static inline void __set_facility(unsigned long nr, void *facilities) { @@ -56,18 +59,20 @@ static inline int test_facility(unsigned long nr) if (__test_facility(nr, &facilities_als)) return 1; } - return __test_facility(nr, &S390_lowcore.stfle_fac_list); + return __test_facility(nr, &stfle_fac_list); } static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size) { - register unsigned long reg0 asm("0") = size - 1; + unsigned long reg0 = size - 1; asm volatile( - ".insn s,0xb2b00000,0(%1)" /* stfle */ - : "+d" (reg0) - : "a" (stfle_fac_list) - : "memory", "cc"); + " lgr 0,%[reg0]\n" + " .insn s,0xb2b00000,%[list]\n" /* stfle */ + " lgr %[reg0],0\n" + : [reg0] "+&d" (reg0), [list] "+Q" (*stfle_fac_list) + : + : "memory", "cc", "0"); return reg0; } @@ -79,13 +84,15 @@ static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size) static inline void __stfle(u64 *stfle_fac_list, int size) { unsigned long nr; + u32 stfl_fac_list; asm volatile( " stfl 0(0)\n" : "=m" (S390_lowcore.stfl_fac_list)); + stfl_fac_list = S390_lowcore.stfl_fac_list; + memcpy(stfle_fac_list, &stfl_fac_list, 4); nr = 4; /* bytes stored by stfl */ - memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); - if (S390_lowcore.stfl_fac_list & 0x01000000) { + if (stfl_fac_list & 0x01000000) { /* More facility bits available with stfle */ nr = __stfle_asm(stfle_fac_list, size); nr = min_t(unsigned long, (nr + 1) * 8, size * 8); diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h index 586df4c9e2f2..02427b205c11 100644 --- a/arch/s390/include/asm/irqflags.h +++ b/arch/s390/include/asm/irqflags.h @@ -32,45 +32,45 @@ }) /* set system mask. */ -static inline notrace void __arch_local_irq_ssm(unsigned long flags) +static __always_inline void __arch_local_irq_ssm(unsigned long flags) { asm volatile("ssm %0" : : "Q" (flags) : "memory"); } -static inline notrace unsigned long arch_local_save_flags(void) +static __always_inline unsigned long arch_local_save_flags(void) { return __arch_local_irq_stnsm(0xff); } -static inline notrace unsigned long arch_local_irq_save(void) +static __always_inline unsigned long arch_local_irq_save(void) { return __arch_local_irq_stnsm(0xfc); } -static inline notrace void arch_local_irq_disable(void) +static __always_inline void arch_local_irq_disable(void) { arch_local_irq_save(); } -static inline notrace void arch_local_irq_enable(void) +static __always_inline void arch_local_irq_enable(void) { __arch_local_irq_stosm(0x03); } /* This only restores external and I/O interrupt state */ -static inline notrace void arch_local_irq_restore(unsigned long flags) +static __always_inline void arch_local_irq_restore(unsigned long flags) { /* only disabled->disabled and disabled->enabled is valid */ if (flags & ARCH_IRQ_ENABLED) arch_local_irq_enable(); } -static inline notrace bool arch_irqs_disabled_flags(unsigned long flags) +static __always_inline bool arch_irqs_disabled_flags(unsigned long flags) { return !(flags & ARCH_IRQ_ENABLED); } -static inline notrace bool arch_irqs_disabled(void) +static __always_inline bool arch_irqs_disabled(void) { return arch_irqs_disabled_flags(arch_local_save_flags()); } diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index 76f351bd6645..2768d5db181f 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -16,7 +16,6 @@ extern void kasan_early_init(void); extern void kasan_copy_shadow_mapping(void); extern void kasan_free_early_identity(void); -extern unsigned long kasan_vmax; /* * Estimate kasan memory requirements, which it will reserve diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 611f18ecde91..bf1ab0630ec1 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -362,6 +362,7 @@ struct sie_page { }; struct kvm_vcpu_stat { + struct kvm_vcpu_stat_generic generic; u64 exit_userspace; u64 exit_null; u64 exit_external_request; @@ -371,13 +372,7 @@ struct kvm_vcpu_stat { u64 exit_validity; u64 exit_instruction; u64 exit_pei; - u64 halt_successful_poll; - u64 halt_attempted_poll; - u64 halt_poll_invalid; u64 halt_no_poll_steal; - u64 halt_wakeup; - u64 halt_poll_success_ns; - u64 halt_poll_fail_ns; u64 instruction_lctl; u64 instruction_lctlg; u64 instruction_stctl; @@ -451,15 +446,15 @@ struct kvm_vcpu_stat { u64 instruction_sigp_init_cpu_reset; u64 instruction_sigp_cpu_reset; u64 instruction_sigp_unknown; - u64 diagnose_10; - u64 diagnose_44; - u64 diagnose_9c; - u64 diagnose_9c_ignored; - u64 diagnose_9c_forward; - u64 diagnose_258; - u64 diagnose_308; - u64 diagnose_500; - u64 diagnose_other; + u64 instruction_diagnose_10; + u64 instruction_diagnose_44; + u64 instruction_diagnose_9c; + u64 diag_9c_ignored; + u64 diag_9c_forward; + u64 instruction_diagnose_258; + u64 instruction_diagnose_308; + u64 instruction_diagnose_500; + u64 instruction_diagnose_other; u64 pfault_sync; }; @@ -756,12 +751,12 @@ struct kvm_vcpu_arch { }; struct kvm_vm_stat { + struct kvm_vm_stat_generic generic; u64 inject_io; u64 inject_float_mchk; u64 inject_pfault_done; u64 inject_service_signal; u64 inject_virtio; - u64 remote_tlb_flush; }; struct kvm_arch_memory_slot { diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index a0a7a2c72bd4..24e8fed150cf 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -5,7 +5,7 @@ #include <asm/asm-const.h> #include <linux/stringify.h> -#define __ALIGN .align 4, 0x07 +#define __ALIGN .align 16, 0x07 #define __ALIGN_STR __stringify(__ALIGN) /* diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 22bceeeba4bc..47bde5a20a41 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -17,15 +17,23 @@ #define LC_ORDER 1 #define LC_PAGES 2 +struct pgm_tdb { + u64 data[32]; +}; + struct lowcore { __u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */ __u32 ipl_parmblock_ptr; /* 0x0014 */ __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */ __u32 ext_params; /* 0x0080 */ - __u16 ext_cpu_addr; /* 0x0084 */ - __u16 ext_int_code; /* 0x0086 */ - __u16 svc_ilc; /* 0x0088 */ - __u16 svc_code; /* 0x008a */ + union { + struct { + __u16 ext_cpu_addr; /* 0x0084 */ + __u16 ext_int_code; /* 0x0086 */ + }; + __u32 ext_int_code_addr; + }; + __u32 svc_int_code; /* 0x0088 */ __u16 pgm_ilc; /* 0x008c */ __u16 pgm_code; /* 0x008e */ __u32 data_exc_code; /* 0x0090 */ @@ -40,10 +48,15 @@ struct lowcore { __u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */ __u64 trans_exc_code; /* 0x00a8 */ __u64 monitor_code; /* 0x00b0 */ - __u16 subchannel_id; /* 0x00b8 */ - __u16 subchannel_nr; /* 0x00ba */ - __u32 io_int_parm; /* 0x00bc */ - __u32 io_int_word; /* 0x00c0 */ + union { + struct { + __u16 subchannel_id; /* 0x00b8 */ + __u16 subchannel_nr; /* 0x00ba */ + __u32 io_int_parm; /* 0x00bc */ + __u32 io_int_word; /* 0x00c0 */ + }; + struct tpi_info tpi_info; /* 0x00b8 */ + }; __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */ __u32 stfl_fac_list; /* 0x00c8 */ __u8 pad_0x00cc[0x00e8-0x00cc]; /* 0x00cc */ @@ -154,12 +167,7 @@ struct lowcore { __u64 vmcore_info; /* 0x0e0c */ __u8 pad_0x0e14[0x0e18-0x0e14]; /* 0x0e14 */ __u64 os_info; /* 0x0e18 */ - __u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */ - - /* Extended facility list */ - __u64 stfle_fac_list[16]; /* 0x0f00 */ - __u64 alt_stfle_fac_list[16]; /* 0x0f80 */ - __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ + __u8 pad_0x0e20[0x11b0-0x0e20]; /* 0x0e20 */ /* Pointer to the machine check extended save area */ __u64 mcesad; /* 0x11b0 */ @@ -185,7 +193,7 @@ struct lowcore { __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */ /* Transaction abort diagnostic block */ - __u8 pgm_tdb[256]; /* 0x1800 */ + struct pgm_tdb pgm_tdb; /* 0x1800 */ __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ } __packed __aligned(8192); diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index e7cffc7b5c2f..c7937f369e62 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -70,8 +70,8 @@ static inline int init_new_context(struct task_struct *tsk, return 0; } -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) +static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) { int cpu = smp_processor_id(); @@ -85,6 +85,17 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (prev != next) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } +#define switch_mm_irqs_off switch_mm_irqs_off + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm_irqs_off(prev, next, tsk); + local_irq_restore(flags); +} #define finish_arch_post_lock_switch finish_arch_post_lock_switch static inline void finish_arch_post_lock_switch(void) diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 20e51c9ff240..2db45d7e68aa 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -23,12 +23,16 @@ #define MCCK_CODE_SYSTEM_DAMAGE BIT(63) #define MCCK_CODE_EXT_DAMAGE BIT(63 - 5) #define MCCK_CODE_CP BIT(63 - 9) -#define MCCK_CODE_CPU_TIMER_VALID BIT(63 - 46) +#define MCCK_CODE_STG_ERROR BIT(63 - 16) +#define MCCK_CODE_STG_KEY_ERROR BIT(63 - 18) +#define MCCK_CODE_STG_DEGRAD BIT(63 - 19) #define MCCK_CODE_PSW_MWP_VALID BIT(63 - 20) #define MCCK_CODE_PSW_IA_VALID BIT(63 - 23) +#define MCCK_CODE_STG_FAIL_ADDR BIT(63 - 24) #define MCCK_CODE_CR_VALID BIT(63 - 29) #define MCCK_CODE_GS_VALID BIT(63 - 36) #define MCCK_CODE_FC_VALID BIT(63 - 43) +#define MCCK_CODE_CPU_TIMER_VALID BIT(63 - 46) #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index cc98f9b78fd4..3ba945c6b9dc 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -55,22 +55,25 @@ static inline void storage_key_init_range(unsigned long start, unsigned long end */ static inline void copy_page(void *to, void *from) { - register void *reg2 asm ("2") = to; - register unsigned long reg3 asm ("3") = 0x1000; - register void *reg4 asm ("4") = from; - register unsigned long reg5 asm ("5") = 0xb0001000; + union register_pair dst, src; + + dst.even = (unsigned long) to; + dst.odd = 0x1000; + src.even = (unsigned long) from; + src.odd = 0xb0001000; + asm volatile( - " mvcl 2,4" - : "+d" (reg2), "+d" (reg3), "+d" (reg4), "+d" (reg5) + " mvcl %[dst],%[src]" + : [dst] "+&d" (dst.pair), [src] "+&d" (src.pair) : : "memory", "cc"); } #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) -#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE +#define alloc_zeroed_user_highpage_movable(vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE /* * These are used to make use of C type-checking.. diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 10b67f8aab99..5509b224c2ec 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -133,7 +133,8 @@ struct zpci_dev { u8 has_resources : 1; u8 is_physfn : 1; u8 util_str_avail : 1; - u8 reserved : 3; + u8 irqs_registered : 1; + u8 reserved : 2; unsigned int devfn; /* DEVFN part of the RID*/ struct mutex lock; @@ -271,9 +272,13 @@ struct zpci_dev *get_zdev_by_fid(u32); int zpci_dma_init(void); void zpci_dma_exit(void); +/* IRQ */ int __init zpci_irq_init(void); void __init zpci_irq_exit(void); +int zpci_set_irq(struct zpci_dev *zdev); +int zpci_clear_irq(struct zpci_dev *zdev); + /* FMB */ int zpci_fmb_enable_device(struct zpci_dev *); int zpci_fmb_disable_device(struct zpci_dev *); diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 918f0ba4f4d2..cb5fc0690435 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -164,19 +164,20 @@ #define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval) #define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval) -#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \ -({ \ - typeof(pcp1) o1__ = (o1), n1__ = (n1); \ - typeof(pcp2) o2__ = (o2), n2__ = (n2); \ - typeof(pcp1) *p1__; \ - typeof(pcp2) *p2__; \ - int ret__; \ - preempt_disable_notrace(); \ - p1__ = raw_cpu_ptr(&(pcp1)); \ - p2__ = raw_cpu_ptr(&(pcp2)); \ - ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \ - preempt_enable_notrace(); \ - ret__; \ +#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \ +({ \ + typeof(pcp1) *p1__; \ + typeof(pcp2) *p2__; \ + int ret__; \ + \ + preempt_disable_notrace(); \ + p1__ = raw_cpu_ptr(&(pcp1)); \ + p2__ = raw_cpu_ptr(&(pcp2)); \ + ret__ = __cmpxchg_double((unsigned long)p1__, (unsigned long)p2__, \ + (unsigned long)(o1), (unsigned long)(o2), \ + (unsigned long)(n1), (unsigned long)(n2)); \ + preempt_enable_notrace(); \ + ret__; \ }) #define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 6b187cd72251..f14a555eff74 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -134,9 +134,6 @@ static inline void pmd_populate(struct mm_struct *mm, #define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) -#define pmd_pgtable(pmd) \ - ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)) - /* * page table entry allocation/free routines. */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 29c7ecd5ad1d..dcac7b2df72c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -17,6 +17,7 @@ #include <linux/page-flags.h> #include <linux/radix-tree.h> #include <linux/atomic.h> +#include <asm/sections.h> #include <asm/bug.h> #include <asm/page.h> #include <asm/uv.h> @@ -65,8 +66,6 @@ extern unsigned long zero_page_mask; /* TODO: s390 cannot support io_remap_pfn_range... */ -#define FIRST_USER_ADDRESS 0UL - #define pte_ERROR(e) \ printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e)) #define pmd_ERROR(e) \ @@ -86,16 +85,16 @@ extern unsigned long zero_page_mask; * happen without trampolines and in addition the placement within a * 2GB frame is branch prediction unit friendly. */ -extern unsigned long VMALLOC_START; -extern unsigned long VMALLOC_END; +extern unsigned long __bootdata_preserved(VMALLOC_START); +extern unsigned long __bootdata_preserved(VMALLOC_END); #define VMALLOC_DEFAULT_SIZE ((512UL << 30) - MODULES_LEN) -extern struct page *vmemmap; -extern unsigned long vmemmap_size; +extern struct page *__bootdata_preserved(vmemmap); +extern unsigned long __bootdata_preserved(vmemmap_size); #define VMEM_MAX_PHYS ((unsigned long) vmemmap) -extern unsigned long MODULES_VADDR; -extern unsigned long MODULES_END; +extern unsigned long __bootdata_preserved(MODULES_VADDR); +extern unsigned long __bootdata_preserved(MODULES_END); #define MODULES_VADDR MODULES_VADDR #define MODULES_END MODULES_END #define MODULES_LEN (1UL << 31) @@ -344,8 +343,6 @@ static inline int is_module_addr(void *addr) #define PTRS_PER_P4D _CRST_ENTRIES #define PTRS_PER_PGD _CRST_ENTRIES -#define MAX_PTRS_PER_P4D PTRS_PER_P4D - /* * Segment table and region3 table entry encoding * (R = read-only, I = invalid, y = young bit): @@ -557,27 +554,25 @@ static inline int mm_uses_skeys(struct mm_struct *mm) static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new) { - register unsigned long reg2 asm("2") = old; - register unsigned long reg3 asm("3") = new; + union register_pair r1 = { .even = old, .odd = new, }; unsigned long address = (unsigned long)ptr | 1; asm volatile( - " csp %0,%3" - : "+d" (reg2), "+m" (*ptr) - : "d" (reg3), "d" (address) + " csp %[r1],%[address]" + : [r1] "+&d" (r1.pair), "+m" (*ptr) + : [address] "d" (address) : "cc"); } static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new) { - register unsigned long reg2 asm("2") = old; - register unsigned long reg3 asm("3") = new; + union register_pair r1 = { .even = old, .odd = new, }; unsigned long address = (unsigned long)ptr | 1; asm volatile( - " .insn rre,0xb98a0000,%0,%3" - : "+d" (reg2), "+m" (*ptr) - : "d" (reg3), "d" (address) + " .insn rre,0xb98a0000,%[r1],%[address]" + : [r1] "+&d" (r1.pair), "+m" (*ptr) + : [address] "d" (address) : "cc"); } @@ -591,14 +586,12 @@ static inline void crdte(unsigned long old, unsigned long new, unsigned long table, unsigned long dtt, unsigned long address, unsigned long asce) { - register unsigned long reg2 asm("2") = old; - register unsigned long reg3 asm("3") = new; - register unsigned long reg4 asm("4") = table | dtt; - register unsigned long reg5 asm("5") = address; + union register_pair r1 = { .even = old, .odd = new, }; + union register_pair r2 = { .even = table | dtt, .odd = address, }; - asm volatile(".insn rrf,0xb98f0000,%0,%2,%4,0" - : "+d" (reg2) - : "d" (reg3), "d" (reg4), "d" (reg5), "a" (asce) + asm volatile(".insn rrf,0xb98f0000,%[r1],%[r2],%[asce],0" + : [r1] "+&d" (r1.pair) + : [r2] "d" (r2.pair), [asce] "a" (asce) : "memory", "cc"); } @@ -866,6 +859,25 @@ static inline int pte_unused(pte_t pte) } /* + * Extract the pgprot value from the given pte while at the same time making it + * usable for kernel address space mappings where fault driven dirty and + * young/old accounting is not supported, i.e _PAGE_PROTECT and _PAGE_INVALID + * must not be set. + */ +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pte_flags = pte_val(pte) & _PAGE_CHG_MASK; + + if (pte_write(pte)) + pte_flags |= pgprot_val(PAGE_KERNEL); + else + pte_flags |= pgprot_val(PAGE_KERNEL_RO); + pte_flags |= pte_val(pte) & mio_wb_bit_mask; + + return __pgprot(pte_flags); +} + +/* * pgd/pmd/pte modification functions */ @@ -1713,4 +1725,7 @@ extern void s390_reset_cmma(struct mm_struct *mm); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN +#define pmd_pgtable(pmd) \ + ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)) + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index b49e0492842c..d9d5350cc3ec 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -29,12 +29,6 @@ static inline void preempt_count_set(int pc) old, new) != old); } -#define init_task_preempt_count(p) do { } while (0) - -#define init_idle_preempt_count(p, cpu) do { \ - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ -} while (0) - static inline void set_preempt_need_resched(void) { __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); @@ -88,12 +82,6 @@ static inline void preempt_count_set(int pc) S390_lowcore.preempt_count = pc; } -#define init_task_preempt_count(p) do { } while (0) - -#define init_idle_preempt_count(p, cpu) do { \ - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ -} while (0) - static inline void set_preempt_need_resched(void) { } @@ -130,6 +118,10 @@ static inline bool should_resched(int preempt_offset) #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ +#define init_task_preempt_count(p) do { } while (0) +/* Deferred to CPU bringup time */ +#define init_idle_preempt_count(p, cpu) do { } while (0) + #ifdef CONFIG_PREEMPTION extern void preempt_schedule(void); #define __preempt_schedule() preempt_schedule() diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 023a15dc25a3..ddc7858bbce4 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -129,7 +129,7 @@ struct thread_struct { struct runtime_instr_cb *ri_cb; struct gs_cb *gs_cb; /* Current guarded storage cb */ struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ - unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ + struct pgm_tdb trap_tdb; /* Transaction abort diagnose block */ /* * Warning: 'fpu' is dynamically-sized. It *MUST* be at * the end. @@ -207,7 +207,7 @@ static __always_inline unsigned long current_stack_pointer(void) return sp; } -static __no_kasan_or_inline unsigned short stap(void) +static __always_inline unsigned short stap(void) { unsigned short cpu_address; @@ -246,7 +246,7 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ -static __no_kasan_or_inline void __load_psw_mask(unsigned long mask) +static __always_inline void __load_psw_mask(unsigned long mask) { unsigned long addr; psw_t psw; diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index f828be78937f..61b22aa990e7 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -9,16 +9,17 @@ #include <linux/bits.h> #include <uapi/asm/ptrace.h> +#include <asm/tpi.h> -#define PIF_SYSCALL 0 /* inside a system call */ -#define PIF_SYSCALL_RESTART 1 /* restart the current system call */ -#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ -#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ +#define PIF_SYSCALL 0 /* inside a system call */ +#define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */ +#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ +#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ -#define _PIF_SYSCALL BIT(PIF_SYSCALL) -#define _PIF_SYSCALL_RESTART BIT(PIF_SYSCALL_RESTART) -#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) -#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) +#define _PIF_SYSCALL BIT(PIF_SYSCALL) +#define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART) +#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) +#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #ifndef __ASSEMBLY__ @@ -86,9 +87,14 @@ struct pt_regs }; }; unsigned long orig_gpr2; - unsigned int int_code; - unsigned int int_parm; - unsigned long int_parm_long; + union { + struct { + unsigned int int_code; + unsigned int int_parm; + unsigned long int_parm_long; + }; + struct tpi_info tpi_info; + }; unsigned long flags; unsigned long cr1; }; @@ -156,6 +162,14 @@ static inline int test_pt_regs_flag(struct pt_regs *regs, int flag) return !!(regs->flags & (1UL << flag)); } +static inline int test_and_clear_pt_regs_flag(struct pt_regs *regs, int flag) +{ + int ret = test_pt_regs_flag(regs, flag); + + clear_pt_regs_flag(regs, flag); + return ret; +} + /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 8fc52679543d..cb4f73c7228d 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -137,7 +137,6 @@ struct slibe { * @user0: user defineable value * @res4: reserved paramater * @user1: user defineable value - * @user2: user defineable value */ struct qaob { u64 res0[6]; @@ -152,8 +151,7 @@ struct qaob { u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER]; u64 user0; u64 res4[2]; - u64 user1; - u64 user2; + u8 user1[16]; } __attribute__ ((packed, aligned(256))); /** diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 3e388fa208d4..3a77aa96d092 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -78,6 +78,8 @@ struct parmarea { char command_line[ARCH_COMMAND_LINE_SIZE]; /* 0x10480 */ }; +extern struct parmarea parmarea; + extern unsigned int zlib_dfltcc_support; #define ZLIB_DFLTCC_DISABLED 0 #define ZLIB_DFLTCC_FULL 1 @@ -87,7 +89,6 @@ extern unsigned int zlib_dfltcc_support; extern int noexec_disabled; extern unsigned long ident_map_size; -extern unsigned long vmalloc_size; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; @@ -158,6 +159,8 @@ static inline unsigned long kaslr_offset(void) return __kaslr_offset; } +extern int is_full_image; + static inline u32 gen_lpswe(unsigned long addr) { BUILD_BUG_ON(addr > 0xfff); diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 53ee795cd3d3..edee63da08e7 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -41,15 +41,17 @@ static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm, u32 *status) { - register unsigned long reg1 asm ("1") = parm; + union register_pair r1 = { .odd = parm, }; int cc; asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); - *status = reg1; + " sigp %[r1],%[addr],0(%[order])\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=&d" (cc), [r1] "+&d" (r1.pair) + : [addr] "d" (addr), [order] "a" (order) + : "cc"); + *status = r1.even; return cc; } diff --git a/arch/s390/include/asm/softirq_stack.h b/arch/s390/include/asm/softirq_stack.h new file mode 100644 index 000000000000..fd17f25704bd --- /dev/null +++ b/arch/s390/include/asm/softirq_stack.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef __ASM_S390_SOFTIRQ_STACK_H +#define __ASM_S390_SOFTIRQ_STACK_H + +#include <asm/lowcore.h> +#include <asm/stacktrace.h> + +static inline void do_softirq_own_stack(void) +{ + call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq); +} + +#endif /* __ASM_S390_SOFTIRQ_STACK_H */ diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 2b543163d90a..3d8a4b94c620 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -74,29 +74,16 @@ struct stack_frame { ((unsigned long)__builtin_frame_address(0) - \ offsetof(struct stack_frame, back_chain)) -#define CALL_ARGS_0() \ - register unsigned long r2 asm("2") -#define CALL_ARGS_1(arg1) \ - register unsigned long r2 asm("2") = (unsigned long)(arg1) -#define CALL_ARGS_2(arg1, arg2) \ - CALL_ARGS_1(arg1); \ - register unsigned long r3 asm("3") = (unsigned long)(arg2) -#define CALL_ARGS_3(arg1, arg2, arg3) \ - CALL_ARGS_2(arg1, arg2); \ - register unsigned long r4 asm("4") = (unsigned long)(arg3) -#define CALL_ARGS_4(arg1, arg2, arg3, arg4) \ - CALL_ARGS_3(arg1, arg2, arg3); \ - register unsigned long r4 asm("5") = (unsigned long)(arg4) -#define CALL_ARGS_5(arg1, arg2, arg3, arg4, arg5) \ - CALL_ARGS_4(arg1, arg2, arg3, arg4); \ - register unsigned long r4 asm("6") = (unsigned long)(arg5) - -#define CALL_FMT_0 "=&d" (r2) : -#define CALL_FMT_1 "+&d" (r2) : -#define CALL_FMT_2 CALL_FMT_1 "d" (r3), -#define CALL_FMT_3 CALL_FMT_2 "d" (r4), -#define CALL_FMT_4 CALL_FMT_3 "d" (r5), -#define CALL_FMT_5 CALL_FMT_4 "d" (r6), +/* + * To keep this simple mark register 2-6 as being changed (volatile) + * by the called function, even though register 6 is saved/nonvolatile. + */ +#define CALL_FMT_0 "=&d" (r2) +#define CALL_FMT_1 "+&d" (r2) +#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3) +#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4) +#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5) +#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6) #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" #define CALL_CLOBBER_4 CALL_CLOBBER_5 @@ -105,34 +92,113 @@ struct stack_frame { #define CALL_CLOBBER_1 CALL_CLOBBER_2, "3" #define CALL_CLOBBER_0 CALL_CLOBBER_1 -#define CALL_ON_STACK(fn, stack, nr, args...) \ +#define CALL_LARGS_0(...) \ + long dummy = 0 +#define CALL_LARGS_1(t1, a1) \ + long arg1 = (long)(t1)(a1) +#define CALL_LARGS_2(t1, a1, t2, a2) \ + CALL_LARGS_1(t1, a1); \ + long arg2 = (long)(t2)(a2) +#define CALL_LARGS_3(t1, a1, t2, a2, t3, a3) \ + CALL_LARGS_2(t1, a1, t2, a2); \ + long arg3 = (long)(t3)(a3) +#define CALL_LARGS_4(t1, a1, t2, a2, t3, a3, t4, a4) \ + CALL_LARGS_3(t1, a1, t2, a2, t3, a3); \ + long arg4 = (long)(t4)(a4) +#define CALL_LARGS_5(t1, a1, t2, a2, t3, a3, t4, a4, t5, a5) \ + CALL_LARGS_4(t1, a1, t2, a2, t3, a3, t4, a4); \ + long arg5 = (long)(t5)(a5) + +#define CALL_REGS_0 \ + register long r2 asm("2") = dummy +#define CALL_REGS_1 \ + register long r2 asm("2") = arg1 +#define CALL_REGS_2 \ + CALL_REGS_1; \ + register long r3 asm("3") = arg2 +#define CALL_REGS_3 \ + CALL_REGS_2; \ + register long r4 asm("4") = arg3 +#define CALL_REGS_4 \ + CALL_REGS_3; \ + register long r5 asm("5") = arg4 +#define CALL_REGS_5 \ + CALL_REGS_4; \ + register long r6 asm("6") = arg5 + +#define CALL_TYPECHECK_0(...) +#define CALL_TYPECHECK_1(t, a, ...) \ + typecheck(t, a) +#define CALL_TYPECHECK_2(t, a, ...) \ + CALL_TYPECHECK_1(__VA_ARGS__); \ + typecheck(t, a) +#define CALL_TYPECHECK_3(t, a, ...) \ + CALL_TYPECHECK_2(__VA_ARGS__); \ + typecheck(t, a) +#define CALL_TYPECHECK_4(t, a, ...) \ + CALL_TYPECHECK_3(__VA_ARGS__); \ + typecheck(t, a) +#define CALL_TYPECHECK_5(t, a, ...) \ + CALL_TYPECHECK_4(__VA_ARGS__); \ + typecheck(t, a) + +#define CALL_PARM_0(...) void +#define CALL_PARM_1(t, a, ...) t +#define CALL_PARM_2(t, a, ...) t, CALL_PARM_1(__VA_ARGS__) +#define CALL_PARM_3(t, a, ...) t, CALL_PARM_2(__VA_ARGS__) +#define CALL_PARM_4(t, a, ...) t, CALL_PARM_3(__VA_ARGS__) +#define CALL_PARM_5(t, a, ...) t, CALL_PARM_4(__VA_ARGS__) +#define CALL_PARM_6(t, a, ...) t, CALL_PARM_5(__VA_ARGS__) + +/* + * Use call_on_stack() to call a function switching to a specified + * stack. Proper sign and zero extension of function arguments is + * done. Usage: + * + * rc = call_on_stack(nr, stack, rettype, fn, t1, a1, t2, a2, ...) + * + * - nr specifies the number of function arguments of fn. + * - stack specifies the stack to be used. + * - fn is the function to be called. + * - rettype is the return type of fn. + * - t1, a1, ... are pairs, where t1 must match the type of the first + * argument of fn, t2 the second, etc. a1 is the corresponding + * first function argument (not name), etc. + */ +#define call_on_stack(nr, stack, rettype, fn, ...) \ ({ \ + rettype (*__fn)(CALL_PARM_##nr(__VA_ARGS__)) = fn; \ unsigned long frame = current_frame_address(); \ - CALL_ARGS_##nr(args); \ + unsigned long __stack = stack; \ unsigned long prev; \ + CALL_LARGS_##nr(__VA_ARGS__); \ + CALL_REGS_##nr; \ \ + CALL_TYPECHECK_##nr(__VA_ARGS__); \ asm volatile( \ - " la %[_prev],0(15)\n" \ + " lgr %[_prev],15\n" \ " lg 15,%[_stack]\n" \ " stg %[_frame],%[_bc](15)\n" \ " brasl 14,%[_fn]\n" \ - " la 15,0(%[_prev])\n" \ - : [_prev] "=&a" (prev), CALL_FMT_##nr \ - [_stack] "R" (stack), \ + " lgr 15,%[_prev]\n" \ + : [_prev] "=&d" (prev), CALL_FMT_##nr \ + : [_stack] "R" (__stack), \ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ [_frame] "d" (frame), \ - [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ - r2; \ + [_fn] "X" (__fn) : CALL_CLOBBER_##nr); \ + (rettype)r2; \ }) -#define CALL_ON_STACK_NORETURN(fn, stack) \ +#define call_on_stack_noreturn(fn, stack) \ ({ \ + void (*__fn)(void) = fn; \ + \ asm volatile( \ " la 15,0(%[_stack])\n" \ " xc %[_bc](8,15),%[_bc](15)\n" \ " brasl 14,%[_fn]\n" \ ::[_bc] "i" (offsetof(struct stack_frame, back_chain)), \ - [_stack] "a" (stack), [_fn] "X" (fn)); \ + [_stack] "a" (stack), [_fn] "X" (__fn)); \ BUG(); \ }) diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index 4c0690fc5167..4fd66c5e8934 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -107,16 +107,18 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t count) #ifdef __HAVE_ARCH_MEMCHR static inline void *memchr(const void * s, int c, size_t n) { - register int r0 asm("0") = (char) c; const void *ret = s + n; asm volatile( - "0: srst %0,%1\n" + " lgr 0,%[c]\n" + "0: srst %[ret],%[s]\n" " jo 0b\n" " jl 1f\n" - " la %0,0\n" + " la %[ret],0\n" "1:" - : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory"); + : [ret] "+&a" (ret), [s] "+&a" (s) + : [c] "d" (c) + : "cc", "memory", "0"); return (void *) ret; } #endif @@ -124,13 +126,15 @@ static inline void *memchr(const void * s, int c, size_t n) #ifdef __HAVE_ARCH_MEMSCAN static inline void *memscan(void *s, int c, size_t n) { - register int r0 asm("0") = (char) c; const void *ret = s + n; asm volatile( - "0: srst %0,%1\n" + " lgr 0,%[c]\n" + "0: srst %[ret],%[s]\n" " jo 0b\n" - : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory"); + : [ret] "+&a" (ret), [s] "+&a" (s) + : [c] "d" (c) + : "cc", "memory", "0"); return (void *) ret; } #endif @@ -138,17 +142,18 @@ static inline void *memscan(void *s, int c, size_t n) #ifdef __HAVE_ARCH_STRCAT static inline char *strcat(char *dst, const char *src) { - register int r0 asm("0") = 0; - unsigned long dummy; + unsigned long dummy = 0; char *ret = dst; asm volatile( - "0: srst %0,%1\n" + " lghi 0,0\n" + "0: srst %[dummy],%[dst]\n" " jo 0b\n" - "1: mvst %0,%2\n" + "1: mvst %[dummy],%[src]\n" " jo 1b" - : "=&a" (dummy), "+a" (dst), "+a" (src) - : "d" (r0), "0" (0) : "cc", "memory" ); + : [dummy] "+&a" (dummy), [dst] "+&a" (dst), [src] "+&a" (src) + : + : "cc", "memory", "0"); return ret; } #endif @@ -156,14 +161,15 @@ static inline char *strcat(char *dst, const char *src) #ifdef __HAVE_ARCH_STRCPY static inline char *strcpy(char *dst, const char *src) { - register int r0 asm("0") = 0; char *ret = dst; asm volatile( - "0: mvst %0,%1\n" + " lghi 0,0\n" + "0: mvst %[dst],%[src]\n" " jo 0b" - : "+&a" (dst), "+&a" (src) : "d" (r0) - : "cc", "memory"); + : [dst] "+&a" (dst), [src] "+&a" (src) + : + : "cc", "memory", "0"); return ret; } #endif @@ -171,28 +177,33 @@ static inline char *strcpy(char *dst, const char *src) #if defined(__HAVE_ARCH_STRLEN) || (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)) static inline size_t __no_sanitize_prefix_strfunc(strlen)(const char *s) { - register unsigned long r0 asm("0") = 0; + unsigned long end = 0; const char *tmp = s; asm volatile( - "0: srst %0,%1\n" + " lghi 0,0\n" + "0: srst %[end],%[tmp]\n" " jo 0b" - : "+d" (r0), "+a" (tmp) : : "cc", "memory"); - return r0 - (unsigned long) s; + : [end] "+&a" (end), [tmp] "+&a" (tmp) + : + : "cc", "memory", "0"); + return end - (unsigned long)s; } #endif #ifdef __HAVE_ARCH_STRNLEN static inline size_t strnlen(const char * s, size_t n) { - register int r0 asm("0") = 0; const char *tmp = s; const char *end = s + n; asm volatile( - "0: srst %0,%1\n" + " lghi 0,0\n" + "0: srst %[end],%[tmp]\n" " jo 0b" - : "+a" (end), "+a" (tmp) : "d" (r0) : "cc", "memory"); + : [end] "+&a" (end), [tmp] "+&a" (tmp) + : + : "cc", "memory", "0"); return end - s; } #endif diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index f6326c6d2abe..50d9b04ecbd1 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -75,9 +75,12 @@ static inline void set_clock_comparator(__u64 time) static inline void set_tod_programmable_field(u16 val) { - register unsigned long reg0 asm("0") = val; - - asm volatile("sckpf" : : "d" (reg0)); + asm volatile( + " lgr 0,%[val]\n" + " sckpf\n" + : + : [val] "d" ((unsigned long)val) + : "0"); } void clock_comparator_work(void); @@ -138,16 +141,19 @@ struct ptff_qui { #define ptff(ptff_block, len, func) \ ({ \ struct addrtype { char _[len]; }; \ - register unsigned int reg0 asm("0") = func; \ - register unsigned long reg1 asm("1") = (unsigned long) (ptff_block);\ + unsigned int reg0 = func; \ + unsigned long reg1 = (unsigned long)(ptff_block); \ int rc; \ \ asm volatile( \ - " .word 0x0104\n" \ - " ipm %0\n" \ - " srl %0,28\n" \ - : "=d" (rc), "+m" (*(struct addrtype *) reg1) \ - : "d" (reg0), "d" (reg1) : "cc"); \ + " lgr 0,%[reg0]\n" \ + " lgr 1,%[reg1]\n" \ + " .insn e,0x0104\n" \ + " ipm %[rc]\n" \ + " srl %[rc],28\n" \ + : [rc] "=&d" (rc), "+m" (*(struct addrtype *)reg1) \ + : [reg0] "d" (reg0), [reg1] "d" (reg1) \ + : "cc", "0", "1"); \ rc; \ }) diff --git a/arch/s390/include/asm/tpi.h b/arch/s390/include/asm/tpi.h new file mode 100644 index 000000000000..1ac538b8cbf5 --- /dev/null +++ b/arch/s390/include/asm/tpi.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_S390_TPI_H +#define _ASM_S390_TPI_H + +#include <linux/types.h> +#include <uapi/asm/schid.h> + +#ifndef __ASSEMBLY__ + +/* I/O-Interruption Code as stored by TEST PENDING INTERRUPTION (TPI). */ +struct tpi_info { + struct subchannel_id schid; + u32 intparm; + u32 adapter_IO:1; + u32 directed_irq:1; + u32 isc:3; + u32 :12; + u32 type:3; + u32 :12; +} __packed __aligned(4); + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_S390_TPI_H */ diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h new file mode 100644 index 000000000000..0b5d550a0478 --- /dev/null +++ b/arch/s390/include/asm/types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _ASM_S390_TYPES_H +#define _ASM_S390_TYPES_H + +#include <uapi/asm/types.h> + +#ifndef __ASSEMBLY__ + +union register_pair { + unsigned __int128 pair; + struct { + unsigned long even; + unsigned long odd; + }; +}; + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_S390_TYPES_H */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 4756d2937e54..2316f2440881 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -49,52 +49,51 @@ int __get_user_bad(void) __attribute__((noreturn)); #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES -#define __put_get_user_asm(to, from, size, spec) \ +#define __put_get_user_asm(to, from, size, insn) \ ({ \ - register unsigned long __reg0 asm("0") = spec; \ int __rc; \ \ asm volatile( \ - "0: mvcos %1,%3,%2\n" \ - "1: xr %0,%0\n" \ + insn " 0,%[spec]\n" \ + "0: mvcos %[_to],%[_from],%[_size]\n" \ + "1: xr %[rc],%[rc]\n" \ "2:\n" \ ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %0,%5\n" \ + "3: lhi %[rc],%[retval]\n" \ " jg 2b\n" \ ".popsection\n" \ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ - : "=d" (__rc), "+Q" (*(to)) \ - : "d" (size), "Q" (*(from)), \ - "d" (__reg0), "K" (-EFAULT) \ - : "cc"); \ + : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \ + : [_size] "d" (size), [_from] "Q" (*(from)), \ + [retval] "K" (-EFAULT), [spec] "K" (0x81UL) \ + : "cc", "0"); \ __rc; \ }) static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { - unsigned long spec = 0x810000UL; int rc; switch (size) { case 1: rc = __put_get_user_asm((unsigned char __user *)ptr, (unsigned char *)x, - size, spec); + size, "llilh"); break; case 2: rc = __put_get_user_asm((unsigned short __user *)ptr, (unsigned short *)x, - size, spec); + size, "llilh"); break; case 4: rc = __put_get_user_asm((unsigned int __user *)ptr, (unsigned int *)x, - size, spec); + size, "llilh"); break; case 8: rc = __put_get_user_asm((unsigned long __user *)ptr, (unsigned long *)x, - size, spec); + size, "llilh"); break; default: __put_user_bad(); @@ -105,29 +104,28 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { - unsigned long spec = 0x81UL; int rc; switch (size) { case 1: rc = __put_get_user_asm((unsigned char *)x, (unsigned char __user *)ptr, - size, spec); + size, "lghi"); break; case 2: rc = __put_get_user_asm((unsigned short *)x, (unsigned short __user *)ptr, - size, spec); + size, "lghi"); break; case 4: rc = __put_get_user_asm((unsigned int *)x, (unsigned int __user *)ptr, - size, spec); + size, "lghi"); break; case 8: rc = __put_get_user_asm((unsigned long *)x, (unsigned long __user *)ptr, - size, spec); + size, "lghi"); break; default: __get_user_bad(); diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 7b98d4caee77..12c5f006c136 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -73,6 +73,10 @@ enum uv_cmds_inst { BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22, }; +enum uv_feat_ind { + BIT_UV_FEAT_MISC = 0, +}; + struct uv_cb_header { u16 len; u16 cmd; /* Command Code */ @@ -97,7 +101,8 @@ struct uv_cb_qui { u64 max_guest_stor_addr; u8 reserved88[158 - 136]; u16 max_guest_cpu_id; - u8 reserveda0[200 - 160]; + u64 uv_feature_indications; + u8 reserveda0[200 - 168]; } __packed __aligned(8); /* Initialize Ultravisor */ @@ -274,6 +279,7 @@ struct uv_info { unsigned long max_sec_stor_addr; unsigned int max_num_sec_conf; unsigned short max_guest_cpu_id; + unsigned long uv_feature_indications; }; extern struct uv_info uv_info; diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index b45e3dddd2c2..53165aa7813a 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -4,18 +4,31 @@ #include <vdso/datapage.h> -/* Default link address for the vDSO */ -#define VDSO64_LBASE 0 +#ifndef __ASSEMBLY__ -#define __VVAR_PAGES 2 +#include <generated/vdso64-offsets.h> +#ifdef CONFIG_COMPAT +#include <generated/vdso32-offsets.h> +#endif -#define VDSO_VERSION_STRING LINUX_2.6.29 - -#ifndef __ASSEMBLY__ +#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) +#ifdef CONFIG_COMPAT +#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name)) +#else +#define VDSO32_SYMBOL(tsk, name) (-1UL) +#endif extern struct vdso_data *vdso_data; int vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ + +/* Default link address for the vDSO */ +#define VDSO_LBASE 0 + +#define __VVAR_PAGES 2 + +#define VDSO_VERSION_STRING LINUX_2.6.29 + #endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index 383c53c3dddd..d6465b22ffe3 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -8,7 +8,6 @@ #include <asm/timex.h> #include <asm/unistd.h> -#include <asm/vdso.h> #include <linux/compiler.h> #define vdso_calc_delta __arch_vdso_calc_delta diff --git a/arch/s390/include/uapi/asm/schid.h b/arch/s390/include/uapi/asm/schid.h index 58fca6f48410..a3e1cf168553 100644 --- a/arch/s390/include/uapi/asm/schid.h +++ b/arch/s390/include/uapi/asm/schid.h @@ -4,6 +4,8 @@ #include <linux/types.h> +#ifndef __ASSEMBLY__ + struct subchannel_id { __u32 cssid : 8; __u32 : 4; @@ -13,5 +15,6 @@ struct subchannel_id { __u32 sch_no : 16; } __attribute__ ((packed, aligned(4))); +#endif /* __ASSEMBLY__ */ #endif /* _UAPIASM_SCHID_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 68ca1834316f..4a44ba5a2d73 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -71,10 +71,10 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o -obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_diag.o obj-$(CONFIG_TRACEPOINTS) += trace.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o # vdso obj-y += vdso64/ +obj-$(CONFIG_COMPAT) += vdso32/ diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 8e1f2aee85ef..c22ea1c3ef84 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -76,8 +76,7 @@ static void __init_or_module __apply_alternatives(struct alt_instr *start, instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; - if (!__test_facility(a->facility, - S390_lowcore.alt_stfle_fac_list)) + if (!__test_facility(a->facility, alt_stfle_fac_list)) continue; if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) { diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 15e637728a4b..77ff2130cb04 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -14,7 +14,6 @@ #include <linux/pgtable.h> #include <asm/idle.h> #include <asm/gmap.h> -#include <asm/nmi.h> #include <asm/stacktrace.h> int main(void) @@ -58,8 +57,6 @@ int main(void) OFFSET(__LC_EXT_PARAMS, lowcore, ext_params); OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr); OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code); - OFFSET(__LC_SVC_ILC, lowcore, svc_ilc); - OFFSET(__LC_SVC_INT_CODE, lowcore, svc_code); OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc); OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code); OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code); @@ -77,8 +74,6 @@ int main(void) OFFSET(__LC_SUBCHANNEL_NR, lowcore, subchannel_nr); OFFSET(__LC_IO_INT_PARM, lowcore, io_int_parm); OFFSET(__LC_IO_INT_WORD, lowcore, io_int_word); - OFFSET(__LC_STFL_FAC_LIST, lowcore, stfl_fac_list); - OFFSET(__LC_STFLE_FAC_LIST, lowcore, stfle_fac_list); OFFSET(__LC_MCCK_CODE, lowcore, mcck_interruption_code); OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code); OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address); @@ -111,7 +106,6 @@ int main(void) OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock); - OFFSET(__LC_CLOCK_COMPARATOR, lowcore, clock_comparator); OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); @@ -148,9 +142,6 @@ int main(void) OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area); OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb); BLANK(); - /* extended machine check save area */ - OFFSET(__MCESA_GS_SAVE_AREA, mcesa, guarded_storage_save_area); - BLANK(); /* gmap/sie offsets */ OFFSET(__GMAP_ASCE, gmap, asce); OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c); @@ -159,5 +150,7 @@ int main(void) OFFSET(__KEXEC_SHA_REGION_START, kexec_sha_region, start); OFFSET(__KEXEC_SHA_REGION_LEN, kexec_sha_region, len); DEFINE(__KEXEC_SHA_REGION_SIZE, sizeof(struct kexec_sha_region)); + /* sizeof kernel parameter area */ + DEFINE(__PARMAREA_SIZE, sizeof(struct parmarea)); return 0; } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 1d0e17ec93eb..cca142fbb516 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -28,6 +28,7 @@ #include <linux/uaccess.h> #include <asm/lowcore.h> #include <asm/switch_to.h> +#include <asm/vdso.h> #include "compat_linux.h" #include "compat_ptrace.h" #include "entry.h" @@ -118,7 +119,6 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.fpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ - clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); return 0; } @@ -304,11 +304,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, restorer = (unsigned long __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; } else { - /* Signal frames without vectors registers are short ! */ - __u16 __user *svc = (void __user *) frame + frame_size - 2; - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) - return -EFAULT; - restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; + restorer = VDSO32_SYMBOL(current, sigreturn); } /* Set up registers for signal handler */ @@ -371,10 +367,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, restorer = (unsigned long __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; } else { - __u16 __user *svc = &frame->svc_insn; - if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) - return -EFAULT; - restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; + restorer = VDSO32_SYMBOL(current, rt_sigreturn); } /* Create siginfo on the signal stack */ diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 2da027359798..54efc279f54e 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -26,33 +26,35 @@ static char cpcmd_buf[241]; static int diag8_noresponse(int cmdlen) { - register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; - register unsigned long reg3 asm ("3") = cmdlen; - asm volatile( - " diag %1,%0,0x8\n" - : "+d" (reg3) : "d" (reg2) : "cc"); - return reg3; + " diag %[rx],%[ry],0x8\n" + : [ry] "+&d" (cmdlen) + : [rx] "d" ((addr_t) cpcmd_buf) + : "cc"); + return cmdlen; } static int diag8_response(int cmdlen, char *response, int *rlen) { - unsigned long _cmdlen = cmdlen | 0x40000000L; - unsigned long _rlen = *rlen; - register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; - register unsigned long reg3 asm ("3") = (addr_t) response; - register unsigned long reg4 asm ("4") = _cmdlen; - register unsigned long reg5 asm ("5") = _rlen; + union register_pair rx, ry; + int cc; + rx.even = (addr_t) cpcmd_buf; + rx.odd = (addr_t) response; + ry.even = cmdlen | 0x40000000L; + ry.odd = *rlen; asm volatile( - " diag %2,%0,0x8\n" - " brc 8,1f\n" - " agr %1,%4\n" - "1:\n" - : "+d" (reg4), "+d" (reg5) - : "d" (reg2), "d" (reg3), "d" (*rlen) : "cc"); - *rlen = reg5; - return reg4; + " diag %[rx],%[ry],0x8\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=&d" (cc), [ry] "+&d" (ry.pair) + : [rx] "d" (rx.pair) + : "cc"); + if (cc) + *rlen += ry.odd; + else + *rlen = ry.odd; + return ry.even; } /* diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index bb958d32bd81..09b6c6402f9b 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1418,7 +1418,7 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %pK ", + rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px ", area, sec, usec, level, except_str, entry->cpu, (void *)caller); return rc; diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index b8b0cd7b008f..a3f47464c3f1 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -141,16 +141,15 @@ EXPORT_SYMBOL(diag14); static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr) { - register unsigned long _subcode asm("0") = *subcode; - register unsigned long _size asm("1") = size; + union register_pair rp = { .even = *subcode, .odd = size }; asm volatile( - " diag %2,%0,0x204\n" + " diag %[addr],%[rp],0x204\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) - : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); - *subcode = _subcode; - return _size; + : [rp] "+&d" (rp.pair) : [addr] "d" (addr) : "memory"); + *subcode = rp.even; + return rp.odd; } int diag204(unsigned long subcode, unsigned long size, void *addr) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index a361d2e70025..fb84e3fc1686 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -33,6 +33,8 @@ #include <asm/switch_to.h> #include "entry.h" +int __bootdata(is_full_image); + static void __init reset_tod_clock(void) { union tod_clock clk; @@ -180,11 +182,9 @@ static noinline __init void setup_lowcore_early(void) static noinline __init void setup_facility_list(void) { - memcpy(S390_lowcore.alt_stfle_fac_list, - S390_lowcore.stfle_fac_list, - sizeof(S390_lowcore.alt_stfle_fac_list)); + memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list)); if (!IS_ENABLED(CONFIG_KERNEL_NOBP)) - __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + __clear_facility(82, alt_stfle_fac_list); } static __init void detect_diag9c(void) @@ -281,7 +281,7 @@ static void __init setup_boot_command_line(void) static void __init check_image_bootable(void) { - if (!memcmp(EP_STRING, (void *)EP_OFFSET, strlen(EP_STRING))) + if (is_full_image) return; sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n"); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 12de7a9c85b3..5a2f70cbd3a9 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -14,7 +14,6 @@ #include <asm/alternative-asm.h> #include <asm/processor.h> #include <asm/cache.h> -#include <asm/ctl_reg.h> #include <asm/dwarf.h> #include <asm/errno.h> #include <asm/ptrace.h> @@ -129,6 +128,52 @@ _LPP_OFFSET = __LC_LPP "jnz .+8; .long 0xb2e8d000", 82 .endm + /* + * The CHKSTG macro jumps to the provided label in case the + * machine check interruption code reports one of unrecoverable + * storage errors: + * - Storage error uncorrected + * - Storage key error uncorrected + * - Storage degradation with Failing-storage-address validity + */ + .macro CHKSTG errlabel + TSTMSK __LC_MCCK_CODE,(MCCK_CODE_STG_ERROR|MCCK_CODE_STG_KEY_ERROR) + jnz \errlabel + TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_DEGRAD + jz oklabel\@ + TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_FAIL_ADDR + jnz \errlabel +oklabel\@: + .endm + +#if IS_ENABLED(CONFIG_KVM) + /* + * The OUTSIDE macro jumps to the provided label in case the value + * in the provided register is outside of the provided range. The + * macro is useful for checking whether a PSW stored in a register + * pair points inside or outside of a block of instructions. + * @reg: register to check + * @start: start of the range + * @end: end of the range + * @outside_label: jump here if @reg is outside of [@start..@end) + */ + .macro OUTSIDE reg,start,end,outside_label + lgr %r14,\reg + larl %r13,\start + slgr %r14,%r13 + lghi %r13,\end - \start + clgr %r14,%r13 + jhe \outside_label + .endm + + .macro SIEEXIT + lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer + ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce + larl %r9,sie_exit # skip forward to sie_exit + .endm +#endif + GEN_BR_THUNK %r14 GEN_BR_THUNK %r14,%r13 @@ -214,7 +259,6 @@ ENTRY(sie64a) # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. # Other instructions between sie64a and .Lsie_done should not cause program # interrupts. So lets use 3 nops as a landing pad for all possible rewinds. -# See also .Lcleanup_sie_mcck/.Lcleanup_sie_int .Lrewind_pad6: nopr 7 .Lrewind_pad4: @@ -276,6 +320,7 @@ ENTRY(system_call) xgr %r10,%r10 xgr %r11,%r11 la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs + mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC lgr %r3,%r14 brasl %r14,__do_syscall lctlg %c1,%c1,__LC_USER_ASCE @@ -318,16 +363,8 @@ ENTRY(pgm_check_handler) .Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a - lgr %r14,%r9 - larl %r13,.Lsie_gmap - slgr %r14,%r13 - lghi %r13,.Lsie_done - .Lsie_gmap - clgr %r14,%r13 - jhe 1f - lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer - ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce - larl %r9,sie_exit # skip forward to sie_exit + OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f + SIEEXIT lghi %r10,_PIF_GUEST_FAULT #endif 1: tmhh %r8,0x4000 # PER bit set in old PSW ? @@ -392,13 +429,9 @@ ENTRY(\name) tmhh %r8,0x0001 # interrupting from user ? jnz 1f #if IS_ENABLED(CONFIG_KVM) - lgr %r14,%r9 - larl %r13,.Lsie_gmap - slgr %r14,%r13 - lghi %r13,.Lsie_done - .Lsie_gmap - clgr %r14,%r13 - jhe 0f - brasl %r14,.Lcleanup_sie_int + OUTSIDE %r9,.Lsie_gmap,.Lsie_done,0f + BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + SIEEXIT #endif 0: CHECK_STACK __LC_SAVE_AREA_ASYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) @@ -418,6 +451,7 @@ ENTRY(\name) xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) tm %r8,0x0001 # coming from user space? @@ -471,8 +505,6 @@ ENTRY(mcck_int_handler) BPOFF la %r1,4095 # validate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer - sckc __LC_CLOCK_COMPARATOR # validate comparator - lam %a0,%a15,__LC_AREGS_SAVE_AREA-4095(%r1) # validate acrs lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs lg %r12,__LC_CURRENT lmg %r8,%r9,__LC_MCK_OLD_PSW @@ -483,41 +515,7 @@ ENTRY(mcck_int_handler) la %r14,4095 lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r14) # validate ctl regs ptlb - lg %r11,__LC_MCESAD-4095(%r14) # extended machine check save area - nill %r11,0xfc00 # MCESA_ORIGIN_MASK - TSTMSK __LC_CREGS_SAVE_AREA+16-4095(%r14),CR2_GUARDED_STORAGE - jno 0f - TSTMSK __LC_MCCK_CODE,MCCK_CODE_GS_VALID - jno 0f - .insn rxy,0xe3000000004d,0,__MCESA_GS_SAVE_AREA(%r11) # LGSC -0: l %r14,__LC_FP_CREG_SAVE_AREA-4095(%r14) - TSTMSK __LC_MCCK_CODE,MCCK_CODE_FC_VALID - jo 0f - sr %r14,%r14 -0: sfpc %r14 - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - jo 0f - lghi %r14,__LC_FPREGS_SAVE_AREA - ld %f0,0(%r14) - ld %f1,8(%r14) - ld %f2,16(%r14) - ld %f3,24(%r14) - ld %f4,32(%r14) - ld %f5,40(%r14) - ld %f6,48(%r14) - ld %f7,56(%r14) - ld %f8,64(%r14) - ld %f9,72(%r14) - ld %f10,80(%r14) - ld %f11,88(%r14) - ld %f12,96(%r14) - ld %f13,104(%r14) - ld %f14,112(%r14) - ld %f15,120(%r14) - j 1f -0: VLM %v0,%v15,0,%r11 - VLM %v16,%v31,256,%r11 -1: lghi %r14,__LC_CPU_TIMER_SAVE_AREA + lghi %r14,__LC_CPU_TIMER_SAVE_AREA mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID jo 3f @@ -533,27 +531,29 @@ ENTRY(mcck_int_handler) 3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID jno .Lmcck_panic tmhh %r8,0x0001 # interrupting from user ? - jnz 4f + jnz 6f TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic -4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - tmhh %r8,0x0001 # interrupting from user ? - jnz .Lmcck_user #if IS_ENABLED(CONFIG_KVM) - lgr %r14,%r9 - larl %r13,.Lsie_gmap - slgr %r14,%r13 - lghi %r13,.Lsie_done - .Lsie_gmap - clgr %r14,%r13 - jhe .Lmcck_stack - brasl %r14,.Lcleanup_sie_mcck -#endif + OUTSIDE %r9,.Lsie_gmap,.Lsie_done,6f + OUTSIDE %r9,.Lsie_entry,.Lsie_skip,4f + oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST + j 5f +4: CHKSTG .Lmcck_panic +5: larl %r14,.Lstosm_tmp + stosm 0(%r14),0x04 # turn dat on, keep irqs off + BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + SIEEXIT j .Lmcck_stack -.Lmcck_user: +#endif +6: CHKSTG .Lmcck_panic + larl %r14,.Lstosm_tmp + stosm 0(%r14),0x04 # turn dat on, keep irqs off + tmhh %r8,0x0001 # interrupting from user ? + jz .Lmcck_stack BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP .Lmcck_stack: lg %r15,__LC_MCCK_STACK -.Lmcck_skip: la %r11,STACK_FRAME_OVERHEAD(%r15) stctg %c1,%c1,__PT_CR1(%r11) lctlg %c1,%c1,__LC_KERNEL_ASCE @@ -595,8 +595,33 @@ ENTRY(mcck_int_handler) b __LC_RETURN_MCCK_LPSWE .Lmcck_panic: - lg %r15,__LC_NODAT_STACK - j .Lmcck_skip + /* + * Iterate over all possible CPU addresses in the range 0..0xffff + * and stop each CPU using signal processor. Use compare and swap + * to allow just one CPU-stopper and prevent concurrent CPUs from + * stopping each other while leaving the others running. + */ + lhi %r5,0 + lhi %r6,1 + larl %r7,.Lstop_lock + cs %r5,%r6,0(%r7) # single CPU-stopper only + jnz 4f + larl %r7,.Lthis_cpu + stap 0(%r7) # this CPU address + lh %r4,0(%r7) + nilh %r4,0 + lhi %r0,1 + sll %r0,16 # CPU counter + lhi %r3,0 # next CPU address +0: cr %r3,%r4 + je 2f +1: sigp %r1,%r3,SIGP_STOP # stop next CPU + brc SIGP_CC_BUSY,1b +2: ahi %r3,1 + brct %r0,0b +3: sigp %r1,%r4,SIGP_STOP # stop this CPU + brc SIGP_CC_BUSY,3b +4: j 4b ENDPROC(mcck_int_handler) # @@ -647,23 +672,11 @@ ENTRY(stack_overflow) ENDPROC(stack_overflow) #endif -#if IS_ENABLED(CONFIG_KVM) -.Lcleanup_sie_mcck: - larl %r13,.Lsie_entry - slgr %r9,%r13 - larl %r13,.Lsie_skip - clgr %r9,%r13 - jh .Lcleanup_sie_int - oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST -.Lcleanup_sie_int: - BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) - lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer - ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE - larl %r9,sie_exit # skip forward to sie_exit - BR_EX %r14,%r13 - -#endif + .section .data, "aw" + .align 4 +.Lstop_lock: .long 0 +.Lthis_cpu: .short 0 +.Lstosm_tmp: .byte 0 .section .rodata, "a" #define SYSCALL(esame,emu) .quad __s390x_ ## esame .globl sys_call_table diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index dba04fbc37a2..50e2c21e0ec9 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/delay.h> +#include <linux/panic_notifier.h> #include <linux/reboot.h> #include <linux/ctype.h> #include <linux/fs.h> @@ -162,16 +163,18 @@ static bool reipl_ccw_clear; static inline int __diag308(unsigned long subcode, void *addr) { - register unsigned long _addr asm("0") = (unsigned long) addr; - register unsigned long _rc asm("1") = 0; + union register_pair r1; + r1.even = (unsigned long) addr; + r1.odd = 0; asm volatile( - " diag %0,%2,0x308\n" + " diag %[r1],%[subcode],0x308\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) - : "+d" (_addr), "+d" (_rc) - : "d" (subcode) : "cc", "memory"); - return _rc; + : [r1] "+&d" (r1.pair) + : [subcode] "d" (subcode) + : "cc", "memory"); + return r1.odd; } int diag308(unsigned long subcode, void *addr) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 714269e10eec..234d085257eb 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -110,15 +110,17 @@ static int on_async_stack(void) { unsigned long frame = current_frame_address(); - return !!!((S390_lowcore.async_stack - frame) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)); + return ((S390_lowcore.async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0; } static void do_irq_async(struct pt_regs *regs, int irq) { - if (on_async_stack()) + if (on_async_stack()) { do_IRQ(regs, irq); - else - CALL_ON_STACK(do_IRQ, S390_lowcore.async_stack, 2, regs, irq); + } else { + call_on_stack(2, S390_lowcore.async_stack, void, do_IRQ, + struct pt_regs *, regs, int, irq); + } } static int irq_pending(struct pt_regs *regs) @@ -146,8 +148,8 @@ void noinstr do_io_irq(struct pt_regs *regs) account_idle_time_irq(); do { - memcpy(®s->int_code, &S390_lowcore.subchannel_id, 12); - if (S390_lowcore.io_int_word & BIT(31)) + regs->tpi_info = S390_lowcore.tpi_info; + if (S390_lowcore.tpi_info.adapter_IO) do_irq_async(regs, THIN_INTERRUPT); else do_irq_async(regs, IO_INTERRUPT); @@ -172,7 +174,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) if (user_mode(regs)) update_timer_sys(); - memcpy(®s->int_code, &S390_lowcore.ext_cpu_addr, 4); + regs->int_code = S390_lowcore.ext_int_code_addr; regs->int_parm = S390_lowcore.ext_params; regs->int_parm_long = S390_lowcore.ext_params2; @@ -266,24 +268,6 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) } /* - * Switch to the asynchronous interrupt stack for softirq execution. - */ -void do_softirq_own_stack(void) -{ - unsigned long old, new; - - old = current_stack_pointer(); - /* Check against async. stack address range. */ - new = S390_lowcore.async_stack; - if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) { - CALL_ON_STACK(__do_softirq, new, 0); - } else { - /* We are already on the async stack. */ - __do_softirq(); - } -} - -/* * ext_int_hash[index] is the list head for all external interrupts that hash * to this index. */ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index aae24dc75df6..52d056a5f89f 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -44,11 +44,6 @@ void *alloc_insn_page(void) return page; } -void free_insn_page(void *page) -{ - module_memfree(page); -} - static void *alloc_s390_insn_page(void) { if (xchg(&insn_page_in_use, 1) == 1) @@ -97,11 +92,6 @@ static void copy_instruction(struct kprobe *p) } NOKPROBE_SYMBOL(copy_instruction); -static inline int is_kernel_addr(void *addr) -{ - return addr < (void *)_end; -} - static int s390_get_insn_slot(struct kprobe *p) { /* @@ -110,7 +100,7 @@ static int s390_get_insn_slot(struct kprobe *p) * field can be patched and executed within the insn slot. */ p->ainsn.insn = NULL; - if (is_kernel_addr(p->addr)) + if (is_kernel((unsigned long)p->addr)) p->ainsn.insn = get_s390_insn_slot(); else if (is_module_addr(p->addr)) p->ainsn.insn = get_insn_slot(); @@ -122,7 +112,7 @@ static void s390_free_insn_slot(struct kprobe *p) { if (!p->ainsn.insn) return; - if (is_kernel_addr(p->addr)) + if (is_kernel((unsigned long)p->addr)) free_s390_insn_slot(p->ainsn.insn, 0); else free_insn_slot(p->ainsn.insn, 0); @@ -446,23 +436,6 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(p); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (p->fault_handler && p->fault_handler(p, regs, trapnr)) - return 1; - - /* * In case the user-specified fault handler returned * zero, try to fix up. */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index d91989c7bd6a..1005a6935fbe 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -132,7 +132,8 @@ static bool kdump_csum_valid(struct kimage *image) int rc; preempt_disable(); - rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image); + rc = call_on_stack(1, S390_lowcore.nodat_stack, unsigned long, do_start_kdump, + unsigned long, (unsigned long)image); preempt_enable(); return rc == 0; #else diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 11f8c296f60d..20f8e1868853 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -189,12 +189,16 @@ void noinstr s390_handle_mcck(void) * returns 0 if all required registers are available * returns 1 otherwise */ -static int notrace s390_check_registers(union mci mci, int umode) +static int notrace s390_validate_registers(union mci mci, int umode) { + struct mcesa *mcesa; + void *fpt_save_area; union ctlreg2 cr2; int kill_task; + u64 zero; kill_task = 0; + zero = 0; if (!mci.gr) { /* @@ -205,14 +209,6 @@ static int notrace s390_check_registers(union mci mci, int umode) s390_handle_damage(); kill_task = 1; } - /* Check control registers */ - if (!mci.cr) { - /* - * Control registers have unknown contents. - * Can't recover and therefore stopping machine. - */ - s390_handle_damage(); - } if (!mci.fp) { /* * Floating point registers can't be restored. If the @@ -225,35 +221,89 @@ static int notrace s390_check_registers(union mci mci, int umode) if (!test_cpu_flag(CIF_FPU)) kill_task = 1; } + fpt_save_area = &S390_lowcore.floating_pt_save_area; if (!mci.fc) { /* * Floating point control register can't be restored. * If the kernel currently uses the floating pointer * registers and needs the FPC register the system is * stopped. If the process has its floating pointer - * registers loaded it is terminated. + * registers loaded it is terminated. Otherwise the + * FPC is just validated. */ if (S390_lowcore.fpu_flags & KERNEL_FPC) s390_handle_damage(); + asm volatile( + " lfpc %0\n" + : + : "Q" (zero)); if (!test_cpu_flag(CIF_FPU)) kill_task = 1; + } else { + asm volatile( + " lfpc %0\n" + : + : "Q" (S390_lowcore.fpt_creg_save_area)); } - if (MACHINE_HAS_VX) { + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); + if (!MACHINE_HAS_VX) { + /* Validate floating point registers */ + asm volatile( + " ld 0,0(%0)\n" + " ld 1,8(%0)\n" + " ld 2,16(%0)\n" + " ld 3,24(%0)\n" + " ld 4,32(%0)\n" + " ld 5,40(%0)\n" + " ld 6,48(%0)\n" + " ld 7,56(%0)\n" + " ld 8,64(%0)\n" + " ld 9,72(%0)\n" + " ld 10,80(%0)\n" + " ld 11,88(%0)\n" + " ld 12,96(%0)\n" + " ld 13,104(%0)\n" + " ld 14,112(%0)\n" + " ld 15,120(%0)\n" + : + : "a" (fpt_save_area) + : "memory"); + } else { + /* Validate vector registers */ + union ctlreg0 cr0; + if (!mci.vr) { /* * Vector registers can't be restored. If the kernel * currently uses vector registers the system is * stopped. If the process has its vector registers - * loaded it is terminated. + * loaded it is terminated. Otherwise just validate + * the registers. */ if (S390_lowcore.fpu_flags & KERNEL_VXR) s390_handle_damage(); if (!test_cpu_flag(CIF_FPU)) kill_task = 1; } + cr0.val = S390_lowcore.cregs_save_area[0]; + cr0.afp = cr0.vx = 1; + __ctl_load(cr0.val, 0, 0); + asm volatile( + " la 1,%0\n" + " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ + : + : "Q" (*(struct vx_array *)mcesa->vector_save_area) + : "1"); + __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } - /* Check if access registers are valid */ + /* Validate access registers */ + asm volatile( + " lam 0,15,0(%0)\n" + : + : "a" (&S390_lowcore.access_regs_save_area) + : "memory"); if (!mci.ar) { /* * Access registers have unknown contents. @@ -261,7 +311,7 @@ static int notrace s390_check_registers(union mci mci, int umode) */ kill_task = 1; } - /* Check guarded storage registers */ + /* Validate guarded storage registers */ cr2.val = S390_lowcore.cregs_save_area[2]; if (cr2.gse) { if (!mci.gs) { @@ -271,31 +321,26 @@ static int notrace s390_check_registers(union mci mci, int umode) * It has to be terminated. */ kill_task = 1; + } else { + load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area); } } - /* Check if old PSW is valid */ - if (!mci.wp) { - /* - * Can't tell if we come from user or kernel mode - * -> stopping machine. - */ - s390_handle_damage(); - } - /* Check for invalid kernel instruction address */ - if (!mci.ia && !umode) { - /* - * The instruction address got lost while running - * in the kernel -> stopping machine. - */ - s390_handle_damage(); - } + /* + * The getcpu vdso syscall reads CPU number from the programmable + * field of the TOD clock. Disregard the TOD programmable register + * validity bit and load the CPU number into the TOD programmable + * field unconditionally. + */ + set_tod_programmable_field(raw_smp_processor_id()); + /* Validate clock comparator register */ + set_clock_comparator(S390_lowcore.clock_comparator); if (!mci.ms || !mci.pm || !mci.ia) kill_task = 1; return kill_task; } -NOKPROBE_SYMBOL(s390_check_registers); +NOKPROBE_SYMBOL(s390_validate_registers); /* * Backup the guest's machine check info to its description block @@ -353,11 +398,6 @@ int notrace s390_do_machine_check(struct pt_regs *regs) mci.val = S390_lowcore.mcck_interruption_code; mcck = this_cpu_ptr(&cpu_mcck); - if (mci.sd) { - /* System damage -> stopping machine */ - s390_handle_damage(); - } - /* * Reinject the instruction processing damages' machine checks * including Delayed Access Exception into the guest @@ -398,7 +438,7 @@ int notrace s390_do_machine_check(struct pt_regs *regs) s390_handle_damage(); } } - if (s390_check_registers(mci, user_mode(regs))) { + if (s390_validate_registers(mci, user_mode(regs))) { /* * Couldn't restore all register contents for the * user space process -> mark task for termination. @@ -428,21 +468,6 @@ int notrace s390_do_machine_check(struct pt_regs *regs) mcck_pending = 1; } - /* - * Reinject storage related machine checks into the guest if they - * happen when the guest is running. - */ - if (!test_cpu_flag(CIF_MCCK_GUEST)) { - if (mci.se) - /* Storage error uncorrected */ - s390_handle_damage(); - if (mci.ke) - /* Storage key-error uncorrected */ - s390_handle_damage(); - if (mci.ds && mci.fa) - /* Storage degradation */ - s390_handle_damage(); - } if (mci.cp) { /* Channel report word pending */ mcck->channel_report = 1; diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 29e511f5bf06..250e4dbf653c 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -17,11 +17,11 @@ static int __init nobp_setup_early(char *str) * The user explicitely requested nobp=1, enable it and * disable the expoline support. */ - __set_facility(82, S390_lowcore.alt_stfle_fac_list); + __set_facility(82, alt_stfle_fac_list); if (IS_ENABLED(CONFIG_EXPOLINE)) nospec_disable = 1; } else { - __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + __clear_facility(82, alt_stfle_fac_list); } return 0; } @@ -29,7 +29,7 @@ early_param("nobp", nobp_setup_early); static int __init nospec_setup_early(char *str) { - __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + __clear_facility(82, alt_stfle_fac_list); return 0; } early_param("nospec", nospec_setup_early); @@ -40,7 +40,7 @@ static int __init nospec_report(void) pr_info("Spectre V2 mitigation: etokens\n"); if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) pr_info("Spectre V2 mitigation: execute trampolines\n"); - if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) + if (__test_facility(82, alt_stfle_fac_list)) pr_info("Spectre V2 mitigation: limited branch prediction\n"); return 0; } @@ -66,14 +66,14 @@ void __init nospec_auto_detect(void) */ if (__is_defined(CC_USING_EXPOLINE)) nospec_disable = 1; - __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + __clear_facility(82, alt_stfle_fac_list); } else if (__is_defined(CC_USING_EXPOLINE)) { /* * The kernel has been compiled with expolines. * Keep expolines enabled and disable nobp. */ nospec_disable = 0; - __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + __clear_facility(82, alt_stfle_fac_list); } /* * If the kernel has not been compiled with expolines the @@ -86,7 +86,7 @@ static int __init spectre_v2_setup_early(char *str) { if (str && !strncmp(str, "on", 2)) { nospec_disable = 0; - __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + __clear_facility(82, alt_stfle_fac_list); } if (str && !strncmp(str, "off", 3)) nospec_disable = 1; @@ -99,6 +99,7 @@ early_param("spectre_v2", spectre_v2_setup_early); static void __init_or_module __nospec_revert(s32 *start, s32 *end) { enum { BRCL_EXPOLINE, BRASL_EXPOLINE } type; + static const u8 branch[] = { 0x47, 0x00, 0x07, 0x00 }; u8 *instr, *thunk, *br; u8 insnbuf[6]; s32 *epo; @@ -128,7 +129,7 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0) continue; - memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x07, 0x00 }, 4); + memcpy(insnbuf + 2, branch, sizeof(branch)); switch (type) { case BRCL_EXPOLINE: insnbuf[0] = br[0]; diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c index 48f472bf9290..b4b5c8c21166 100644 --- a/arch/s390/kernel/nospec-sysfs.c +++ b/arch/s390/kernel/nospec-sysfs.c @@ -17,7 +17,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Mitigation: etokens\n"); if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) return sprintf(buf, "Mitigation: execute trampolines\n"); - if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) + if (__test_facility(82, alt_stfle_fac_list)) return sprintf(buf, "Mitigation: limited branch prediction\n"); return sprintf(buf, "Vulnerable\n"); } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 31a605bcbc6e..975a00c8c564 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -2,8 +2,9 @@ /* * Performance event support for s390x - CPU-measurement Counter Facility * - * Copyright IBM Corp. 2012, 2019 + * Copyright IBM Corp. 2012, 2021 * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> + * Thomas Richter <tmricht@linux.ibm.com> */ #define KMSG_COMPONENT "cpum_cf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt @@ -14,7 +15,223 @@ #include <linux/notifier.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/miscdevice.h> + #include <asm/cpu_mcf.h> +#include <asm/hwctrset.h> +#include <asm/debug.h> + +static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ +static debug_info_t *cf_dbg; + +#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ + /* interval in seconds */ + +/* Counter sets are stored as data stream in a page sized memory buffer and + * exported to user space via raw data attached to the event sample data. + * Each counter set starts with an eight byte header consisting of: + * - a two byte eye catcher (0xfeef) + * - a one byte counter set number + * - a two byte counter set size (indicates the number of counters in this set) + * - a three byte reserved value (must be zero) to make the header the same + * size as a counter value. + * All counter values are eight byte in size. + * + * All counter sets are followed by a 64 byte trailer. + * The trailer consists of a: + * - flag field indicating valid fields when corresponding bit set + * - the counter facility first and second version number + * - the CPU speed if nonzero + * - the time stamp the counter sets have been collected + * - the time of day (TOD) base value + * - the machine type. + * + * The counter sets are saved when the process is prepared to be executed on a + * CPU and saved again when the process is going to be removed from a CPU. + * The difference of both counter sets are calculated and stored in the event + * sample data area. + */ +struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int set:16; /* 16-31 Counter set identifier */ + unsigned int ctr:16; /* 32-47 Number of stored counters */ + unsigned int res1:16; /* 48-63 Reserved */ +}; + +struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ + /* 0 - 7 */ + union { + struct { + unsigned int clock_base:1; /* TOD clock base set */ + unsigned int speed:1; /* CPU speed set */ + /* Measurement alerts */ + unsigned int mtda:1; /* Loss of MT ctr. data alert */ + unsigned int caca:1; /* Counter auth. change alert */ + unsigned int lcda:1; /* Loss of counter data alert */ + }; + unsigned long flags; /* 0-63 All indicators */ + }; + /* 8 - 15 */ + unsigned int cfvn:16; /* 64-79 Ctr First Version */ + unsigned int csvn:16; /* 80-95 Ctr Second Version */ + unsigned int cpu_speed:32; /* 96-127 CPU speed */ + /* 16 - 23 */ + unsigned long timestamp; /* 128-191 Timestamp (TOD) */ + /* 24 - 55 */ + union { + struct { + unsigned long progusage1; + unsigned long progusage2; + unsigned long progusage3; + unsigned long tod_base; + }; + unsigned long progusage[4]; + }; + /* 56 - 63 */ + unsigned int mach_type:16; /* Machine type */ + unsigned int res1:16; /* Reserved */ + unsigned int res2:32; /* Reserved */ +}; + +/* Create the trailer data at the end of a page. */ +static void cfdiag_trailer(struct cf_trailer_entry *te) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpuid cpuid; + + te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */ + te->csvn = cpuhw->info.csvn; + + get_cpu_id(&cpuid); /* Machine type */ + te->mach_type = cpuid.machine; + te->cpu_speed = cfdiag_cpu_speed; + if (te->cpu_speed) + te->speed = 1; + te->clock_base = 1; /* Save clock base */ + te->tod_base = tod_clock_base.tod; + te->timestamp = get_tod_clock_fast(); +} + +/* Read a counter set. The counter set number determines the counter set and + * the CPUM-CF first and second version number determine the number of + * available counters in each counter set. + * Each counter set starts with header containing the counter set number and + * the number of eight byte counters. + * + * The functions returns the number of bytes occupied by this counter set + * including the header. + * If there is no counter in the counter set, this counter set is useless and + * zero is returned on this case. + * + * Note that the counter sets may not be enabled or active and the stcctm + * instruction might return error 3. Depending on error_ok value this is ok, + * for example when called from cpumf_pmu_start() call back function. + */ +static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, + size_t room, bool error_ok) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + size_t ctrset_size, need = 0; + int rc = 3; /* Assume write failure */ + + ctrdata->def = CF_DIAG_CTRSET_DEF; + ctrdata->set = ctrset; + ctrdata->res1 = 0; + ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info); + + if (ctrset_size) { /* Save data */ + need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); + if (need <= room) { + rc = ctr_stcctm(ctrset, ctrset_size, + (u64 *)(ctrdata + 1)); + } + if (rc != 3 || error_ok) + ctrdata->ctr = ctrset_size; + else + need = 0; + } + + debug_sprintf_event(cf_dbg, 3, + "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" + " need %zd rc %d\n", __func__, ctrset, ctrset_size, + cpuhw->info.cfvn, cpuhw->info.csvn, need, rc); + return need; +} + +/* Read out all counter sets and save them in the provided data buffer. + * The last 64 byte host an artificial trailer entry. + */ +static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth, + bool error_ok) +{ + struct cf_trailer_entry *trailer; + size_t offset = 0, done; + int i; + + memset(data, 0, sz); + sz -= sizeof(*trailer); /* Always room for trailer */ + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + struct cf_ctrset_entry *ctrdata = data + offset; + + if (!(auth & cpumf_ctr_ctl[i])) + continue; /* Counter set not authorized */ + + done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok); + offset += done; + } + trailer = data + offset; + cfdiag_trailer(trailer); + return offset + sizeof(*trailer); +} + +/* Calculate the difference for each counter in a counter set. */ +static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters) +{ + for (; --counters >= 0; ++pstart, ++pstop) + if (*pstop >= *pstart) + *pstop -= *pstart; + else + *pstop = *pstart - *pstop + 1; +} + +/* Scan the counter sets and calculate the difference of each counter + * in each set. The result is the increment of each counter during the + * period the counter set has been activated. + * + * Return true on success. + */ +static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) +{ + struct cf_trailer_entry *trailer_start, *trailer_stop; + struct cf_ctrset_entry *ctrstart, *ctrstop; + size_t offset = 0; + + auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; + do { + ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); + ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); + + if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { + pr_err_once("cpum_cf_diag counter set compare error " + "in set %i\n", ctrstart->set); + return 0; + } + auth &= ~cpumf_ctr_ctl[ctrstart->set]; + if (ctrstart->def == CF_DIAG_CTRSET_DEF) { + cfdiag_diffctrset((u64 *)(ctrstart + 1), + (u64 *)(ctrstop + 1), ctrstart->ctr); + offset += ctrstart->ctr * sizeof(u64) + + sizeof(*ctrstart); + } + } while (ctrstart->def && auth); + + /* Save time_stamp from start of event in stop's trailer */ + trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); + trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset); + trailer_stop->progusage[0] = trailer_start->timestamp; + + return 1; +} static enum cpumf_ctr_set get_counter_set(u64 event) { @@ -34,7 +251,8 @@ static enum cpumf_ctr_set get_counter_set(u64 event) return set; } -static int validate_ctr_version(const struct hw_perf_event *hwc) +static int validate_ctr_version(const struct hw_perf_event *hwc, + enum cpumf_ctr_set set) { struct cpu_cf_events *cpuhw; int err = 0; @@ -43,7 +261,7 @@ static int validate_ctr_version(const struct hw_perf_event *hwc) cpuhw = &get_cpu_var(cpu_cf_events); /* check required version for counter sets */ - switch (hwc->config_base) { + switch (set) { case CPUMF_CTR_SET_BASIC: case CPUMF_CTR_SET_USER: if (cpuhw->info.cfvn < 1) @@ -86,6 +304,8 @@ static int validate_ctr_version(const struct hw_perf_event *hwc) (cpuhw->info.act_ctl & mtdiag_ctl))) err = -EOPNOTSUPP; break; + case CPUMF_CTR_SET_MAX: + err = -EOPNOTSUPP; } put_cpu_var(cpu_cf_events); @@ -95,7 +315,6 @@ static int validate_ctr_version(const struct hw_perf_event *hwc) static int validate_ctr_auth(const struct hw_perf_event *hwc) { struct cpu_cf_events *cpuhw; - u64 ctrs_state; int err = 0; cpuhw = &get_cpu_var(cpu_cf_events); @@ -105,8 +324,7 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc) * return with -ENOENT in order to fall back to other * PMUs that might suffice the event request. */ - ctrs_state = cpumf_ctr_ctl[hwc->config_base]; - if (!(ctrs_state & cpuhw->info.auth_ctl)) + if (!(hwc->config_base & cpuhw->info.auth_ctl)) err = -ENOENT; put_cpu_var(cpu_cf_events); @@ -126,7 +344,7 @@ static void cpumf_pmu_enable(struct pmu *pmu) if (cpuhw->flags & PMU_F_ENABLED) return; - err = lcctl(cpuhw->state); + err = lcctl(cpuhw->state | cpuhw->dev_state); if (err) { pr_err("Enabling the performance measuring unit " "failed with rc=%x\n", err); @@ -151,6 +369,7 @@ static void cpumf_pmu_disable(struct pmu *pmu) return; inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + inactive |= cpuhw->dev_state; err = lcctl(inactive); if (err) { pr_err("Disabling the performance measuring unit " @@ -199,6 +418,14 @@ static const int cpumf_generic_events_user[] = { [PERF_COUNT_HW_BUS_CYCLES] = -1, }; +static void cpumf_hw_inuse(void) +{ + mutex_lock(&pmc_reserve_mutex); + if (atomic_inc_return(&num_events) == 1) + __kernel_cpumcf_begin(); + mutex_unlock(&pmc_reserve_mutex); +} + static int __hw_perf_event_init(struct perf_event *event, unsigned int type) { struct perf_event_attr *attr = &event->attr; @@ -258,11 +485,11 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) /* * Use the hardware perf event structure to store the * counter number in the 'config' member and the counter - * set number in the 'config_base'. The counter set number - * is then later used to enable/disable the counter(s). + * set number in the 'config_base' as bit mask. + * It is later used to enable/disable the counter(s). */ hwc->config = ev; - hwc->config_base = set; + hwc->config_base = cpumf_ctr_ctl[set]; break; case CPUMF_CTR_SET_MAX: /* The counter could not be associated to a counter set */ @@ -270,22 +497,13 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) } /* Initialize for using the CPU-measurement counter facility */ - if (!atomic_inc_not_zero(&num_events)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&num_events) == 0 && __kernel_cpumcf_begin()) - err = -EBUSY; - else - atomic_inc(&num_events); - mutex_unlock(&pmc_reserve_mutex); - } - if (err) - return err; + cpumf_hw_inuse(); event->destroy = hw_perf_event_destroy; /* Finally, validate version and authorization of the counter set */ err = validate_ctr_auth(hwc); if (!err) - err = validate_ctr_version(hwc); + err = validate_ctr_version(hwc, set); return err; } @@ -361,16 +579,11 @@ static void cpumf_pmu_start(struct perf_event *event, int flags) { struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); struct hw_perf_event *hwc = &event->hw; + int i; - if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) - return; - - if (WARN_ON_ONCE(hwc->config == -1)) + if (!(hwc->state & PERF_HES_STOPPED)) return; - if (flags & PERF_EF_RELOAD) - WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); - hwc->state = 0; /* (Re-)enable and activate the counter set */ @@ -382,29 +595,92 @@ static void cpumf_pmu_start(struct perf_event *event, int flags) * needs to be synchronized. At this point, the counter set can be in * the inactive or disabled state. */ - hw_perf_event_reset(event); + if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { + cpuhw->usedss = cfdiag_getctr(cpuhw->start, + sizeof(cpuhw->start), + hwc->config_base, true); + } else { + hw_perf_event_reset(event); + } - /* increment refcount for this counter set */ - atomic_inc(&cpuhw->ctr_set[hwc->config_base]); + /* Increment refcount for counter sets */ + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) + if ((hwc->config_base & cpumf_ctr_ctl[i])) + atomic_inc(&cpuhw->ctr_set[i]); +} + +/* Create perf event sample with the counter sets as raw data. The sample + * is then pushed to the event subsystem and the function checks for + * possible event overflows. If an event overflow occurs, the PMU is + * stopped. + * + * Return non-zero if an event overflow occurred. + */ +static int cfdiag_push_sample(struct perf_event *event, + struct cpu_cf_events *cpuhw) +{ + struct perf_sample_data data; + struct perf_raw_record raw; + struct pt_regs regs; + int overflow; + + /* Setup perf sample */ + perf_sample_data_init(&data, 0, event->hw.last_period); + memset(®s, 0, sizeof(regs)); + memset(&raw, 0, sizeof(raw)); + + if (event->attr.sample_type & PERF_SAMPLE_CPU) + data.cpu_entry.cpu = event->cpu; + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.frag.size = cpuhw->usedss; + raw.frag.data = cpuhw->stop; + raw.size = raw.frag.size; + data.raw = &raw; + } + + overflow = perf_event_overflow(event, &data, ®s); + debug_sprintf_event(cf_dbg, 3, + "%s event %#llx sample_type %#llx raw %d ov %d\n", + __func__, event->hw.config, + event->attr.sample_type, raw.size, overflow); + if (overflow) + event->pmu->stop(event, 0); + + perf_event_update_userpage(event); + return overflow; } static void cpumf_pmu_stop(struct perf_event *event, int flags) { struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); struct hw_perf_event *hwc = &event->hw; + int i; if (!(hwc->state & PERF_HES_STOPPED)) { /* Decrement reference count for this counter set and if this * is the last used counter in the set, clear activation * control and set the counter set state to inactive. */ - if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base])) - ctr_set_stop(&cpuhw->state, hwc->config_base); + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + if (!(hwc->config_base & cpumf_ctr_ctl[i])) + continue; + if (!atomic_dec_return(&cpuhw->ctr_set[i])) + ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]); + } hwc->state |= PERF_HES_STOPPED; } if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - hw_perf_event_update(event); + if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { + local64_inc(&event->count); + cpuhw->usedss = cfdiag_getctr(cpuhw->stop, + sizeof(cpuhw->stop), + event->hw.config_base, + false); + if (cfdiag_diffctr(cpuhw, event->hw.config_base)) + cfdiag_push_sample(event, cpuhw); + } else + hw_perf_event_update(event); hwc->state |= PERF_HES_UPTODATE; } } @@ -413,15 +689,6 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) { struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - /* Check authorization for the counter set to which this - * counter belongs. - * For group events transaction, the authorization check is - * done in cpumf_pmu_commit_txn(). - */ - if (!(cpuhw->txn_flags & PERF_PMU_TXN_ADD)) - if (validate_ctr_auth(&event->hw)) - return -ENOENT; - ctr_set_enable(&cpuhw->state, event->hw.config_base); event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; @@ -434,6 +701,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) static void cpumf_pmu_del(struct perf_event *event, int flags) { struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + int i; cpumf_pmu_stop(event, PERF_EF_UPDATE); @@ -445,110 +713,716 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) * clear enable control and resets all counters in a set. Therefore, * cpumf_pmu_start() always has to reenable a counter set. */ - if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base])) - ctr_set_disable(&cpuhw->state, event->hw.config_base); + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) + if (!atomic_read(&cpuhw->ctr_set[i])) + ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]); } -/* - * Start group events scheduling transaction. - * Set flags to perform a single test at commit time. +/* Performance monitoring unit for s390x */ +static struct pmu cpumf_pmu = { + .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .pmu_enable = cpumf_pmu_enable, + .pmu_disable = cpumf_pmu_disable, + .event_init = cpumf_pmu_event_init, + .add = cpumf_pmu_add, + .del = cpumf_pmu_del, + .start = cpumf_pmu_start, + .stop = cpumf_pmu_stop, + .read = cpumf_pmu_read, +}; + +static int cfset_init(void); +static int __init cpumf_pmu_init(void) +{ + int rc; + + if (!kernel_cpumcf_avail()) + return -ENODEV; + + /* Setup s390dbf facility */ + cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); + if (!cf_dbg) { + pr_err("Registration of s390dbf(cpum_cf) failed\n"); + return -ENOMEM; + }; + debug_register_view(cf_dbg, &debug_sprintf_view); + + cpumf_pmu.attr_groups = cpumf_cf_event_group(); + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); + if (rc) { + debug_unregister_view(cf_dbg, &debug_sprintf_view); + debug_unregister(cf_dbg); + pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); + } else if (stccm_avail()) { /* Setup counter set device */ + cfset_init(); + } + return rc; +} + +/* Support for the CPU Measurement Facility counter set extraction using + * device /dev/hwctr. This allows user space programs to extract complete + * counter set via normal file operations. + */ + +static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Excl. access */ +static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */ +struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ + unsigned int sets; /* Counter set bit mask */ + atomic_t cpus_ack; /* # CPUs successfully executed func */ +}; + +static struct cfset_request { /* CPUs and counter set bit mask */ + unsigned long ctrset; /* Bit mask of counter set to read */ + cpumask_t mask; /* CPU mask to read from */ +} cfset_request; + +static void cfset_ctrset_clear(void) +{ + cpumask_clear(&cfset_request.mask); + cfset_request.ctrset = 0; +} + +/* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access + * path is currently used. + * The cpu_cf_events::dev_state is used to denote counter sets in use by this + * interface. It is always or'ed in. If this interface is not active, its + * value is zero and no additional counter sets will be included. + * + * The cpu_cf_events::state is used by the perf_event_open SVC and remains + * unchanged. * - * We only support PERF_PMU_TXN_ADD transactions. Save the - * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD - * transactions. + * perf_pmu_enable() and perf_pmu_enable() and its call backs + * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the + * performance measurement subsystem to enable per process + * CPU Measurement counter facility. + * The XXX_enable() and XXX_disable functions are used to turn off + * x86 performance monitoring interrupt (PMI) during scheduling. + * s390 uses these calls to temporarily stop and resume the active CPU + * counters sets during scheduling. + * + * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr + * device access. The perf_event_open() SVC interface makes a lot of effort + * to only run the counters while the calling process is actively scheduled + * to run. + * When /dev/hwctr interface is also used at the same time, the counter sets + * will keep running, even when the process is scheduled off a CPU. + * However this is not a problem and does not lead to wrong counter values + * for the perf_event_open() SVC. The current counter value will be recorded + * during schedule-in. At schedule-out time the current counter value is + * extracted again and the delta is calculated and added to the event. */ -static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) +/* Stop all counter sets via ioctl interface */ +static void cfset_ioctl_off(void *parm) { struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cfset_call_on_cpu_parm *p = parm; + int rc; - WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + cpuhw->dev_state = 0; + for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) + if ((p->sets & cpumf_ctr_ctl[rc])) + atomic_dec(&cpuhw->ctr_set[rc]); + rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ + if (rc) + pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n", + cpuhw->state, S390_HWCTR_DEVICE, rc); + cpuhw->flags &= ~PMU_F_IN_USE; + debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", + __func__, rc, cpuhw->state, cpuhw->dev_state); +} - cpuhw->txn_flags = txn_flags; - if (txn_flags & ~PERF_PMU_TXN_ADD) - return; +/* Start counter sets on particular CPU */ +static void cfset_ioctl_on(void *parm) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cfset_call_on_cpu_parm *p = parm; + int rc; - perf_pmu_disable(pmu); - cpuhw->tx_state = cpuhw->state; + cpuhw->flags |= PMU_F_IN_USE; + ctr_set_enable(&cpuhw->dev_state, p->sets); + ctr_set_start(&cpuhw->dev_state, p->sets); + for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) + if ((p->sets & cpumf_ctr_ctl[rc])) + atomic_inc(&cpuhw->ctr_set[rc]); + rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */ + if (!rc) + atomic_inc(&p->cpus_ack); + else + pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n", + cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc); + debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", + __func__, rc, cpuhw->state, cpuhw->dev_state); } -/* - * Stop and cancel a group events scheduling tranctions. - * Assumes cpumf_pmu_del() is called for each successful added - * cpumf_pmu_add() during the transaction. - */ -static void cpumf_pmu_cancel_txn(struct pmu *pmu) +static void cfset_release_cpu(void *p) { - unsigned int txn_flags; struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + int rc; - WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", + __func__, cpuhw->state, cpuhw->dev_state); + rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ + if (rc) + pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n", + cpuhw->state, S390_HWCTR_DEVICE, rc); + cpuhw->dev_state = 0; +} - txn_flags = cpuhw->txn_flags; - cpuhw->txn_flags = 0; - if (txn_flags & ~PERF_PMU_TXN_ADD) - return; +/* Release function is also called when application gets terminated without + * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. + */ +static int cfset_release(struct inode *inode, struct file *file) +{ + on_each_cpu(cfset_release_cpu, NULL, 1); + hw_perf_event_destroy(NULL); + cfset_ctrset_clear(); + atomic_set(&cfset_opencnt, 0); + return 0; +} - WARN_ON(cpuhw->tx_state != cpuhw->state); +static int cfset_open(struct inode *inode, struct file *file) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + /* Only one user space program can open /dev/hwctr */ + if (atomic_xchg(&cfset_opencnt, 1)) + return -EBUSY; + + cpumf_hw_inuse(); + file->private_data = NULL; + /* nonseekable_open() never fails */ + return nonseekable_open(inode, file); +} - perf_pmu_enable(pmu); +static int cfset_all_stop(void) +{ + struct cfset_call_on_cpu_parm p = { + .sets = cfset_request.ctrset, + }; + cpumask_var_t mask; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_and(mask, &cfset_request.mask, cpu_online_mask); + on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); + free_cpumask_var(mask); + return 0; } -/* - * Commit the group events scheduling transaction. On success, the - * transaction is closed. On error, the transaction is kept open - * until cpumf_pmu_cancel_txn() is called. +static int cfset_all_start(void) +{ + struct cfset_call_on_cpu_parm p = { + .sets = cfset_request.ctrset, + .cpus_ack = ATOMIC_INIT(0), + }; + cpumask_var_t mask; + int rc = 0; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_and(mask, &cfset_request.mask, cpu_online_mask); + on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1); + if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { + on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); + rc = -EIO; + debug_sprintf_event(cf_dbg, 4, "%s CPUs missing", __func__); + } + free_cpumask_var(mask); + return rc; +} + + +/* Return the maximum required space for all possible CPUs in case one + * CPU will be onlined during the START, READ, STOP cycles. + * To find out the size of the counter sets, any one CPU will do. They + * all have the same counter sets. */ -static int cpumf_pmu_commit_txn(struct pmu *pmu) +static size_t cfset_needspace(unsigned int sets) +{ + struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events); + size_t bytes = 0; + int i; + + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + if (!(sets & cpumf_ctr_ctl[i])) + continue; + bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) + + sizeof(((struct s390_ctrset_setdata *)0)->set) + + sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); + } + bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * + (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + + sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); + put_cpu_ptr(&cpu_cf_events); + return bytes; +} + +static int cfset_all_copy(unsigned long arg, cpumask_t *mask) +{ + struct s390_ctrset_read __user *ctrset_read; + unsigned int cpu, cpus, rc; + void __user *uptr; + + ctrset_read = (struct s390_ctrset_read __user *)arg; + uptr = ctrset_read->data; + for_each_cpu(cpu, mask) { + struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu); + struct s390_ctrset_cpudata __user *ctrset_cpudata; + + ctrset_cpudata = uptr; + rc = put_user(cpu, &ctrset_cpudata->cpu_nr); + rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets); + rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data, + cpuhw->used); + if (rc) + return -EFAULT; + uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used; + cond_resched(); + } + cpus = cpumask_weight(mask); + if (put_user(cpus, &ctrset_read->no_cpus)) + return -EFAULT; + debug_sprintf_event(cf_dbg, 4, "%s copied %ld\n", __func__, + uptr - (void __user *)ctrset_read->data); + return 0; +} + +static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, + int ctrset_size, size_t room) +{ + size_t need = 0; + int rc = -1; + + need = sizeof(*p) + sizeof(u64) * ctrset_size; + if (need <= room) { + p->set = cpumf_ctr_ctl[ctrset]; + p->no_cnts = ctrset_size; + rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); + if (rc == 3) /* Nothing stored */ + need = 0; + } + return need; +} + +/* Read all counter sets. */ +static void cfset_cpu_read(void *parm) { struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - u64 state; + struct cfset_call_on_cpu_parm *p = parm; + int set, set_size; + size_t space; + + /* No data saved yet */ + cpuhw->used = 0; + cpuhw->sets = 0; + memset(cpuhw->data, 0, sizeof(cpuhw->data)); + + /* Scan the counter sets */ + for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { + struct s390_ctrset_setdata *sp = (void *)cpuhw->data + + cpuhw->used; + + if (!(p->sets & cpumf_ctr_ctl[set])) + continue; /* Counter set not in list */ + set_size = cpum_cf_ctrset_size(set, &cpuhw->info); + space = sizeof(cpuhw->data) - cpuhw->used; + space = cfset_cpuset_read(sp, set, set_size, space); + if (space) { + cpuhw->used += space; + cpuhw->sets += 1; + } + } + debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, + cpuhw->sets, cpuhw->used); +} + +static int cfset_all_read(unsigned long arg) +{ + struct cfset_call_on_cpu_parm p; + cpumask_var_t mask; + int rc; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; - WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + p.sets = cfset_request.ctrset; + cpumask_and(mask, &cfset_request.mask, cpu_online_mask); + on_each_cpu_mask(mask, cfset_cpu_read, &p, 1); + rc = cfset_all_copy(arg, mask); + free_cpumask_var(mask); + return rc; +} - if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) { - cpuhw->txn_flags = 0; - return 0; +static long cfset_ioctl_read(unsigned long arg) +{ + struct s390_ctrset_read read; + int ret = 0; + + if (copy_from_user(&read, (char __user *)arg, sizeof(read))) + return -EFAULT; + ret = cfset_all_read(arg); + return ret; +} + +static long cfset_ioctl_stop(void) +{ + int ret = ENXIO; + + if (cfset_request.ctrset) { + ret = cfset_all_stop(); + cfset_ctrset_clear(); } + return ret; +} - /* check if the updated state can be scheduled */ - state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); - state >>= CPUMF_LCCTL_ENABLE_SHIFT; - if ((state & cpuhw->info.auth_ctl) != state) - return -ENOENT; +static long cfset_ioctl_start(unsigned long arg) +{ + struct s390_ctrset_start __user *ustart; + struct s390_ctrset_start start; + void __user *umask; + unsigned int len; + int ret = 0; + size_t need; + + if (cfset_request.ctrset) + return -EBUSY; + ustart = (struct s390_ctrset_start __user *)arg; + if (copy_from_user(&start, ustart, sizeof(start))) + return -EFAULT; + if (start.version != S390_HWCTR_START_VERSION) + return -EINVAL; + if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | + cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | + cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | + cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | + cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) + return -EINVAL; /* Invalid counter set */ + if (!start.counter_sets) + return -EINVAL; /* No counter set at all? */ + cpumask_clear(&cfset_request.mask); + len = min_t(u64, start.cpumask_len, cpumask_size()); + umask = (void __user *)start.cpumask; + if (copy_from_user(&cfset_request.mask, umask, len)) + return -EFAULT; + if (cpumask_empty(&cfset_request.mask)) + return -EINVAL; + need = cfset_needspace(start.counter_sets); + if (put_user(need, &ustart->data_bytes)) + ret = -EFAULT; + if (ret) + goto out; + cfset_request.ctrset = start.counter_sets; + ret = cfset_all_start(); +out: + if (ret) + cfset_ctrset_clear(); + debug_sprintf_event(cf_dbg, 4, "%s sets %#lx need %ld ret %d\n", + __func__, cfset_request.ctrset, need, ret); + return ret; +} - cpuhw->txn_flags = 0; - perf_pmu_enable(pmu); +/* Entry point to the /dev/hwctr device interface. + * The ioctl system call supports three subcommands: + * S390_HWCTR_START: Start the specified counter sets on a CPU list. The + * counter set keeps running until explicitly stopped. Returns the number + * of bytes needed to store the counter values. If another S390_HWCTR_START + * ioctl subcommand is called without a previous S390_HWCTR_STOP stop + * command, -EBUSY is returned. + * S390_HWCTR_READ: Read the counter set values from specified CPU list given + * with the S390_HWCTR_START command. + * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the + * previous S390_HWCTR_START subcommand. + */ +static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int ret; + + get_online_cpus(); + mutex_lock(&cfset_ctrset_mutex); + switch (cmd) { + case S390_HWCTR_START: + ret = cfset_ioctl_start(arg); + break; + case S390_HWCTR_STOP: + ret = cfset_ioctl_stop(); + break; + case S390_HWCTR_READ: + ret = cfset_ioctl_read(arg); + break; + default: + ret = -ENOTTY; + break; + } + mutex_unlock(&cfset_ctrset_mutex); + put_online_cpus(); + return ret; +} + +static const struct file_operations cfset_fops = { + .owner = THIS_MODULE, + .open = cfset_open, + .release = cfset_release, + .unlocked_ioctl = cfset_ioctl, + .compat_ioctl = cfset_ioctl, + .llseek = no_llseek +}; + +static struct miscdevice cfset_dev = { + .name = S390_HWCTR_DEVICE, + .minor = MISC_DYNAMIC_MINOR, + .fops = &cfset_fops, +}; + +int cfset_online_cpu(unsigned int cpu) +{ + struct cfset_call_on_cpu_parm p; + + mutex_lock(&cfset_ctrset_mutex); + if (cfset_request.ctrset) { + p.sets = cfset_request.ctrset; + cfset_ioctl_on(&p); + cpumask_set_cpu(cpu, &cfset_request.mask); + } + mutex_unlock(&cfset_ctrset_mutex); return 0; } -/* Performance monitoring unit for s390x */ -static struct pmu cpumf_pmu = { +int cfset_offline_cpu(unsigned int cpu) +{ + struct cfset_call_on_cpu_parm p; + + mutex_lock(&cfset_ctrset_mutex); + if (cfset_request.ctrset) { + p.sets = cfset_request.ctrset; + cfset_ioctl_off(&p); + cpumask_clear_cpu(cpu, &cfset_request.mask); + } + mutex_unlock(&cfset_ctrset_mutex); + return 0; +} + +static void cfdiag_read(struct perf_event *event) +{ + debug_sprintf_event(cf_dbg, 3, "%s event %#llx count %ld\n", __func__, + event->attr.config, local64_read(&event->count)); +} + +static int get_authctrsets(void) +{ + struct cpu_cf_events *cpuhw; + unsigned long auth = 0; + enum cpumf_ctr_set i; + + cpuhw = &get_cpu_var(cpu_cf_events); + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) + auth |= cpumf_ctr_ctl[i]; + } + put_cpu_var(cpu_cf_events); + return auth; +} + +/* Setup the event. Test for authorized counter sets and only include counter + * sets which are authorized at the time of the setup. Including unauthorized + * counter sets result in specification exception (and panic). + */ +static int cfdiag_event_init2(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + int err = 0; + + /* Set sample_period to indicate sampling */ + event->hw.config = attr->config; + event->hw.sample_period = attr->sample_period; + local64_set(&event->hw.period_left, event->hw.sample_period); + local64_set(&event->count, 0); + event->hw.last_period = event->hw.sample_period; + + /* Add all authorized counter sets to config_base. The + * the hardware init function is either called per-cpu or just once + * for all CPUS (event->cpu == -1). This depends on the whether + * counting is started for all CPUs or on a per workload base where + * the perf event moves from one CPU to another CPU. + * Checking the authorization on any CPU is fine as the hardware + * applies the same authorization settings to all CPUs. + */ + event->hw.config_base = get_authctrsets(); + + /* No authorized counter sets, nothing to count/sample */ + if (!event->hw.config_base) + err = -EINVAL; + + debug_sprintf_event(cf_dbg, 5, "%s err %d config_base %#lx\n", + __func__, err, event->hw.config_base); + return err; +} + +static int cfdiag_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + int err = -ENOENT; + + if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || + event->attr.type != event->pmu->type) + goto out; + + /* Raw events are used to access counters directly, + * hence do not permit excludes. + * This event is useless without PERF_SAMPLE_RAW to return counter set + * values as raw data. + */ + if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || + !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { + err = -EOPNOTSUPP; + goto out; + } + + /* Initialize for using the CPU-measurement counter facility */ + cpumf_hw_inuse(); + event->destroy = hw_perf_event_destroy; + + err = cfdiag_event_init2(event); + if (unlikely(err)) + event->destroy(event); +out: + return err; +} + +/* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used + * to collect the complete counter sets for a scheduled process. Target + * are complete counter sets attached as raw data to the artificial event. + * This results in complete counter sets available when a process is + * scheduled. Contains the delta of every counter while the process was + * running. + */ +CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); + +static struct attribute *cfdiag_events_attr[] = { + CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), + NULL, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cfdiag_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cfdiag_events_group = { + .name = "events", + .attrs = cfdiag_events_attr, +}; +static struct attribute_group cfdiag_format_group = { + .name = "format", + .attrs = cfdiag_format_attr, +}; +static const struct attribute_group *cfdiag_attr_groups[] = { + &cfdiag_events_group, + &cfdiag_format_group, + NULL, +}; + +/* Performance monitoring unit for event CF_DIAG. Since this event + * is also started and stopped via the perf_event_open() system call, use + * the same event enable/disable call back functions. They do not + * have a pointer to the perf_event strcture as first parameter. + * + * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common. + * Reuse them and distinguish the event (always first parameter) via + * 'config' member. + */ +static struct pmu cf_diag = { .task_ctx_nr = perf_sw_context, - .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .event_init = cfdiag_event_init, .pmu_enable = cpumf_pmu_enable, .pmu_disable = cpumf_pmu_disable, - .event_init = cpumf_pmu_event_init, .add = cpumf_pmu_add, .del = cpumf_pmu_del, .start = cpumf_pmu_start, .stop = cpumf_pmu_stop, - .read = cpumf_pmu_read, - .start_txn = cpumf_pmu_start_txn, - .commit_txn = cpumf_pmu_commit_txn, - .cancel_txn = cpumf_pmu_cancel_txn, + .read = cfdiag_read, + + .attr_groups = cfdiag_attr_groups }; -static int __init cpumf_pmu_init(void) +/* Calculate memory needed to store all counter sets together with header and + * trailer data. This is independent of the counter set authorization which + * can vary depending on the configuration. + */ +static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) +{ + size_t max_size = sizeof(struct cf_trailer_entry); + enum cpumf_ctr_set i; + + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + size_t size = cpum_cf_ctrset_size(i, info); + + if (size) + max_size += size * sizeof(u64) + + sizeof(struct cf_ctrset_entry); + } + return max_size; +} + +/* Get the CPU speed, try sampling facility first and CPU attributes second. */ +static void cfdiag_get_cpu_speed(void) { + if (cpum_sf_avail()) { /* Sampling facility first */ + struct hws_qsi_info_block si; + + memset(&si, 0, sizeof(si)); + if (!qsi(&si)) { + cfdiag_cpu_speed = si.cpu_speed; + return; + } + } + + /* Fallback: CPU speed extract static part. Used in case + * CPU Measurement Sampling Facility is turned off. + */ + if (test_facility(34)) { + unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); + + if (mhz != -1UL) + cfdiag_cpu_speed = mhz & 0xffffffff; + } +} + +static int cfset_init(void) +{ + struct cpumf_ctr_info info; + size_t need; int rc; - if (!kernel_cpumcf_avail()) + if (qctri(&info)) return -ENODEV; - cpumf_pmu.attr_groups = cpumf_cf_event_group(); - rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); - if (rc) - pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); + cfdiag_get_cpu_speed(); + /* Make sure the counter set data fits into predefined buffer. */ + need = cfdiag_maxsize(&info); + if (need > sizeof(((struct cpu_cf_events *)0)->start)) { + pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", + need); + return -ENOMEM; + } + + rc = misc_register(&cfset_dev); + if (rc) { + pr_err("Registration of /dev/%s failed rc=%i\n", + cfset_dev.name, rc); + goto out; + } + + rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); + if (rc) { + misc_deregister(&cfset_dev); + pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", + rc); + } +out: return rc; } -subsys_initcall(cpumf_pmu_init); + +device_initcall(cpumf_pmu_init); diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c index 6d53215c8484..30f0242de4a5 100644 --- a/arch/s390/kernel/perf_cpum_cf_common.c +++ b/arch/s390/kernel/perf_cpum_cf_common.c @@ -29,8 +29,11 @@ DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = { }, .alert = ATOMIC64_INIT(0), .state = 0, + .dev_state = 0, .flags = 0, - .txn_flags = 0, + .used = 0, + .usedss = 0, + .sets = 0 }; /* Indicator whether the CPU-Measurement Counter Facility Support is ready */ static bool cpum_cf_initalized; @@ -97,25 +100,10 @@ bool kernel_cpumcf_avail(void) } EXPORT_SYMBOL(kernel_cpumcf_avail); - -/* Reserve/release functions for sharing perf hardware */ -static DEFINE_SPINLOCK(cpumcf_owner_lock); -static void *cpumcf_owner; - /* Initialize the CPU-measurement counter facility */ int __kernel_cpumcf_begin(void) { int flags = PMC_INIT; - int err = 0; - - spin_lock(&cpumcf_owner_lock); - if (cpumcf_owner) - err = -EBUSY; - else - cpumcf_owner = __builtin_return_address(0); - spin_unlock(&cpumcf_owner_lock); - if (err) - return err; on_each_cpu(cpum_cf_setup_cpu, &flags, 1); irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); @@ -145,10 +133,6 @@ void __kernel_cpumcf_end(void) on_each_cpu(cpum_cf_setup_cpu, &flags, 1); irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); - - spin_lock(&cpumcf_owner_lock); - cpumcf_owner = NULL; - spin_unlock(&cpumcf_owner_lock); } EXPORT_SYMBOL(__kernel_cpumcf_end); @@ -162,11 +146,13 @@ static int cpum_cf_setup(unsigned int cpu, int flags) static int cpum_cf_online_cpu(unsigned int cpu) { - return cpum_cf_setup(cpu, PMC_INIT); + cpum_cf_setup(cpu, PMC_INIT); + return cfset_online_cpu(cpu); } static int cpum_cf_offline_cpu(unsigned int cpu) { + cfset_offline_cpu(cpu); return cpum_cf_setup(cpu, PMC_RELEASE); } diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c deleted file mode 100644 index 08c985c1097c..000000000000 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ /dev/null @@ -1,1148 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Performance event support for s390x - CPU-measurement Counter Sets - * - * Copyright IBM Corp. 2019, 2021 - * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> - * Thomas Richer <tmricht@linux.ibm.com> - */ -#define KMSG_COMPONENT "cpum_cf_diag" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/kernel.h> -#include <linux/kernel_stat.h> -#include <linux/percpu.h> -#include <linux/notifier.h> -#include <linux/init.h> -#include <linux/export.h> -#include <linux/slab.h> -#include <linux/processor.h> -#include <linux/miscdevice.h> -#include <linux/mutex.h> - -#include <asm/ctl_reg.h> -#include <asm/irq.h> -#include <asm/cpu_mcf.h> -#include <asm/timex.h> -#include <asm/debug.h> - -#include <asm/hwctrset.h> - -#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ - /* interval in seconds */ -static unsigned int cf_diag_cpu_speed; -static debug_info_t *cf_diag_dbg; - -struct cf_diag_csd { /* Counter set data per CPU */ - size_t used; /* Bytes used in data/start */ - unsigned char start[PAGE_SIZE]; /* Counter set at event start */ - unsigned char data[PAGE_SIZE]; /* Counter set at event delete */ - unsigned int sets; /* # Counter set saved in data */ -}; -static DEFINE_PER_CPU(struct cf_diag_csd, cf_diag_csd); - -/* Counter sets are stored as data stream in a page sized memory buffer and - * exported to user space via raw data attached to the event sample data. - * Each counter set starts with an eight byte header consisting of: - * - a two byte eye catcher (0xfeef) - * - a one byte counter set number - * - a two byte counter set size (indicates the number of counters in this set) - * - a three byte reserved value (must be zero) to make the header the same - * size as a counter value. - * All counter values are eight byte in size. - * - * All counter sets are followed by a 64 byte trailer. - * The trailer consists of a: - * - flag field indicating valid fields when corresponding bit set - * - the counter facility first and second version number - * - the CPU speed if nonzero - * - the time stamp the counter sets have been collected - * - the time of day (TOD) base value - * - the machine type. - * - * The counter sets are saved when the process is prepared to be executed on a - * CPU and saved again when the process is going to be removed from a CPU. - * The difference of both counter sets are calculated and stored in the event - * sample data area. - */ - -struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ - unsigned int def:16; /* 0-15 Data Entry Format */ - unsigned int set:16; /* 16-31 Counter set identifier */ - unsigned int ctr:16; /* 32-47 Number of stored counters */ - unsigned int res1:16; /* 48-63 Reserved */ -}; - -struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ - /* 0 - 7 */ - union { - struct { - unsigned int clock_base:1; /* TOD clock base set */ - unsigned int speed:1; /* CPU speed set */ - /* Measurement alerts */ - unsigned int mtda:1; /* Loss of MT ctr. data alert */ - unsigned int caca:1; /* Counter auth. change alert */ - unsigned int lcda:1; /* Loss of counter data alert */ - }; - unsigned long flags; /* 0-63 All indicators */ - }; - /* 8 - 15 */ - unsigned int cfvn:16; /* 64-79 Ctr First Version */ - unsigned int csvn:16; /* 80-95 Ctr Second Version */ - unsigned int cpu_speed:32; /* 96-127 CPU speed */ - /* 16 - 23 */ - unsigned long timestamp; /* 128-191 Timestamp (TOD) */ - /* 24 - 55 */ - union { - struct { - unsigned long progusage1; - unsigned long progusage2; - unsigned long progusage3; - unsigned long tod_base; - }; - unsigned long progusage[4]; - }; - /* 56 - 63 */ - unsigned int mach_type:16; /* Machine type */ - unsigned int res1:16; /* Reserved */ - unsigned int res2:32; /* Reserved */ -}; - -/* Create the trailer data at the end of a page. */ -static void cf_diag_trailer(struct cf_trailer_entry *te) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - struct cpuid cpuid; - - te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */ - te->csvn = cpuhw->info.csvn; - - get_cpu_id(&cpuid); /* Machine type */ - te->mach_type = cpuid.machine; - te->cpu_speed = cf_diag_cpu_speed; - if (te->cpu_speed) - te->speed = 1; - te->clock_base = 1; /* Save clock base */ - te->tod_base = tod_clock_base.tod; - te->timestamp = get_tod_clock_fast(); -} - -/* - * Change the CPUMF state to active. - * Enable and activate the CPU-counter sets according - * to the per-cpu control state. - */ -static void cf_diag_enable(struct pmu *pmu) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - int err; - - debug_sprintf_event(cf_diag_dbg, 5, - "%s pmu %p cpu %d flags %#x state %#llx\n", - __func__, pmu, smp_processor_id(), cpuhw->flags, - cpuhw->state); - if (cpuhw->flags & PMU_F_ENABLED) - return; - - err = lcctl(cpuhw->state); - if (err) { - pr_err("Enabling the performance measuring unit " - "failed with rc=%x\n", err); - return; - } - cpuhw->flags |= PMU_F_ENABLED; -} - -/* - * Change the CPUMF state to inactive. - * Disable and enable (inactive) the CPU-counter sets according - * to the per-cpu control state. - */ -static void cf_diag_disable(struct pmu *pmu) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - u64 inactive; - int err; - - debug_sprintf_event(cf_diag_dbg, 5, - "%s pmu %p cpu %d flags %#x state %#llx\n", - __func__, pmu, smp_processor_id(), cpuhw->flags, - cpuhw->state); - if (!(cpuhw->flags & PMU_F_ENABLED)) - return; - - inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); - err = lcctl(inactive); - if (err) { - pr_err("Disabling the performance measuring unit " - "failed with rc=%x\n", err); - return; - } - cpuhw->flags &= ~PMU_F_ENABLED; -} - -/* Number of perf events counting hardware events */ -static atomic_t cf_diag_events = ATOMIC_INIT(0); -/* Used to avoid races in calling reserve/release_cpumf_hardware */ -static DEFINE_MUTEX(cf_diag_reserve_mutex); - -/* Release the PMU if event is the last perf event */ -static void cf_diag_perf_event_destroy(struct perf_event *event) -{ - debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d cf_diag_events %d\n", - __func__, event, smp_processor_id(), - atomic_read(&cf_diag_events)); - if (atomic_dec_return(&cf_diag_events) == 0) - __kernel_cpumcf_end(); -} - -static int get_authctrsets(void) -{ - struct cpu_cf_events *cpuhw; - unsigned long auth = 0; - enum cpumf_ctr_set i; - - cpuhw = &get_cpu_var(cpu_cf_events); - for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { - if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) - auth |= cpumf_ctr_ctl[i]; - } - put_cpu_var(cpu_cf_events); - return auth; -} - -/* Setup the event. Test for authorized counter sets and only include counter - * sets which are authorized at the time of the setup. Including unauthorized - * counter sets result in specification exception (and panic). - */ -static int __hw_perf_event_init(struct perf_event *event) -{ - struct perf_event_attr *attr = &event->attr; - int err = 0; - - debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d\n", __func__, - event, event->cpu); - - event->hw.config = attr->config; - - /* Add all authorized counter sets to config_base. The - * the hardware init function is either called per-cpu or just once - * for all CPUS (event->cpu == -1). This depends on the whether - * counting is started for all CPUs or on a per workload base where - * the perf event moves from one CPU to another CPU. - * Checking the authorization on any CPU is fine as the hardware - * applies the same authorization settings to all CPUs. - */ - event->hw.config_base = get_authctrsets(); - - /* No authorized counter sets, nothing to count/sample */ - if (!event->hw.config_base) { - err = -EINVAL; - goto out; - } - - /* Set sample_period to indicate sampling */ - event->hw.sample_period = attr->sample_period; - local64_set(&event->hw.period_left, event->hw.sample_period); - event->hw.last_period = event->hw.sample_period; -out: - debug_sprintf_event(cf_diag_dbg, 5, "%s err %d config_base %#lx\n", - __func__, err, event->hw.config_base); - return err; -} - -/* Return 0 if the CPU-measurement counter facility is currently free - * and an error otherwise. - */ -static int cf_diag_perf_event_inuse(void) -{ - int err = 0; - - if (!atomic_inc_not_zero(&cf_diag_events)) { - mutex_lock(&cf_diag_reserve_mutex); - if (atomic_read(&cf_diag_events) == 0 && - __kernel_cpumcf_begin()) - err = -EBUSY; - else - err = atomic_inc_return(&cf_diag_events); - mutex_unlock(&cf_diag_reserve_mutex); - } - return err; -} - -static int cf_diag_event_init(struct perf_event *event) -{ - struct perf_event_attr *attr = &event->attr; - int err = -ENOENT; - - debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d config %#llx type:%u " - "sample_type %#llx cf_diag_events %d\n", __func__, - event, event->cpu, attr->config, event->pmu->type, - attr->sample_type, atomic_read(&cf_diag_events)); - - if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || - event->attr.type != event->pmu->type) - goto out; - - /* Raw events are used to access counters directly, - * hence do not permit excludes. - * This event is usesless without PERF_SAMPLE_RAW to return counter set - * values as raw data. - */ - if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || - !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { - err = -EOPNOTSUPP; - goto out; - } - - /* Initialize for using the CPU-measurement counter facility */ - err = cf_diag_perf_event_inuse(); - if (err < 0) - goto out; - event->destroy = cf_diag_perf_event_destroy; - - err = __hw_perf_event_init(event); - if (unlikely(err)) - event->destroy(event); -out: - debug_sprintf_event(cf_diag_dbg, 5, "%s err %d\n", __func__, err); - return err; -} - -static void cf_diag_read(struct perf_event *event) -{ - debug_sprintf_event(cf_diag_dbg, 5, "%s event %p\n", __func__, event); -} - -/* Calculate memory needed to store all counter sets together with header and - * trailer data. This is independend of the counter set authorization which - * can vary depending on the configuration. - */ -static size_t cf_diag_ctrset_maxsize(struct cpumf_ctr_info *info) -{ - size_t max_size = sizeof(struct cf_trailer_entry); - enum cpumf_ctr_set i; - - for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { - size_t size = cpum_cf_ctrset_size(i, info); - - if (size) - max_size += size * sizeof(u64) + - sizeof(struct cf_ctrset_entry); - } - debug_sprintf_event(cf_diag_dbg, 5, "%s max_size %zu\n", __func__, - max_size); - - return max_size; -} - -/* Read a counter set. The counter set number determines which counter set and - * the CPUM-CF first and second version number determine the number of - * available counters in this counter set. - * Each counter set starts with header containing the counter set number and - * the number of 8 byte counters. - * - * The functions returns the number of bytes occupied by this counter set - * including the header. - * If there is no counter in the counter set, this counter set is useless and - * zero is returned on this case. - */ -static size_t cf_diag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, - size_t room) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - size_t ctrset_size, need = 0; - int rc = 3; /* Assume write failure */ - - ctrdata->def = CF_DIAG_CTRSET_DEF; - ctrdata->set = ctrset; - ctrdata->res1 = 0; - ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info); - - if (ctrset_size) { /* Save data */ - need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); - if (need <= room) - rc = ctr_stcctm(ctrset, ctrset_size, - (u64 *)(ctrdata + 1)); - if (rc != 3) - ctrdata->ctr = ctrset_size; - else - need = 0; - } - - debug_sprintf_event(cf_diag_dbg, 6, - "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" - " need %zd rc %d\n", - __func__, ctrset, ctrset_size, cpuhw->info.cfvn, - cpuhw->info.csvn, need, rc); - return need; -} - -/* Read out all counter sets and save them in the provided data buffer. - * The last 64 byte host an artificial trailer entry. - */ -static size_t cf_diag_getctr(void *data, size_t sz, unsigned long auth) -{ - struct cf_trailer_entry *trailer; - size_t offset = 0, done; - int i; - - memset(data, 0, sz); - sz -= sizeof(*trailer); /* Always room for trailer */ - for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { - struct cf_ctrset_entry *ctrdata = data + offset; - - if (!(auth & cpumf_ctr_ctl[i])) - continue; /* Counter set not authorized */ - - done = cf_diag_getctrset(ctrdata, i, sz - offset); - offset += done; - debug_sprintf_event(cf_diag_dbg, 6, - "%s ctrset %d offset %zu done %zu\n", - __func__, i, offset, done); - } - trailer = data + offset; - cf_diag_trailer(trailer); - return offset + sizeof(*trailer); -} - -/* Calculate the difference for each counter in a counter set. */ -static void cf_diag_diffctrset(u64 *pstart, u64 *pstop, int counters) -{ - for (; --counters >= 0; ++pstart, ++pstop) - if (*pstop >= *pstart) - *pstop -= *pstart; - else - *pstop = *pstart - *pstop; -} - -/* Scan the counter sets and calculate the difference of each counter - * in each set. The result is the increment of each counter during the - * period the counter set has been activated. - * - * Return true on success. - */ -static int cf_diag_diffctr(struct cf_diag_csd *csd, unsigned long auth) -{ - struct cf_trailer_entry *trailer_start, *trailer_stop; - struct cf_ctrset_entry *ctrstart, *ctrstop; - size_t offset = 0; - - auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; - do { - ctrstart = (struct cf_ctrset_entry *)(csd->start + offset); - ctrstop = (struct cf_ctrset_entry *)(csd->data + offset); - - if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { - pr_err("cpum_cf_diag counter set compare error " - "in set %i\n", ctrstart->set); - return 0; - } - auth &= ~cpumf_ctr_ctl[ctrstart->set]; - if (ctrstart->def == CF_DIAG_CTRSET_DEF) { - cf_diag_diffctrset((u64 *)(ctrstart + 1), - (u64 *)(ctrstop + 1), ctrstart->ctr); - offset += ctrstart->ctr * sizeof(u64) + - sizeof(*ctrstart); - } - debug_sprintf_event(cf_diag_dbg, 6, - "%s set %d ctr %d offset %zu auth %lx\n", - __func__, ctrstart->set, ctrstart->ctr, - offset, auth); - } while (ctrstart->def && auth); - - /* Save time_stamp from start of event in stop's trailer */ - trailer_start = (struct cf_trailer_entry *)(csd->start + offset); - trailer_stop = (struct cf_trailer_entry *)(csd->data + offset); - trailer_stop->progusage[0] = trailer_start->timestamp; - - return 1; -} - -/* Create perf event sample with the counter sets as raw data. The sample - * is then pushed to the event subsystem and the function checks for - * possible event overflows. If an event overflow occurs, the PMU is - * stopped. - * - * Return non-zero if an event overflow occurred. - */ -static int cf_diag_push_sample(struct perf_event *event, - struct cf_diag_csd *csd) -{ - struct perf_sample_data data; - struct perf_raw_record raw; - struct pt_regs regs; - int overflow; - - /* Setup perf sample */ - perf_sample_data_init(&data, 0, event->hw.last_period); - memset(®s, 0, sizeof(regs)); - memset(&raw, 0, sizeof(raw)); - - if (event->attr.sample_type & PERF_SAMPLE_CPU) - data.cpu_entry.cpu = event->cpu; - if (event->attr.sample_type & PERF_SAMPLE_RAW) { - raw.frag.size = csd->used; - raw.frag.data = csd->data; - raw.size = csd->used; - data.raw = &raw; - } - - overflow = perf_event_overflow(event, &data, ®s); - debug_sprintf_event(cf_diag_dbg, 6, - "%s event %p cpu %d sample_type %#llx raw %d " - "ov %d\n", __func__, event, event->cpu, - event->attr.sample_type, raw.size, overflow); - if (overflow) - event->pmu->stop(event, 0); - - perf_event_update_userpage(event); - return overflow; -} - -static void cf_diag_start(struct perf_event *event, int flags) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd); - struct hw_perf_event *hwc = &event->hw; - - debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d flags %#x hwc-state %#x\n", - __func__, event, event->cpu, flags, hwc->state); - if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) - return; - - /* (Re-)enable and activate all counter sets */ - lcctl(0); /* Reset counter sets */ - hwc->state = 0; - ctr_set_multiple_enable(&cpuhw->state, hwc->config_base); - lcctl(cpuhw->state); /* Enable counter sets */ - csd->used = cf_diag_getctr(csd->start, sizeof(csd->start), - event->hw.config_base); - ctr_set_multiple_start(&cpuhw->state, hwc->config_base); - /* Function cf_diag_enable() starts the counter sets. */ -} - -static void cf_diag_stop(struct perf_event *event, int flags) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd); - struct hw_perf_event *hwc = &event->hw; - - debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d flags %#x hwc-state %#x\n", - __func__, event, event->cpu, flags, hwc->state); - - /* Deactivate all counter sets */ - ctr_set_multiple_stop(&cpuhw->state, hwc->config_base); - local64_inc(&event->count); - csd->used = cf_diag_getctr(csd->data, sizeof(csd->data), - event->hw.config_base); - if (cf_diag_diffctr(csd, event->hw.config_base)) - cf_diag_push_sample(event, csd); - hwc->state |= PERF_HES_STOPPED; -} - -static int cf_diag_add(struct perf_event *event, int flags) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - int err = 0; - - debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d flags %#x cpuhw %p\n", - __func__, event, event->cpu, flags, cpuhw); - - if (cpuhw->flags & PMU_F_IN_USE) { - err = -EAGAIN; - goto out; - } - - event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - - cpuhw->flags |= PMU_F_IN_USE; - if (flags & PERF_EF_START) - cf_diag_start(event, PERF_EF_RELOAD); -out: - debug_sprintf_event(cf_diag_dbg, 5, "%s err %d\n", __func__, err); - return err; -} - -static void cf_diag_del(struct perf_event *event, int flags) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - - debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d flags %#x\n", - __func__, event, event->cpu, flags); - - cf_diag_stop(event, PERF_EF_UPDATE); - ctr_set_multiple_stop(&cpuhw->state, event->hw.config_base); - ctr_set_multiple_disable(&cpuhw->state, event->hw.config_base); - cpuhw->flags &= ~PMU_F_IN_USE; -} - -/* Default counter set events and format attribute groups */ - -CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); - -static struct attribute *cf_diag_events_attr[] = { - CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), - NULL, -}; - -PMU_FORMAT_ATTR(event, "config:0-63"); - -static struct attribute *cf_diag_format_attr[] = { - &format_attr_event.attr, - NULL, -}; - -static struct attribute_group cf_diag_events_group = { - .name = "events", - .attrs = cf_diag_events_attr, -}; -static struct attribute_group cf_diag_format_group = { - .name = "format", - .attrs = cf_diag_format_attr, -}; -static const struct attribute_group *cf_diag_attr_groups[] = { - &cf_diag_events_group, - &cf_diag_format_group, - NULL, -}; - -/* Performance monitoring unit for s390x */ -static struct pmu cf_diag = { - .task_ctx_nr = perf_sw_context, - .pmu_enable = cf_diag_enable, - .pmu_disable = cf_diag_disable, - .event_init = cf_diag_event_init, - .add = cf_diag_add, - .del = cf_diag_del, - .start = cf_diag_start, - .stop = cf_diag_stop, - .read = cf_diag_read, - - .attr_groups = cf_diag_attr_groups -}; - -/* Get the CPU speed, try sampling facility first and CPU attributes second. */ -static void cf_diag_get_cpu_speed(void) -{ - if (cpum_sf_avail()) { /* Sampling facility first */ - struct hws_qsi_info_block si; - - memset(&si, 0, sizeof(si)); - if (!qsi(&si)) { - cf_diag_cpu_speed = si.cpu_speed; - return; - } - } - - if (test_facility(34)) { /* CPU speed extract static part */ - unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); - - if (mhz != -1UL) - cf_diag_cpu_speed = mhz & 0xffffffff; - } -} - -/* Code to create device and file I/O operations */ -static atomic_t ctrset_opencnt = ATOMIC_INIT(0); /* Excl. access */ - -static int cf_diag_open(struct inode *inode, struct file *file) -{ - int err = 0; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (atomic_xchg(&ctrset_opencnt, 1)) - return -EBUSY; - - /* Avoid concurrent access with perf_event_open() system call */ - mutex_lock(&cf_diag_reserve_mutex); - if (atomic_read(&cf_diag_events) || __kernel_cpumcf_begin()) - err = -EBUSY; - mutex_unlock(&cf_diag_reserve_mutex); - if (err) { - atomic_set(&ctrset_opencnt, 0); - return err; - } - file->private_data = NULL; - debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__); - /* nonseekable_open() never fails */ - return nonseekable_open(inode, file); -} - -/* Variables for ioctl() interface support */ -static DEFINE_MUTEX(cf_diag_ctrset_mutex); -static struct cf_diag_ctrset { - unsigned long ctrset; /* Bit mask of counter set to read */ - cpumask_t mask; /* CPU mask to read from */ -} cf_diag_ctrset; - -static void cf_diag_ctrset_clear(void) -{ - cpumask_clear(&cf_diag_ctrset.mask); - cf_diag_ctrset.ctrset = 0; -} - -static void cf_diag_release_cpu(void *p) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - - debug_sprintf_event(cf_diag_dbg, 3, "%s cpu %d\n", __func__, - smp_processor_id()); - lcctl(0); /* Reset counter sets */ - cpuhw->state = 0; /* Save state in CPU hardware state */ -} - -/* Release function is also called when application gets terminated without - * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. - * Since only one application is allowed to open the device, simple stop all - * CPU counter sets. - */ -static int cf_diag_release(struct inode *inode, struct file *file) -{ - on_each_cpu(cf_diag_release_cpu, NULL, 1); - cf_diag_ctrset_clear(); - atomic_set(&ctrset_opencnt, 0); - __kernel_cpumcf_end(); - debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__); - return 0; -} - -struct cf_diag_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ - unsigned int sets; /* Counter set bit mask */ - atomic_t cpus_ack; /* # CPUs successfully executed func */ -}; - -static int cf_diag_all_copy(unsigned long arg, cpumask_t *mask) -{ - struct s390_ctrset_read __user *ctrset_read; - unsigned int cpu, cpus, rc; - void __user *uptr; - - ctrset_read = (struct s390_ctrset_read __user *)arg; - uptr = ctrset_read->data; - for_each_cpu(cpu, mask) { - struct cf_diag_csd *csd = per_cpu_ptr(&cf_diag_csd, cpu); - struct s390_ctrset_cpudata __user *ctrset_cpudata; - - ctrset_cpudata = uptr; - debug_sprintf_event(cf_diag_dbg, 5, "%s cpu %d used %zd\n", - __func__, cpu, csd->used); - rc = put_user(cpu, &ctrset_cpudata->cpu_nr); - rc |= put_user(csd->sets, &ctrset_cpudata->no_sets); - rc |= copy_to_user(ctrset_cpudata->data, csd->data, csd->used); - if (rc) - return -EFAULT; - uptr += sizeof(struct s390_ctrset_cpudata) + csd->used; - cond_resched(); - } - cpus = cpumask_weight(mask); - if (put_user(cpus, &ctrset_read->no_cpus)) - return -EFAULT; - debug_sprintf_event(cf_diag_dbg, 5, "%s copied %ld\n", - __func__, uptr - (void __user *)ctrset_read->data); - return 0; -} - -static size_t cf_diag_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, - int ctrset_size, size_t room) -{ - size_t need = 0; - int rc = -1; - - need = sizeof(*p) + sizeof(u64) * ctrset_size; - debug_sprintf_event(cf_diag_dbg, 5, - "%s room %zd need %zd set %#x set_size %d\n", - __func__, room, need, ctrset, ctrset_size); - if (need <= room) { - p->set = cpumf_ctr_ctl[ctrset]; - p->no_cnts = ctrset_size; - rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); - if (rc == 3) /* Nothing stored */ - need = 0; - } - debug_sprintf_event(cf_diag_dbg, 5, "%s need %zd rc %d\n", __func__, - need, rc); - return need; -} - -/* Read all counter sets. Since the perf_event_open() system call with - * event cpum_cf_diag/.../ is blocked when this interface is active, reuse - * the perf_event_open() data buffer to store the counter sets. - */ -static void cf_diag_cpu_read(void *parm) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd); - struct cf_diag_call_on_cpu_parm *p = parm; - int set, set_size; - size_t space; - - debug_sprintf_event(cf_diag_dbg, 5, - "%s new %#x flags %#x state %#llx\n", - __func__, p->sets, cpuhw->flags, - cpuhw->state); - /* No data saved yet */ - csd->used = 0; - csd->sets = 0; - memset(csd->data, 0, sizeof(csd->data)); - - /* Scan the counter sets */ - for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { - struct s390_ctrset_setdata *sp = (void *)csd->data + csd->used; - - if (!(p->sets & cpumf_ctr_ctl[set])) - continue; /* Counter set not in list */ - set_size = cpum_cf_ctrset_size(set, &cpuhw->info); - space = sizeof(csd->data) - csd->used; - space = cf_diag_cpuset_read(sp, set, set_size, space); - if (space) { - csd->used += space; - csd->sets += 1; - } - debug_sprintf_event(cf_diag_dbg, 5, "%s sp %px space %zd\n", - __func__, sp, space); - } - debug_sprintf_event(cf_diag_dbg, 5, "%s sets %d used %zd\n", __func__, - csd->sets, csd->used); -} - -static int cf_diag_all_read(unsigned long arg) -{ - struct cf_diag_call_on_cpu_parm p; - cpumask_var_t mask; - int rc; - - debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; - - p.sets = cf_diag_ctrset.ctrset; - cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); - on_each_cpu_mask(mask, cf_diag_cpu_read, &p, 1); - rc = cf_diag_all_copy(arg, mask); - free_cpumask_var(mask); - debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d\n", __func__, rc); - return rc; -} - -/* Stop all counter sets via ioctl interface */ -static void cf_diag_ioctl_off(void *parm) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - struct cf_diag_call_on_cpu_parm *p = parm; - int rc; - - debug_sprintf_event(cf_diag_dbg, 5, - "%s new %#x flags %#x state %#llx\n", - __func__, p->sets, cpuhw->flags, - cpuhw->state); - - ctr_set_multiple_disable(&cpuhw->state, p->sets); - ctr_set_multiple_stop(&cpuhw->state, p->sets); - rc = lcctl(cpuhw->state); /* Stop counter sets */ - if (!cpuhw->state) - cpuhw->flags &= ~PMU_F_IN_USE; - debug_sprintf_event(cf_diag_dbg, 5, - "%s rc %d flags %#x state %#llx\n", __func__, - rc, cpuhw->flags, cpuhw->state); -} - -/* Start counter sets on particular CPU */ -static void cf_diag_ioctl_on(void *parm) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - struct cf_diag_call_on_cpu_parm *p = parm; - int rc; - - debug_sprintf_event(cf_diag_dbg, 5, - "%s new %#x flags %#x state %#llx\n", - __func__, p->sets, cpuhw->flags, - cpuhw->state); - - if (!(cpuhw->flags & PMU_F_IN_USE)) - cpuhw->state = 0; - cpuhw->flags |= PMU_F_IN_USE; - rc = lcctl(cpuhw->state); /* Reset unused counter sets */ - ctr_set_multiple_enable(&cpuhw->state, p->sets); - ctr_set_multiple_start(&cpuhw->state, p->sets); - rc |= lcctl(cpuhw->state); /* Start counter sets */ - if (!rc) - atomic_inc(&p->cpus_ack); - debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d state %#llx\n", - __func__, rc, cpuhw->state); -} - -static int cf_diag_all_stop(void) -{ - struct cf_diag_call_on_cpu_parm p = { - .sets = cf_diag_ctrset.ctrset, - }; - cpumask_var_t mask; - - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; - cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); - on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1); - free_cpumask_var(mask); - return 0; -} - -static int cf_diag_all_start(void) -{ - struct cf_diag_call_on_cpu_parm p = { - .sets = cf_diag_ctrset.ctrset, - .cpus_ack = ATOMIC_INIT(0), - }; - cpumask_var_t mask; - int rc = 0; - - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; - cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); - on_each_cpu_mask(mask, cf_diag_ioctl_on, &p, 1); - if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { - on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1); - rc = -EIO; - } - free_cpumask_var(mask); - return rc; -} - -/* Return the maximum required space for all possible CPUs in case one - * CPU will be onlined during the START, READ, STOP cycles. - * To find out the size of the counter sets, any one CPU will do. They - * all have the same counter sets. - */ -static size_t cf_diag_needspace(unsigned int sets) -{ - struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events); - size_t bytes = 0; - int i; - - for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { - if (!(sets & cpumf_ctr_ctl[i])) - continue; - bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) + - sizeof(((struct s390_ctrset_setdata *)0)->set) + - sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); - } - bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * - (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + - sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); - debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__, - bytes); - put_cpu_ptr(&cpu_cf_events); - return bytes; -} - -static long cf_diag_ioctl_read(unsigned long arg) -{ - struct s390_ctrset_read read; - int ret = 0; - - debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); - if (copy_from_user(&read, (char __user *)arg, sizeof(read))) - return -EFAULT; - ret = cf_diag_all_read(arg); - debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret); - return ret; -} - -static long cf_diag_ioctl_stop(void) -{ - int ret; - - debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); - ret = cf_diag_all_stop(); - cf_diag_ctrset_clear(); - debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret); - return ret; -} - -static long cf_diag_ioctl_start(unsigned long arg) -{ - struct s390_ctrset_start __user *ustart; - struct s390_ctrset_start start; - void __user *umask; - unsigned int len; - int ret = 0; - size_t need; - - if (cf_diag_ctrset.ctrset) - return -EBUSY; - ustart = (struct s390_ctrset_start __user *)arg; - if (copy_from_user(&start, ustart, sizeof(start))) - return -EFAULT; - if (start.version != S390_HWCTR_START_VERSION) - return -EINVAL; - if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | - cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | - cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | - cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | - cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) - return -EINVAL; /* Invalid counter set */ - if (!start.counter_sets) - return -EINVAL; /* No counter set at all? */ - cpumask_clear(&cf_diag_ctrset.mask); - len = min_t(u64, start.cpumask_len, cpumask_size()); - umask = (void __user *)start.cpumask; - if (copy_from_user(&cf_diag_ctrset.mask, umask, len)) - return -EFAULT; - if (cpumask_empty(&cf_diag_ctrset.mask)) - return -EINVAL; - need = cf_diag_needspace(start.counter_sets); - if (put_user(need, &ustart->data_bytes)) - ret = -EFAULT; - if (ret) - goto out; - cf_diag_ctrset.ctrset = start.counter_sets; - ret = cf_diag_all_start(); -out: - if (ret) - cf_diag_ctrset_clear(); - debug_sprintf_event(cf_diag_dbg, 2, "%s sets %#lx need %ld ret %d\n", - __func__, cf_diag_ctrset.ctrset, need, ret); - return ret; -} - -static long cf_diag_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int ret; - - debug_sprintf_event(cf_diag_dbg, 2, "%s cmd %#x arg %lx\n", __func__, - cmd, arg); - get_online_cpus(); - mutex_lock(&cf_diag_ctrset_mutex); - switch (cmd) { - case S390_HWCTR_START: - ret = cf_diag_ioctl_start(arg); - break; - case S390_HWCTR_STOP: - ret = cf_diag_ioctl_stop(); - break; - case S390_HWCTR_READ: - ret = cf_diag_ioctl_read(arg); - break; - default: - ret = -ENOTTY; - break; - } - mutex_unlock(&cf_diag_ctrset_mutex); - put_online_cpus(); - debug_sprintf_event(cf_diag_dbg, 2, "%s ret %d\n", __func__, ret); - return ret; -} - -static const struct file_operations cf_diag_fops = { - .owner = THIS_MODULE, - .open = cf_diag_open, - .release = cf_diag_release, - .unlocked_ioctl = cf_diag_ioctl, - .compat_ioctl = cf_diag_ioctl, - .llseek = no_llseek -}; - -static struct miscdevice cf_diag_dev = { - .name = S390_HWCTR_DEVICE, - .minor = MISC_DYNAMIC_MINOR, - .fops = &cf_diag_fops, -}; - -static int cf_diag_online_cpu(unsigned int cpu) -{ - struct cf_diag_call_on_cpu_parm p; - - mutex_lock(&cf_diag_ctrset_mutex); - if (!cf_diag_ctrset.ctrset) - goto out; - p.sets = cf_diag_ctrset.ctrset; - cf_diag_ioctl_on(&p); -out: - mutex_unlock(&cf_diag_ctrset_mutex); - return 0; -} - -static int cf_diag_offline_cpu(unsigned int cpu) -{ - struct cf_diag_call_on_cpu_parm p; - - mutex_lock(&cf_diag_ctrset_mutex); - if (!cf_diag_ctrset.ctrset) - goto out; - p.sets = cf_diag_ctrset.ctrset; - cf_diag_ioctl_off(&p); -out: - mutex_unlock(&cf_diag_ctrset_mutex); - return 0; -} - -/* Initialize the counter set PMU to generate complete counter set data as - * event raw data. This relies on the CPU Measurement Counter Facility device - * already being loaded and initialized. - */ -static int __init cf_diag_init(void) -{ - struct cpumf_ctr_info info; - size_t need; - int rc; - - if (!kernel_cpumcf_avail() || !stccm_avail() || qctri(&info)) - return -ENODEV; - cf_diag_get_cpu_speed(); - - /* Make sure the counter set data fits into predefined buffer. */ - need = cf_diag_ctrset_maxsize(&info); - if (need > sizeof(((struct cf_diag_csd *)0)->start)) { - pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", - need); - return -ENOMEM; - } - - rc = misc_register(&cf_diag_dev); - if (rc) { - pr_err("Registration of /dev/" S390_HWCTR_DEVICE - "failed rc=%d\n", rc); - goto out; - } - - /* Setup s390dbf facility */ - cf_diag_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); - if (!cf_diag_dbg) { - pr_err("Registration of s390dbf(cpum_cf_diag) failed\n"); - rc = -ENOMEM; - goto out_dbf; - } - debug_register_view(cf_diag_dbg, &debug_sprintf_view); - - rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); - if (rc) { - pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", - rc); - goto out_perf; - } - rc = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_S390_CFD_ONLINE, - "perf/s390/cfd:online", - cf_diag_online_cpu, cf_diag_offline_cpu); - if (!rc) - goto out; - - pr_err("Registration of CPUHP_AP_PERF_S390_CFD_ONLINE failed rc=%i\n", - rc); - perf_pmu_unregister(&cf_diag); -out_perf: - debug_unregister_view(cf_diag_dbg, &debug_sprintf_view); - debug_unregister(cf_diag_dbg); -out_dbf: - misc_deregister(&cf_diag_dev); -out: - return rc; -} -device_initcall(cf_diag_init); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index e20bed1ed34a..350e94d0cac2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -166,6 +166,12 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, p->thread.acrs[1] = (unsigned int)tls; } } + /* + * s390 stores the svc return address in arch_data when calling + * sigreturn()/restart_syscall() via vdso. 1 means no valid address + * stored. + */ + p->restart_block.arch_data = 1; return 0; } @@ -180,7 +186,7 @@ unsigned long get_wchan(struct task_struct *p) struct unwind_state state; unsigned long ip = 0; - if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p)) + if (!p || p == current || task_is_running(p) || !task_stack_page(p)) return 0; if (!try_get_task_stack(p)) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index c92d04f876cb..82df39b17bb5 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -103,11 +103,9 @@ EXPORT_SYMBOL(cpu_have_feature); static void show_facilities(struct seq_file *m) { unsigned int bit; - long *facilities; - facilities = (long *)&S390_lowcore.stfle_fac_list; seq_puts(m, "facilities :"); - for_each_set_bit_inv(bit, facilities, MAX_FACILITY_BIT) + for_each_set_bit_inv(bit, (long *)&stfle_fac_list, MAX_FACILITY_BIT) seq_printf(m, " %d", bit); seq_putc(m, '\n'); } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 18b3416fd663..0ea3d02b378d 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -975,10 +975,12 @@ static int s390_tdb_get(struct task_struct *target, struct membuf to) { struct pt_regs *regs = task_pt_regs(target); + size_t size; if (!(regs->int_code & 0x200)) return -ENODATA; - return membuf_write(&to, target->thread.trap_tdb, 256); + size = sizeof(target->thread.trap_tdb.data); + return membuf_write(&to, target->thread.trap_tdb.data, size); } static int s390_tdb_set(struct task_struct *target, diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 5aab59ad5688..ff0f9e838916 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -96,7 +96,6 @@ unsigned long int_hwcap = 0; int __bootdata(noexec_disabled); unsigned long __bootdata(ident_map_size); -unsigned long __bootdata(vmalloc_size); struct mem_detect_info __bootdata(mem_detect); struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table); @@ -108,6 +107,9 @@ unsigned long __bootdata_preserved(__edma); unsigned long __bootdata_preserved(__kaslr_offset); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); +u64 __bootdata_preserved(stfle_fac_list[16]); +EXPORT_SYMBOL(stfle_fac_list); +u64 __bootdata_preserved(alt_stfle_fac_list[16]); unsigned long VMALLOC_START; EXPORT_SYMBOL(VMALLOC_START); @@ -165,7 +167,7 @@ static void __init set_preferred_console(void) else if (CONSOLE_IS_3270) add_preferred_console("tty3270", 0, NULL); else if (CONSOLE_IS_VT220) - add_preferred_console("ttyS", 1, NULL); + add_preferred_console("ttysclp", 0, NULL); else if (CONSOLE_IS_HVC) add_preferred_console("hvc", 0, NULL); } @@ -338,27 +340,6 @@ int __init arch_early_irq_init(void) return 0; } -static int __init stack_realloc(void) -{ - unsigned long old, new; - - old = S390_lowcore.async_stack - STACK_INIT_OFFSET; - new = stack_alloc(); - if (!new) - panic("Couldn't allocate async stack"); - WRITE_ONCE(S390_lowcore.async_stack, new + STACK_INIT_OFFSET); - free_pages(old, THREAD_SIZE_ORDER); - - old = S390_lowcore.mcck_stack - STACK_INIT_OFFSET; - new = stack_alloc(); - if (!new) - panic("Couldn't allocate machine check stack"); - WRITE_ONCE(S390_lowcore.mcck_stack, new + STACK_INIT_OFFSET); - memblock_free_late(old, THREAD_SIZE); - return 0; -} -early_initcall(stack_realloc); - void __init arch_call_rest_init(void) { unsigned long stack; @@ -373,7 +354,7 @@ void __init arch_call_rest_init(void) set_task_stack_end_magic(current); stack += STACK_INIT_OFFSET; S390_lowcore.kernel_stack = stack; - CALL_ON_STACK_NORETURN(rest_init, stack); + call_on_stack_noreturn(rest_init, stack); } static void __init setup_lowcore_dat_off(void) @@ -413,11 +394,6 @@ static void __init setup_lowcore_dat_off(void) lc->lpp = LPP_MAGIC; lc->machine_flags = S390_lowcore.machine_flags; lc->preempt_count = S390_lowcore.preempt_count; - lc->stfl_fac_list = S390_lowcore.stfl_fac_list; - memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, - sizeof(lc->stfle_fac_list)); - memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, - sizeof(lc->alt_stfle_fac_list)); nmi_alloc_boot_cpu(lc); lc->sys_enter_timer = S390_lowcore.sys_enter_timer; lc->exit_timer = S390_lowcore.exit_timer; @@ -466,6 +442,7 @@ static void __init setup_lowcore_dat_off(void) lc->br_r1_trampoline = 0x07f1; /* br %r1 */ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); + lc->preempt_count = PREEMPT_DISABLED; set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; @@ -568,53 +545,10 @@ static void __init setup_resources(void) #endif } -static void __init setup_ident_map_size(void) +static void __init setup_memory_end(void) { - unsigned long vmax, tmp; - - /* Choose kernel address space layout: 3 or 4 levels. */ - tmp = ident_map_size / PAGE_SIZE; - tmp = tmp * (sizeof(struct page) + PAGE_SIZE); - if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) - vmax = _REGION2_SIZE; /* 3-level kernel page table */ - else - vmax = _REGION1_SIZE; /* 4-level kernel page table */ - /* module area is at the end of the kernel address space. */ - MODULES_END = vmax; - if (is_prot_virt_host()) - adjust_to_uv_max(&MODULES_END); -#ifdef CONFIG_KASAN - vmax = _REGION1_SIZE; - MODULES_END = kasan_vmax; -#endif - MODULES_VADDR = MODULES_END - MODULES_LEN; - VMALLOC_END = MODULES_VADDR; - VMALLOC_START = VMALLOC_END - vmalloc_size; - - /* Split remaining virtual space between 1:1 mapping & vmemmap array */ - tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); - /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ - tmp = SECTION_ALIGN_UP(tmp); - tmp = VMALLOC_START - tmp * sizeof(struct page); - tmp &= ~((vmax >> 11) - 1); /* align to page table level */ - tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); - vmemmap = (struct page *) tmp; - - /* Take care that ident_map_size <= vmemmap */ - ident_map_size = min(ident_map_size, (unsigned long)vmemmap); -#ifdef CONFIG_KASAN - ident_map_size = min(ident_map_size, KASAN_SHADOW_START); -#endif - vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); -#ifdef CONFIG_KASAN - /* move vmemmap above kasan shadow only if stands in a way */ - if (KASAN_SHADOW_END > (unsigned long)vmemmap && - (unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START) - vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); -#endif - max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); memblock_remove(ident_map_size, ULONG_MAX); - + max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20); } @@ -653,30 +587,6 @@ static void __init reserve_above_ident_map(void) } /* - * Make sure that oldmem, where the dump is stored, is protected - */ -static void __init reserve_oldmem(void) -{ -#ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE) - /* Forget all memory above the running kdump system */ - memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); -#endif -} - -/* - * Make sure that oldmem, where the dump is stored, is protected - */ -static void __init remove_oldmem(void) -{ -#ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE) - /* Forget all memory above the running kdump system */ - memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); -#endif -} - -/* * Reserve memory for kdump kernel to be loaded with kexec */ static void __init reserve_crashkernel(void) @@ -1119,10 +1029,7 @@ void __init setup_arch(char **cmdline_p) ROOT_DEV = Root_RAM0; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = (unsigned long) _end; + setup_initial_init_mm(_text, _etext, _edata, _end); if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) nospec_auto_detect(); @@ -1141,7 +1048,6 @@ void __init setup_arch(char **cmdline_p) /* Do some memory reservations *before* memory is added to memblock */ reserve_above_ident_map(); - reserve_oldmem(); reserve_kernel(); reserve_initrd(); reserve_certificate_list(); @@ -1152,10 +1058,9 @@ void __init setup_arch(char **cmdline_p) memblock_add_mem_detect_info(); free_mem_detect_info(); - remove_oldmem(); setup_uv(); - setup_ident_map_size(); + setup_memory_end(); setup_memory(); dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 90163e6184f5..78ef53b29958 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -32,6 +32,7 @@ #include <linux/uaccess.h> #include <asm/lowcore.h> #include <asm/switch_to.h> +#include <asm/vdso.h> #include "entry.h" /* @@ -171,7 +172,6 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) fpregs_load(&user_sregs.fpregs, ¤t->thread.fpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ - clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); return 0; } @@ -334,15 +334,10 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* Set up to return from userspace. If provided, use a stub already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { + if (ka->sa.sa_flags & SA_RESTORER) restorer = (unsigned long) ka->sa.sa_restorer; - } else { - /* Signal frame without vector registers are short ! */ - __u16 __user *svc = (void __user *) frame + frame_size - 2; - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) - return -EFAULT; - restorer = (unsigned long) svc; - } + else + restorer = VDSO64_SYMBOL(current, sigreturn); /* Set up registers for signal handler */ regs->gprs[14] = restorer; @@ -397,14 +392,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, /* Set up to return from userspace. If provided, use a stub already in userspace. */ - if (ksig->ka.sa.sa_flags & SA_RESTORER) { + if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = (unsigned long) ksig->ka.sa.sa_restorer; - } else { - __u16 __user *svc = &frame->svc_insn; - if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) - return -EFAULT; - restorer = (unsigned long) svc; - } + else + restorer = VDSO64_SYMBOL(current, rt_sigreturn); /* Create siginfo on the signal stack */ if (copy_siginfo_to_user(&frame->info, &ksig->info)) @@ -501,7 +492,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) } /* No longer in a system call */ clear_pt_regs_flag(regs, PIF_SYSCALL); - clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); + rseq_signal_deliver(&ksig, regs); if (is_compat_task()) handle_signal32(&ksig, oldset, regs); @@ -512,20 +503,25 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) /* No handlers present - check for system call restart */ clear_pt_regs_flag(regs, PIF_SYSCALL); - clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); if (current->thread.system_call) { regs->int_code = current->thread.system_call; switch (regs->gprs[2]) { case -ERESTART_RESTARTBLOCK: /* Restart with sys_restart_syscall */ - regs->int_code = __NR_restart_syscall; - fallthrough; + regs->gprs[2] = regs->orig_gpr2; + current->restart_block.arch_data = regs->psw.addr; + if (is_compat_task()) + regs->psw.addr = VDSO32_SYMBOL(current, restart_syscall); + else + regs->psw.addr = VDSO64_SYMBOL(current, restart_syscall); + if (test_thread_flag(TIF_SINGLE_STEP)) + clear_thread_flag(TIF_PER_TRAP); + break; case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: - /* Restart system call with magic TIF bit. */ regs->gprs[2] = regs->orig_gpr2; - set_pt_regs_flag(regs, PIF_SYSCALL_RESTART); + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); if (test_thread_flag(TIF_SINGLE_STEP)) clear_thread_flag(TIF_PER_TRAP); break; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2fec2b80d35d..8984711f72ed 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -74,7 +74,6 @@ enum { static DEFINE_PER_CPU(struct cpu *, cpu_device); struct pcpu { - struct lowcore *lowcore; /* lowcore page(s) for the cpu */ unsigned long ec_mask; /* bit mask for ec_xxx functions */ unsigned long ec_clk; /* sigp timestamp for ec_xxx */ signed char state; /* physical cpu state */ @@ -194,20 +193,12 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) unsigned long async_stack, nodat_stack, mcck_stack; struct lowcore *lc; - if (pcpu != &pcpu_devices[0]) { - pcpu->lowcore = (struct lowcore *) - __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); - if (!pcpu->lowcore || !nodat_stack) - goto out; - } else { - nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET; - } + lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); + nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); async_stack = stack_alloc(); mcck_stack = stack_alloc(); - if (!async_stack || !mcck_stack) - goto out_stack; - lc = pcpu->lowcore; + if (!lc || !nodat_stack || !async_stack || !mcck_stack) + goto out; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); lc->async_stack = async_stack + STACK_INIT_OFFSET; @@ -219,46 +210,44 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->br_r1_trampoline = 0x07f1; /* br %r1 */ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); + lc->preempt_count = PREEMPT_DISABLED; if (nmi_alloc_per_cpu(lc)) - goto out_stack; + goto out; lowcore_ptr[cpu] = lc; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); return 0; -out_stack: +out: stack_free(mcck_stack); stack_free(async_stack); -out: - if (pcpu != &pcpu_devices[0]) { - free_pages(nodat_stack, THREAD_SIZE_ORDER); - free_pages((unsigned long) pcpu->lowcore, LC_ORDER); - } + free_pages(nodat_stack, THREAD_SIZE_ORDER); + free_pages((unsigned long) lc, LC_ORDER); return -ENOMEM; } static void pcpu_free_lowcore(struct pcpu *pcpu) { - unsigned long async_stack, nodat_stack, mcck_stack, lowcore; - - nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET; - async_stack = pcpu->lowcore->async_stack - STACK_INIT_OFFSET; - mcck_stack = pcpu->lowcore->mcck_stack - STACK_INIT_OFFSET; - lowcore = (unsigned long) pcpu->lowcore; + unsigned long async_stack, nodat_stack, mcck_stack; + struct lowcore *lc; + int cpu; + cpu = pcpu - pcpu_devices; + lc = lowcore_ptr[cpu]; + nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET; + async_stack = lc->async_stack - STACK_INIT_OFFSET; + mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); - lowcore_ptr[pcpu - pcpu_devices] = NULL; - nmi_free_per_cpu(pcpu->lowcore); + lowcore_ptr[cpu] = NULL; + nmi_free_per_cpu(lc); stack_free(async_stack); stack_free(mcck_stack); - if (pcpu == &pcpu_devices[0]) - return; free_pages(nodat_stack, THREAD_SIZE_ORDER); - free_pages(lowcore, LC_ORDER); + free_pages((unsigned long) lc, LC_ORDER); } static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { - struct lowcore *lc = pcpu->lowcore; + struct lowcore *lc = lowcore_ptr[cpu]; cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); @@ -275,17 +264,16 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->cregs_save_area[1] = lc->kernel_asce; lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); - memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, - sizeof(lc->stfle_fac_list)); - memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, - sizeof(lc->alt_stfle_fac_list)); arch_spin_lock_setup(cpu); } static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) { - struct lowcore *lc = pcpu->lowcore; + struct lowcore *lc; + int cpu; + cpu = pcpu - pcpu_devices; + lc = lowcore_ptr[cpu]; lc->kernel_stack = (unsigned long) task_stack_page(tsk) + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long) tsk; @@ -301,8 +289,11 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) { - struct lowcore *lc = pcpu->lowcore; + struct lowcore *lc; + int cpu; + cpu = pcpu - pcpu_devices; + lc = lowcore_ptr[cpu]; lc->restart_stack = lc->nodat_stack; lc->restart_fn = (unsigned long) func; lc->restart_data = (unsigned long) data; @@ -310,24 +301,28 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); } +typedef void (pcpu_delegate_fn)(void *); + /* * Call function via PSW restart on pcpu and stop the current cpu. */ -static void __pcpu_delegate(void (*func)(void*), void *data) +static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) { func(data); /* should not return */ } static void __no_sanitize_address pcpu_delegate(struct pcpu *pcpu, - void (*func)(void *), + pcpu_delegate_fn *func, void *data, unsigned long stack) { struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; unsigned long source_cpu = stap(); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); - if (pcpu->address == source_cpu) - CALL_ON_STACK(__pcpu_delegate, stack, 2, func, data); + if (pcpu->address == source_cpu) { + call_on_stack(2, stack, void, __pcpu_delegate, + pcpu_delegate_fn *, func, void *, data); + } /* Stop target cpu (if func returns this stops the current cpu). */ pcpu_sigp_retry(pcpu, SIGP_STOP, 0); /* Restart func on the target cpu and stop the current cpu. */ @@ -387,7 +382,7 @@ void smp_call_online_cpu(void (*func)(void *), void *data) */ void smp_call_ipl_cpu(void (*func)(void *), void *data) { - struct lowcore *lc = pcpu_devices->lowcore; + struct lowcore *lc = lowcore_ptr[0]; if (pcpu_devices[0].address == stap()) lc = &S390_lowcore; @@ -600,18 +595,21 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); int smp_store_status(int cpu) { - struct pcpu *pcpu = pcpu_devices + cpu; + struct lowcore *lc; + struct pcpu *pcpu; unsigned long pa; - pa = __pa(&pcpu->lowcore->floating_pt_save_area); + pcpu = pcpu_devices + cpu; + lc = lowcore_ptr[cpu]; + pa = __pa(&lc->floating_pt_save_area); if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; if (!MACHINE_HAS_VX && !MACHINE_HAS_GS) return 0; - pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK); + pa = __pa(lc->mcesad & MCESA_ORIGIN_MASK); if (MACHINE_HAS_GS) - pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK; + pa |= lc->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; @@ -878,7 +876,6 @@ static void smp_init_secondary(void) restore_access_regs(S390_lowcore.access_regs_save_area); cpu_init(); rcu_cpu_starting(cpu); - preempt_disable(); init_cpu_timer(); vtime_init(); vdso_getcpu_init(); @@ -906,7 +903,7 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid) S390_lowcore.restart_source = -1UL; __ctl_load(S390_lowcore.cregs_save_area, 0, 15); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); - CALL_ON_STACK_NORETURN(smp_init_secondary, S390_lowcore.kernel_stack); + call_on_stack_noreturn(smp_init_secondary, S390_lowcore.kernel_stack); } /* Upping and downing of CPUs */ @@ -1012,7 +1009,6 @@ void __init smp_prepare_boot_cpu(void) WARN_ON(!cpu_present(0) || !cpu_online(0)); pcpu->state = CPU_STATE_CONFIGURED; - pcpu->lowcore = (struct lowcore *)(unsigned long) store_prefix(); S390_lowcore.percpu_offset = __per_cpu_offset[0]; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); } @@ -1238,3 +1234,54 @@ out: return rc; } subsys_initcall(s390_smp_init); + +static __always_inline void set_new_lowcore(struct lowcore *lc) +{ + union register_pair dst, src; + u32 pfx; + + src.even = (unsigned long) &S390_lowcore; + src.odd = sizeof(S390_lowcore); + dst.even = (unsigned long) lc; + dst.odd = sizeof(*lc); + pfx = (unsigned long) lc; + + asm volatile( + " mvcl %[dst],%[src]\n" + " spx %[pfx]\n" + : [dst] "+&d" (dst.pair), [src] "+&d" (src.pair) + : [pfx] "Q" (pfx) + : "memory", "cc"); +} + +static int __init smp_reinit_ipl_cpu(void) +{ + unsigned long async_stack, nodat_stack, mcck_stack; + struct lowcore *lc, *lc_ipl; + unsigned long flags; + + lc_ipl = lowcore_ptr[0]; + lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); + nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); + async_stack = stack_alloc(); + mcck_stack = stack_alloc(); + if (!lc || !nodat_stack || !async_stack || !mcck_stack) + panic("Couldn't allocate memory"); + + local_irq_save(flags); + local_mcck_disable(); + set_new_lowcore(lc); + S390_lowcore.nodat_stack = nodat_stack + STACK_INIT_OFFSET; + S390_lowcore.async_stack = async_stack + STACK_INIT_OFFSET; + S390_lowcore.mcck_stack = mcck_stack + STACK_INIT_OFFSET; + lowcore_ptr[0] = lc; + local_mcck_enable(); + local_irq_restore(flags); + + free_pages(lc_ipl->async_stack - STACK_INIT_OFFSET, THREAD_SIZE_ORDER); + memblock_free_late(lc_ipl->mcck_stack - STACK_INIT_OFFSET, THREAD_SIZE); + memblock_free_late((unsigned long) lc_ipl, sizeof(*lc_ipl)); + + return 0; +} +early_initcall(smp_reinit_ipl_cpu); diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 888cc2f166db..4d141e2c132e 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -395,19 +395,18 @@ out: static int sthyi(u64 vaddr, u64 *rc) { - register u64 code asm("0") = 0; - register u64 addr asm("2") = vaddr; - register u64 rcode asm("3"); + union register_pair r1 = { .even = 0, }; /* subcode */ + union register_pair r2 = { .even = vaddr, }; int cc; asm volatile( - ".insn rre,0xB2560000,%[code],%[addr]\n" + ".insn rre,0xB2560000,%[r1],%[r2]\n" "ipm %[cc]\n" "srl %[cc],28\n" - : [cc] "=d" (cc), "=d" (rcode) - : [code] "d" (code), [addr] "a" (addr) + : [cc] "=&d" (cc), [r2] "+&d" (r2.pair) + : [r1] "d" (r1.pair) : "memory", "cc"); - *rc = rcode; + *rc = r2.odd; return cc; } diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 4e5cc7d2364e..8fe2d23b64f4 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -108,7 +108,7 @@ SYSCALL_DEFINE0(ni_syscall) return -ENOSYS; } -void do_syscall(struct pt_regs *regs) +static void do_syscall(struct pt_regs *regs) { unsigned long nr; @@ -121,6 +121,10 @@ void do_syscall(struct pt_regs *regs) regs->gprs[2] = nr; + if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) { + regs->psw.addr = current->restart_block.arch_data; + current->restart_block.arch_data = 1; + } nr = syscall_enter_from_user_mode_work(regs, nr); /* @@ -130,13 +134,16 @@ void do_syscall(struct pt_regs *regs) * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here * and if set, the syscall will be skipped. */ - if (!test_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)) { - regs->gprs[2] = -ENOSYS; - if (likely(nr < NR_syscalls)) - regs->gprs[2] = current->thread.sys_call_table[nr](regs); - } else { - clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET); - } + + if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) + goto out; + regs->gprs[2] = -ENOSYS; + if (likely(nr >= NR_syscalls)) + goto out; + do { + regs->gprs[2] = current->thread.sys_call_table[nr](regs); + } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART)); +out: syscall_exit_to_user_mode_work(regs); } @@ -144,11 +151,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap) { add_random_kstack_offset(); enter_from_user_mode(regs); - - memcpy(®s->gprs[8], S390_lowcore.save_area_sync, 8 * sizeof(unsigned long)); - memcpy(®s->int_code, &S390_lowcore.svc_ilc, sizeof(regs->int_code)); regs->psw = S390_lowcore.svc_old_psw; - + regs->int_code = S390_lowcore.svc_int_code; update_timer_sys(); local_irq_enable(); @@ -157,13 +161,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap) if (per_trap) set_thread_flag(TIF_PER_TRAP); - for (;;) { - regs->flags = 0; - set_pt_regs_flag(regs, PIF_SYSCALL); - do_syscall(regs); - if (!test_pt_regs_flag(regs, PIF_SYSCALL_RESTART)) - break; - local_irq_enable(); - } + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); + do_syscall(regs); exit_to_user_mode(); } diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 7e4a2aba366d..64d51ab5a8b4 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path sys_quotactl_path +443 common quotactl_fd sys_quotactl_fd sys_quotactl_fd 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 2ac3c9b56a13..ef3f2659876c 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -25,19 +25,22 @@ int topology_max_mnest; static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl) { - register int r0 asm("0") = (fc << 28) | sel1; - register int r1 asm("1") = sel2; + int r0 = (fc << 28) | sel1; int rc = 0; asm volatile( - " stsi 0(%3)\n" + " lr 0,%[r0]\n" + " lr 1,%[r1]\n" + " stsi 0(%[sysinfo])\n" "0: jz 2f\n" - "1: lhi %1,%4\n" - "2:\n" + "1: lhi %[rc],%[retval]\n" + "2: lr %[r0],0\n" EX_TABLE(0b, 1b) - : "+d" (r0), "+d" (rc) - : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP) - : "cc", "memory"); + : [r0] "+d" (r0), [rc] "+d" (rc) + : [r1] "d" (sel2), + [sysinfo] "a" (sysinfo), + [retval] "K" (-EOPNOTSUPP) + : "cc", "0", "1", "memory"); *lvl = ((unsigned int) r0) >> 28; return rc; } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index bfcc327acc6b..26aa2614ee35 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -66,7 +66,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c { static cpumask_t mask; - cpumask_copy(&mask, cpumask_of(cpu)); + cpumask_clear(&mask); + if (!cpu_online(cpu)) + goto out; + cpumask_set_cpu(cpu, &mask); switch (topology_mode) { case TOPOLOGY_MODE_HW: while (info) { @@ -83,10 +86,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c default: fallthrough; case TOPOLOGY_MODE_SINGLE: - cpumask_copy(&mask, cpumask_of(cpu)); break; } cpumask_and(&mask, &mask, cpu_online_mask); +out: cpumask_copy(dst, &mask); } @@ -95,7 +98,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) static cpumask_t mask; int i; - cpumask_copy(&mask, cpumask_of(cpu)); + cpumask_clear(&mask); + if (!cpu_online(cpu)) + goto out; + cpumask_set_cpu(cpu, &mask); if (topology_mode != TOPOLOGY_MODE_HW) goto out; cpu -= cpu % (smp_cpu_mtid + 1); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 8dd23c703718..76947275fe8b 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -36,7 +36,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) unsigned long address; if (regs->int_code & 0x200) - address = *(unsigned long *)(current->thread.trap_tdb + 24); + address = current->thread.trap_tdb.data[3]; else address = regs->psw.addr; return (void __user *) (address - (regs->int_code >> 16)); @@ -277,6 +277,8 @@ static void __init test_monitor_call(void) { int val = 1; + if (!IS_ENABLED(CONFIG_BUG)) + return; asm volatile( " mc 0,0\n" "0: xgr %0,%0\n" @@ -299,10 +301,9 @@ static void (*pgm_check_table[128])(struct pt_regs *regs); void noinstr __do_pgm_check(struct pt_regs *regs) { unsigned long last_break = S390_lowcore.breaking_event_addr; - unsigned int trapnr, syscall_redirect = 0; + unsigned int trapnr; irqentry_state_t state; - add_random_kstack_offset(); regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc; regs->int_parm_long = S390_lowcore.trans_exc_code; @@ -318,7 +319,7 @@ void noinstr __do_pgm_check(struct pt_regs *regs) if (S390_lowcore.pgm_code & 0x0200) { /* transaction abort */ - memcpy(¤t->thread.trap_tdb, &S390_lowcore.pgm_tdb, 256); + current->thread.trap_tdb = S390_lowcore.pgm_tdb; } if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) { @@ -344,18 +345,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs) trapnr = regs->int_code & PGM_INT_CODE_MASK; if (trapnr) pgm_check_table[trapnr](regs); - syscall_redirect = user_mode(regs) && test_pt_regs_flag(regs, PIF_SYSCALL); out: local_irq_disable(); irqentry_exit(regs, state); - - if (syscall_redirect) { - enter_from_user_mode(regs); - local_irq_enable(); - regs->orig_gpr2 = regs->gprs[2]; - do_syscall(regs); - exit_to_user_mode(); - } } /* diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index bbf8622bbf5d..bd3ef121c379 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -126,6 +126,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, case DIE_SSTEP: if (uprobe_post_sstep_notifier(regs)) return NOTIFY_STOP; + break; default: break; } diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 370f664580af..aeb0a15bcbb7 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -52,7 +52,7 @@ void __init setup_uv(void) unsigned long uv_stor_base; /* - * keep these conditions in line with kasan init code has_uv_sec_stor_limit() + * keep these conditions in line with has_uv_sec_stor_limit() */ if (!is_prot_virt_host()) return; @@ -91,12 +91,6 @@ fail: prot_virt_host = 0; } -void adjust_to_uv_max(unsigned long *vmax) -{ - if (uv_info.max_sec_stor_addr) - *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr); -} - /* * Requests the Ultravisor to pin the page in the shared state. This will * cause an intercept when the guest attempts to unshare the pinned page. @@ -364,6 +358,15 @@ static ssize_t uv_query_facilities(struct kobject *kobj, static struct kobj_attribute uv_query_facilities_attr = __ATTR(facilities, 0444, uv_query_facilities, NULL); +static ssize_t uv_query_feature_indications(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.uv_feature_indications); +} + +static struct kobj_attribute uv_query_feature_indications_attr = + __ATTR(feature_indications, 0444, uv_query_feature_indications, NULL); + static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, struct kobj_attribute *attr, char *page) { @@ -396,6 +399,7 @@ static struct kobj_attribute uv_query_max_guest_addr_attr = static struct attribute *uv_query_attrs[] = { &uv_query_facilities_attr.attr, + &uv_query_feature_indications_attr.attr, &uv_query_max_guest_cpus_attr.attr, &uv_query_max_guest_vms_attr.attr, &uv_query_max_guest_addr_attr.attr, diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 8c4e07d533c8..99694260cac9 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -20,7 +20,7 @@ #include <asm/vdso.h> extern char vdso64_start[], vdso64_end[]; -static unsigned int vdso_pages; +extern char vdso32_start[], vdso32_end[]; static struct vm_special_mapping vvar_mapping; @@ -37,18 +37,6 @@ enum vvar_pages { VVAR_NR_PAGES, }; -unsigned int __read_mostly vdso_enabled = 1; - -static int __init vdso_setup(char *str) -{ - bool enabled; - - if (!kstrtobool(str, &enabled)) - vdso_enabled = enabled; - return 1; -} -__setup("vdso=", vdso_setup); - #ifdef CONFIG_TIME_NS struct vdso_data *arch_get_vdso_data(void *vvar_page) { @@ -155,7 +143,12 @@ static struct vm_special_mapping vvar_mapping = { .fault = vvar_fault, }; -static struct vm_special_mapping vdso_mapping = { +static struct vm_special_mapping vdso64_mapping = { + .name = "[vdso]", + .mremap = vdso_mremap, +}; + +static struct vm_special_mapping vdso32_mapping = { .name = "[vdso]", .mremap = vdso_mremap, }; @@ -171,16 +164,22 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { unsigned long vdso_text_len, vdso_mapping_len; unsigned long vvar_start, vdso_text_start; + struct vm_special_mapping *vdso_mapping; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int rc; BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); - if (!vdso_enabled || is_compat_task()) - return 0; if (mmap_write_lock_killable(mm)) return -EINTR; - vdso_text_len = vdso_pages << PAGE_SHIFT; + + if (is_compat_task()) { + vdso_text_len = vdso32_end - vdso32_start; + vdso_mapping = &vdso32_mapping; + } else { + vdso_text_len = vdso64_end - vdso64_start; + vdso_mapping = &vdso64_mapping; + } vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); rc = vvar_start; @@ -198,7 +197,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - &vdso_mapping); + vdso_mapping); if (IS_ERR(vma)) { do_munmap(mm, vvar_start, PAGE_SIZE, NULL); rc = PTR_ERR(vma); @@ -211,21 +210,25 @@ out: return rc; } -static int __init vdso_init(void) +static struct page ** __init vdso_setup_pages(void *start, void *end) { - struct page **pages; + int pages = (end - start) >> PAGE_SHIFT; + struct page **pagelist; int i; - vdso_pages = (vdso64_end - vdso64_start) >> PAGE_SHIFT; - pages = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); - if (!pages) { - vdso_enabled = 0; - return -ENOMEM; - } - for (i = 0; i < vdso_pages; i++) - pages[i] = virt_to_page(vdso64_start + i * PAGE_SIZE); - pages[vdso_pages] = NULL; - vdso_mapping.pages = pages; + pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); + if (!pagelist) + panic("%s: Cannot allocate page list for VDSO", __func__); + for (i = 0; i < pages; i++) + pagelist[i] = virt_to_page(start + i * PAGE_SIZE); + return pagelist; +} + +static int __init vdso_init(void) +{ + vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end); + if (IS_ENABLED(CONFIG_COMPAT)) + vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end); return 0; } arch_initcall(vdso_init); diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore new file mode 100644 index 000000000000..5167384843b9 --- /dev/null +++ b/arch/s390/kernel/vdso32/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +vdso32.lds diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile new file mode 100644 index 000000000000..b2349a3f4fa3 --- /dev/null +++ b/arch/s390/kernel/vdso32/Makefile @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: GPL-2.0 +# List of files in the vdso + +KCOV_INSTRUMENT := n +ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE +ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT + +include $(srctree)/lib/vdso/Makefile +obj-vdso32 = vdso_user_wrapper-32.o note-32.o + +# Build rules + +targets := $(obj-vdso32) vdso32.so vdso32.so.dbg +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +KBUILD_AFLAGS += -DBUILD_VDSO +KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING + +KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_32 += -m31 -s + +KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin + +LDFLAGS_vdso32.so.dbg += -fPIC -shared -nostdlib -soname=linux-vdso32.so.1 \ + --hash-style=both --build-id=sha1 -melf_s390 -T + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) + +obj-y += vdso32_wrapper.o +CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) + +# Disable gcov profiling, ubsan and kasan for VDSO code +GCOV_PROFILE := n +UBSAN_SANITIZE := n +KASAN_SANITIZE := n + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so + +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE + $(call if_changed,ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +$(obj-vdso32): %-32.o: %.S FORCE + $(call if_changed_dep,vdso32as) + +# actual build commands +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdso32cc = VDSO32C $@ + cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso32.so: $(obj)/vdso32.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso32.so + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE + $(call if_changed,vdsosym) diff --git a/arch/s390/kernel/vdso32/gen_vdso_offsets.sh b/arch/s390/kernel/vdso32/gen_vdso_offsets.sh new file mode 100755 index 000000000000..9c4f951e227d --- /dev/null +++ b/arch/s390/kernel/vdso32/gen_vdso_offsets.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Inspired by arm64 version. +# + +LC_ALL=C +sed -n 's/\([0-9a-f]*\) . __kernel_compat_\(.*\)/\#define vdso32_offset_\2\t0x\1/p' diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S new file mode 100644 index 000000000000..db19d0680a0a --- /dev/null +++ b/arch/s390/kernel/vdso32/note.S @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S new file mode 100644 index 000000000000..bff50b6acd6d --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This is the infamous ld script for the 64 bits vdso + * library + */ + +#include <asm/page.h> +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") +OUTPUT_ARCH(s390:31-bit) +ENTRY(_start) + +SECTIONS +{ + PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); +#ifdef CONFIG_TIME_NS + PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); +#endif + . = VDSO_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } + + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the + * beginning of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_compat_restart_syscall; + __kernel_compat_rt_sigreturn; + __kernel_compat_sigreturn; + local: *; + }; +} diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S new file mode 100644 index 000000000000..de2fb930471a --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso32_start, vdso32_end + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/s390/kernel/vdso32/vdso32.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/s390/kernel/vdso32/vdso_user_wrapper.S b/arch/s390/kernel/vdso32/vdso_user_wrapper.S new file mode 100644 index 000000000000..3f42f27f978c --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso_user_wrapper.S @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <asm/unistd.h> +#include <asm/dwarf.h> + +.macro vdso_syscall func,syscall + .globl __kernel_compat_\func + .type __kernel_compat_\func,@function + .align 8 +__kernel_compat_\func: + CFI_STARTPROC + svc \syscall + /* Make sure we notice when a syscall returns, which shouldn't happen */ + .word 0 + CFI_ENDPROC + .size __kernel_compat_\func,.-__kernel_compat_\func +.endm + +vdso_syscall restart_syscall,__NR_restart_syscall +vdso_syscall sigreturn,__NR_sigreturn +vdso_syscall rt_sigreturn,__NR_rt_sigreturn diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index a6e0fb6b91d6..2a2092ce19f1 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -74,3 +74,11 @@ vdso64.so: $(obj)/vdso64.so.dbg $(call cmd,vdso_install) vdso_install: vdso64.so + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE + $(call if_changed,vdsosym) diff --git a/arch/s390/kernel/vdso64/gen_vdso_offsets.sh b/arch/s390/kernel/vdso64/gen_vdso_offsets.sh new file mode 100755 index 000000000000..37f05cb38dad --- /dev/null +++ b/arch/s390/kernel/vdso64/gen_vdso_offsets.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Inspired by arm64 version. +# + +LC_ALL=C +sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p' diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 518f1ea405f4..d4fb336d747b 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -17,7 +17,7 @@ SECTIONS #ifdef CONFIG_TIME_NS PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); #endif - . = VDSO64_LBASE + SIZEOF_HEADERS; + . = VDSO_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } @@ -137,6 +137,9 @@ VERSION __kernel_clock_gettime; __kernel_clock_getres; __kernel_getcpu; + __kernel_restart_syscall; + __kernel_rt_sigreturn; + __kernel_sigreturn; local: *; }; } diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S index f773505c7e63..97f0c0a669a5 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -37,3 +37,20 @@ vdso_func gettimeofday vdso_func clock_getres vdso_func clock_gettime vdso_func getcpu + +.macro vdso_syscall func,syscall + .globl __kernel_\func + .type __kernel_\func,@function + .align 8 +__kernel_\func: + CFI_STARTPROC + svc \syscall + /* Make sure we notice when a syscall returns, which shouldn't happen */ + .word 0 + CFI_ENDPROC + .size __kernel_\func,.-__kernel_\func +.endm + +vdso_syscall restart_syscall,__NR_restart_syscall +vdso_syscall sigreturn,__NR_sigreturn +vdso_syscall rt_sigreturn,__NR_rt_sigreturn diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 12decca22e7c..b3aaadc60ead 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -4,7 +4,8 @@ # Copyright IBM Corp. 2008 KVM := ../../../virt/kvm -common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o \ + $(KVM)/irqchip.o $(KVM)/vfio.o $(KVM)/binary_stats.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 02c146f9e5cd..807fa9da1e72 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -24,7 +24,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE; - vcpu->stat.diagnose_10++; + vcpu->stat.instruction_diagnose_10++; if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end || start < 2 * PAGE_SIZE) @@ -74,7 +74,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx", vcpu->run->s.regs.gprs[rx]); - vcpu->stat.diagnose_258++; + vcpu->stat.instruction_diagnose_258++; if (vcpu->run->s.regs.gprs[rx] & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); @@ -145,7 +145,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); - vcpu->stat.diagnose_44++; + vcpu->stat.instruction_diagnose_44++; kvm_vcpu_on_spin(vcpu, true); return 0; } @@ -169,7 +169,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) int tid; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; - vcpu->stat.diagnose_9c++; + vcpu->stat.instruction_diagnose_9c++; /* yield to self */ if (tid == vcpu->vcpu_id) @@ -192,7 +192,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: yield forwarded", tid); - vcpu->stat.diagnose_9c_forward++; + vcpu->stat.diag_9c_forward++; return 0; } @@ -203,7 +203,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) return 0; no_yield: VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid); - vcpu->stat.diagnose_9c_ignored++; + vcpu->stat.diag_9c_ignored++; return 0; } @@ -213,7 +213,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode); - vcpu->stat.diagnose_308++; + vcpu->stat.instruction_diagnose_308++; switch (subcode) { case 3: vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; @@ -245,7 +245,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) { int ret; - vcpu->stat.diagnose_500++; + vcpu->stat.instruction_diagnose_500++; /* No virtio-ccw notification? Get out quickly. */ if (!vcpu->kvm->arch.css_support || (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) @@ -299,7 +299,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) case 0x500: return __diag_virtio_hypercall(vcpu); default: - vcpu->stat.diagnose_other++; + vcpu->stat.instruction_diagnose_other++; return -EOPNOTSUPP; } } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e144c8046ceb..1053c14c78ea 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -58,112 +58,128 @@ #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ (KVM_MAX_VCPUS + LOCAL_IRQS)) -struct kvm_stats_debugfs_item debugfs_entries[] = { - VCPU_STAT("userspace_handled", exit_userspace), - VCPU_STAT("exit_null", exit_null), - VCPU_STAT("pfault_sync", pfault_sync), - VCPU_STAT("exit_validity", exit_validity), - VCPU_STAT("exit_stop_request", exit_stop_request), - VCPU_STAT("exit_external_request", exit_external_request), - VCPU_STAT("exit_io_request", exit_io_request), - VCPU_STAT("exit_external_interrupt", exit_external_interrupt), - VCPU_STAT("exit_instruction", exit_instruction), - VCPU_STAT("exit_pei", exit_pei), - VCPU_STAT("exit_program_interruption", exit_program_interruption), - VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program), - VCPU_STAT("exit_operation_exception", exit_operation_exception), - VCPU_STAT("halt_successful_poll", halt_successful_poll), - VCPU_STAT("halt_attempted_poll", halt_attempted_poll), - VCPU_STAT("halt_poll_invalid", halt_poll_invalid), - VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal), - VCPU_STAT("halt_wakeup", halt_wakeup), - VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), - VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), - VCPU_STAT("instruction_lctlg", instruction_lctlg), - VCPU_STAT("instruction_lctl", instruction_lctl), - VCPU_STAT("instruction_stctl", instruction_stctl), - VCPU_STAT("instruction_stctg", instruction_stctg), - VCPU_STAT("deliver_ckc", deliver_ckc), - VCPU_STAT("deliver_cputm", deliver_cputm), - VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal), - VCPU_STAT("deliver_external_call", deliver_external_call), - VCPU_STAT("deliver_service_signal", deliver_service_signal), - VCPU_STAT("deliver_virtio", deliver_virtio), - VCPU_STAT("deliver_stop_signal", deliver_stop_signal), - VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal), - VCPU_STAT("deliver_restart_signal", deliver_restart_signal), - VCPU_STAT("deliver_program", deliver_program), - VCPU_STAT("deliver_io", deliver_io), - VCPU_STAT("deliver_machine_check", deliver_machine_check), - VCPU_STAT("exit_wait_state", exit_wait_state), - VCPU_STAT("inject_ckc", inject_ckc), - VCPU_STAT("inject_cputm", inject_cputm), - VCPU_STAT("inject_external_call", inject_external_call), - VM_STAT("inject_float_mchk", inject_float_mchk), - VCPU_STAT("inject_emergency_signal", inject_emergency_signal), - VM_STAT("inject_io", inject_io), - VCPU_STAT("inject_mchk", inject_mchk), - VM_STAT("inject_pfault_done", inject_pfault_done), - VCPU_STAT("inject_program", inject_program), - VCPU_STAT("inject_restart", inject_restart), - VM_STAT("inject_service_signal", inject_service_signal), - VCPU_STAT("inject_set_prefix", inject_set_prefix), - VCPU_STAT("inject_stop_signal", inject_stop_signal), - VCPU_STAT("inject_pfault_init", inject_pfault_init), - VM_STAT("inject_virtio", inject_virtio), - VCPU_STAT("instruction_epsw", instruction_epsw), - VCPU_STAT("instruction_gs", instruction_gs), - VCPU_STAT("instruction_io_other", instruction_io_other), - VCPU_STAT("instruction_lpsw", instruction_lpsw), - VCPU_STAT("instruction_lpswe", instruction_lpswe), - VCPU_STAT("instruction_pfmf", instruction_pfmf), - VCPU_STAT("instruction_ptff", instruction_ptff), - VCPU_STAT("instruction_stidp", instruction_stidp), - VCPU_STAT("instruction_sck", instruction_sck), - VCPU_STAT("instruction_sckpf", instruction_sckpf), - VCPU_STAT("instruction_spx", instruction_spx), - VCPU_STAT("instruction_stpx", instruction_stpx), - VCPU_STAT("instruction_stap", instruction_stap), - VCPU_STAT("instruction_iske", instruction_iske), - VCPU_STAT("instruction_ri", instruction_ri), - VCPU_STAT("instruction_rrbe", instruction_rrbe), - VCPU_STAT("instruction_sske", instruction_sske), - VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock), - VCPU_STAT("instruction_essa", instruction_essa), - VCPU_STAT("instruction_stsi", instruction_stsi), - VCPU_STAT("instruction_stfl", instruction_stfl), - VCPU_STAT("instruction_tb", instruction_tb), - VCPU_STAT("instruction_tpi", instruction_tpi), - VCPU_STAT("instruction_tprot", instruction_tprot), - VCPU_STAT("instruction_tsch", instruction_tsch), - VCPU_STAT("instruction_sthyi", instruction_sthyi), - VCPU_STAT("instruction_sie", instruction_sie), - VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense), - VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running), - VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call), - VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency), - VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency), - VCPU_STAT("instruction_sigp_start", instruction_sigp_start), - VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop), - VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status), - VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status), - VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status), - VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch), - VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix), - VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart), - VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset), - VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset), - VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown), - VCPU_STAT("instruction_diag_10", diagnose_10), - VCPU_STAT("instruction_diag_44", diagnose_44), - VCPU_STAT("instruction_diag_9c", diagnose_9c), - VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored), - VCPU_STAT("diag_9c_forward", diagnose_9c_forward), - VCPU_STAT("instruction_diag_258", diagnose_258), - VCPU_STAT("instruction_diag_308", diagnose_308), - VCPU_STAT("instruction_diag_500", diagnose_500), - VCPU_STAT("instruction_diag_other", diagnose_other), - { NULL } +const struct _kvm_stats_desc kvm_vm_stats_desc[] = { + KVM_GENERIC_VM_STATS(), + STATS_DESC_COUNTER(VM, inject_io), + STATS_DESC_COUNTER(VM, inject_float_mchk), + STATS_DESC_COUNTER(VM, inject_pfault_done), + STATS_DESC_COUNTER(VM, inject_service_signal), + STATS_DESC_COUNTER(VM, inject_virtio) +}; + +const struct kvm_stats_header kvm_vm_stats_header = { + .name_size = KVM_STATS_NAME_SIZE, + .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), + .id_offset = sizeof(struct kvm_stats_header), + .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, + .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + + sizeof(kvm_vm_stats_desc), +}; + +const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { + KVM_GENERIC_VCPU_STATS(), + STATS_DESC_COUNTER(VCPU, exit_userspace), + STATS_DESC_COUNTER(VCPU, exit_null), + STATS_DESC_COUNTER(VCPU, exit_external_request), + STATS_DESC_COUNTER(VCPU, exit_io_request), + STATS_DESC_COUNTER(VCPU, exit_external_interrupt), + STATS_DESC_COUNTER(VCPU, exit_stop_request), + STATS_DESC_COUNTER(VCPU, exit_validity), + STATS_DESC_COUNTER(VCPU, exit_instruction), + STATS_DESC_COUNTER(VCPU, exit_pei), + STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), + STATS_DESC_COUNTER(VCPU, instruction_lctl), + STATS_DESC_COUNTER(VCPU, instruction_lctlg), + STATS_DESC_COUNTER(VCPU, instruction_stctl), + STATS_DESC_COUNTER(VCPU, instruction_stctg), + STATS_DESC_COUNTER(VCPU, exit_program_interruption), + STATS_DESC_COUNTER(VCPU, exit_instr_and_program), + STATS_DESC_COUNTER(VCPU, exit_operation_exception), + STATS_DESC_COUNTER(VCPU, deliver_ckc), + STATS_DESC_COUNTER(VCPU, deliver_cputm), + STATS_DESC_COUNTER(VCPU, deliver_external_call), + STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), + STATS_DESC_COUNTER(VCPU, deliver_service_signal), + STATS_DESC_COUNTER(VCPU, deliver_virtio), + STATS_DESC_COUNTER(VCPU, deliver_stop_signal), + STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), + STATS_DESC_COUNTER(VCPU, deliver_restart_signal), + STATS_DESC_COUNTER(VCPU, deliver_program), + STATS_DESC_COUNTER(VCPU, deliver_io), + STATS_DESC_COUNTER(VCPU, deliver_machine_check), + STATS_DESC_COUNTER(VCPU, exit_wait_state), + STATS_DESC_COUNTER(VCPU, inject_ckc), + STATS_DESC_COUNTER(VCPU, inject_cputm), + STATS_DESC_COUNTER(VCPU, inject_external_call), + STATS_DESC_COUNTER(VCPU, inject_emergency_signal), + STATS_DESC_COUNTER(VCPU, inject_mchk), + STATS_DESC_COUNTER(VCPU, inject_pfault_init), + STATS_DESC_COUNTER(VCPU, inject_program), + STATS_DESC_COUNTER(VCPU, inject_restart), + STATS_DESC_COUNTER(VCPU, inject_set_prefix), + STATS_DESC_COUNTER(VCPU, inject_stop_signal), + STATS_DESC_COUNTER(VCPU, instruction_epsw), + STATS_DESC_COUNTER(VCPU, instruction_gs), + STATS_DESC_COUNTER(VCPU, instruction_io_other), + STATS_DESC_COUNTER(VCPU, instruction_lpsw), + STATS_DESC_COUNTER(VCPU, instruction_lpswe), + STATS_DESC_COUNTER(VCPU, instruction_pfmf), + STATS_DESC_COUNTER(VCPU, instruction_ptff), + STATS_DESC_COUNTER(VCPU, instruction_sck), + STATS_DESC_COUNTER(VCPU, instruction_sckpf), + STATS_DESC_COUNTER(VCPU, instruction_stidp), + STATS_DESC_COUNTER(VCPU, instruction_spx), + STATS_DESC_COUNTER(VCPU, instruction_stpx), + STATS_DESC_COUNTER(VCPU, instruction_stap), + STATS_DESC_COUNTER(VCPU, instruction_iske), + STATS_DESC_COUNTER(VCPU, instruction_ri), + STATS_DESC_COUNTER(VCPU, instruction_rrbe), + STATS_DESC_COUNTER(VCPU, instruction_sske), + STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), + STATS_DESC_COUNTER(VCPU, instruction_stsi), + STATS_DESC_COUNTER(VCPU, instruction_stfl), + STATS_DESC_COUNTER(VCPU, instruction_tb), + STATS_DESC_COUNTER(VCPU, instruction_tpi), + STATS_DESC_COUNTER(VCPU, instruction_tprot), + STATS_DESC_COUNTER(VCPU, instruction_tsch), + STATS_DESC_COUNTER(VCPU, instruction_sie), + STATS_DESC_COUNTER(VCPU, instruction_essa), + STATS_DESC_COUNTER(VCPU, instruction_sthyi), + STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), + STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), + STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), + STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), + STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), + STATS_DESC_COUNTER(VCPU, instruction_sigp_start), + STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), + STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), + STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), + STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), + STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), + STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), + STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), + STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), + STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), + STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), + STATS_DESC_COUNTER(VCPU, diag_9c_ignored), + STATS_DESC_COUNTER(VCPU, diag_9c_forward), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), + STATS_DESC_COUNTER(VCPU, pfault_sync) +}; + +const struct kvm_stats_header kvm_vcpu_stats_header = { + .name_size = KVM_STATS_NAME_SIZE, + .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), + .id_offset = sizeof(struct kvm_stats_header), + .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, + .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + + sizeof(kvm_vcpu_stats_desc), }; /* allow nested virtualization in KVM (if enabled by user space) */ @@ -214,7 +230,7 @@ static unsigned long kvm_s390_fac_size(void) BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > - sizeof(S390_lowcore.stfle_fac_list)); + sizeof(stfle_fac_list)); return SIZE_INTERNAL; } @@ -1462,8 +1478,8 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) mach->ibc = sclp.ibc; memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); - memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, - sizeof(S390_lowcore.stfle_fac_list)); + memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, + sizeof(stfle_fac_list)); VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", kvm->arch.model.ibc, kvm->arch.model.cpuid); @@ -1933,7 +1949,7 @@ out: static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) { int start = 0, end = slots->used_slots; - int slot = atomic_read(&slots->lru_slot); + int slot = atomic_read(&slots->last_used_slot); struct kvm_memory_slot *memslots = slots->memslots; if (gfn >= memslots[slot].base_gfn && @@ -1954,7 +1970,7 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) if (gfn >= memslots[start].base_gfn && gfn < memslots[start].base_gfn + memslots[start].npages) { - atomic_set(&slots->lru_slot, start); + atomic_set(&slots->last_used_slot, start); } return start; @@ -2687,10 +2703,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; for (i = 0; i < kvm_s390_fac_size(); i++) { - kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] & + kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & (kvm_s390_fac_base[i] | kvm_s390_fac_ext[i]); - kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & + kvm->arch.model.fac_list[i] = stfle_fac_list[i] & kvm_s390_fac_base[i]; } kvm->arch.model.subfuncs = kvm_s390_available_subfunc; @@ -5061,7 +5077,7 @@ static int __init kvm_s390_init(void) for (i = 0; i < 16; i++) kvm_s390_fac_base[i] |= - S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); + stfle_fac_list[i] & nonhyp_mask(i); return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 813b6e93dc83..c8841f476e91 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -140,7 +140,12 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm) /* Allocate variable storage */ vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE); vlen += uv_info.guest_virt_base_stor_len; - kvm->arch.pv.stor_var = vzalloc(vlen); + /* + * The Create Secure Configuration Ultravisor Call does not support + * using large pages for the virtual memory area. + * This is a hardware limitation. + */ + kvm->arch.pv.stor_var = vmalloc_no_huge(vlen); if (!kvm->arch.pv.stor_var) goto out_err; return 0; diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 93b3209b94a2..cfcdf76d6a95 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -18,23 +18,30 @@ */ static inline char *__strend(const char *s) { - register unsigned long r0 asm("0") = 0; - - asm volatile ("0: srst %0,%1\n" - " jo 0b" - : "+d" (r0), "+a" (s) : : "cc", "memory"); - return (char *) r0; + unsigned long e = 0; + + asm volatile( + " lghi 0,0\n" + "0: srst %[e],%[s]\n" + " jo 0b\n" + : [e] "+&a" (e), [s] "+&a" (s) + : + : "cc", "memory", "0"); + return (char *)e; } static inline char *__strnend(const char *s, size_t n) { - register unsigned long r0 asm("0") = 0; const char *p = s + n; - asm volatile ("0: srst %0,%1\n" - " jo 0b" - : "+d" (p), "+a" (s) : "d" (r0) : "cc", "memory"); - return (char *) p; + asm volatile( + " lghi 0,0\n" + "0: srst %[p],%[s]\n" + " jo 0b\n" + : [p] "+&d" (p), [s] "+&a" (s) + : + : "cc", "memory", "0"); + return (char *)p; } /** @@ -76,13 +83,15 @@ EXPORT_SYMBOL(strnlen); #ifdef __HAVE_ARCH_STRCPY char *strcpy(char *dest, const char *src) { - register int r0 asm("0") = 0; char *ret = dest; - asm volatile ("0: mvst %0,%1\n" - " jo 0b" - : "+&a" (dest), "+&a" (src) : "d" (r0) - : "cc", "memory" ); + asm volatile( + " lghi 0,0\n" + "0: mvst %[dest],%[src]\n" + " jo 0b\n" + : [dest] "+&a" (dest), [src] "+&a" (src) + : + : "cc", "memory", "0"); return ret; } EXPORT_SYMBOL(strcpy); @@ -144,16 +153,18 @@ EXPORT_SYMBOL(strncpy); #ifdef __HAVE_ARCH_STRCAT char *strcat(char *dest, const char *src) { - register int r0 asm("0") = 0; - unsigned long dummy; + unsigned long dummy = 0; char *ret = dest; - asm volatile ("0: srst %0,%1\n" - " jo 0b\n" - "1: mvst %0,%2\n" - " jo 1b" - : "=&a" (dummy), "+a" (dest), "+a" (src) - : "d" (r0), "0" (0UL) : "cc", "memory" ); + asm volatile( + " lghi 0,0\n" + "0: srst %[dummy],%[dest]\n" + " jo 0b\n" + "1: mvst %[dummy],%[src]\n" + " jo 1b\n" + : [dummy] "+&a" (dummy), [dest] "+&a" (dest), [src] "+&a" (src) + : + : "cc", "memory", "0"); return ret; } EXPORT_SYMBOL(strcat); @@ -221,18 +232,20 @@ EXPORT_SYMBOL(strncat); #ifdef __HAVE_ARCH_STRCMP int strcmp(const char *s1, const char *s2) { - register int r0 asm("0") = 0; int ret = 0; - asm volatile ("0: clst %2,%3\n" - " jo 0b\n" - " je 1f\n" - " ic %0,0(%2)\n" - " ic %1,0(%3)\n" - " sr %0,%1\n" - "1:" - : "+d" (ret), "+d" (r0), "+a" (s1), "+a" (s2) - : : "cc", "memory"); + asm volatile( + " lghi 0,0\n" + "0: clst %[s1],%[s2]\n" + " jo 0b\n" + " je 1f\n" + " ic %[ret],0(%[s1])\n" + " ic 0,0(%[s2])\n" + " sr %[ret],0\n" + "1:" + : [ret] "+&d" (ret), [s1] "+&a" (s1), [s2] "+&a" (s2) + : + : "cc", "memory", "0"); return ret; } EXPORT_SYMBOL(strcmp); @@ -261,18 +274,18 @@ EXPORT_SYMBOL(strrchr); static inline int clcle(const char *s1, unsigned long l1, const char *s2, unsigned long l2) { - register unsigned long r2 asm("2") = (unsigned long) s1; - register unsigned long r3 asm("3") = (unsigned long) l1; - register unsigned long r4 asm("4") = (unsigned long) s2; - register unsigned long r5 asm("5") = (unsigned long) l2; + union register_pair r1 = { .even = (unsigned long)s1, .odd = l1, }; + union register_pair r3 = { .even = (unsigned long)s2, .odd = l2, }; int cc; - asm volatile ("0: clcle %1,%3,0\n" - " jo 0b\n" - " ipm %0\n" - " srl %0,28" - : "=&d" (cc), "+a" (r2), "+a" (r3), - "+a" (r4), "+a" (r5) : : "cc", "memory"); + asm volatile( + "0: clcle %[r1],%[r3],0\n" + " jo 0b\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=&d" (cc), [r1] "+&d" (r1.pair), [r3] "+&d" (r3.pair) + : + : "cc", "memory"); return cc; } @@ -315,15 +328,18 @@ EXPORT_SYMBOL(strstr); #ifdef __HAVE_ARCH_MEMCHR void *memchr(const void *s, int c, size_t n) { - register int r0 asm("0") = (char) c; const void *ret = s + n; - asm volatile ("0: srst %0,%1\n" - " jo 0b\n" - " jl 1f\n" - " la %0,0\n" - "1:" - : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory"); + asm volatile( + " lgr 0,%[c]\n" + "0: srst %[ret],%[s]\n" + " jo 0b\n" + " jl 1f\n" + " la %[ret],0\n" + "1:" + : [ret] "+&a" (ret), [s] "+&a" (s) + : [c] "d" (c) + : "cc", "memory", "0"); return (void *) ret; } EXPORT_SYMBOL(memchr); @@ -360,13 +376,16 @@ EXPORT_SYMBOL(memcmp); #ifdef __HAVE_ARCH_MEMSCAN void *memscan(void *s, int c, size_t n) { - register int r0 asm("0") = (char) c; const void *ret = s + n; - asm volatile ("0: srst %0,%1\n" - " jo 0b\n" - : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory"); - return (void *) ret; + asm volatile( + " lgr 0,%[c]\n" + "0: srst %[ret],%[s]\n" + " jo 0b\n" + : [ret] "+&a" (ret), [s] "+&a" (s) + : [c] "d" (c) + : "cc", "memory", "0"); + return (void *)ret; } EXPORT_SYMBOL(memscan); #endif diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 2f32802f79ce..ecf327d743a0 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -120,7 +120,7 @@ static struct unwindme *unwindme; #define UWM_REGS 0x2 /* Pass regs to test_unwind(). */ #define UWM_SP 0x4 /* Pass sp to test_unwind(). */ #define UWM_CALLER 0x8 /* Unwind starting from caller. */ -#define UWM_SWITCH_STACK 0x10 /* Use CALL_ON_STACK. */ +#define UWM_SWITCH_STACK 0x10 /* Use call_on_stack. */ #define UWM_IRQ 0x20 /* Unwind from irq context. */ #define UWM_PGM 0x40 /* Unwind from program check handler. */ @@ -211,7 +211,8 @@ static noinline int unwindme_func2(struct unwindme *u) if (u->flags & UWM_SWITCH_STACK) { local_irq_save(flags); local_mcck_disable(); - rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u); + rc = call_on_stack(1, S390_lowcore.nodat_stack, + int, unwindme_func3, struct unwindme *, u); local_mcck_enable(); local_irq_restore(flags); return rc; diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 2fece1fd210a..7ec8b1fa0f08 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -61,11 +61,11 @@ static inline int copy_with_mvcos(void) static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, unsigned long size) { - register unsigned long reg0 asm("0") = 0x81UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; asm volatile( + " lghi 0,%[spec]\n" "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" "6: jz 4f\n" "1: algr %0,%3\n" @@ -84,7 +84,8 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); + : [spec] "K" (0x81UL) + : "cc", "memory", "0"); return size; } @@ -133,11 +134,11 @@ EXPORT_SYMBOL(raw_copy_from_user); static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, unsigned long size) { - register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; asm volatile( + " llilh 0,%[spec]\n" "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" "6: jz 4f\n" "1: algr %0,%3\n" @@ -156,7 +157,8 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); + : [spec] "K" (0x81UL) + : "cc", "memory", "0"); return size; } @@ -205,12 +207,12 @@ EXPORT_SYMBOL(raw_copy_to_user); static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, unsigned long size) { - register unsigned long reg0 asm("0") = 0x810081UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; /* FIXME: copy with reduced length. */ asm volatile( + " lgr 0,%[spec]\n" "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" " jz 2f\n" "1: algr %0,%3\n" @@ -221,7 +223,8 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use "3: \n" EX_TABLE(0b,3b) : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); + : [spec] "d" (0x810081UL) + : "cc", "memory", "0"); return size; } @@ -266,11 +269,11 @@ EXPORT_SYMBOL(raw_copy_in_user); static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { - register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; asm volatile( + " llilh 0,%[spec]\n" "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" " jz 4f\n" "1: algr %0,%2\n" @@ -288,7 +291,8 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) - : "a" (empty_zero_page), "d" (reg0) : "cc", "memory"); + : "a" (empty_zero_page), [spec] "K" (0x81UL) + : "cc", "memory", "0"); return size; } @@ -338,10 +342,10 @@ EXPORT_SYMBOL(__clear_user); static inline unsigned long strnlen_user_srst(const char __user *src, unsigned long size) { - register unsigned long reg0 asm("0") = 0; unsigned long tmp1, tmp2; asm volatile( + " lghi 0,0\n" " la %2,0(%1)\n" " la %3,0(%0,%1)\n" " slgr %0,%0\n" @@ -353,7 +357,8 @@ static inline unsigned long strnlen_user_srst(const char __user *src, "1: sacf 768\n" EX_TABLE(0b,1b) : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); + : + : "cc", "memory", "0"); return size; } diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index 29d9470dbceb..a963c3d8ad0d 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -91,9 +91,6 @@ static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { - /* Get around a gcc oddity */ - register unsigned long *reg7 asm ("7") = p5; - asm volatile( " larl 1,2f\n" " aghi %0,-1\n" @@ -122,7 +119,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, " xc 0(1,%1),0(%5)\n" "3:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), - "+a" (reg7) + "+a" (p5) : : "0", "1", "cc", "memory"); } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 826d01777361..e33c43b38afe 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -285,26 +285,6 @@ static noinline void do_sigbus(struct pt_regs *regs) (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); } -static noinline int signal_return(struct pt_regs *regs) -{ - u16 instruction; - int rc; - - rc = __get_user(instruction, (u16 __user *) regs->psw.addr); - if (rc) - return rc; - if (instruction == 0x0a77) { - set_pt_regs_flag(regs, PIF_SYSCALL); - regs->int_code = 0x00040077; - return 0; - } else if (instruction == 0x0aad) { - set_pt_regs_flag(regs, PIF_SYSCALL); - regs->int_code = 0x000400ad; - return 0; - } - return -EACCES; -} - static noinline void do_fault_error(struct pt_regs *regs, int access, vm_fault_t fault) { @@ -312,9 +292,6 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, switch (fault) { case VM_FAULT_BADACCESS: - if (access == VM_EXEC && signal_return(regs) == 0) - break; - fallthrough; case VM_FAULT_BADMAP: /* Bad memory access. Check if it is kernel or user space. */ if (user_mode(regs)) { @@ -702,7 +679,7 @@ static void pfault_interrupt(struct ext_code ext_code, * interrupt since it must be a leftover of a PFAULT * CANCEL operation which didn't remove all pending * completion interrupts. */ - if (tsk->state == TASK_RUNNING) + if (task_is_running(tsk)) tsk->thread.pfault_wait = -1; } } else { @@ -792,6 +769,32 @@ void do_secure_storage_access(struct pt_regs *regs) struct page *page; int rc; + /* + * bit 61 tells us if the address is valid, if it's not we + * have a major problem and should stop the kernel or send a + * SIGSEGV to the process. Unfortunately bit 61 is not + * reliable without the misc UV feature so we need to check + * for that as well. + */ + if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) && + !test_bit_inv(61, ®s->int_parm_long)) { + /* + * When this happens, userspace did something that it + * was not supposed to do, e.g. branching into secure + * memory. Trigger a segmentation fault. + */ + if (user_mode(regs)) { + send_sig(SIGSEGV, current, 0); + return; + } + + /* + * The kernel should never run into this case and we + * have no way out of this situation. + */ + panic("Unexpected PGM 0x3d with TEID bit 61=0"); + } + switch (get_fault_type(regs)) { case USER_FAULT: mm = current->mm; diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index db4d303aaaa9..a0fdc6dc5f9d 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -13,7 +13,6 @@ #include <asm/setup.h> #include <asm/uv.h> -unsigned long kasan_vmax; static unsigned long segment_pos __initdata; static unsigned long segment_low __initdata; static unsigned long pgalloc_pos __initdata; @@ -251,28 +250,9 @@ static void __init kasan_early_detect_facilities(void) } } -static bool __init has_uv_sec_stor_limit(void) -{ - /* - * keep these conditions in line with setup_uv() - */ - if (!is_prot_virt_host()) - return false; - - if (is_prot_virt_guest()) - return false; - - if (!test_facility(158)) - return false; - - return !!uv_info.max_sec_stor_addr; -} - void __init kasan_early_init(void) { - unsigned long untracked_mem_end; unsigned long shadow_alloc_size; - unsigned long vmax_unlimited; unsigned long initrd_end; unsigned long memsize; unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO); @@ -306,9 +286,6 @@ void __init kasan_early_init(void) BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY); - untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE; - if (has_uv_sec_stor_limit()) - kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr); /* init kasan zero shadow */ crst_table_init((unsigned long *)kasan_early_shadow_p4d, @@ -375,18 +352,18 @@ void __init kasan_early_init(void) */ /* populate kasan shadow (for identity mapping and zero page mapping) */ kasan_early_pgtable_populate(__sha(0), __sha(memsize), POPULATE_MAP); - if (IS_ENABLED(CONFIG_MODULES)) - untracked_mem_end = kasan_vmax - MODULES_LEN; if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - untracked_mem_end = kasan_vmax - vmalloc_size - MODULES_LEN; /* shallowly populate kasan shadow for vmalloc and modules */ - kasan_early_pgtable_populate(__sha(untracked_mem_end), __sha(kasan_vmax), + kasan_early_pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_SHALLOW); } /* populate kasan shadow for untracked memory */ - kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_mem_end), + kasan_early_pgtable_populate(__sha(ident_map_size), + IS_ENABLED(CONFIG_KASAN_VMALLOC) ? + __sha(VMALLOC_START) : + __sha(MODULES_VADDR), POPULATE_ZERO_SHADOW); - kasan_early_pgtable_populate(__sha(kasan_vmax), __sha(vmax_unlimited), + kasan_early_pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_ZERO_SHADOW); /* memory allocated for identity mapping structs will be freed later */ pgalloc_freeable = pgalloc_pos; diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 1d17413b319a..a0f54bd5e98a 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -79,22 +79,21 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size) static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count) { - register unsigned long _dest asm("2") = (unsigned long) dest; - register unsigned long _len1 asm("3") = (unsigned long) count; - register unsigned long _src asm("4") = (unsigned long) src; - register unsigned long _len2 asm("5") = (unsigned long) count; + union register_pair _dst, _src; int rc = -EFAULT; + _dst.even = (unsigned long) dest; + _dst.odd = (unsigned long) count; + _src.even = (unsigned long) src; + _src.odd = (unsigned long) count; asm volatile ( - "0: mvcle %1,%2,0x0\n" + "0: mvcle %[dst],%[src],0\n" "1: jo 0b\n" - " lhi %0,0x0\n" + " lhi %[rc],0\n" "2:\n" EX_TABLE(1b,2b) - : "+d" (rc), "+d" (_dest), "+d" (_src), "+d" (_len1), - "+d" (_len2), "=m" (*((long *) dest)) - : "m" (*((long *) src)) - : "cc", "memory"); + : [rc] "+&d" (rc), [dst] "+&d" (_dst.pair), [src] "+&d" (_src.pair) + : : "cc", "memory"); return rc; } @@ -126,12 +125,18 @@ static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, */ int memcpy_real(void *dest, void *src, size_t count) { + unsigned long _dest = (unsigned long)dest; + unsigned long _src = (unsigned long)src; + unsigned long _count = (unsigned long)count; int rc; if (S390_lowcore.nodat_stack != 0) { preempt_disable(); - rc = CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, 3, - dest, src, count); + rc = call_on_stack(3, S390_lowcore.nodat_stack, + unsigned long, _memcpy_real, + unsigned long, _dest, + unsigned long, _src, + unsigned long, _count); preempt_enable(); return rc; } @@ -140,8 +145,7 @@ int memcpy_real(void *dest, void *src, size_t count) * not set up yet. Just call _memcpy_real on the early boot * stack */ - return _memcpy_real((unsigned long) dest,(unsigned long) src, - (unsigned long) count); + return _memcpy_real(_dest, _src, _count); } /* diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 7f0e154a470a..68b153083a92 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -31,17 +31,17 @@ __setup("cmma=", cmma); static inline int cmma_test_essa(void) { - register unsigned long tmp asm("0") = 0; - register int rc asm("1"); + unsigned long tmp = 0; + int rc = -EOPNOTSUPP; /* test ESSA_GET_STATE */ asm volatile( - " .insn rrf,0xb9ab0000,%1,%1,%2,0\n" - "0: la %0,0\n" + " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n" + "0: la %[rc],0\n" "1:\n" EX_TABLE(0b,1b) - : "=&d" (rc), "+&d" (tmp) - : "i" (ESSA_GET_STATE), "0" (-EOPNOTSUPP)); + : [rc] "+&d" (rc), [tmp] "+&d" (tmp) + : [cmd] "i" (ESSA_GET_STATE)); return rc; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 18205f851c24..eec3a9d7176e 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -691,7 +691,7 @@ static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) if (!non_swap_entry(entry)) dec_mm_counter(mm, MM_SWAPENTS); else if (is_migration_entry(entry)) { - struct page *page = migration_entry_to_page(entry); + struct page *page = pfn_swap_entry_to_page(entry); dec_mm_counter(mm, mm_counter(page)); } diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 02f9505c99a8..2e43996159f0 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -63,16 +63,15 @@ u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status) /* Refresh PCI Translations */ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status) { - register u64 __addr asm("2") = addr; - register u64 __range asm("3") = range; + union register_pair addr_range = {.even = addr, .odd = range}; u8 cc; asm volatile ( - " .insn rre,0xb9d30000,%[fn],%[addr]\n" + " .insn rre,0xb9d30000,%[fn],%[addr_range]\n" " ipm %[cc]\n" " srl %[cc],28\n" : [cc] "=d" (cc), [fn] "+d" (fn) - : [addr] "d" (__addr), "d" (__range) + : [addr_range] "d" (addr_range.pair) : "cc"); *status = fn >> 24 & 0xff; return cc; @@ -113,21 +112,19 @@ int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib) /* PCI Load */ static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status) { - register u64 __req asm("2") = req; - register u64 __offset asm("3") = offset; + union register_pair req_off = {.even = req, .odd = offset}; int cc = -ENXIO; u64 __data; asm volatile ( - " .insn rre,0xb9d20000,%[data],%[req]\n" + " .insn rre,0xb9d20000,%[data],%[req_off]\n" "0: ipm %[cc]\n" " srl %[cc],28\n" "1:\n" EX_TABLE(0b, 1b) - : [cc] "+d" (cc), [data] "=d" (__data), [req] "+d" (__req) - : "d" (__offset) - : "cc"); - *status = __req >> 24 & 0xff; + : [cc] "+d" (cc), [data] "=d" (__data), + [req_off] "+&d" (req_off.pair) :: "cc"); + *status = req_off.even >> 24 & 0xff; *data = __data; return cc; } @@ -173,21 +170,19 @@ static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr, static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status) { - register u64 addr asm("2") = ioaddr; - register u64 r3 asm("3") = len; + union register_pair ioaddr_len = {.even = ioaddr, .odd = len}; int cc = -ENXIO; u64 __data; asm volatile ( - " .insn rre,0xb9d60000,%[data],%[ioaddr]\n" + " .insn rre,0xb9d60000,%[data],%[ioaddr_len]\n" "0: ipm %[cc]\n" " srl %[cc],28\n" "1:\n" EX_TABLE(0b, 1b) - : [cc] "+d" (cc), [data] "=d" (__data), "+d" (r3) - : [ioaddr] "d" (addr) - : "cc"); - *status = r3 >> 24 & 0xff; + : [cc] "+d" (cc), [data] "=d" (__data), + [ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc"); + *status = ioaddr_len.odd >> 24 & 0xff; *data = __data; return cc; } @@ -211,20 +206,19 @@ EXPORT_SYMBOL_GPL(zpci_load); /* PCI Store */ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) { - register u64 __req asm("2") = req; - register u64 __offset asm("3") = offset; + union register_pair req_off = {.even = req, .odd = offset}; int cc = -ENXIO; asm volatile ( - " .insn rre,0xb9d00000,%[data],%[req]\n" + " .insn rre,0xb9d00000,%[data],%[req_off]\n" "0: ipm %[cc]\n" " srl %[cc],28\n" "1:\n" EX_TABLE(0b, 1b) - : [cc] "+d" (cc), [req] "+d" (__req) - : "d" (__offset), [data] "d" (data) + : [cc] "+d" (cc), [req_off] "+&d" (req_off.pair) + : [data] "d" (data) : "cc"); - *status = __req >> 24 & 0xff; + *status = req_off.even >> 24 & 0xff; return cc; } @@ -257,20 +251,19 @@ static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data, static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status) { - register u64 addr asm("2") = ioaddr; - register u64 r3 asm("3") = len; + union register_pair ioaddr_len = {.even = ioaddr, .odd = len}; int cc = -ENXIO; asm volatile ( - " .insn rre,0xb9d40000,%[data],%[ioaddr]\n" + " .insn rre,0xb9d40000,%[data],%[ioaddr_len]\n" "0: ipm %[cc]\n" " srl %[cc],28\n" "1:\n" EX_TABLE(0b, 1b) - : [cc] "+d" (cc), "+d" (r3) - : [data] "d" (data), [ioaddr] "d" (addr) - : "cc"); - *status = r3 >> 24 & 0xff; + : [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair) + : [data] "d" (data) + : "cc", "memory"); + *status = ioaddr_len.odd >> 24 & 0xff; return cc; } diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 9dd5ad1b553d..9c7de9089939 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -35,7 +35,7 @@ static struct airq_iv *zpci_sbv; */ static struct airq_iv **zpci_ibv; -/* Modify PCI: Register adapter interruptions */ +/* Modify PCI: Register floating adapter interruptions */ static int zpci_set_airq(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); @@ -53,7 +53,7 @@ static int zpci_set_airq(struct zpci_dev *zdev) return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; } -/* Modify PCI: Unregister adapter interruptions */ +/* Modify PCI: Unregister floating adapter interruptions */ static int zpci_clear_airq(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT); @@ -98,6 +98,38 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev) return cc ? -EIO : 0; } +/* Register adapter interruptions */ +int zpci_set_irq(struct zpci_dev *zdev) +{ + int rc; + + if (irq_delivery == DIRECTED) + rc = zpci_set_directed_irq(zdev); + else + rc = zpci_set_airq(zdev); + + if (!rc) + zdev->irqs_registered = 1; + + return rc; +} + +/* Clear adapter interruptions */ +int zpci_clear_irq(struct zpci_dev *zdev) +{ + int rc; + + if (irq_delivery == DIRECTED) + rc = zpci_clear_directed_irq(zdev); + else + rc = zpci_clear_airq(zdev); + + if (!rc) + zdev->irqs_registered = 0; + + return rc; +} + static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest, bool force) { @@ -311,10 +343,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) zdev->msi_first_bit = bit; zdev->msi_nr_irqs = msi_vecs; - if (irq_delivery == DIRECTED) - rc = zpci_set_directed_irq(zdev); - else - rc = zpci_set_airq(zdev); + rc = zpci_set_irq(zdev); if (rc) return rc; @@ -328,10 +357,7 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) int rc; /* Disable interrupts */ - if (irq_delivery == DIRECTED) - rc = zpci_clear_directed_irq(zdev); - else - rc = zpci_clear_airq(zdev); + rc = zpci_clear_irq(zdev); if (rc) return; diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 474617b88648..ae683aa623ac 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -49,8 +49,7 @@ static inline int __pcistg_mio_inuser( void __iomem *ioaddr, const void __user *src, u64 ulen, u8 *status) { - register u64 addr asm("2") = (u64 __force) ioaddr; - register u64 len asm("3") = ulen; + union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen}; int cc = -ENXIO; u64 val = 0; u64 cnt = ulen; @@ -68,7 +67,7 @@ static inline int __pcistg_mio_inuser( " aghi %[src],1\n" " ogr %[val],%[tmp]\n" " brctg %[cnt],0b\n" - "1: .insn rre,0xb9d40000,%[val],%[ioaddr]\n" + "1: .insn rre,0xb9d40000,%[val],%[ioaddr_len]\n" "2: ipm %[cc]\n" " srl %[cc],28\n" "3: sacf 768\n" @@ -76,10 +75,9 @@ static inline int __pcistg_mio_inuser( : [src] "+a" (src), [cnt] "+d" (cnt), [val] "+d" (val), [tmp] "=d" (tmp), - [len] "+d" (len), [cc] "+d" (cc), - [ioaddr] "+a" (addr) + [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc", "memory"); - *status = len >> 24 & 0xff; + *status = ioaddr_len.odd >> 24 & 0xff; /* did we read everything from user memory? */ if (!cc && cnt != 0) @@ -195,8 +193,7 @@ static inline int __pcilg_mio_inuser( void __user *dst, const void __iomem *ioaddr, u64 ulen, u8 *status) { - register u64 addr asm("2") = (u64 __force) ioaddr; - register u64 len asm("3") = ulen; + union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen}; u64 cnt = ulen; int shift = ulen * 8; int cc = -ENXIO; @@ -209,7 +206,7 @@ static inline int __pcilg_mio_inuser( */ asm volatile ( " sacf 256\n" - "0: .insn rre,0xb9d60000,%[val],%[ioaddr]\n" + "0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n" "1: ipm %[cc]\n" " srl %[cc],28\n" " ltr %[cc],%[cc]\n" @@ -222,18 +219,17 @@ static inline int __pcilg_mio_inuser( "4: sacf 768\n" EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) : - [cc] "+d" (cc), [val] "=d" (val), [len] "+d" (len), + [ioaddr_len] "+&d" (ioaddr_len.pair), + [cc] "+d" (cc), [val] "=d" (val), [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp), [shift] "+d" (shift) - : - [ioaddr] "a" (addr) - : "cc", "memory"); + :: "cc", "memory"); /* did we write everything to the user space buffer? */ if (!cc && cnt != 0) cc = -EFAULT; - *status = len >> 24 & 0xff; + *status = ioaddr_len.odd >> 24 & 0xff; return cc; } diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index c57f8c40e992..21c4ebe29b9a 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -24,6 +24,7 @@ KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common +KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) |