diff options
Diffstat (limited to 'arch/s390')
194 files changed, 4970 insertions, 3809 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9c9ec08d78c7..0c16dc443e2f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -41,9 +41,6 @@ config AUDIT_ARCH config NO_IOPORT_MAP def_bool y -config PCI_QUIRKS - def_bool n - config ARCH_SUPPORTS_UPROBES def_bool y @@ -73,7 +70,6 @@ config S390 imply IMA_SECURE_AND_OR_TRUSTED_BOOT select ALTERNATE_USER_ADDRESS_SPACE select ARCH_32BIT_USTAT_F_TINODE - select ARCH_BINFMT_ELF_STATE select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE @@ -96,6 +92,7 @@ config S390 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PREEMPT_LAZY + select ARCH_HAS_PTDUMP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_DIRECT_MAP @@ -140,6 +137,7 @@ config S390 select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_BUILTIN_BSWAP @@ -148,6 +146,7 @@ config S390 select ARCH_WANTS_NO_INSTR select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WANT_KERNEL_PMD_MKWRITE select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP @@ -163,9 +162,9 @@ config S390 select GENERIC_CPU_VULNERABILITIES select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY - select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_DATA_STORE select GENERIC_VDSO_TIME_NS select GENERIC_IOREMAP if PCI select HAVE_ALIGNED_STRUCT_PAGE @@ -185,6 +184,7 @@ config S390 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK select HAVE_ASM_MODVERSIONS + select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK @@ -241,6 +241,7 @@ config S390 select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_VIRT_CPU_ACCOUNTING_IDLE + select HOTPLUG_SMT select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI select KASAN_VMALLOC if KASAN @@ -258,6 +259,7 @@ config S390 select PCI_DOMAINS if PCI select PCI_MSI if PCI select PCI_MSI_ARCH_FALLBACKS if PCI_MSI + select PCI_QUIRKS if PCI select SPARSE_IRQ select SWIOTLB select SYSCTL_EXCEPTION_TRACE @@ -331,6 +333,10 @@ config HAVE_MARCH_Z16_FEATURES def_bool n select HAVE_MARCH_Z15_FEATURES +config HAVE_MARCH_Z17_FEATURES + def_bool n + select HAVE_MARCH_Z16_FEATURES + choice prompt "Processor type" default MARCH_Z196 @@ -396,6 +402,14 @@ config MARCH_Z16 Select this to enable optimizations for IBM z16 (3931 and 3932 series). +config MARCH_Z17 + bool "IBM z17" + select HAVE_MARCH_Z17_FEATURES + depends on $(cc-option,-march=z17) + help + Select this to enable optimizations for IBM z17 (9175 and + 9176 series). + endchoice config MARCH_Z10_TUNE @@ -419,6 +433,9 @@ config MARCH_Z15_TUNE config MARCH_Z16_TUNE def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT +config MARCH_Z17_TUNE + def_bool TUNE_Z17 || MARCH_Z17 && TUNE_DEFAULT + choice prompt "Tune code generation" default TUNE_DEFAULT @@ -463,6 +480,10 @@ config TUNE_Z16 bool "IBM z16" depends on $(cc-option,-mtune=z16) +config TUNE_Z17 + bool "IBM z17" + depends on $(cc-option,-mtune=z17) + endchoice config 64BIT @@ -630,6 +651,7 @@ endchoice config RELOCATABLE def_bool y + select ARCH_VMLINUX_NEEDS_RELOCS help This builds a kernel image that retains relocation information so it can be loaded at an arbitrary address. diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index c4300ea4abf8..7955d7eee7d8 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -13,6 +13,16 @@ config DEBUG_ENTRY If unsure, say N. +config STRICT_MM_TYPECHECKS + bool "Strict Memory Management Type Checks" + depends on DEBUG_KERNEL + help + Enable strict type checking for memory management types like pte_t + and pmd_t. This generates slightly worse code and should be used + for debug builds. + + If unsure, say N. + config CIO_INJECT bool "CIO Inject interfaces" depends on DEBUG_KERNEL && DEBUG_FS diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 5fae311203c2..7679bc16b692 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -15,7 +15,7 @@ KBUILD_CFLAGS_MODULE += -fPIC KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 KBUILD_CFLAGS += -fPIC -LDFLAGS_vmlinux := -no-pie --emit-relocs --discard-none +LDFLAGS_vmlinux := $(call ld-option,-no-pie) extra_tools := relocs aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ @@ -48,6 +48,7 @@ mflags-$(CONFIG_MARCH_Z13) := -march=z13 mflags-$(CONFIG_MARCH_Z14) := -march=z14 mflags-$(CONFIG_MARCH_Z15) := -march=z15 mflags-$(CONFIG_MARCH_Z16) := -march=z16 +mflags-$(CONFIG_MARCH_Z17) := -march=z17 export CC_FLAGS_MARCH := $(mflags-y) @@ -61,6 +62,7 @@ cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14 cflags-$(CONFIG_MARCH_Z15_TUNE) += -mtune=z15 cflags-$(CONFIG_MARCH_Z16_TUNE) += -mtune=z16 +cflags-$(CONFIG_MARCH_Z17_TUNE) += -mtune=z17 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink index 1ae5478cd6ac..c2b737500a91 100644 --- a/arch/s390/Makefile.postlink +++ b/arch/s390/Makefile.postlink @@ -11,7 +11,6 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include -include $(srctree)/scripts/Makefile.lib CMD_RELOCS=arch/s390/tools/relocs OUT_RELOCS = arch/s390/boot @@ -20,9 +19,8 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/relocs.S mkdir -p $(OUT_RELOCS); \ $(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S -vmlinux: FORCE +vmlinux.unstripped: FORCE $(call cmd,relocs) - $(call cmd,strip_relocs) clean: @rm -f $(OUT_RELOCS)/relocs.S diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore index f5ef099e2fd3..af2a6a7bc028 100644 --- a/arch/s390/boot/.gitignore +++ b/arch/s390/boot/.gitignore @@ -5,4 +5,5 @@ relocs.S section_cmp.* vmlinux vmlinux.lds +vmlinux.map vmlinux.syms diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 8bc1308ac892..bee49626be4b 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -26,7 +26,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o +obj-y += version.o pgm_check.o ctype.o ipl_data.o relocs.o alternative.o obj-y += uv.o printk.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c index abc08d2c873d..19ea7934b918 100644 --- a/arch/s390/boot/alternative.c +++ b/arch/s390/boot/alternative.c @@ -1,3 +1,138 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "alt: " fmt +#include "boot.h" + +#define a_debug boot_debug #include "../kernel/alternative.c" + +static void alt_debug_all(int type) +{ + int i; + + switch (type) { + case ALT_TYPE_FACILITY: + for (i = 0; i < ARRAY_SIZE(alt_debug.facilities); i++) + alt_debug.facilities[i] = -1UL; + break; + case ALT_TYPE_FEATURE: + for (i = 0; i < ARRAY_SIZE(alt_debug.mfeatures); i++) + alt_debug.mfeatures[i] = -1UL; + break; + case ALT_TYPE_SPEC: + alt_debug.spec = 1; + break; + } +} + +static void alt_debug_modify(int type, unsigned int nr, bool clear) +{ + switch (type) { + case ALT_TYPE_FACILITY: + if (clear) + __clear_facility(nr, alt_debug.facilities); + else + __set_facility(nr, alt_debug.facilities); + break; + case ALT_TYPE_FEATURE: + if (clear) + __clear_machine_feature(nr, alt_debug.mfeatures); + else + __set_machine_feature(nr, alt_debug.mfeatures); + break; + } +} + +static char *alt_debug_parse(int type, char *str) +{ + unsigned long val, endval; + char *endp; + bool clear; + int i; + + if (*str == ':') { + str++; + } else { + alt_debug_all(type); + return str; + } + clear = false; + if (*str == '!') { + alt_debug_all(type); + clear = true; + str++; + } + while (*str) { + val = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + if (*str == '-') { + str++; + endval = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + while (val <= endval) { + alt_debug_modify(type, val, clear); + val++; + } + } else { + alt_debug_modify(type, val, clear); + } + if (*str != ',') + break; + str++; + } + return str; +} + +/* + * Use debug-alternative command line parameter for debugging: + * "debug-alternative" + * -> print debug message for every single alternative + * + * "debug-alternative=0;2" + * -> print debug message for all alternatives with type 0 and 2 + * + * "debug-alternative=0:0-7" + * -> print debug message for all alternatives with type 0 and with + * facility numbers within the range of 0-7 + * (if type 0 is ALT_TYPE_FACILITY) + * + * "debug-alternative=0:!8;1" + * -> print debug message for all alternatives with type 0, for all + * facility number, except facility 8, and in addition print all + * alternatives with type 1 + */ +void alt_debug_setup(char *str) +{ + unsigned long type; + char *endp; + int i; + + if (!str) { + alt_debug_all(ALT_TYPE_FACILITY); + alt_debug_all(ALT_TYPE_FEATURE); + alt_debug_all(ALT_TYPE_SPEC); + return; + } + while (*str) { + type = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + switch (type) { + case ALT_TYPE_FACILITY: + case ALT_TYPE_FEATURE: + str = alt_debug_parse(type, str); + break; + case ALT_TYPE_SPEC: + alt_debug_all(ALT_TYPE_SPEC); + break; + } + if (*str != ';') + break; + str++; + } +} diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 69f261566a64..e045cae6e80a 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -11,11 +11,6 @@ #include <linux/printk.h> #include <asm/physmem_info.h> -struct machine_info { - unsigned char has_edat1 : 1; - unsigned char has_edat2 : 1; -}; - struct vmlinux_info { unsigned long entry; unsigned long image_size; /* does not include .bss */ @@ -69,7 +64,8 @@ void parse_boot_command_line(void); void verify_facilities(void); void print_missing_facilities(void); void sclp_early_setup_buffer(void); -void print_pgm_check_info(void); +void alt_debug_setup(char *str); +void do_pgm_check(struct pt_regs *regs); unsigned long randomize_within_range(unsigned long size, unsigned long align, unsigned long min, unsigned long max); void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit); diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 0a47b16f6412..0b511d5c030b 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -254,8 +254,9 @@ SYM_CODE_START_LOCAL(startup_normal) xc 0xf00(256),0xf00 larl %r13,.Lctl lctlg %c0,%c15,0(%r13) # load control registers - stcke __LC_BOOT_CLOCK - mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 + larl %r13,tod_clock_base + stcke 0(%r13) + mvc __LC_LAST_UPDATE_CLOCK(8),1(%r13) larl %r13,6f spt 0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),0(%r13) @@ -292,12 +293,6 @@ SYM_CODE_END(startup_normal) #include "head_kdump.S" -# -# This program check is active immediately after kernel start -# and until early_pgm_check_handler is set in kernel/early.c -# It simply saves general/control registers and psw in -# the save area and does disabled wait with a faulty address. -# SYM_CODE_START_LOCAL(startup_pgm_check_handler) stmg %r8,%r15,__LC_SAVE_AREA la %r8,4095 @@ -311,8 +306,18 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler) oi __LC_RETURN_PSW+1,0x2 # set wait state bit larl %r9,.Lold_psw_disabled_wait stg %r9,__LC_PGM_NEW_PSW+8 - larl %r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD - brasl %r14,print_pgm_check_info + larl %r15,_dump_info_stack_end-(STACK_FRAME_OVERHEAD+__PT_SIZE) + la %r2,STACK_FRAME_OVERHEAD(%r15) + mvc __PT_PSW(16,%r2),__LC_PSW_SAVE_AREA-4095(%r8) + mvc __PT_R0(128,%r2),__LC_GPREGS_SAVE_AREA-4095(%r8) + mvc __PT_LAST_BREAK(8,%r2),__LC_PGM_LAST_BREAK + mvc __PT_INT_CODE(4,%r2),__LC_PGM_INT_CODE + brasl %r14,do_pgm_check + larl %r9,startup_pgm_check_handler + stg %r9,__LC_PGM_NEW_PSW+8 + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + lpswe __LC_RETURN_PSW .Lold_psw_disabled_wait: la %r8,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index d3731f2983b7..f584d7da29cb 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -5,6 +5,7 @@ #include <linux/pgtable.h> #include <asm/abs_lowcore.h> #include <asm/page-states.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include <asm/sclp.h> #include <asm/sections.h> @@ -34,29 +35,14 @@ int vmalloc_size_set; static inline int __diag308(unsigned long subcode, void *addr) { - unsigned long reg1, reg2; - union register_pair r1; - psw_t old; - - r1.even = (unsigned long) addr; - r1.odd = 0; - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + union register_pair r1 = { .even = (unsigned long)addr, .odd = 0 }; + + asm_inline volatile( " diag %[r1],%[subcode],0x308\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [r1] "+&d" (r1.pair), - [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - "+Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [subcode] "d" (subcode), - [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw) + "0:\n" + EX_TABLE(0b, 0b) + : [r1] "+d" (r1.pair) + : [subcode] "d" (subcode) : "cc", "memory"); return r1.odd; } @@ -193,7 +179,7 @@ void setup_boot_command_line(void) if (has_ebcdic_char(parmarea.command_line)) EBCASC(parmarea.command_line, COMMAND_LINE_SIZE); /* copy arch command line */ - strcpy(early_command_line, strim(parmarea.command_line)); + strscpy(early_command_line, strim(parmarea.command_line)); /* append IPL PARM data to the boot command line */ if (!is_prot_virt_guest() && ipl_block_valid) @@ -267,7 +253,8 @@ void parse_boot_command_line(void) int rc; __kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE); - args = strcpy(command_line_buf, early_command_line); + strscpy(command_line_buf, early_command_line); + args = command_line_buf; while (*args) { args = next_arg(args, ¶m, &val); @@ -295,6 +282,9 @@ void parse_boot_command_line(void) if (!strcmp(param, "facilities") && val) modify_fac_list(val); + if (!strcmp(param, "debug-alternative")) + alt_debug_setup(val); + if (!strcmp(param, "nokaslr")) __kaslr_enabled = 0; @@ -312,7 +302,7 @@ void parse_boot_command_line(void) } #endif if (!strcmp(param, "relocate_lowcore") && test_facility(193)) - relocate_lowcore = 1; + set_machine_feature(MFEATURE_LOWCORE); if (!strcmp(param, "earlyprintk")) boot_earlyprintk = true; if (!strcmp(param, "debug")) @@ -320,7 +310,7 @@ void parse_boot_command_line(void) if (!strcmp(param, "bootdebug")) { bootdebug = true; if (val) - strncpy(bootdebug_filter, val, sizeof(bootdebug_filter) - 1); + strscpy(bootdebug_filter, val); } if (!strcmp(param, "quiet")) boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET; diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check.c index 633f11600aab..fa621fa5bc02 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check.c @@ -32,26 +32,49 @@ void print_stacktrace(unsigned long sp) } } -void print_pgm_check_info(void) +extern struct exception_table_entry __start___ex_table[]; +extern struct exception_table_entry __stop___ex_table[]; + +static inline unsigned long extable_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +static bool ex_handler(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; + + for (ex = __start___ex_table; ex < __stop___ex_table; ex++) { + if (extable_insn(ex) != regs->psw.addr) + continue; + if (ex->type != EX_TYPE_FIXUP) + return false; + regs->psw.addr = extable_fixup(ex); + return true; + } + return false; +} + +void do_pgm_check(struct pt_regs *regs) { - unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area; - struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area); + struct psw_bits *psw = &psw_bits(regs->psw); + unsigned long *gpregs = regs->gprs; + if (ex_handler(regs)) + return; if (bootdebug) boot_rb_dump(); boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) boot_emerg("Kernel command line: %s\n", early_command_line); boot_emerg("Kernel fault: interruption code %04x ilc:%d\n", - get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1); + regs->int_code & 0xffff, regs->int_code >> 17); if (kaslr_enabled()) { boot_emerg("Kernel random base: %lx\n", __kaslr_offset); boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys); } boot_emerg("PSW : %016lx %016lx (%pS)\n", - get_lowcore()->psw_save_area.mask, - get_lowcore()->psw_save_area.addr, - (void *)get_lowcore()->psw_save_area.addr); + regs->psw.mask, regs->psw.addr, (void *)regs->psw.addr); boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba); @@ -59,8 +82,11 @@ void print_pgm_check_info(void) boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); - print_stacktrace(get_lowcore()->gpregs_save_area[15]); + print_stacktrace(gpregs[15]); boot_emerg("Last Breaking-Event-Address:\n"); - boot_emerg(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break, - (void *)get_lowcore()->pgm_last_break); + boot_emerg(" [<%016lx>] %pS\n", regs->last_break, (void *)regs->last_break); + /* Convert to disabled wait PSW */ + psw->io = 0; + psw->ext = 0; + psw->wait = 1; } diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index aa096ef68e8c..45e3d057cfaa 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -59,36 +59,22 @@ void add_physmem_online_range(u64 start, u64 end) static int __diag260(unsigned long rx1, unsigned long rx2) { - unsigned long reg1, reg2, ry; union register_pair rx; int cc, exception; - psw_t old; + unsigned long ry; rx.even = rx1; rx.odd = rx2; ry = 0x10; /* storage configuration */ exception = 1; - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + asm_inline volatile( " diag %[rx],%[ry],0x260\n" - " lhi %[exc],0\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + "0: lhi %[exc],0\n" + "1:\n" CC_IPM(cc) - : CC_OUT(cc, cc), - [exc] "+d" (exception), - [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [ry] "+&d" (ry), - "+Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [rx] "d" (rx.pair), - [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw) + EX_TABLE(0b, 1b) + : CC_OUT(cc, cc), [exc] "+d" (exception), [ry] "+d" (ry) + : [rx] "d" (rx.pair) : CC_CLOBBER_LIST("memory")); cc = exception ? -1 : CC_TRANSFORM(cc); return cc == 0 ? ry : -1; @@ -118,29 +104,15 @@ static int diag260(void) static int diag500_storage_limit(unsigned long *max_physmem_end) { unsigned long storage_limit; - unsigned long reg1, reg2; - psw_t old; - - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" - " lghi 1,%[subcode]\n" - " lghi 2,0\n" - " diag 2,4,0x500\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - " lgr %[slimit],2\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [slimit] "=d" (storage_limit), - "=Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw), - [subcode] "i" (DIAG500_SC_STOR_LIMIT) + + asm_inline volatile( + " lghi %%r1,%[subcode]\n" + " lghi %%r2,0\n" + " diag %%r2,%%r4,0x500\n" + "0: lgr %[slimit],%%r2\n" + EX_TABLE(0b, 0b) + : [slimit] "=d" (storage_limit) + : [subcode] "i" (DIAG500_SC_STOR_LIMIT) : "memory", "1", "2"); if (!storage_limit) return -EINVAL; @@ -151,31 +123,17 @@ static int diag500_storage_limit(unsigned long *max_physmem_end) static int tprot(unsigned long addr) { - unsigned long reg1, reg2; int cc, exception; - psw_t old; exception = 1; - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + asm_inline volatile( " tprot 0(%[addr]),0\n" - " lhi %[exc],0\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + "0: lhi %[exc],0\n" + "1:\n" CC_IPM(cc) - : CC_OUT(cc, cc), - [exc] "+d" (exception), - [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - "=Q" (get_lowcore()->program_new_psw.addr), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw), - [addr] "a" (addr) + EX_TABLE(0b, 1b) + : CC_OUT(cc, cc), [exc] "+d" (exception) + : [addr] "a" (addr) : CC_CLOBBER_LIST("memory")); cc = exception ? -EFAULT : CC_TRANSFORM(cc); return cc; diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index b4c66fa667d5..4bb6bc95704e 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -8,6 +8,7 @@ #include <asm/sections.h> #include <asm/lowcore.h> #include <asm/setup.h> +#include <asm/timex.h> #include <asm/sclp.h> #include <asm/uv.h> #include "boot.h" @@ -28,7 +29,8 @@ static void boot_rb_add(const char *str, size_t len) /* store strings separated by '\0' */ if (len + 1 > avail) boot_rb_off = 0; - strcpy(boot_rb + boot_rb_off, str); + avail = sizeof(boot_rb) - boot_rb_off - 1; + strscpy(boot_rb + boot_rb_off, str, avail); boot_rb_off += len + 1; } @@ -157,10 +159,10 @@ static noinline char *strsym(char *buf, void *ip) p = findsym((unsigned long)ip, &off, &len); if (p) { - strncpy(buf, p, MAX_SYMLEN); + strscpy(buf, p, MAX_SYMLEN); /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ p = buf + strnlen(buf, MAX_SYMLEN - 15); - strcpy(p, "+0x"); + strscpy(p, "+0x", MAX_SYMLEN - (p - buf)); as_hex(p + 3, off, 0); strcat(p, "/0x"); as_hex(p + strlen(p), len, 0); @@ -199,8 +201,7 @@ static void boot_console_earlyprintk(const char *buf) static char *add_timestamp(char *buf) { #ifdef CONFIG_PRINTK_TIME - union tod_clock *boot_clock = (union tod_clock *)&get_lowcore()->boot_clock; - unsigned long ns = tod_to_ns(get_tod_clock() - boot_clock->tod); + unsigned long ns = tod_to_ns(__get_tod_clock_monotonic()); char ts[MAX_NUMLEN]; *buf++ = '['; diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 9276e0576d0a..da8337e63a3e 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -6,9 +6,13 @@ #include <asm/boot_data.h> #include <asm/extmem.h> #include <asm/sections.h> +#include <asm/diag288.h> #include <asm/maccess.h> +#include <asm/machine.h> +#include <asm/sysinfo.h> #include <asm/cpu_mf.h> #include <asm/setup.h> +#include <asm/timex.h> #include <asm/kasan.h> #include <asm/kexec.h> #include <asm/sclp.h> @@ -34,13 +38,12 @@ unsigned long __bootdata_preserved(max_mappable); unsigned long __bootdata_preserved(page_noexec_mask); unsigned long __bootdata_preserved(segment_noexec_mask); unsigned long __bootdata_preserved(region_noexec_mask); -int __bootdata_preserved(relocate_lowcore); +union tod_clock __bootdata_preserved(tod_clock_base); +u64 __bootdata_preserved(clock_comparator_max) = -1UL; u64 __bootdata_preserved(stfle_fac_list[16]); struct oldmem_data __bootdata_preserved(oldmem_data); -struct machine_info machine; - void error(char *x) { boot_emerg("%s\n", x); @@ -48,50 +51,115 @@ void error(char *x) disabled_wait(); } +static char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); + +static void detect_machine_type(void) +{ + struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; + + /* Check current-configuration-level */ + if (stsi(NULL, 0, 0, 0) <= 2) { + set_machine_feature(MFEATURE_LPAR); + return; + } + /* Get virtual-machine cpu information. */ + if (stsi(vmms, 3, 2, 2) || !vmms->count) + return; + /* Detect known hypervisors */ + if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) + set_machine_feature(MFEATURE_KVM); + else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) + set_machine_feature(MFEATURE_VM); +} + +static void detect_diag288(void) +{ + /* "BEGIN" in EBCDIC character set */ + static const char cmd[] = "\xc2\xc5\xc7\xc9\xd5"; + unsigned long action, len; + + action = machine_is_vm() ? (unsigned long)cmd : LPARWDT_RESTART; + len = machine_is_vm() ? sizeof(cmd) : 0; + if (__diag288(WDT_FUNC_INIT, MIN_INTERVAL, action, len)) + return; + __diag288(WDT_FUNC_CANCEL, 0, 0, 0); + set_machine_feature(MFEATURE_DIAG288); +} + +static void detect_diag9c(void) +{ + unsigned int cpu; + int rc = 1; + + cpu = stap(); + asm_inline volatile( + " diag %[cpu],%%r0,0x9c\n" + "0: lhi %[rc],0\n" + "1:\n" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc) + : [cpu] "d" (cpu) + : "cc", "memory"); + if (!rc) + set_machine_feature(MFEATURE_DIAG9C); +} + +static void reset_tod_clock(void) +{ + union tod_clock clk; + + if (store_tod_clock_ext_cc(&clk) == 0) + return; + /* TOD clock not running. Set the clock to Unix Epoch. */ + if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) + disabled_wait(); + memset(&tod_clock_base, 0, sizeof(tod_clock_base)); + tod_clock_base.tod = TOD_UNIX_EPOCH; + get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; +} + static void detect_facilities(void) { - if (test_facility(8)) { - machine.has_edat1 = 1; + if (cpu_has_edat1()) local_ctl_set_bit(0, CR0_EDAT_BIT); - } - if (test_facility(78)) - machine.has_edat2 = 1; page_noexec_mask = -1UL; segment_noexec_mask = -1UL; region_noexec_mask = -1UL; - if (!test_facility(130)) { + if (!cpu_has_nx()) { page_noexec_mask &= ~_PAGE_NOEXEC; segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC; region_noexec_mask &= ~_REGION_ENTRY_NOEXEC; } + if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) + set_machine_feature(MFEATURE_PCI_MIO); + reset_tod_clock(); + if (test_facility(139) && (tod_clock_base.tod >> 63)) { + /* Enable signed clock comparator comparisons */ + set_machine_feature(MFEATURE_SCC); + clock_comparator_max = -1UL >> 1; + local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); + } + if (test_facility(50) && test_facility(73)) { + set_machine_feature(MFEATURE_TX); + local_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); + } + if (cpu_has_vx()) + local_ctl_set_bit(0, CR0_VECTOR_BIT); } static int cmma_test_essa(void) { - unsigned long reg1, reg2, tmp = 0; + unsigned long tmp = 0; int rc = 1; - psw_t old; /* Test ESSA_GET_STATE */ - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + asm_inline volatile( " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n" - " la %[rc],0\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [rc] "+&d" (rc), - [tmp] "+&d" (tmp), - "+Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw), - [cmd] "i" (ESSA_GET_STATE) + "0: lhi %[rc],0\n" + "1:\n" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc), [tmp] "+d" (tmp) + : [cmd] "i" (ESSA_GET_STATE) : "cc", "memory"); return rc; } @@ -462,7 +530,12 @@ void startup_kernel(void) read_ipl_report(); sclp_early_read_info(); + sclp_early_detect_machine_features(); detect_facilities(); + detect_diag9c(); + detect_machine_type(); + /* detect_diag288() needs machine type */ + detect_diag288(); cmma_init(); sanitize_prot_virt_host(); max_physmem_end = detect_max_physmem_end(); diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c index f6b9b1df48a8..bd68161434a6 100644 --- a/arch/s390/boot/string.c +++ b/arch/s390/boot/string.c @@ -29,6 +29,18 @@ int strncmp(const char *cs, const char *ct, size_t count) return 0; } +ssize_t sized_strscpy(char *dst, const char *src, size_t count) +{ + size_t len; + + if (count == 0) + return -E2BIG; + len = strnlen(src, count - 1); + memcpy(dst, src, len); + dst[len] = '\0'; + return src[len] ? -E2BIG : len; +} + void *memset64(uint64_t *s, uint64_t v, size_t count) { uint64_t *xs = s; diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index cfca94a8eac4..1d073acd05a7 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #define boot_fmt(fmt) "vmem: " fmt +#include <linux/cpufeature.h> #include <linux/sched/task.h> #include <linux/pgtable.h> #include <linux/kasan.h> @@ -10,6 +11,7 @@ #include <asm/ctlreg.h> #include <asm/physmem_info.h> #include <asm/maccess.h> +#include <asm/machine.h> #include <asm/abs_lowcore.h> #include "decompressor.h" #include "boot.h" @@ -314,7 +316,7 @@ static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, uns { unsigned long pa, size = end - addr; - if (!machine.has_edat2 || !large_page_mapping_allowed(mode) || + if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) || !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) return INVALID_PHYS_ADDR; @@ -330,7 +332,7 @@ static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, uns { unsigned long pa, size = end - addr; - if (!machine.has_edat1 || !large_page_mapping_allowed(mode) || + if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) || !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) return INVALID_PHYS_ADDR; @@ -516,7 +518,7 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); - if (relocate_lowcore) + if (machine_has_relocated_lowcore()) lowcore_address = LOWCORE_ALT_ADDRESS; /* diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S index 66670212a361..50988022f9ea 100644 --- a/arch/s390/boot/vmlinux.lds.S +++ b/arch/s390/boot/vmlinux.lds.S @@ -40,6 +40,7 @@ SECTIONS *(.rodata.*) _erodata = . ; } + EXCEPTION_TABLE(16) .got : { *(.got) } @@ -165,7 +166,6 @@ SECTIONS /DISCARD/ : { COMMON_DISCARDS *(.eh_frame) - *(__ex_table) *(*__ksymtab*) *(___kcrctab*) } diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 80bdfbae6e5b..8ecad727497e 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -38,7 +38,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -92,7 +91,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y CONFIG_SLUB_STATS=y @@ -395,6 +393,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -405,6 +406,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y @@ -628,8 +632,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -654,7 +666,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=y CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -724,11 +735,10 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_HARDENED_USERCOPY=y -CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_LOCKDOWN_LSM=y @@ -741,12 +751,14 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_FORTIFY_SOURCE=y +CONFIG_HARDENED_USERCOPY=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_SELFTESTS=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m @@ -756,7 +768,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -795,13 +806,11 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA3_256_S390=m CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -812,12 +821,9 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y @@ -888,12 +894,14 @@ CONFIG_USER_EVENTS=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set +CONFIG_FTRACE_SORT_STARTUP_TEST=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_SAMPLE_FTRACE_DIRECT=m CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m CONFIG_SAMPLE_FTRACE_OPS=m CONFIG_DEBUG_ENTRY=y +CONFIG_STRICT_MM_TYPECHECKS=y CONFIG_CIO_INJECT=y CONFIG_KUNIT=m CONFIG_KUNIT_DEBUGFS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 449a0e996b96..c13a77765162 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -36,7 +36,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -86,7 +85,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y # CONFIG_COMPAT_BRK is not set @@ -385,6 +383,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -395,6 +396,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y @@ -618,8 +622,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -641,7 +653,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=m CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -711,6 +722,7 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y @@ -729,10 +741,10 @@ CONFIG_IMA_APPRAISE=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_SELFTESTS=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m @@ -742,7 +754,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -782,13 +793,11 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA3_256_S390=m CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -799,13 +808,10 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 diff --git a/arch/s390/configs/mmtypes.config b/arch/s390/configs/mmtypes.config new file mode 100644 index 000000000000..fe32b442d789 --- /dev/null +++ b/arch/s390/configs/mmtypes.config @@ -0,0 +1,2 @@ +# Help: Enable strict memory management typechecks +CONFIG_STRICT_MM_TYPECHECKS=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 853b2326a171..8163c1702720 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -70,7 +70,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y -# CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig index b760232537f1..e2c27588b21a 100644 --- a/arch/s390/crypto/Kconfig +++ b/arch/s390/crypto/Kconfig @@ -4,7 +4,6 @@ menu "Accelerated Cryptographic Algorithms for CPU (s390)" config CRYPTO_SHA512_S390 tristate "Hash functions: SHA-384 and SHA-512" - depends on S390 select CRYPTO_HASH help SHA-384 and SHA-512 secure hash algorithms (FIPS 180) @@ -15,7 +14,6 @@ config CRYPTO_SHA512_S390 config CRYPTO_SHA1_S390 tristate "Hash functions: SHA-1" - depends on S390 select CRYPTO_HASH help SHA-1 secure hash algorithm (FIPS 180) @@ -24,20 +22,8 @@ config CRYPTO_SHA1_S390 It is available as of z990. -config CRYPTO_SHA256_S390 - tristate "Hash functions: SHA-224 and SHA-256" - depends on S390 - select CRYPTO_HASH - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: s390 - - It is available as of z9. - config CRYPTO_SHA3_256_S390 tristate "Hash functions: SHA3-224 and SHA3-256" - depends on S390 select CRYPTO_HASH help SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202) @@ -48,7 +34,6 @@ config CRYPTO_SHA3_256_S390 config CRYPTO_SHA3_512_S390 tristate "Hash functions: SHA3-384 and SHA3-512" - depends on S390 select CRYPTO_HASH help SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202) @@ -59,7 +44,6 @@ config CRYPTO_SHA3_512_S390 config CRYPTO_GHASH_S390 tristate "Hash functions: GHASH" - depends on S390 select CRYPTO_HASH help GCM GHASH hash function (NIST SP800-38D) @@ -70,7 +54,6 @@ config CRYPTO_GHASH_S390 config CRYPTO_AES_S390 tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM" - depends on S390 select CRYPTO_ALGAPI select CRYPTO_SKCIPHER help @@ -92,7 +75,6 @@ config CRYPTO_AES_S390 config CRYPTO_DES_S390 tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR" - depends on S390 select CRYPTO_ALGAPI select CRYPTO_SKCIPHER select CRYPTO_LIB_DES @@ -107,22 +89,8 @@ config CRYPTO_DES_S390 As of z990 the ECB and CBC mode are hardware accelerated. As of z196 the CTR mode is hardware accelerated. -config CRYPTO_CHACHA_S390 - tristate "Ciphers: ChaCha20" - depends on S390 - select CRYPTO_SKCIPHER - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA - help - Length-preserving cipher: ChaCha20 stream cipher (RFC 7539) - - Architecture: s390 - - It is available as of z13. - config CRYPTO_HMAC_S390 tristate "Keyed-hash message authentication code: HMAC" - depends on S390 select CRYPTO_HASH help s390 specific HMAC hardware support for SHA224, SHA256, SHA384 and diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 14dafadbcbed..21757d86cd49 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -4,17 +4,13 @@ # obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o -obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA3_256_S390) += sha3_256_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o -obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o obj-y += arch_random.o - -chacha_s390-y := chacha-glue.o chacha-s390.o diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 9c46b1b630b1..5d36f4020dfa 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -66,7 +66,6 @@ struct s390_xts_ctx { struct gcm_sg_walk { struct scatter_walk walk; unsigned int walk_bytes; - u8 *walk_ptr; unsigned int walk_bytes_remain; u8 buf[AES_BLOCK_SIZE]; unsigned int buf_bytes; @@ -787,29 +786,20 @@ static void gcm_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg, static inline unsigned int _gcm_sg_clamp_and_map(struct gcm_sg_walk *gw) { - struct scatterlist *nextsg; - - gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain); - while (!gw->walk_bytes) { - nextsg = sg_next(gw->walk.sg); - if (!nextsg) - return 0; - scatterwalk_start(&gw->walk, nextsg); - gw->walk_bytes = scatterwalk_clamp(&gw->walk, - gw->walk_bytes_remain); - } - gw->walk_ptr = scatterwalk_map(&gw->walk); + if (gw->walk_bytes_remain == 0) + return 0; + gw->walk_bytes = scatterwalk_next(&gw->walk, gw->walk_bytes_remain); return gw->walk_bytes; } static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw, - unsigned int nbytes) + unsigned int nbytes, bool out) { gw->walk_bytes_remain -= nbytes; - scatterwalk_unmap(gw->walk_ptr); - scatterwalk_advance(&gw->walk, nbytes); - scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); - gw->walk_ptr = NULL; + if (out) + scatterwalk_done_dst(&gw->walk, nbytes); + else + scatterwalk_done_src(&gw->walk, nbytes); } static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) @@ -835,16 +825,16 @@ static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) } if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) { - gw->ptr = gw->walk_ptr; + gw->ptr = gw->walk.addr; gw->nbytes = gw->walk_bytes; goto out; } while (1) { n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes); - memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n); + memcpy(gw->buf + gw->buf_bytes, gw->walk.addr, n); gw->buf_bytes += n; - _gcm_sg_unmap_and_advance(gw, n); + _gcm_sg_unmap_and_advance(gw, n, false); if (gw->buf_bytes >= minbytesneeded) { gw->ptr = gw->buf; gw->nbytes = gw->buf_bytes; @@ -876,13 +866,12 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) } if (gw->walk_bytes >= minbytesneeded) { - gw->ptr = gw->walk_ptr; + gw->ptr = gw->walk.addr; gw->nbytes = gw->walk_bytes; goto out; } - scatterwalk_unmap(gw->walk_ptr); - gw->walk_ptr = NULL; + scatterwalk_unmap(&gw->walk); gw->ptr = gw->buf; gw->nbytes = sizeof(gw->buf); @@ -904,7 +893,7 @@ static int gcm_in_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) } else gw->buf_bytes = 0; } else - _gcm_sg_unmap_and_advance(gw, bytesdone); + _gcm_sg_unmap_and_advance(gw, bytesdone, false); return bytesdone; } @@ -921,11 +910,11 @@ static int gcm_out_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) if (!_gcm_sg_clamp_and_map(gw)) return i; n = min(gw->walk_bytes, bytesdone - i); - memcpy(gw->walk_ptr, gw->buf + i, n); - _gcm_sg_unmap_and_advance(gw, n); + memcpy(gw->walk.addr, gw->buf + i, n); + _gcm_sg_unmap_and_advance(gw, n, true); } } else - _gcm_sg_unmap_and_advance(gw, bytesdone); + _gcm_sg_unmap_and_advance(gw, bytesdone, true); return bytesdone; } diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c deleted file mode 100644 index f8b0c52e77a4..000000000000 --- a/arch/s390/crypto/chacha-glue.c +++ /dev/null @@ -1,130 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * s390 ChaCha stream cipher. - * - * Copyright IBM Corp. 2021 - */ - -#define KMSG_COMPONENT "chacha_s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <crypto/internal/chacha.h> -#include <crypto/internal/skcipher.h> -#include <crypto/algapi.h> -#include <linux/cpufeature.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/sizes.h> -#include <asm/fpu.h> -#include "chacha-s390.h" - -static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src, - unsigned int nbytes, const u32 *key, - u32 *counter) -{ - DECLARE_KERNEL_FPU_ONSTACK32(vxstate); - - kernel_fpu_begin(&vxstate, KERNEL_VXR); - chacha20_vx(dst, src, nbytes, key, counter); - kernel_fpu_end(&vxstate, KERNEL_VXR); - - *counter += round_up(nbytes, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; -} - -static int chacha20_s390(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 state[CHACHA_STATE_WORDS] __aligned(16); - struct skcipher_walk walk; - unsigned int nbytes; - int rc; - - rc = skcipher_walk_virt(&walk, req, false); - chacha_init_generic(state, ctx->key, req->iv); - - while (walk.nbytes > 0) { - nbytes = walk.nbytes; - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - if (nbytes <= CHACHA_BLOCK_SIZE) { - chacha_crypt_generic(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - ctx->nrounds); - } else { - chacha20_crypt_s390(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - &state[4], &state[12]); - } - rc = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - return rc; -} - -void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) -{ - /* TODO: implement hchacha_block_arch() in assembly */ - hchacha_block_generic(state, stream, nrounds); -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) -{ - chacha_init_generic(state, key, iv); -} -EXPORT_SYMBOL(chacha_init_arch); - -void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - /* s390 chacha20 implementation has 20 rounds hard-coded, - * it cannot handle a block of data or less, but otherwise - * it can handle data of arbitrary size - */ - if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) - chacha_crypt_generic(state, dst, src, bytes, nrounds); - else - chacha20_crypt_s390(state, dst, src, bytes, - &state[4], &state[12]); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static struct skcipher_alg chacha_algs[] = { - { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-s390", - .base.cra_priority = 900, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = chacha20_s390, - .decrypt = chacha20_s390, - } -}; - -static int __init chacha_mod_init(void) -{ - return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? - crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0; -} - -static void __exit chacha_mod_fini(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) - crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)); -} - -module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init); -module_exit(chacha_mod_fini); - -MODULE_DESCRIPTION("ChaCha20 stream cipher"); -MODULE_LICENSE("GPL v2"); - -MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 0800a2a5799f..dcbcee37cb63 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -8,29 +8,28 @@ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> */ +#include <asm/cpacf.h> +#include <crypto/ghash.h> #include <crypto/internal/hash.h> -#include <linux/module.h> #include <linux/cpufeature.h> -#include <asm/cpacf.h> - -#define GHASH_BLOCK_SIZE 16 -#define GHASH_DIGEST_SIZE 16 +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> -struct ghash_ctx { +struct s390_ghash_ctx { u8 key[GHASH_BLOCK_SIZE]; }; -struct ghash_desc_ctx { +struct s390_ghash_desc_ctx { u8 icv[GHASH_BLOCK_SIZE]; u8 key[GHASH_BLOCK_SIZE]; - u8 buffer[GHASH_BLOCK_SIZE]; - u32 bytes; }; static int ghash_init(struct shash_desc *desc) { - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); memset(dctx, 0, sizeof(*dctx)); memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE); @@ -41,7 +40,7 @@ static int ghash_init(struct shash_desc *desc) static int ghash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { - struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + struct s390_ghash_ctx *ctx = crypto_shash_ctx(tfm); if (keylen != GHASH_BLOCK_SIZE) return -EINVAL; @@ -54,80 +53,71 @@ static int ghash_setkey(struct crypto_shash *tfm, static int ghash_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); unsigned int n; - u8 *buf = dctx->buffer; - - if (dctx->bytes) { - u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); - n = min(srclen, dctx->bytes); - dctx->bytes -= n; - srclen -= n; - - memcpy(pos, src, n); - src += n; + n = srclen & ~(GHASH_BLOCK_SIZE - 1); + cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n); + return srclen - n; +} - if (!dctx->bytes) { - cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, - GHASH_BLOCK_SIZE); - } - } +static void ghash_flush(struct s390_ghash_desc_ctx *dctx, const u8 *src, + unsigned int len) +{ + if (len) { + u8 buf[GHASH_BLOCK_SIZE] = {}; - n = srclen & ~(GHASH_BLOCK_SIZE - 1); - if (n) { - cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n); - src += n; - srclen -= n; + memcpy(buf, src, len); + cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); + memzero_explicit(buf, sizeof(buf)); } +} - if (srclen) { - dctx->bytes = GHASH_BLOCK_SIZE - srclen; - memcpy(buf, src, srclen); - } +static int ghash_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *dst) +{ + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + ghash_flush(dctx, src, len); + memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE); return 0; } -static int ghash_flush(struct ghash_desc_ctx *dctx) +static int ghash_export(struct shash_desc *desc, void *out) { - u8 *buf = dctx->buffer; - - if (dctx->bytes) { - u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); - - memset(pos, 0, dctx->bytes); - cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); - dctx->bytes = 0; - } + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + memcpy(out, dctx->icv, GHASH_DIGEST_SIZE); return 0; } -static int ghash_final(struct shash_desc *desc, u8 *dst) +static int ghash_import(struct shash_desc *desc, const void *in) { - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - int ret; + struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - ret = ghash_flush(dctx); - if (!ret) - memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE); - return ret; + memcpy(dctx->icv, in, GHASH_DIGEST_SIZE); + memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE); + return 0; } static struct shash_alg ghash_alg = { .digestsize = GHASH_DIGEST_SIZE, .init = ghash_init, .update = ghash_update, - .final = ghash_final, + .finup = ghash_finup, .setkey = ghash_setkey, - .descsize = sizeof(struct ghash_desc_ctx), + .export = ghash_export, + .import = ghash_import, + .statesize = sizeof(struct ghash_desc_ctx), + .descsize = sizeof(struct s390_ghash_desc_ctx), .base = { .cra_name = "ghash", .cra_driver_name = "ghash-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = GHASH_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_ctxsize = sizeof(struct s390_ghash_ctx), .cra_module = THIS_MODULE, }, }; diff --git a/arch/s390/crypto/hmac_s390.c b/arch/s390/crypto/hmac_s390.c index bba9a818dfdc..93a1098d9f8d 100644 --- a/arch/s390/crypto/hmac_s390.c +++ b/arch/s390/crypto/hmac_s390.c @@ -9,10 +9,14 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <asm/cpacf.h> -#include <crypto/sha2.h> #include <crypto/internal/hash.h> +#include <crypto/hmac.h> +#include <crypto/sha2.h> #include <linux/cpufeature.h> +#include <linux/errno.h> +#include <linux/kernel.h> #include <linux/module.h> +#include <linux/string.h> /* * KMAC param block layout for sha2 function codes: @@ -71,32 +75,31 @@ union s390_kmac_gr0 { struct s390_kmac_sha2_ctx { u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + MAX_BLOCK_SIZE]; union s390_kmac_gr0 gr0; - u8 buf[MAX_BLOCK_SIZE]; - unsigned int buflen; + u64 buflen[2]; }; /* * kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize */ -static inline void kmac_sha2_set_imbl(u8 *param, unsigned int buflen, - unsigned int blocksize) +static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo, + u64 buflen_hi, unsigned int blocksize) { u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize); switch (blocksize) { case SHA256_BLOCK_SIZE: - *(u64 *)imbl = (u64)buflen * BITS_PER_BYTE; + *(u64 *)imbl = buflen_lo * BITS_PER_BYTE; break; case SHA512_BLOCK_SIZE: - *(u128 *)imbl = (u128)buflen * BITS_PER_BYTE; + *(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3; break; default: break; } } -static int hash_key(const u8 *in, unsigned int inlen, - u8 *digest, unsigned int digestsize) +static int hash_data(const u8 *in, unsigned int inlen, + u8 *digest, unsigned int digestsize, bool final) { unsigned long func; union { @@ -123,19 +126,23 @@ static int hash_key(const u8 *in, unsigned int inlen, switch (digestsize) { case SHA224_DIGEST_SIZE: - func = CPACF_KLMD_SHA_256; + func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256; PARAM_INIT(256, 224, inlen * 8); + if (!final) + digestsize = SHA256_DIGEST_SIZE; break; case SHA256_DIGEST_SIZE: - func = CPACF_KLMD_SHA_256; + func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256; PARAM_INIT(256, 256, inlen * 8); break; case SHA384_DIGEST_SIZE: - func = CPACF_KLMD_SHA_512; + func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512; PARAM_INIT(512, 384, inlen * 8); + if (!final) + digestsize = SHA512_DIGEST_SIZE; break; case SHA512_DIGEST_SIZE: - func = CPACF_KLMD_SHA_512; + func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512; PARAM_INIT(512, 512, inlen * 8); break; default: @@ -151,6 +158,12 @@ static int hash_key(const u8 *in, unsigned int inlen, return 0; } +static int hash_key(const u8 *in, unsigned int inlen, + u8 *digest, unsigned int digestsize) +{ + return hash_data(in, inlen, digest, digestsize, true); +} + static int s390_hmac_sha2_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { @@ -176,7 +189,8 @@ static int s390_hmac_sha2_init(struct shash_desc *desc) memcpy(ctx->param + SHA2_KEY_OFFSET(bs), tfm_ctx->key, bs); - ctx->buflen = 0; + ctx->buflen[0] = 0; + ctx->buflen[1] = 0; ctx->gr0.reg = 0; switch (crypto_shash_digestsize(desc->tfm)) { case SHA224_DIGEST_SIZE: @@ -203,48 +217,31 @@ static int s390_hmac_sha2_update(struct shash_desc *desc, { struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); unsigned int bs = crypto_shash_blocksize(desc->tfm); - unsigned int offset, n; - - /* check current buffer */ - offset = ctx->buflen % bs; - ctx->buflen += len; - if (offset + len < bs) - goto store; - - /* process one stored block */ - if (offset) { - n = bs - offset; - memcpy(ctx->buf + offset, data, n); - ctx->gr0.iimp = 1; - _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, bs); - data += n; - len -= n; - offset = 0; - } - /* process as many blocks as possible */ - if (len >= bs) { - n = (len / bs) * bs; - ctx->gr0.iimp = 1; - _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n); - data += n; - len -= n; - } -store: - /* store incomplete block in buffer */ - if (len) - memcpy(ctx->buf + offset, data, len); + unsigned int n = round_down(len, bs); - return 0; + ctx->buflen[0] += n; + if (ctx->buflen[0] < n) + ctx->buflen[1]++; + + /* process as many blocks as possible */ + ctx->gr0.iimp = 1; + _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n); + return len - n; } -static int s390_hmac_sha2_final(struct shash_desc *desc, u8 *out) +static int s390_hmac_sha2_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); unsigned int bs = crypto_shash_blocksize(desc->tfm); + ctx->buflen[0] += len; + if (ctx->buflen[0] < len) + ctx->buflen[1]++; + ctx->gr0.iimp = 0; - kmac_sha2_set_imbl(ctx->param, ctx->buflen, bs); - _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, ctx->buflen % bs); + kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs); + _cpacf_kmac(&ctx->gr0.reg, ctx->param, src, len); memcpy(out, ctx->param, crypto_shash_digestsize(desc->tfm)); return 0; @@ -262,7 +259,7 @@ static int s390_hmac_sha2_digest(struct shash_desc *desc, return rc; ctx->gr0.iimp = 0; - kmac_sha2_set_imbl(ctx->param, len, + kmac_sha2_set_imbl(ctx->param, len, 0, crypto_shash_blocksize(desc->tfm)); _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, len); memcpy(out, ctx->param, ds); @@ -270,22 +267,89 @@ static int s390_hmac_sha2_digest(struct shash_desc *desc, return 0; } -#define S390_HMAC_SHA2_ALG(x) { \ +static int s390_hmac_export_zero(struct shash_desc *desc, void *out) +{ + struct crypto_shash *tfm = desc->tfm; + u8 ipad[SHA512_BLOCK_SIZE]; + struct s390_hmac_ctx *ctx; + unsigned int bs; + int err, i; + + ctx = crypto_shash_ctx(tfm); + bs = crypto_shash_blocksize(tfm); + for (i = 0; i < bs; i++) + ipad[i] = ctx->key[i] ^ HMAC_IPAD_VALUE; + + err = hash_data(ipad, bs, out, crypto_shash_digestsize(tfm), false); + memzero_explicit(ipad, sizeof(ipad)); + return err; +} + +static int s390_hmac_export(struct shash_desc *desc, void *out) +{ + struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); + unsigned int bs = crypto_shash_blocksize(desc->tfm); + unsigned int ds = bs / 2; + union { + u8 *u8; + u64 *u64; + } p = { .u8 = out }; + int err = 0; + + if (!ctx->gr0.ikp) + err = s390_hmac_export_zero(desc, out); + else + memcpy(p.u8, ctx->param, ds); + p.u8 += ds; + put_unaligned(ctx->buflen[0], p.u64++); + if (ds == SHA512_DIGEST_SIZE) + put_unaligned(ctx->buflen[1], p.u64); + return err; +} + +static int s390_hmac_import(struct shash_desc *desc, const void *in) +{ + struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); + unsigned int bs = crypto_shash_blocksize(desc->tfm); + unsigned int ds = bs / 2; + union { + const u8 *u8; + const u64 *u64; + } p = { .u8 = in }; + int err; + + err = s390_hmac_sha2_init(desc); + memcpy(ctx->param, p.u8, ds); + p.u8 += ds; + ctx->buflen[0] = get_unaligned(p.u64++); + if (ds == SHA512_DIGEST_SIZE) + ctx->buflen[1] = get_unaligned(p.u64); + if (ctx->buflen[0] | ctx->buflen[1]) + ctx->gr0.ikp = 1; + return err; +} + +#define S390_HMAC_SHA2_ALG(x, ss) { \ .fc = CPACF_KMAC_HMAC_SHA_##x, \ .alg = { \ .init = s390_hmac_sha2_init, \ .update = s390_hmac_sha2_update, \ - .final = s390_hmac_sha2_final, \ + .finup = s390_hmac_sha2_finup, \ .digest = s390_hmac_sha2_digest, \ .setkey = s390_hmac_sha2_setkey, \ + .export = s390_hmac_export, \ + .import = s390_hmac_import, \ .descsize = sizeof(struct s390_kmac_sha2_ctx), \ .halg = { \ + .statesize = ss, \ .digestsize = SHA##x##_DIGEST_SIZE, \ .base = { \ .cra_name = "hmac(sha" #x ")", \ .cra_driver_name = "hmac_s390_sha" #x, \ .cra_blocksize = SHA##x##_BLOCK_SIZE, \ .cra_priority = 400, \ + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | \ + CRYPTO_AHASH_ALG_FINUP_MAX, \ .cra_ctxsize = sizeof(struct s390_hmac_ctx), \ .cra_module = THIS_MODULE, \ }, \ @@ -298,10 +362,10 @@ static struct s390_hmac_alg { unsigned int fc; struct shash_alg alg; } s390_hmac_algs[] = { - S390_HMAC_SHA2_ALG(224), - S390_HMAC_SHA2_ALG(256), - S390_HMAC_SHA2_ALG(384), - S390_HMAC_SHA2_ALG(512), + S390_HMAC_SHA2_ALG(224, sizeof(struct crypto_sha256_state)), + S390_HMAC_SHA2_ALG(256, sizeof(struct crypto_sha256_state)), + S390_HMAC_SHA2_ALG(384, SHA512_STATE_SIZE), + S390_HMAC_SHA2_ALG(512, SHA512_STATE_SIZE), }; static __always_inline void _s390_hmac_algs_unregister(void) diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 511093713a6f..8a340c16acb4 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -5,7 +5,7 @@ * s390 implementation of the AES Cipher Algorithm with protected keys. * * s390 Version: - * Copyright IBM Corp. 2017, 2023 + * Copyright IBM Corp. 2017, 2025 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Harald Freudenberger <freude@de.ibm.com> */ @@ -13,16 +13,18 @@ #define KMSG_COMPONENT "paes_s390" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <crypto/aes.h> -#include <crypto/algapi.h> -#include <linux/bug.h> -#include <linux/err.h> -#include <linux/module.h> +#include <linux/atomic.h> #include <linux/cpufeature.h> +#include <linux/delay.h> +#include <linux/err.h> #include <linux/init.h> +#include <linux/miscdevice.h> +#include <linux/module.h> #include <linux/mutex.h> #include <linux/spinlock.h> -#include <linux/delay.h> +#include <crypto/aes.h> +#include <crypto/algapi.h> +#include <crypto/engine.h> #include <crypto/internal/skcipher.h> #include <crypto/xts.h> #include <asm/cpacf.h> @@ -44,23 +46,61 @@ static DEFINE_MUTEX(ctrblk_lock); static cpacf_mask_t km_functions, kmc_functions, kmctr_functions; +static struct crypto_engine *paes_crypto_engine; +#define MAX_QLEN 10 + +/* + * protected key specific stuff + */ + struct paes_protkey { u32 type; u32 len; u8 protkey[PXTS_256_PROTKEY_SIZE]; }; -struct key_blob { - /* - * Small keys will be stored in the keybuf. Larger keys are - * stored in extra allocated memory. In both cases does - * key point to the memory where the key is stored. - * The code distinguishes by checking keylen against - * sizeof(keybuf). See the two following helper functions. - */ - u8 *key; - u8 keybuf[128]; +#define PK_STATE_NO_KEY 0 +#define PK_STATE_CONVERT_IN_PROGRESS 1 +#define PK_STATE_VALID 2 + +struct s390_paes_ctx { + /* source key material used to derive a protected key from */ + u8 keybuf[PAES_MAX_KEYSIZE]; + unsigned int keylen; + + /* cpacf function code to use with this protected key type */ + long fc; + + /* nr of requests enqueued via crypto engine which use this tfm ctx */ + atomic_t via_engine_ctr; + + /* spinlock to atomic read/update all the following fields */ + spinlock_t pk_lock; + + /* see PK_STATE* defines above, < 0 holds convert failure rc */ + int pk_state; + /* if state is valid, pk holds the protected key */ + struct paes_protkey pk; +}; + +struct s390_pxts_ctx { + /* source key material used to derive a protected key from */ + u8 keybuf[2 * PAES_MAX_KEYSIZE]; unsigned int keylen; + + /* cpacf function code to use with this protected key type */ + long fc; + + /* nr of requests enqueued via crypto engine which use this tfm ctx */ + atomic_t via_engine_ctr; + + /* spinlock to atomic read/update all the following fields */ + spinlock_t pk_lock; + + /* see PK_STATE* defines above, < 0 holds convert failure rc */ + int pk_state; + /* if state is valid, pk[] hold(s) the protected key(s) */ + struct paes_protkey pk[2]; }; /* @@ -89,214 +129,370 @@ static inline u32 make_clrkey_token(const u8 *ck, size_t cklen, u8 *dest) return sizeof(*token) + cklen; } -static inline int _key_to_kb(struct key_blob *kb, - const u8 *key, - unsigned int keylen) +/* + * paes_ctx_setkey() - Set key value into context, maybe construct + * a clear key token digestible by pkey from a clear key value. + */ +static inline int paes_ctx_setkey(struct s390_paes_ctx *ctx, + const u8 *key, unsigned int keylen) { + if (keylen > sizeof(ctx->keybuf)) + return -EINVAL; + switch (keylen) { case 16: case 24: case 32: /* clear key value, prepare pkey clear key token in keybuf */ - memset(kb->keybuf, 0, sizeof(kb->keybuf)); - kb->keylen = make_clrkey_token(key, keylen, kb->keybuf); - kb->key = kb->keybuf; + memset(ctx->keybuf, 0, sizeof(ctx->keybuf)); + ctx->keylen = make_clrkey_token(key, keylen, ctx->keybuf); break; default: /* other key material, let pkey handle this */ - if (keylen <= sizeof(kb->keybuf)) - kb->key = kb->keybuf; - else { - kb->key = kmalloc(keylen, GFP_KERNEL); - if (!kb->key) - return -ENOMEM; - } - memcpy(kb->key, key, keylen); - kb->keylen = keylen; + memcpy(ctx->keybuf, key, keylen); + ctx->keylen = keylen; break; } return 0; } -static inline int _xts_key_to_kb(struct key_blob *kb, - const u8 *key, - unsigned int keylen) +/* + * pxts_ctx_setkey() - Set key value into context, maybe construct + * a clear key token digestible by pkey from a clear key value. + */ +static inline int pxts_ctx_setkey(struct s390_pxts_ctx *ctx, + const u8 *key, unsigned int keylen) { size_t cklen = keylen / 2; - memset(kb->keybuf, 0, sizeof(kb->keybuf)); + if (keylen > sizeof(ctx->keybuf)) + return -EINVAL; switch (keylen) { case 32: case 64: /* clear key value, prepare pkey clear key tokens in keybuf */ - kb->key = kb->keybuf; - kb->keylen = make_clrkey_token(key, cklen, kb->key); - kb->keylen += make_clrkey_token(key + cklen, cklen, - kb->key + kb->keylen); + memset(ctx->keybuf, 0, sizeof(ctx->keybuf)); + ctx->keylen = make_clrkey_token(key, cklen, ctx->keybuf); + ctx->keylen += make_clrkey_token(key + cklen, cklen, + ctx->keybuf + ctx->keylen); break; default: /* other key material, let pkey handle this */ - if (keylen <= sizeof(kb->keybuf)) { - kb->key = kb->keybuf; - } else { - kb->key = kmalloc(keylen, GFP_KERNEL); - if (!kb->key) - return -ENOMEM; - } - memcpy(kb->key, key, keylen); - kb->keylen = keylen; + memcpy(ctx->keybuf, key, keylen); + ctx->keylen = keylen; break; } return 0; } -static inline void _free_kb_keybuf(struct key_blob *kb) +/* + * Convert the raw key material into a protected key via PKEY api. + * This function may sleep - don't call in non-sleeping context. + */ +static inline int convert_key(const u8 *key, unsigned int keylen, + struct paes_protkey *pk) { - if (kb->key && kb->key != kb->keybuf - && kb->keylen > sizeof(kb->keybuf)) { - kfree_sensitive(kb->key); - kb->key = NULL; + int rc, i; + + pk->len = sizeof(pk->protkey); + + /* + * In case of a busy card retry with increasing delay + * of 200, 400, 800 and 1600 ms - in total 3 s. + */ + for (rc = -EIO, i = 0; rc && i < 5; i++) { + if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) { + rc = -EINTR; + goto out; + } + rc = pkey_key2protkey(key, keylen, + pk->protkey, &pk->len, &pk->type, + PKEY_XFLAG_NOMEMALLOC); } - memzero_explicit(kb->keybuf, sizeof(kb->keybuf)); + +out: + pr_debug("rc=%d\n", rc); + return rc; } -struct s390_paes_ctx { - struct key_blob kb; +/* + * (Re-)Convert the raw key material from the ctx into a protected key + * via convert_key() function. Update the pk_state, pk_type, pk_len + * and the protected key in the tfm context. + * Please note this function may be invoked concurrently with the very + * same tfm context. The pk_lock spinlock in the context ensures an + * atomic update of the pk and the pk state but does not guarantee any + * order of update. So a fresh converted valid protected key may get + * updated with an 'old' expired key value. As the cpacf instructions + * detect this, refuse to operate with an invalid key and the calling + * code triggers a (re-)conversion this does no harm. This may lead to + * unnecessary additional conversion but never to invalid data on en- + * or decrypt operations. + */ +static int paes_convert_key(struct s390_paes_ctx *ctx) +{ struct paes_protkey pk; - spinlock_t pk_lock; - unsigned long fc; -}; + int rc; -struct s390_pxts_ctx { - struct key_blob kb; - struct paes_protkey pk[2]; - spinlock_t pk_lock; - unsigned long fc; -}; + spin_lock_bh(&ctx->pk_lock); + ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS; + spin_unlock_bh(&ctx->pk_lock); -static inline int __paes_keyblob2pkey(const u8 *key, unsigned int keylen, - struct paes_protkey *pk) -{ - int i, rc = -EIO; + rc = convert_key(ctx->keybuf, ctx->keylen, &pk); - /* try three times in case of busy card */ - for (i = 0; rc && i < 3; i++) { - if (rc == -EBUSY && in_task()) { - if (msleep_interruptible(1000)) - return -EINTR; - } - rc = pkey_key2protkey(key, keylen, pk->protkey, &pk->len, - &pk->type); + /* update context */ + spin_lock_bh(&ctx->pk_lock); + if (rc) { + ctx->pk_state = rc; + } else { + ctx->pk_state = PK_STATE_VALID; + ctx->pk = pk; } + spin_unlock_bh(&ctx->pk_lock); + memzero_explicit(&pk, sizeof(pk)); + pr_debug("rc=%d\n", rc); return rc; } -static inline int __paes_convert_key(struct s390_paes_ctx *ctx) +/* + * (Re-)Convert the raw xts key material from the ctx into a + * protected key via convert_key() function. Update the pk_state, + * pk_type, pk_len and the protected key in the tfm context. + * See also comments on function paes_convert_key. + */ +static int pxts_convert_key(struct s390_pxts_ctx *ctx) { - struct paes_protkey pk; + struct paes_protkey pk0, pk1; + size_t split_keylen; int rc; - pk.len = sizeof(pk.protkey); - rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk); + spin_lock_bh(&ctx->pk_lock); + ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS; + spin_unlock_bh(&ctx->pk_lock); + + rc = convert_key(ctx->keybuf, ctx->keylen, &pk0); if (rc) - return rc; + goto out; + + switch (pk0.type) { + case PKEY_KEYTYPE_AES_128: + case PKEY_KEYTYPE_AES_256: + /* second keytoken required */ + if (ctx->keylen % 2) { + rc = -EINVAL; + goto out; + } + split_keylen = ctx->keylen / 2; + rc = convert_key(ctx->keybuf + split_keylen, + split_keylen, &pk1); + if (rc) + goto out; + if (pk0.type != pk1.type) { + rc = -EINVAL; + goto out; + } + break; + case PKEY_KEYTYPE_AES_XTS_128: + case PKEY_KEYTYPE_AES_XTS_256: + /* single key */ + pk1.type = 0; + break; + default: + /* unsupported protected keytype */ + rc = -EINVAL; + goto out; + } +out: + /* update context */ spin_lock_bh(&ctx->pk_lock); - memcpy(&ctx->pk, &pk, sizeof(pk)); + if (rc) { + ctx->pk_state = rc; + } else { + ctx->pk_state = PK_STATE_VALID; + ctx->pk[0] = pk0; + ctx->pk[1] = pk1; + } spin_unlock_bh(&ctx->pk_lock); - return 0; + memzero_explicit(&pk0, sizeof(pk0)); + memzero_explicit(&pk1, sizeof(pk1)); + pr_debug("rc=%d\n", rc); + return rc; } -static int ecb_paes_init(struct crypto_skcipher *tfm) -{ - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); +/* + * PAES ECB implementation + */ - ctx->kb.key = NULL; - spin_lock_init(&ctx->pk_lock); +struct ecb_param { + u8 key[PAES_256_PROTKEY_SIZE]; +} __packed; - return 0; -} +struct s390_pecb_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + struct ecb_param param; +}; -static void ecb_paes_exit(struct crypto_skcipher *tfm) +static int ecb_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - - _free_kb_keybuf(&ctx->kb); -} - -static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx) -{ - unsigned long fc; + long fc; int rc; - rc = __paes_convert_key(ctx); + /* set raw key into context */ + rc = paes_ctx_setkey(ctx, in_key, key_len); if (rc) - return rc; + goto out; - /* Pick the correct function code based on the protected key type */ - fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 : - (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 : - (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0; + /* convert key into protected key */ + rc = paes_convert_key(ctx); + if (rc) + goto out; - /* Check if the function code is available */ + /* Pick the correct function code based on the protected key type */ + switch (ctx->pk.type) { + case PKEY_KEYTYPE_AES_128: + fc = CPACF_KM_PAES_128; + break; + case PKEY_KEYTYPE_AES_192: + fc = CPACF_KM_PAES_192; + break; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KM_PAES_256; + break; + default: + fc = 0; + break; + } ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; - return ctx->fc ? 0 : -EINVAL; + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int ecb_paes_do_crypt(struct s390_paes_ctx *ctx, + struct s390_pecb_req_ctx *req_ctx, + bool maysleep) { - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - int rc; - - _free_kb_keybuf(&ctx->kb); - rc = _key_to_kb(&ctx->kb, in_key, key_len); + struct ecb_param *param = &req_ctx->param; + struct skcipher_walk *walk = &req_ctx->walk; + unsigned int nbytes, n, k; + int pk_state, rc = 0; + + if (!req_ctx->param_init_done) { + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + req_ctx->param_init_done = true; + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + } if (rc) - return rc; + goto out; - return __ecb_paes_set_key(ctx); + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { + /* only use complete blocks */ + n = nbytes & ~(AES_BLOCK_SIZE - 1); + k = cpacf_km(ctx->fc | req_ctx->modifier, param, + walk->dst.virt.addr, walk->src.virt.addr, n); + if (k) + rc = skcipher_walk_done(walk, nbytes - k); + if (k < n) { + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) + goto out; + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + spin_unlock_bh(&ctx->pk_lock); + } + } + +out: + pr_debug("rc=%d\n", rc); + return rc; } static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier) { + struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - } param; - struct skcipher_walk walk; - unsigned int nbytes, n, k; + struct skcipher_walk *walk = &req_ctx->walk; int rc; - rc = skcipher_walk_virt(&walk, req, false); + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ + + rc = skcipher_walk_virt(walk, req, false); if (rc) - return rc; + goto out; - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); + req_ctx->modifier = modifier; + req_ctx->param_init_done = false; - while ((nbytes = walk.nbytes) != 0) { - /* only use complete blocks */ - n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_km(ctx->fc | modifier, ¶m, - walk.dst.virt.addr, walk.src.virt.addr, n); - if (k) - rc = skcipher_walk_done(&walk, nbytes - k); - if (k < n) { - if (__paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - } + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = ecb_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); + +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); return rc; } @@ -310,112 +506,256 @@ static int ecb_paes_decrypt(struct skcipher_request *req) return ecb_paes_crypt(req, CPACF_DECRYPT); } -static struct skcipher_alg ecb_paes_alg = { - .base.cra_name = "ecb(paes)", - .base.cra_driver_name = "ecb-paes-s390", - .base.cra_priority = 401, /* combo: aes + ecb + 1 */ - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct s390_paes_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.cra_list), - .init = ecb_paes_init, - .exit = ecb_paes_exit, - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .setkey = ecb_paes_set_key, - .encrypt = ecb_paes_encrypt, - .decrypt = ecb_paes_decrypt, -}; - -static int cbc_paes_init(struct crypto_skcipher *tfm) +static int ecb_paes_init(struct crypto_skcipher *tfm) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - ctx->kb.key = NULL; + memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->pk_lock); + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pecb_req_ctx)); + return 0; } -static void cbc_paes_exit(struct crypto_skcipher *tfm) +static void ecb_paes_exit(struct crypto_skcipher *tfm) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - _free_kb_keybuf(&ctx->kb); + memzero_explicit(ctx, sizeof(*ctx)); } -static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx) +static int ecb_paes_do_one_request(struct crypto_engine *engine, void *areq) { - unsigned long fc; + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; int rc; - rc = __paes_convert_key(ctx); - if (rc) - return rc; + /* walk has already been prepared */ + + rc = ecb_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); + } - /* Pick the correct function code based on the protected key type */ - fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 : - (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMC_PAES_192 : - (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KMC_PAES_256 : 0; + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); + return rc; +} - /* Check if the function code is available */ - ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0; +static struct skcipher_engine_alg ecb_paes_alg = { + .base = { + .base.cra_name = "ecb(paes)", + .base.cra_driver_name = "ecb-paes-s390", + .base.cra_priority = 401, /* combo: aes + ecb + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.base.cra_list), + .init = ecb_paes_init, + .exit = ecb_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .setkey = ecb_paes_setkey, + .encrypt = ecb_paes_encrypt, + .decrypt = ecb_paes_decrypt, + }, + .op = { + .do_one_request = ecb_paes_do_one_request, + }, +}; - return ctx->fc ? 0 : -EINVAL; -} +/* + * PAES CBC implementation + */ + +struct cbc_param { + u8 iv[AES_BLOCK_SIZE]; + u8 key[PAES_256_PROTKEY_SIZE]; +} __packed; + +struct s390_pcbc_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + struct cbc_param param; +}; -static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int cbc_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + long fc; int rc; - _free_kb_keybuf(&ctx->kb); - rc = _key_to_kb(&ctx->kb, in_key, key_len); + /* set raw key into context */ + rc = paes_ctx_setkey(ctx, in_key, key_len); if (rc) - return rc; + goto out; - return __cbc_paes_set_key(ctx); + /* convert raw key into protected key */ + rc = paes_convert_key(ctx); + if (rc) + goto out; + + /* Pick the correct function code based on the protected key type */ + switch (ctx->pk.type) { + case PKEY_KEYTYPE_AES_128: + fc = CPACF_KMC_PAES_128; + break; + case PKEY_KEYTYPE_AES_192: + fc = CPACF_KMC_PAES_192; + break; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KMC_PAES_256; + break; + default: + fc = 0; + break; + } + ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0; + + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier) +static int cbc_paes_do_crypt(struct s390_paes_ctx *ctx, + struct s390_pcbc_req_ctx *req_ctx, + bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct { - u8 iv[AES_BLOCK_SIZE]; - u8 key[PAES_256_PROTKEY_SIZE]; - } param; - struct skcipher_walk walk; + struct cbc_param *param = &req_ctx->param; + struct skcipher_walk *walk = &req_ctx->walk; unsigned int nbytes, n, k; - int rc; - - rc = skcipher_walk_virt(&walk, req, false); + int pk_state, rc = 0; + + if (!req_ctx->param_init_done) { + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + req_ctx->param_init_done = true; + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + } if (rc) - return rc; + goto out; - memcpy(param.iv, walk.iv, AES_BLOCK_SIZE); - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); + memcpy(param->iv, walk->iv, AES_BLOCK_SIZE); - while ((nbytes = walk.nbytes) != 0) { + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_kmc(ctx->fc | modifier, ¶m, - walk.dst.virt.addr, walk.src.virt.addr, n); + k = cpacf_kmc(ctx->fc | req_ctx->modifier, param, + walk->dst.virt.addr, walk->src.virt.addr, n); if (k) { - memcpy(walk.iv, param.iv, AES_BLOCK_SIZE); - rc = skcipher_walk_done(&walk, nbytes - k); + memcpy(walk->iv, param->iv, AES_BLOCK_SIZE); + rc = skcipher_walk_done(walk, nbytes - k); } if (k < n) { - if (__paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) + goto out; spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); spin_unlock_bh(&ctx->pk_lock); } } + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier) +{ + struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; + + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ + + rc = skcipher_walk_virt(walk, req, false); + if (rc) + goto out; + + req_ctx->modifier = modifier; + req_ctx->param_init_done = false; + + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = cbc_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); + +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); return rc; } @@ -429,496 +769,882 @@ static int cbc_paes_decrypt(struct skcipher_request *req) return cbc_paes_crypt(req, CPACF_DECRYPT); } -static struct skcipher_alg cbc_paes_alg = { - .base.cra_name = "cbc(paes)", - .base.cra_driver_name = "cbc-paes-s390", - .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct s390_paes_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.cra_list), - .init = cbc_paes_init, - .exit = cbc_paes_exit, - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = cbc_paes_set_key, - .encrypt = cbc_paes_encrypt, - .decrypt = cbc_paes_decrypt, -}; - -static int xts_paes_init(struct crypto_skcipher *tfm) +static int cbc_paes_init(struct crypto_skcipher *tfm) { - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - ctx->kb.key = NULL; + memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->pk_lock); + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pcbc_req_ctx)); + return 0; } -static void xts_paes_exit(struct crypto_skcipher *tfm) +static void cbc_paes_exit(struct crypto_skcipher *tfm) { - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - _free_kb_keybuf(&ctx->kb); + memzero_explicit(ctx, sizeof(*ctx)); } -static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx) +static int cbc_paes_do_one_request(struct crypto_engine *engine, void *areq) { - struct paes_protkey pk0, pk1; - size_t split_keylen; + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; int rc; - pk0.len = sizeof(pk0.protkey); - pk1.len = sizeof(pk1.protkey); - - rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk0); - if (rc) - return rc; + /* walk has already been prepared */ + + rc = cbc_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); + } - switch (pk0.type) { - case PKEY_KEYTYPE_AES_128: - case PKEY_KEYTYPE_AES_256: - /* second keytoken required */ - if (ctx->kb.keylen % 2) - return -EINVAL; - split_keylen = ctx->kb.keylen / 2; + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); + return rc; +} - rc = __paes_keyblob2pkey(ctx->kb.key + split_keylen, - split_keylen, &pk1); - if (rc) - return rc; +static struct skcipher_engine_alg cbc_paes_alg = { + .base = { + .base.cra_name = "cbc(paes)", + .base.cra_driver_name = "cbc-paes-s390", + .base.cra_priority = 402, /* cbc-paes-s390 + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.base.cra_list), + .init = cbc_paes_init, + .exit = cbc_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = cbc_paes_setkey, + .encrypt = cbc_paes_encrypt, + .decrypt = cbc_paes_decrypt, + }, + .op = { + .do_one_request = cbc_paes_do_one_request, + }, +}; - if (pk0.type != pk1.type) - return -EINVAL; - break; - case PKEY_KEYTYPE_AES_XTS_128: - case PKEY_KEYTYPE_AES_XTS_256: - /* single key */ - pk1.type = 0; - break; - default: - /* unsupported protected keytype */ - return -EINVAL; - } +/* + * PAES CTR implementation + */ - spin_lock_bh(&ctx->pk_lock); - ctx->pk[0] = pk0; - ctx->pk[1] = pk1; - spin_unlock_bh(&ctx->pk_lock); +struct ctr_param { + u8 key[PAES_256_PROTKEY_SIZE]; +} __packed; - return 0; -} +struct s390_pctr_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + struct ctr_param param; +}; -static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx) +static int ctr_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) { - unsigned long fc; + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + long fc; int rc; - rc = __xts_paes_convert_key(ctx); + /* set raw key into context */ + rc = paes_ctx_setkey(ctx, in_key, key_len); if (rc) - return rc; + goto out; + + /* convert raw key into protected key */ + rc = paes_convert_key(ctx); + if (rc) + goto out; /* Pick the correct function code based on the protected key type */ - switch (ctx->pk[0].type) { + switch (ctx->pk.type) { case PKEY_KEYTYPE_AES_128: - fc = CPACF_KM_PXTS_128; - break; - case PKEY_KEYTYPE_AES_256: - fc = CPACF_KM_PXTS_256; + fc = CPACF_KMCTR_PAES_128; break; - case PKEY_KEYTYPE_AES_XTS_128: - fc = CPACF_KM_PXTS_128_FULL; + case PKEY_KEYTYPE_AES_192: + fc = CPACF_KMCTR_PAES_192; break; - case PKEY_KEYTYPE_AES_XTS_256: - fc = CPACF_KM_PXTS_256_FULL; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KMCTR_PAES_256; break; default: fc = 0; break; } + ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0; - /* Check if the function code is available */ - ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static inline unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) +{ + unsigned int i, n; + + /* only use complete blocks, max. PAGE_SIZE */ + memcpy(ctrptr, iv, AES_BLOCK_SIZE); + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1); + for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) { + memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE); + crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE); + ctrptr += AES_BLOCK_SIZE; + } + return n; +} + +static int ctr_paes_do_crypt(struct s390_paes_ctx *ctx, + struct s390_pctr_req_ctx *req_ctx, + bool maysleep) +{ + struct ctr_param *param = &req_ctx->param; + struct skcipher_walk *walk = &req_ctx->walk; + u8 buf[AES_BLOCK_SIZE], *ctrptr; + unsigned int nbytes, n, k; + int pk_state, locked, rc = 0; + + if (!req_ctx->param_init_done) { + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + req_ctx->param_init_done = true; + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + } + if (rc) + goto out; + + locked = mutex_trylock(&ctrblk_lock); + + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + n = AES_BLOCK_SIZE; + if (nbytes >= 2 * AES_BLOCK_SIZE && locked) + n = __ctrblk_init(ctrblk, walk->iv, nbytes); + ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv; + k = cpacf_kmctr(ctx->fc, param, walk->dst.virt.addr, + walk->src.virt.addr, n, ctrptr); + if (k) { + if (ctrptr == ctrblk) + memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(walk->iv, AES_BLOCK_SIZE); + rc = skcipher_walk_done(walk, nbytes - k); + } + if (k < n) { + if (!maysleep) { + if (locked) + mutex_unlock(&ctrblk_lock); + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) { + if (locked) + mutex_unlock(&ctrblk_lock); + goto out; + } + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + spin_unlock_bh(&ctx->pk_lock); + } + } + if (locked) + mutex_unlock(&ctrblk_lock); + + /* final block may be < AES_BLOCK_SIZE, copy only nbytes */ + if (nbytes) { + memset(buf, 0, AES_BLOCK_SIZE); + memcpy(buf, walk->src.virt.addr, nbytes); + while (1) { + if (cpacf_kmctr(ctx->fc, param, buf, + buf, AES_BLOCK_SIZE, + walk->iv) == AES_BLOCK_SIZE) + break; + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) + goto out; + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + spin_unlock_bh(&ctx->pk_lock); + } + memcpy(walk->dst.virt.addr, buf, nbytes); + crypto_inc(walk->iv, AES_BLOCK_SIZE); + rc = skcipher_walk_done(walk, 0); + } + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int ctr_paes_crypt(struct skcipher_request *req) +{ + struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; + + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ + + rc = skcipher_walk_virt(walk, req, false); + if (rc) + goto out; + + req_ctx->param_init_done = false; + + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = ctr_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); + +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); + return rc; +} + +static int ctr_paes_init(struct crypto_skcipher *tfm) +{ + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->pk_lock); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pctr_req_ctx)); + + return 0; +} + +static void ctr_paes_exit(struct crypto_skcipher *tfm) +{ + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + + memzero_explicit(ctx, sizeof(*ctx)); +} + +static int ctr_paes_do_one_request(struct crypto_engine *engine, void *areq) +{ + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; - return ctx->fc ? 0 : -EINVAL; + /* walk has already been prepared */ + + rc = ctr_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); + } + + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); + return rc; } -static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int in_keylen) +static struct skcipher_engine_alg ctr_paes_alg = { + .base = { + .base.cra_name = "ctr(paes)", + .base.cra_driver_name = "ctr-paes-s390", + .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.base.cra_list), + .init = ctr_paes_init, + .exit = ctr_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ctr_paes_setkey, + .encrypt = ctr_paes_crypt, + .decrypt = ctr_paes_crypt, + .chunksize = AES_BLOCK_SIZE, + }, + .op = { + .do_one_request = ctr_paes_do_one_request, + }, +}; + +/* + * PAES XTS implementation + */ + +struct xts_full_km_param { + u8 key[64]; + u8 tweak[16]; + u8 nap[16]; + u8 wkvp[32]; +} __packed; + +struct xts_km_param { + u8 key[PAES_256_PROTKEY_SIZE]; + u8 init[16]; +} __packed; + +struct xts_pcc_param { + u8 key[PAES_256_PROTKEY_SIZE]; + u8 tweak[16]; + u8 block[16]; + u8 bit[16]; + u8 xts[16]; +} __packed; + +struct s390_pxts_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + union { + struct xts_full_km_param full_km_param; + struct xts_km_param km_param; + } param; +}; + +static int xts_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int in_keylen) { struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); u8 ckey[2 * AES_MAX_KEY_SIZE]; unsigned int ckey_len; + long fc; int rc; if ((in_keylen == 32 || in_keylen == 64) && xts_verify_key(tfm, in_key, in_keylen)) return -EINVAL; - _free_kb_keybuf(&ctx->kb); - rc = _xts_key_to_kb(&ctx->kb, in_key, in_keylen); + /* set raw key into context */ + rc = pxts_ctx_setkey(ctx, in_key, in_keylen); if (rc) - return rc; + goto out; - rc = __xts_paes_set_key(ctx); + /* convert raw key(s) into protected key(s) */ + rc = pxts_convert_key(ctx); if (rc) - return rc; + goto out; /* - * It is not possible on a single protected key (e.g. full AES-XTS) to - * check, if k1 and k2 are the same. - */ - if (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128 || - ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_256) - return 0; - /* * xts_verify_key verifies the key length is not odd and makes * sure that the two keys are not the same. This can be done - * on the two protected keys as well + * on the two protected keys as well - but not for full xts keys. */ - ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? - AES_KEYSIZE_128 : AES_KEYSIZE_256; - memcpy(ckey, ctx->pk[0].protkey, ckey_len); - memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len); - return xts_verify_key(tfm, ckey, 2*ckey_len); + if (ctx->pk[0].type == PKEY_KEYTYPE_AES_128 || + ctx->pk[0].type == PKEY_KEYTYPE_AES_256) { + ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? + AES_KEYSIZE_128 : AES_KEYSIZE_256; + memcpy(ckey, ctx->pk[0].protkey, ckey_len); + memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len); + rc = xts_verify_key(tfm, ckey, 2 * ckey_len); + memzero_explicit(ckey, sizeof(ckey)); + if (rc) + goto out; + } + + /* Pick the correct function code based on the protected key type */ + switch (ctx->pk[0].type) { + case PKEY_KEYTYPE_AES_128: + fc = CPACF_KM_PXTS_128; + break; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KM_PXTS_256; + break; + case PKEY_KEYTYPE_AES_XTS_128: + fc = CPACF_KM_PXTS_128_FULL; + break; + case PKEY_KEYTYPE_AES_XTS_256: + fc = CPACF_KM_PXTS_256_FULL; + break; + default: + fc = 0; + break; + } + ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int paes_xts_crypt_full(struct skcipher_request *req, - unsigned long modifier) +static int xts_paes_do_crypt_fullkey(struct s390_pxts_ctx *ctx, + struct s390_pxts_req_ctx *req_ctx, + bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct xts_full_km_param *param = &req_ctx->param.full_km_param; + struct skcipher_walk *walk = &req_ctx->walk; unsigned int keylen, offset, nbytes, n, k; - struct { - u8 key[64]; - u8 tweak[16]; - u8 nap[16]; - u8 wkvp[32]; - } fxts_param = { - .nap = {0}, - }; - struct skcipher_walk walk; - int rc; + int rc = 0; - rc = skcipher_walk_virt(&walk, req, false); - if (rc) - return rc; + /* + * The calling function xts_paes_do_crypt() ensures the + * protected key state is always PK_STATE_VALID when this + * function is invoked. + */ keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 64; offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 0; - spin_lock_bh(&ctx->pk_lock); - memcpy(fxts_param.key + offset, ctx->pk[0].protkey, keylen); - memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen, - sizeof(fxts_param.wkvp)); - spin_unlock_bh(&ctx->pk_lock); - memcpy(fxts_param.tweak, walk.iv, sizeof(fxts_param.tweak)); - fxts_param.nap[0] = 0x01; /* initial alpha power (1, little-endian) */ + if (!req_ctx->param_init_done) { + memset(param, 0, sizeof(*param)); + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); + memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp)); + spin_unlock_bh(&ctx->pk_lock); + memcpy(param->tweak, walk->iv, sizeof(param->tweak)); + param->nap[0] = 0x01; /* initial alpha power (1, little-endian) */ + req_ctx->param_init_done = true; + } - while ((nbytes = walk.nbytes) != 0) { + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_km(ctx->fc | modifier, fxts_param.key + offset, - walk.dst.virt.addr, walk.src.virt.addr, n); + k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset, + walk->dst.virt.addr, walk->src.virt.addr, n); if (k) - rc = skcipher_walk_done(&walk, nbytes - k); + rc = skcipher_walk_done(walk, nbytes - k); if (k < n) { - if (__xts_paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = pxts_convert_key(ctx); + if (rc) + goto out; spin_lock_bh(&ctx->pk_lock); - memcpy(fxts_param.key + offset, ctx->pk[0].protkey, - keylen); - memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen, - sizeof(fxts_param.wkvp)); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); + memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp)); spin_unlock_bh(&ctx->pk_lock); } } +out: + pr_debug("rc=%d\n", rc); return rc; } -static int paes_xts_crypt(struct skcipher_request *req, unsigned long modifier) +static inline int __xts_2keys_prep_param(struct s390_pxts_ctx *ctx, + struct xts_km_param *param, + struct skcipher_walk *walk, + unsigned int keylen, + unsigned int offset, bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct xts_pcc_param pcc_param; + unsigned long cc = 1; + int rc = 0; + + while (cc) { + memset(&pcc_param, 0, sizeof(pcc_param)); + memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); + spin_lock_bh(&ctx->pk_lock); + memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); + spin_unlock_bh(&ctx->pk_lock); + cc = cpacf_pcc(ctx->fc, pcc_param.key + offset); + if (cc) { + if (!maysleep) { + rc = -EKEYEXPIRED; + break; + } + rc = pxts_convert_key(ctx); + if (rc) + break; + continue; + } + memcpy(param->init, pcc_param.xts, 16); + } + + memzero_explicit(pcc_param.key, sizeof(pcc_param.key)); + return rc; +} + +static int xts_paes_do_crypt_2keys(struct s390_pxts_ctx *ctx, + struct s390_pxts_req_ctx *req_ctx, + bool maysleep) +{ + struct xts_km_param *param = &req_ctx->param.km_param; + struct skcipher_walk *walk = &req_ctx->walk; unsigned int keylen, offset, nbytes, n, k; - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - u8 tweak[16]; - u8 block[16]; - u8 bit[16]; - u8 xts[16]; - } pcc_param; - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - u8 init[16]; - } xts_param; - struct skcipher_walk walk; - int rc; + int rc = 0; - rc = skcipher_walk_virt(&walk, req, false); - if (rc) - return rc; + /* + * The calling function xts_paes_do_crypt() ensures the + * protected key state is always PK_STATE_VALID when this + * function is invoked. + */ keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64; offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0; - memset(&pcc_param, 0, sizeof(pcc_param)); - memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak)); - spin_lock_bh(&ctx->pk_lock); - memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen); - memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen); - spin_unlock_bh(&ctx->pk_lock); - cpacf_pcc(ctx->fc, pcc_param.key + offset); - memcpy(xts_param.init, pcc_param.xts, 16); + if (!req_ctx->param_init_done) { + rc = __xts_2keys_prep_param(ctx, param, walk, + keylen, offset, maysleep); + if (rc) + goto out; + req_ctx->param_init_done = true; + } - while ((nbytes = walk.nbytes) != 0) { + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_km(ctx->fc | modifier, xts_param.key + offset, - walk.dst.virt.addr, walk.src.virt.addr, n); + k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset, + walk->dst.virt.addr, walk->src.virt.addr, n); if (k) - rc = skcipher_walk_done(&walk, nbytes - k); + rc = skcipher_walk_done(walk, nbytes - k); if (k < n) { - if (__xts_paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = pxts_convert_key(ctx); + if (rc) + goto out; spin_lock_bh(&ctx->pk_lock); - memcpy(xts_param.key + offset, - ctx->pk[0].protkey, keylen); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); spin_unlock_bh(&ctx->pk_lock); } } +out: + pr_debug("rc=%d\n", rc); return rc; } -static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier) +static int xts_paes_do_crypt(struct s390_pxts_ctx *ctx, + struct s390_pxts_req_ctx *req_ctx, + bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + int pk_state, rc = 0; + + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + if (rc) + goto out; + /* Call the 'real' crypt function based on the xts prot key type. */ switch (ctx->fc) { case CPACF_KM_PXTS_128: case CPACF_KM_PXTS_256: - return paes_xts_crypt(req, modifier); + rc = xts_paes_do_crypt_2keys(ctx, req_ctx, maysleep); + break; case CPACF_KM_PXTS_128_FULL: case CPACF_KM_PXTS_256_FULL: - return paes_xts_crypt_full(req, modifier); + rc = xts_paes_do_crypt_fullkey(ctx, req_ctx, maysleep); + break; default: - return -EINVAL; + rc = -EINVAL; } -} -static int xts_paes_encrypt(struct skcipher_request *req) -{ - return xts_paes_crypt(req, 0); +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int xts_paes_decrypt(struct skcipher_request *req) +static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier) { - return xts_paes_crypt(req, CPACF_DECRYPT); -} + struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; -static struct skcipher_alg xts_paes_alg = { - .base.cra_name = "xts(paes)", - .base.cra_driver_name = "xts-paes-s390", - .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct s390_pxts_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.cra_list), - .init = xts_paes_init, - .exit = xts_paes_exit, - .min_keysize = 2 * PAES_MIN_KEYSIZE, - .max_keysize = 2 * PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = xts_paes_set_key, - .encrypt = xts_paes_encrypt, - .decrypt = xts_paes_decrypt, -}; + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ -static int ctr_paes_init(struct crypto_skcipher *tfm) -{ - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + rc = skcipher_walk_virt(walk, req, false); + if (rc) + goto out; - ctx->kb.key = NULL; - spin_lock_init(&ctx->pk_lock); + req_ctx->modifier = modifier; + req_ctx->param_init_done = false; - return 0; -} + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = xts_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } -static void ctr_paes_exit(struct crypto_skcipher *tfm) -{ - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); - _free_kb_keybuf(&ctx->kb); +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); + return rc; } -static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx) +static int xts_paes_encrypt(struct skcipher_request *req) { - unsigned long fc; - int rc; - - rc = __paes_convert_key(ctx); - if (rc) - return rc; - - /* Pick the correct function code based on the protected key type */ - fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 : - (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMCTR_PAES_192 : - (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? - CPACF_KMCTR_PAES_256 : 0; - - /* Check if the function code is available */ - ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0; + return xts_paes_crypt(req, 0); +} - return ctx->fc ? 0 : -EINVAL; +static int xts_paes_decrypt(struct skcipher_request *req) +{ + return xts_paes_crypt(req, CPACF_DECRYPT); } -static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int xts_paes_init(struct crypto_skcipher *tfm) { - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - int rc; + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); - _free_kb_keybuf(&ctx->kb); - rc = _key_to_kb(&ctx->kb, in_key, key_len); - if (rc) - return rc; + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->pk_lock); - return __ctr_paes_set_key(ctx); + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pxts_req_ctx)); + + return 0; } -static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) +static void xts_paes_exit(struct crypto_skcipher *tfm) { - unsigned int i, n; + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); - /* only use complete blocks, max. PAGE_SIZE */ - memcpy(ctrptr, iv, AES_BLOCK_SIZE); - n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1); - for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) { - memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE); - crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE); - ctrptr += AES_BLOCK_SIZE; - } - return n; + memzero_explicit(ctx, sizeof(*ctx)); } -static int ctr_paes_crypt(struct skcipher_request *req) +static int xts_paes_do_one_request(struct crypto_engine *engine, void *areq) { + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - u8 buf[AES_BLOCK_SIZE], *ctrptr; - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - } param; - struct skcipher_walk walk; - unsigned int nbytes, n, k; - int rc, locked; - - rc = skcipher_walk_virt(&walk, req, false); - if (rc) - return rc; - - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - - locked = mutex_trylock(&ctrblk_lock); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; - while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - n = AES_BLOCK_SIZE; - if (nbytes >= 2*AES_BLOCK_SIZE && locked) - n = __ctrblk_init(ctrblk, walk.iv, nbytes); - ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv; - k = cpacf_kmctr(ctx->fc, ¶m, walk.dst.virt.addr, - walk.src.virt.addr, n, ctrptr); - if (k) { - if (ctrptr == ctrblk) - memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE, - AES_BLOCK_SIZE); - crypto_inc(walk.iv, AES_BLOCK_SIZE); - rc = skcipher_walk_done(&walk, nbytes - k); - } - if (k < n) { - if (__paes_convert_key(ctx)) { - if (locked) - mutex_unlock(&ctrblk_lock); - return skcipher_walk_done(&walk, -EIO); - } - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - } - } - if (locked) - mutex_unlock(&ctrblk_lock); - /* - * final block may be < AES_BLOCK_SIZE, copy only nbytes - */ - if (nbytes) { - memset(buf, 0, AES_BLOCK_SIZE); - memcpy(buf, walk.src.virt.addr, nbytes); - while (1) { - if (cpacf_kmctr(ctx->fc, ¶m, buf, - buf, AES_BLOCK_SIZE, - walk.iv) == AES_BLOCK_SIZE) - break; - if (__paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - } - memcpy(walk.dst.virt.addr, buf, nbytes); - crypto_inc(walk.iv, AES_BLOCK_SIZE); - rc = skcipher_walk_done(&walk, nbytes); + /* walk has already been prepared */ + + rc = xts_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); } + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); return rc; } -static struct skcipher_alg ctr_paes_alg = { - .base.cra_name = "ctr(paes)", - .base.cra_driver_name = "ctr-paes-s390", - .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct s390_paes_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.cra_list), - .init = ctr_paes_init, - .exit = ctr_paes_exit, - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ctr_paes_set_key, - .encrypt = ctr_paes_crypt, - .decrypt = ctr_paes_crypt, - .chunksize = AES_BLOCK_SIZE, +static struct skcipher_engine_alg xts_paes_alg = { + .base = { + .base.cra_name = "xts(paes)", + .base.cra_driver_name = "xts-paes-s390", + .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_pxts_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.base.cra_list), + .init = xts_paes_init, + .exit = xts_paes_exit, + .min_keysize = 2 * PAES_MIN_KEYSIZE, + .max_keysize = 2 * PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_paes_setkey, + .encrypt = xts_paes_encrypt, + .decrypt = xts_paes_decrypt, + }, + .op = { + .do_one_request = xts_paes_do_one_request, + }, }; -static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg) +/* + * alg register, unregister, module init, exit + */ + +static struct miscdevice paes_dev = { + .name = "paes", + .minor = MISC_DYNAMIC_MINOR, +}; + +static inline void __crypto_unregister_skcipher(struct skcipher_engine_alg *alg) { - if (!list_empty(&alg->base.cra_list)) - crypto_unregister_skcipher(alg); + if (!list_empty(&alg->base.base.cra_list)) + crypto_engine_unregister_skcipher(alg); } static void paes_s390_fini(void) { + if (paes_crypto_engine) { + crypto_engine_stop(paes_crypto_engine); + crypto_engine_exit(paes_crypto_engine); + } __crypto_unregister_skcipher(&ctr_paes_alg); __crypto_unregister_skcipher(&xts_paes_alg); __crypto_unregister_skcipher(&cbc_paes_alg); __crypto_unregister_skcipher(&ecb_paes_alg); if (ctrblk) - free_page((unsigned long) ctrblk); + free_page((unsigned long)ctrblk); + misc_deregister(&paes_dev); } static int __init paes_s390_init(void) { int rc; + /* register a simple paes pseudo misc device */ + rc = misc_register(&paes_dev); + if (rc) + return rc; + + /* with this pseudo devie alloc and start a crypto engine */ + paes_crypto_engine = + crypto_engine_alloc_init_and_set(paes_dev.this_device, + true, NULL, false, MAX_QLEN); + if (!paes_crypto_engine) { + rc = -ENOMEM; + goto out_err; + } + rc = crypto_engine_start(paes_crypto_engine); + if (rc) { + crypto_engine_exit(paes_crypto_engine); + paes_crypto_engine = NULL; + goto out_err; + } + /* Query available functions for KM, KMC and KMCTR */ cpacf_query(CPACF_KM, &km_functions); cpacf_query(CPACF_KMC, &kmc_functions); @@ -927,40 +1653,45 @@ static int __init paes_s390_init(void) if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) || cpacf_test_func(&km_functions, CPACF_KM_PAES_192) || cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) { - rc = crypto_register_skcipher(&ecb_paes_alg); + rc = crypto_engine_register_skcipher(&ecb_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", ecb_paes_alg.base.base.cra_driver_name); } if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) || cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) || cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) { - rc = crypto_register_skcipher(&cbc_paes_alg); + rc = crypto_engine_register_skcipher(&cbc_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", cbc_paes_alg.base.base.cra_driver_name); } if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) || cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) { - rc = crypto_register_skcipher(&xts_paes_alg); + rc = crypto_engine_register_skcipher(&xts_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", xts_paes_alg.base.base.cra_driver_name); } if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) || cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) || cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) { - ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + ctrblk = (u8 *)__get_free_page(GFP_KERNEL); if (!ctrblk) { rc = -ENOMEM; goto out_err; } - rc = crypto_register_skcipher(&ctr_paes_alg); + rc = crypto_engine_register_skcipher(&ctr_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", ctr_paes_alg.base.base.cra_driver_name); } return 0; + out_err: paes_s390_fini(); return rc; diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 2bb22db54c31..d757ccbce2b4 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -10,27 +10,33 @@ #ifndef _CRYPTO_ARCH_S390_SHA_H #define _CRYPTO_ARCH_S390_SHA_H -#include <linux/crypto.h> -#include <crypto/sha1.h> #include <crypto/sha2.h> #include <crypto/sha3.h> +#include <linux/types.h> /* must be big enough for the largest SHA variant */ -#define SHA3_STATE_SIZE 200 #define CPACF_MAX_PARMBLOCK_SIZE SHA3_STATE_SIZE #define SHA_MAX_BLOCK_SIZE SHA3_224_BLOCK_SIZE +#define S390_SHA_CTX_SIZE sizeof(struct s390_sha_ctx) struct s390_sha_ctx { u64 count; /* message length in bytes */ - u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)]; - u8 buf[SHA_MAX_BLOCK_SIZE]; + union { + u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)]; + struct { + u64 state[SHA512_DIGEST_SIZE / sizeof(u64)]; + u64 count_hi; + } sha512; + }; int func; /* KIMD function to use */ - int first_message_part; + bool first_message_part; }; struct shash_desc; -int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len); -int s390_sha_final(struct shash_desc *desc, u8 *out); +int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data, + unsigned int len); +int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len, + u8 *out); #endif diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index bc3a22704e09..9b0d55be1239 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -18,12 +18,12 @@ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> */ +#include <asm/cpacf.h> #include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> #include <crypto/sha1.h> -#include <asm/cpacf.h> +#include <linux/cpufeature.h> +#include <linux/kernel.h> +#include <linux/module.h> #include "sha.h" @@ -38,6 +38,7 @@ static int s390_sha1_init(struct shash_desc *desc) sctx->state[4] = SHA1_H4; sctx->count = 0; sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } @@ -49,7 +50,6 @@ static int s390_sha1_export(struct shash_desc *desc, void *out) octx->count = sctx->count; memcpy(octx->state, sctx->state, sizeof(octx->state)); - memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer)); return 0; } @@ -60,24 +60,26 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in) sctx->count = ictx->count; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); - memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer)); sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, .init = s390_sha1_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = s390_sha1_export, .import = s390_sha1_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha1_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA1_STATE_SIZE, .base = { .cra_name = "sha1", .cra_driver_name= "sha1-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c deleted file mode 100644 index 6f1ccdf93d3e..000000000000 --- a/arch/s390/crypto/sha256_s390.c +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Cryptographic API. - * - * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm. - * - * s390 Version: - * Copyright IBM Corp. 2005, 2011 - * Author(s): Jan Glauber (jang@de.ibm.com) - */ -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> -#include <crypto/sha2.h> -#include <asm/cpacf.h> - -#include "sha.h" - -static int s390_sha256_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA256_H0; - sctx->state[1] = SHA256_H1; - sctx->state[2] = SHA256_H2; - sctx->state[3] = SHA256_H3; - sctx->state[4] = SHA256_H4; - sctx->state[5] = SHA256_H5; - sctx->state[6] = SHA256_H6; - sctx->state[7] = SHA256_H7; - sctx->count = 0; - sctx->func = CPACF_KIMD_SHA_256; - - return 0; -} - -static int sha256_export(struct shash_desc *desc, void *out) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - struct sha256_state *octx = out; - - octx->count = sctx->count; - memcpy(octx->state, sctx->state, sizeof(octx->state)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); - return 0; -} - -static int sha256_import(struct shash_desc *desc, const void *in) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha256_state *ictx = in; - - sctx->count = ictx->count; - memcpy(sctx->state, ictx->state, sizeof(ictx->state)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->func = CPACF_KIMD_SHA_256; - return 0; -} - -static struct shash_alg sha256_alg = { - .digestsize = SHA256_DIGEST_SIZE, - .init = s390_sha256_init, - .update = s390_sha_update, - .final = s390_sha_final, - .export = sha256_export, - .import = sha256_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha256_state), - .base = { - .cra_name = "sha256", - .cra_driver_name= "sha256-s390", - .cra_priority = 300, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int s390_sha224_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA224_H0; - sctx->state[1] = SHA224_H1; - sctx->state[2] = SHA224_H2; - sctx->state[3] = SHA224_H3; - sctx->state[4] = SHA224_H4; - sctx->state[5] = SHA224_H5; - sctx->state[6] = SHA224_H6; - sctx->state[7] = SHA224_H7; - sctx->count = 0; - sctx->func = CPACF_KIMD_SHA_256; - - return 0; -} - -static struct shash_alg sha224_alg = { - .digestsize = SHA224_DIGEST_SIZE, - .init = s390_sha224_init, - .update = s390_sha_update, - .final = s390_sha_final, - .export = sha256_export, - .import = sha256_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha256_state), - .base = { - .cra_name = "sha224", - .cra_driver_name= "sha224-s390", - .cra_priority = 300, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init sha256_s390_init(void) -{ - int ret; - - if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) - return -ENODEV; - ret = crypto_register_shash(&sha256_alg); - if (ret < 0) - goto out; - ret = crypto_register_shash(&sha224_alg); - if (ret < 0) - crypto_unregister_shash(&sha256_alg); -out: - return ret; -} - -static void __exit sha256_s390_fini(void) -{ - crypto_unregister_shash(&sha224_alg); - crypto_unregister_shash(&sha256_alg); -} - -module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha256_s390_init); -module_exit(sha256_s390_fini); - -MODULE_ALIAS_CRYPTO("sha256"); -MODULE_ALIAS_CRYPTO("sha224"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c index a84ef692f572..4a7731ac6bcd 100644 --- a/arch/s390/crypto/sha3_256_s390.c +++ b/arch/s390/crypto/sha3_256_s390.c @@ -8,12 +8,14 @@ * Copyright IBM Corp. 2019 * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com) */ +#include <asm/cpacf.h> #include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> #include <crypto/sha3.h> -#include <asm/cpacf.h> +#include <linux/cpufeature.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> #include "sha.h" @@ -21,11 +23,11 @@ static int sha3_256_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ + sctx->first_message_part = test_facility(86); + if (!sctx->first_message_part) memset(sctx->state, 0, sizeof(sctx->state)); sctx->count = 0; sctx->func = CPACF_KIMD_SHA3_256; - sctx->first_message_part = 1; return 0; } @@ -35,11 +37,11 @@ static int sha3_256_export(struct shash_desc *desc, void *out) struct s390_sha_ctx *sctx = shash_desc_ctx(desc); struct sha3_state *octx = out; - octx->rsiz = sctx->count; + if (sctx->first_message_part) { + memset(sctx->state, 0, sizeof(sctx->state)); + sctx->first_message_part = 0; + } memcpy(octx->st, sctx->state, sizeof(octx->st)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); - octx->partial = sctx->first_message_part; - return 0; } @@ -48,10 +50,9 @@ static int sha3_256_import(struct shash_desc *desc, const void *in) struct s390_sha_ctx *sctx = shash_desc_ctx(desc); const struct sha3_state *ictx = in; - sctx->count = ictx->rsiz; + sctx->count = 0; memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + sctx->first_message_part = 0; sctx->func = CPACF_KIMD_SHA3_256; return 0; @@ -60,30 +61,26 @@ static int sha3_256_import(struct shash_desc *desc, const void *in) static int sha3_224_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha3_state *ictx = in; - sctx->count = ictx->rsiz; - memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + sha3_256_import(desc, in); sctx->func = CPACF_KIMD_SHA3_224; - return 0; } static struct shash_alg sha3_256_alg = { .digestsize = SHA3_256_DIGEST_SIZE, /* = 32 */ .init = sha3_256_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_256_export, .import = sha3_256_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-256", .cra_driver_name = "sha3-256-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_256_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -93,28 +90,25 @@ static int sha3_224_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ - memset(sctx->state, 0, sizeof(sctx->state)); - sctx->count = 0; + sha3_256_init(desc); sctx->func = CPACF_KIMD_SHA3_224; - sctx->first_message_part = 1; - return 0; } static struct shash_alg sha3_224_alg = { .digestsize = SHA3_224_DIGEST_SIZE, .init = sha3_224_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_256_export, /* same as for 256 */ .import = sha3_224_import, /* function code different! */ - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-224", .cra_driver_name = "sha3-224-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_224_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c index 07528fc98ff7..018f02fff444 100644 --- a/arch/s390/crypto/sha3_512_s390.c +++ b/arch/s390/crypto/sha3_512_s390.c @@ -7,12 +7,14 @@ * Copyright IBM Corp. 2019 * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com) */ +#include <asm/cpacf.h> #include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> #include <crypto/sha3.h> -#include <asm/cpacf.h> +#include <linux/cpufeature.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> #include "sha.h" @@ -20,11 +22,11 @@ static int sha3_512_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ + sctx->first_message_part = test_facility(86); + if (!sctx->first_message_part) memset(sctx->state, 0, sizeof(sctx->state)); sctx->count = 0; sctx->func = CPACF_KIMD_SHA3_512; - sctx->first_message_part = 1; return 0; } @@ -34,13 +36,12 @@ static int sha3_512_export(struct shash_desc *desc, void *out) struct s390_sha_ctx *sctx = shash_desc_ctx(desc); struct sha3_state *octx = out; - octx->rsiz = sctx->count; - octx->rsizw = sctx->count >> 32; + if (sctx->first_message_part) { + memset(sctx->state, 0, sizeof(sctx->state)); + sctx->first_message_part = 0; + } memcpy(octx->st, sctx->state, sizeof(octx->st)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); - octx->partial = sctx->first_message_part; - return 0; } @@ -49,13 +50,9 @@ static int sha3_512_import(struct shash_desc *desc, const void *in) struct s390_sha_ctx *sctx = shash_desc_ctx(desc); const struct sha3_state *ictx = in; - if (unlikely(ictx->rsizw)) - return -ERANGE; - sctx->count = ictx->rsiz; - + sctx->count = 0; memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + sctx->first_message_part = 0; sctx->func = CPACF_KIMD_SHA3_512; return 0; @@ -64,33 +61,26 @@ static int sha3_512_import(struct shash_desc *desc, const void *in) static int sha3_384_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha3_state *ictx = in; - if (unlikely(ictx->rsizw)) - return -ERANGE; - sctx->count = ictx->rsiz; - - memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + sha3_512_import(desc, in); sctx->func = CPACF_KIMD_SHA3_384; - return 0; } static struct shash_alg sha3_512_alg = { .digestsize = SHA3_512_DIGEST_SIZE, .init = sha3_512_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_512_export, .import = sha3_512_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-512", .cra_driver_name = "sha3-512-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -102,28 +92,25 @@ static int sha3_384_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ - memset(sctx->state, 0, sizeof(sctx->state)); - sctx->count = 0; + sha3_512_init(desc); sctx->func = CPACF_KIMD_SHA3_384; - sctx->first_message_part = 1; - return 0; } static struct shash_alg sha3_384_alg = { .digestsize = SHA3_384_DIGEST_SIZE, .init = sha3_384_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_512_export, /* same as for 512 */ .import = sha3_384_import, /* function code different! */ - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-384", .cra_driver_name = "sha3-384-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_sha_ctx), .cra_module = THIS_MODULE, diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 04f11c407763..6cbbf5e8555f 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -7,14 +7,13 @@ * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) */ +#include <asm/cpacf.h> #include <crypto/internal/hash.h> #include <crypto/sha2.h> +#include <linux/cpufeature.h> #include <linux/errno.h> -#include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/cpufeature.h> -#include <asm/cpacf.h> #include "sha.h" @@ -22,16 +21,18 @@ static int sha512_init(struct shash_desc *desc) { struct s390_sha_ctx *ctx = shash_desc_ctx(desc); - *(__u64 *)&ctx->state[0] = SHA512_H0; - *(__u64 *)&ctx->state[2] = SHA512_H1; - *(__u64 *)&ctx->state[4] = SHA512_H2; - *(__u64 *)&ctx->state[6] = SHA512_H3; - *(__u64 *)&ctx->state[8] = SHA512_H4; - *(__u64 *)&ctx->state[10] = SHA512_H5; - *(__u64 *)&ctx->state[12] = SHA512_H6; - *(__u64 *)&ctx->state[14] = SHA512_H7; + ctx->sha512.state[0] = SHA512_H0; + ctx->sha512.state[1] = SHA512_H1; + ctx->sha512.state[2] = SHA512_H2; + ctx->sha512.state[3] = SHA512_H3; + ctx->sha512.state[4] = SHA512_H4; + ctx->sha512.state[5] = SHA512_H5; + ctx->sha512.state[6] = SHA512_H6; + ctx->sha512.state[7] = SHA512_H7; ctx->count = 0; + ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } @@ -42,9 +43,8 @@ static int sha512_export(struct shash_desc *desc, void *out) struct sha512_state *octx = out; octx->count[0] = sctx->count; - octx->count[1] = 0; + octx->count[1] = sctx->sha512.count_hi; memcpy(octx->state, sctx->state, sizeof(octx->state)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); return 0; } @@ -53,29 +53,30 @@ static int sha512_import(struct shash_desc *desc, const void *in) struct s390_sha_ctx *sctx = shash_desc_ctx(desc); const struct sha512_state *ictx = in; - if (unlikely(ictx->count[1])) - return -ERANGE; sctx->count = ictx->count[0]; + sctx->sha512.count_hi = ictx->count[1]; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); sctx->func = CPACF_KIMD_SHA_512; + sctx->first_message_part = 0; return 0; } static struct shash_alg sha512_alg = { .digestsize = SHA512_DIGEST_SIZE, .init = sha512_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha512_export, .import = sha512_import, .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha512_state), + .statesize = SHA512_STATE_SIZE, .base = { .cra_name = "sha512", .cra_driver_name= "sha512-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -87,16 +88,18 @@ static int sha384_init(struct shash_desc *desc) { struct s390_sha_ctx *ctx = shash_desc_ctx(desc); - *(__u64 *)&ctx->state[0] = SHA384_H0; - *(__u64 *)&ctx->state[2] = SHA384_H1; - *(__u64 *)&ctx->state[4] = SHA384_H2; - *(__u64 *)&ctx->state[6] = SHA384_H3; - *(__u64 *)&ctx->state[8] = SHA384_H4; - *(__u64 *)&ctx->state[10] = SHA384_H5; - *(__u64 *)&ctx->state[12] = SHA384_H6; - *(__u64 *)&ctx->state[14] = SHA384_H7; + ctx->sha512.state[0] = SHA384_H0; + ctx->sha512.state[1] = SHA384_H1; + ctx->sha512.state[2] = SHA384_H2; + ctx->sha512.state[3] = SHA384_H3; + ctx->sha512.state[4] = SHA384_H4; + ctx->sha512.state[5] = SHA384_H5; + ctx->sha512.state[6] = SHA384_H6; + ctx->sha512.state[7] = SHA384_H7; ctx->count = 0; + ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } @@ -104,17 +107,19 @@ static int sha384_init(struct shash_desc *desc) static struct shash_alg sha384_alg = { .digestsize = SHA384_DIGEST_SIZE, .init = sha384_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha512_export, .import = sha512_import, .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha512_state), + .statesize = SHA512_STATE_SIZE, .base = { .cra_name = "sha384", .cra_driver_name= "sha384-s390", .cra_priority = 300, .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .cra_ctxsize = sizeof(struct s390_sha_ctx), .cra_module = THIS_MODULE, } diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index 961d7d522af1..b5e2c365ea05 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -13,50 +13,33 @@ #include <asm/cpacf.h> #include "sha.h" -int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) +int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct s390_sha_ctx *ctx = shash_desc_ctx(desc); unsigned int bsize = crypto_shash_blocksize(desc->tfm); - unsigned int index, n; + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + unsigned int n; int fc; - /* how much is already in the buffer? */ - index = ctx->count % bsize; - ctx->count += len; - - if ((index + len) < bsize) - goto store; - fc = ctx->func; if (ctx->first_message_part) - fc |= test_facility(86) ? CPACF_KIMD_NIP : 0; - - /* process one stored block */ - if (index) { - memcpy(ctx->buf + index, data, bsize - index); - cpacf_kimd(fc, ctx->state, ctx->buf, bsize); - ctx->first_message_part = 0; - fc &= ~CPACF_KIMD_NIP; - data += bsize - index; - len -= bsize - index; - index = 0; - } + fc |= CPACF_KIMD_NIP; /* process as many blocks as possible */ - if (len >= bsize) { - n = (len / bsize) * bsize; - cpacf_kimd(fc, ctx->state, data, n); - ctx->first_message_part = 0; - data += n; - len -= n; + n = (len / bsize) * bsize; + ctx->count += n; + switch (ctx->func) { + case CPACF_KLMD_SHA_512: + case CPACF_KLMD_SHA3_384: + if (ctx->count < n) + ctx->sha512.count_hi++; + break; } -store: - if (len) - memcpy(ctx->buf + index , data, len); - - return 0; + cpacf_kimd(fc, ctx->state, data, n); + ctx->first_message_part = 0; + return len - n; } -EXPORT_SYMBOL_GPL(s390_sha_update); +EXPORT_SYMBOL_GPL(s390_sha_update_blocks); static int s390_crypto_shash_parmsize(int func) { @@ -77,15 +60,15 @@ static int s390_crypto_shash_parmsize(int func) } } -int s390_sha_final(struct shash_desc *desc, u8 *out) +int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len, + u8 *out) { struct s390_sha_ctx *ctx = shash_desc_ctx(desc); - unsigned int bsize = crypto_shash_blocksize(desc->tfm); - u64 bits; - unsigned int n; int mbl_offset, fc; + u64 bits; + + ctx->count += len; - n = ctx->count % bsize; bits = ctx->count * 8; mbl_offset = s390_crypto_shash_parmsize(ctx->func); if (mbl_offset < 0) @@ -95,17 +78,16 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) /* set total msg bit length (mbl) in CPACF parmblock */ switch (ctx->func) { - case CPACF_KLMD_SHA_1: - case CPACF_KLMD_SHA_256: - memcpy(ctx->state + mbl_offset, &bits, sizeof(bits)); - break; case CPACF_KLMD_SHA_512: - /* - * the SHA512 parmblock has a 128-bit mbl field, clear - * high-order u64 field, copy bits to low-order u64 field - */ - memset(ctx->state + mbl_offset, 0x00, sizeof(bits)); + /* The SHA512 parmblock has a 128-bit mbl field. */ + if (ctx->count < len) + ctx->sha512.count_hi++; + ctx->sha512.count_hi <<= 3; + ctx->sha512.count_hi |= ctx->count >> 61; mbl_offset += sizeof(u64) / sizeof(u32); + fallthrough; + case CPACF_KLMD_SHA_1: + case CPACF_KLMD_SHA_256: memcpy(ctx->state + mbl_offset, &bits, sizeof(bits)); break; case CPACF_KLMD_SHA3_224: @@ -121,16 +103,14 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) fc |= test_facility(86) ? CPACF_KLMD_DUFOP : 0; if (ctx->first_message_part) fc |= CPACF_KLMD_NIP; - cpacf_klmd(fc, ctx->state, ctx->buf, n); + cpacf_klmd(fc, ctx->state, src, len); /* copy digest to out */ memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm)); - /* wipe context */ - memset(ctx, 0, sizeof *ctx); return 0; } -EXPORT_SYMBOL_GPL(s390_sha_final); +EXPORT_SYMBOL_GPL(s390_sha_finup); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("s390 SHA cipher common functions"); diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c index 4131f0daa5ea..61220e717af0 100644 --- a/arch/s390/hypfs/hypfs_diag0c.c +++ b/arch/s390/hypfs/hypfs_diag0c.c @@ -9,6 +9,7 @@ #include <linux/slab.h> #include <linux/cpu.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/hypfs.h> #include "hypfs.h" @@ -107,7 +108,7 @@ static struct hypfs_dbfs_file dbfs_file_0c = { */ int __init hypfs_diag0c_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; hypfs_dbfs_create_file(&dbfs_file_0c); return 0; @@ -118,7 +119,7 @@ int __init hypfs_diag0c_init(void) */ void hypfs_diag0c_exit(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; hypfs_dbfs_remove_file(&dbfs_file_0c); } diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c index 00a6d370a280..ede951dc0085 100644 --- a/arch/s390/hypfs/hypfs_diag_fs.c +++ b/arch/s390/hypfs/hypfs_diag_fs.c @@ -16,6 +16,7 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <linux/mm.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include "hypfs_diag.h" @@ -208,6 +209,8 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(), cpu_info)); cpu_dir = hypfs_mkdir(cpus_dir, buffer); + if (IS_ERR(cpu_dir)) + return PTR_ERR(cpu_dir); rc = hypfs_create_u64(cpu_dir, "mgmtime", cpu_info__acc_time(diag204_get_info_type(), cpu_info) - cpu_info__lp_time(diag204_get_info_type(), cpu_info)); @@ -382,7 +385,7 @@ static void diag224_delete_name_table(void) int __init __hypfs_diag_fs_init(void) { - if (MACHINE_IS_LPAR) + if (machine_is_lpar()) return diag224_get_name_table(); return 0; } diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 3db40ad853e0..4db2895e4da3 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -11,6 +11,7 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <asm/extable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/timex.h> @@ -121,7 +122,7 @@ static struct hypfs_dbfs_file dbfs_file_2fc = { int hypfs_vm_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; if (diag2fc(0, all_guests, NULL) > 0) diag2fc_guest_query = all_guests; @@ -135,7 +136,7 @@ int hypfs_vm_init(void) void hypfs_vm_exit(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; hypfs_dbfs_remove_file(&dbfs_file_2fc); } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index d428635abf08..96409573c75d 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -24,6 +24,7 @@ #include <linux/kobject.h> #include <linux/seq_file.h> #include <linux/uio.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include "hypfs.h" @@ -184,7 +185,7 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } hypfs_delete_tree(sb->s_root); - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = hypfs_vm_create_files(sb->s_root); else rc = hypfs_diag_create_files(sb->s_root); @@ -273,7 +274,7 @@ static int hypfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_root = root_dentry = d_make_root(root_inode); if (!root_dentry) return -ENOMEM; - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = hypfs_vm_create_files(root_dentry); else rc = hypfs_diag_create_files(root_dentry); @@ -341,7 +342,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, struct inode *inode; inode_lock(d_inode(parent)); - dentry = lookup_one_len(name, parent, strlen(name)); + dentry = lookup_noperm(&QSTR(name), parent); if (IS_ERR(dentry)) { dentry = ERR_PTR(-ENOMEM); goto fail; diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h index 004d17ea05cf..317c07c09ae4 100644 --- a/arch/s390/include/asm/abs_lowcore.h +++ b/arch/s390/include/asm/abs_lowcore.h @@ -25,11 +25,4 @@ static inline void put_abs_lowcore(struct lowcore *lc) put_cpu(); } -extern int relocate_lowcore; - -static inline int have_relocated_lowcore(void) -{ - return relocate_lowcore; -} - #endif /* _ASM_S390_ABS_LOWCORE_H */ diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 73e781b56bfe..c7bf60a541e9 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -32,8 +32,8 @@ #define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE) #define ALT_TYPE_FACILITY 0 -#define ALT_TYPE_SPEC 1 -#define ALT_TYPE_LOWCORE 2 +#define ALT_TYPE_FEATURE 1 +#define ALT_TYPE_SPEC 2 #define ALT_DATA_SHIFT 0 #define ALT_TYPE_SHIFT 20 @@ -43,13 +43,14 @@ ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ (facility) << ALT_DATA_SHIFT) +#define ALT_FEATURE(feature) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_FEATURE << ALT_TYPE_SHIFT | \ + (feature) << ALT_DATA_SHIFT) + #define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \ (facility) << ALT_DATA_SHIFT) -#define ALT_LOWCORE (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ - ALT_TYPE_LOWCORE << ALT_TYPE_SHIFT) - #ifndef __ASSEMBLY__ #include <linux/types.h> diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h index a92ebbc7aa7a..99b2902c10fd 100644 --- a/arch/s390/include/asm/appldata.h +++ b/arch/s390/include/asm/appldata.h @@ -9,6 +9,7 @@ #define _ASM_S390_APPLDATA_H #include <linux/io.h> +#include <asm/machine.h> #include <asm/diag.h> #define APPLDATA_START_INTERVAL_REC 0x80 @@ -48,7 +49,7 @@ static inline int appldata_asm(struct appldata_parameter_list *parm_list, { int ry; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -EOPNOTSUPP; parm_list->diag = 0xdc; parm_list->function = fn; diff --git a/arch/s390/include/asm/asce.h b/arch/s390/include/asm/asce.h new file mode 100644 index 000000000000..f6dfaaba735a --- /dev/null +++ b/arch/s390/include/asm/asce.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_ASCE_H +#define _ASM_S390_ASCE_H + +#include <linux/thread_info.h> +#include <linux/irqflags.h> +#include <asm/lowcore.h> +#include <asm/ctlreg.h> + +static inline bool enable_sacf_uaccess(void) +{ + unsigned long flags; + + if (test_thread_flag(TIF_ASCE_PRIMARY)) + return true; + local_irq_save(flags); + local_ctl_load(1, &get_lowcore()->kernel_asce); + set_thread_flag(TIF_ASCE_PRIMARY); + local_irq_restore(flags); + return false; +} + +static inline void disable_sacf_uaccess(bool previous) +{ + unsigned long flags; + + if (previous) + return; + local_irq_save(flags); + local_ctl_load(1, &get_lowcore()->user_asce); + clear_thread_flag(TIF_ASCE_PRIMARY); + local_irq_restore(flags); +} + +#endif /* _ASM_S390_ASCE_H */ diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 2e829c16fd8a..d23ea0c94e4e 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -14,6 +14,8 @@ #define EX_TYPE_UA_LOAD_REGPAIR 6 #define EX_TYPE_ZEROPAD 7 #define EX_TYPE_FPC 8 +#define EX_TYPE_UA_MVCOS_TO 9 +#define EX_TYPE_UA_MVCOS_FROM 10 #define EX_DATA_REG_ERR_SHIFT 0 #define EX_DATA_REG_ERR GENMASK(3, 0) @@ -84,4 +86,10 @@ #define EX_TABLE_FPC(_fault, _target) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FPC, __stringify(%%r0), __stringify(%%r0), 0) +#define EX_TABLE_UA_MVCOS_TO(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_TO, __stringify(%%r0), __stringify(%%r0), 0) + +#define EX_TABLE_UA_MVCOS_FROM(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_FROM, __stringify(%%r0), __stringify(%%r0), 0) + #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 585678bbcd7a..21c26d842832 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -163,10 +163,10 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") #undef __ATOMIC64_OPS -#define __atomic_add_const(val, ptr) __atomic_add(val, ptr) -#define __atomic_add_const_barrier(val, ptr) __atomic_add(val, ptr) -#define __atomic64_add_const(val, ptr) __atomic64_add(val, ptr) -#define __atomic64_add_const_barrier(val, ptr) __atomic64_add(val, ptr) +#define __atomic_add_const(val, ptr) ((void)__atomic_add(val, ptr)) +#define __atomic_add_const_barrier(val, ptr) ((void)__atomic_add(val, ptr)) +#define __atomic64_add_const(val, ptr) ((void)__atomic64_add(val, ptr)) +#define __atomic64_add_const_barrier(val, ptr) ((void)__atomic64_add(val, ptr)) #endif /* MARCH_HAS_Z196_FEATURES */ diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 59ab1192e2d5..54cb97603ec0 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -649,18 +649,30 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len, * instruction * @func: the function code passed to PCC; see CPACF_KM_xxx defines * @param: address of parameter block; see POP for details on each func + * + * Returns the condition code, this is + * 0 - cc code 0 (normal completion) + * 1 - cc code 1 (protected key wkvp mismatch or src operand out of range) + * 2 - cc code 2 (something invalid, scalar multiply infinity, ...) + * Condition code 3 (partial completion) is handled within the asm code + * and never returned. */ -static inline void cpacf_pcc(unsigned long func, void *param) +static inline int cpacf_pcc(unsigned long func, void *param) { + int cc; + asm volatile( " lgr 0,%[fc]\n" " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,0,0\n" /* PCC opcode */ " brc 1,0b\n" /* handle partial completion */ - : + CC_IPM(cc) + : CC_OUT(cc, cc) : [fc] "d" (func), [pba] "d" ((unsigned long)param), [opc] "i" (CPACF_PCC) - : "cc", "memory", "0", "1"); + : CC_CLOBBER_LIST("memory", "0", "1")); + + return CC_TRANSFORM(cc); } /** diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index e1a279e0d6a6..1798fbd59068 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -171,7 +171,7 @@ static inline int qctri(struct cpumf_ctr_info *info) { int rc = -EINVAL; - asm volatile ( + asm_inline volatile ( "0: qctri %1\n" "1: lhi %0,0\n" "2:\n" @@ -185,7 +185,7 @@ static inline int lcctl(u64 ctl) { int cc; - asm volatile ( + asm_inline volatile ( " lcctl %[ctl]\n" CC_IPM(cc) : CC_OUT(cc, cc) @@ -200,7 +200,7 @@ static inline int __ecctr(u64 ctr, u64 *content) u64 _content; int cc; - asm volatile ( + asm_inline volatile ( " ecctr %[_content],%[ctr]\n" CC_IPM(cc) : CC_OUT(cc, cc), [_content] "=d" (_content) diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h index 931204613753..6c6a99660e78 100644 --- a/arch/s390/include/asm/cpufeature.h +++ b/arch/s390/include/asm/cpufeature.h @@ -9,10 +9,13 @@ #ifndef __ASM_S390_CPUFEATURE_H #define __ASM_S390_CPUFEATURE_H +#include <asm/facility.h> + enum { S390_CPU_FEATURE_MSA, S390_CPU_FEATURE_VXRS, S390_CPU_FEATURE_UV, + S390_CPU_FEATURE_D288, MAX_CPU_FEATURES }; @@ -20,4 +23,16 @@ enum { int cpu_have_feature(unsigned int nr); +#define cpu_has_bear() test_facility(193) +#define cpu_has_edat1() test_facility(8) +#define cpu_has_edat2() test_facility(78) +#define cpu_has_gs() test_facility(133) +#define cpu_has_idte() test_facility(3) +#define cpu_has_nx() test_facility(130) +#define cpu_has_rdp() test_facility(194) +#define cpu_has_seq_insn() test_facility(85) +#define cpu_has_tlb_lc() test_facility(51) +#define cpu_has_topology() test_facility(11) +#define cpu_has_vx() test_facility(129) + #endif /* __ASM_S390_CPUFEATURE_H */ diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h index d03a922c641e..f9529f7cf62c 100644 --- a/arch/s390/include/asm/current.h +++ b/arch/s390/include/asm/current.h @@ -11,9 +11,25 @@ #define _S390_CURRENT_H #include <asm/lowcore.h> +#include <asm/machine.h> struct task_struct; -#define current ((struct task_struct *const)get_lowcore()->current_task) +static __always_inline struct task_struct *get_current(void) +{ + unsigned long ptr, lc_current; + + lc_current = offsetof(struct lowcore, current_task); + asm_inline( + ALTERNATIVE(" lg %[ptr],%[offzero](%%r0)\n", + " lg %[ptr],%[offalt](%%r0)\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [ptr] "=d" (ptr) + : [offzero] "i" (lc_current), + [offalt] "i" (lc_current + LOWCORE_ALT_ADDRESS)); + return (struct task_struct *)ptr; +} + +#define current get_current() #endif /* !(_S390_CURRENT_H) */ diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 5790630e31f0..8db8db3b1018 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -66,7 +66,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn) end_addr = pfn_to_phys(start_pfn + num_pfn - 1); diag_stat_inc(DIAG_STAT_X010); - asm volatile( + asm_inline volatile( "0: diag %0,%1,0x10\n" "1: nopr %%r7\n" EX_TABLE(0b, 1b) diff --git a/arch/s390/include/asm/diag288.h b/arch/s390/include/asm/diag288.h new file mode 100644 index 000000000000..5e1b43cea9d6 --- /dev/null +++ b/arch/s390/include/asm/diag288.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_DIAG288_H +#define _ASM_S390_DIAG288_H + +#include <asm/asm-extable.h> +#include <asm/types.h> + +#define MIN_INTERVAL 15 /* Minimal time supported by diag288 */ +#define MAX_INTERVAL 3600 /* One hour should be enough - pure estimation */ + +#define WDT_DEFAULT_TIMEOUT 30 + +/* Function codes - init, change, cancel */ +#define WDT_FUNC_INIT 0 +#define WDT_FUNC_CHANGE 1 +#define WDT_FUNC_CANCEL 2 +#define WDT_FUNC_CONCEAL 0x80000000 + +/* Action codes for LPAR watchdog */ +#define LPARWDT_RESTART 0 + +static inline int __diag288(unsigned int func, unsigned int timeout, + unsigned long action, unsigned int len) +{ + union register_pair r1 = { .even = func, .odd = timeout, }; + union register_pair r3 = { .even = action, .odd = len, }; + int rc = -EINVAL; + + asm volatile( + " diag %[r1],%[r3],0x288\n" + "0: lhi %[rc],0\n" + "1:" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc) + : [r1] "d" (r1.pair), [r3] "d" (r3.pair) + : "cc", "memory"); + return rc; +} + +#endif /* _ASM_S390_DIAG288_H */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 8f2c23cc52b6..a03df312081e 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -158,9 +158,6 @@ enum { #define ELF_DATA ELFDATA2MSB #define ELF_ARCH EM_S390 -/* s390 specific phdr types */ -#define PT_S390_PGSTE 0x70000000 - /* * ELF register definitions.. */ @@ -191,35 +188,6 @@ typedef s390_compat_regs compat_elf_gregset_t; && (x)->e_ident[EI_CLASS] == ELF_CLASS) #define compat_start_thread start_thread31 -struct arch_elf_state { - int rc; -}; - -#define INIT_ARCH_ELF_STATE { .rc = 0 } - -#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0) -#ifdef CONFIG_PGSTE -#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \ -({ \ - struct arch_elf_state *_state = state; \ - if ((phdr)->p_type == PT_S390_PGSTE && \ - !page_table_allocate_pgste && \ - !test_thread_flag(TIF_PGSTE) && \ - !current->mm->context.alloc_pgste) { \ - set_thread_flag(TIF_PGSTE); \ - set_pt_regs_flag(task_pt_regs(current), \ - PIF_EXECVE_PGSTE_RESTART); \ - _state->rc = -EAGAIN; \ - } \ - _state->rc; \ -}) -#else -#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \ -({ \ - (state)->rc; \ -}) -#endif - /* For SVR4/S390 the function pointer to be registered with `atexit` is passed in R14. */ #define ELF_PLAT_INIT(_r, load_addr) \ diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h index c84cb33913e2..960c6c67ad6c 100644 --- a/arch/s390/include/asm/fpu.h +++ b/arch/s390/include/asm/fpu.h @@ -44,6 +44,7 @@ #ifndef _ASM_S390_FPU_H #define _ASM_S390_FPU_H +#include <linux/cpufeature.h> #include <linux/processor.h> #include <linux/preempt.h> #include <linux/string.h> @@ -51,12 +52,6 @@ #include <asm/sigcontext.h> #include <asm/fpu-types.h> #include <asm/fpu-insn.h> -#include <asm/facility.h> - -static inline bool cpu_has_vx(void) -{ - return likely(test_facility(129)); -} enum { KERNEL_FPC_BIT = 0, diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index f5781794356b..942f21c39697 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -13,9 +13,11 @@ static uaccess_kmsan_or_inline int \ __futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \ { \ + bool sacf_flag; \ int rc, new; \ \ instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \ + sacf_flag = enable_sacf_uaccess(); \ asm_inline volatile( \ " sacf 256\n" \ "0: l %[old],%[uaddr]\n" \ @@ -32,6 +34,7 @@ __futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \ [new] "=&d" (new), [uaddr] "+Q" (*uaddr) \ : [oparg] "d" (oparg) \ : "cc"); \ + disable_sacf_uaccess(sacf_flag); \ if (!rc) \ instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \ return rc; \ @@ -75,9 +78,11 @@ int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) static uaccess_kmsan_or_inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { + bool sacf_flag; int rc; instrument_copy_from_user_before(uval, uaddr, sizeof(*uval)); + sacf_flag = enable_sacf_uaccess(); asm_inline volatile( " sacf 256\n" "0: cs %[old],%[new],%[uaddr]\n" @@ -88,6 +93,7 @@ int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 : [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr) : [new] "d" (newval) : "cc", "memory"); + disable_sacf_uaccess(sacf_flag); *uval = oldval; instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0); return rc; diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 4e73ef46d4b2..66c5808fd011 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -110,7 +110,6 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); -void gmap_discard(struct gmap *, unsigned long from, unsigned long to); void __gmap_zap(struct gmap *, unsigned long gaddr); void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); @@ -134,12 +133,10 @@ int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], unsigned long gaddr, unsigned long vmaddr); -int s390_disable_cow_sharing(void); int s390_replace_asce(struct gmap *gmap); void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool interruptible); -int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split); unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level); /** diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h new file mode 100644 index 000000000000..5356446a61c4 --- /dev/null +++ b/arch/s390/include/asm/gmap_helpers.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Helper functions for KVM guest address space mapping code + * + * Copyright IBM Corp. 2025 + */ + +#ifndef _ASM_S390_GMAP_HELPERS_H +#define _ASM_S390_GMAP_HELPERS_H + +void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr); +void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end); +int gmap_helper_disable_cow_sharing(void); + +#endif /* _ASM_S390_GMAP_HELPERS_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 663e87220e89..931fcc413598 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -9,12 +9,13 @@ #ifndef _ASM_S390_HUGETLB_H #define _ASM_S390_HUGETLB_H +#include <linux/cpufeature.h> #include <linux/pgtable.h> #include <linux/swap.h> #include <linux/swapops.h> #include <asm/page.h> -#define hugepages_supported() (MACHINE_HAS_EDAT1) +#define hugepages_supported() cpu_has_edat1() #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index fc9933a743d6..faddb9aef3b8 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -33,9 +33,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL) #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(pgprot_writecombine(PAGE_KERNEL))) -#define ioremap_wt(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(pgprot_writethrough(PAGE_KERNEL))) + ioremap_prot((addr), (size), pgprot_writecombine(PAGE_KERNEL)) static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) { diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index d9e705f4a697..bde6a496df5f 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -54,7 +54,6 @@ enum interruption_class { IRQIO_C70, IRQIO_TAP, IRQIO_VMR, - IRQIO_LCS, IRQIO_CTC, IRQIO_ADM, IRQIO_CSC, diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h index e47fd8cbe701..e95e35eb8a3f 100644 --- a/arch/s390/include/asm/kfence.h +++ b/arch/s390/include/asm/kfence.h @@ -12,27 +12,16 @@ void __kernel_map_pages(struct page *page, int numpages, int enable); static __always_inline bool arch_kfence_init_pool(void) { - return true; -} - -#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK) - -/* - * Do not split kfence pool to 4k mapping with arch_kfence_init_pool(), - * but earlier where page table allocations still happen with memblock. - * Reason is that arch_kfence_init_pool() gets called when the system - * is still in a limbo state - disabling and enabling bottom halves is - * not yet allowed, but that is what our page_table_alloc() would do. - */ -static __always_inline void kfence_split_mapping(void) -{ #ifdef CONFIG_KFENCE unsigned long pool_pages = KFENCE_POOL_SIZE >> PAGE_SHIFT; set_memory_4k((unsigned long)__kfence_pool, pool_pages); #endif + return true; } +#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK) + static inline bool kfence_protect_page(unsigned long addr, bool protect) { __kernel_map_pages(virt_to_page((void *)addr), 1, !protect); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9a367866cab0..cb89e54ada25 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -20,14 +20,13 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/mmu_notifier.h> +#include <asm/kvm_host_types.h> #include <asm/debug.h> #include <asm/cpu.h> #include <asm/fpu.h> #include <asm/isc.h> #include <asm/guarded_storage.h> -#define KVM_S390_BSCA_CPU_SLOTS 64 -#define KVM_S390_ESCA_CPU_SLOTS 248 #define KVM_MAX_VCPUS 255 #define KVM_INTERNAL_MEM_SLOTS 1 @@ -51,342 +50,6 @@ #define KVM_REQ_REFRESH_GUEST_PREFIX \ KVM_ARCH_REQ_FLAGS(6, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define SIGP_CTRL_C 0x80 -#define SIGP_CTRL_SCN_MASK 0x3f - -union bsca_sigp_ctrl { - __u8 value; - struct { - __u8 c : 1; - __u8 r : 1; - __u8 scn : 6; - }; -}; - -union esca_sigp_ctrl { - __u16 value; - struct { - __u8 c : 1; - __u8 reserved: 7; - __u8 scn; - }; -}; - -struct esca_entry { - union esca_sigp_ctrl sigp_ctrl; - __u16 reserved1[3]; - __u64 sda; - __u64 reserved2[6]; -}; - -struct bsca_entry { - __u8 reserved0; - union bsca_sigp_ctrl sigp_ctrl; - __u16 reserved[3]; - __u64 sda; - __u64 reserved2[2]; -}; - -union ipte_control { - unsigned long val; - struct { - unsigned long k : 1; - unsigned long kh : 31; - unsigned long kg : 32; - }; -}; - -/* - * Utility is defined as two bytes but having it four bytes wide - * generates more efficient code. Since the following bytes are - * reserved this makes no functional difference. - */ -union sca_utility { - __u32 val; - struct { - __u32 mtcr : 1; - __u32 : 31; - }; -}; - -struct bsca_block { - union ipte_control ipte_control; - __u64 reserved[5]; - __u64 mcn; - union sca_utility utility; - __u8 reserved2[4]; - struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; -}; - -struct esca_block { - union ipte_control ipte_control; - __u64 reserved1[6]; - union sca_utility utility; - __u8 reserved2[4]; - __u64 mcn[4]; - __u64 reserved3[20]; - struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; -}; - -/* - * This struct is used to store some machine check info from lowcore - * for machine checks that happen while the guest is running. - * This info in host's lowcore might be overwritten by a second machine - * check from host when host is in the machine check's high-level handling. - * The size is 24 bytes. - */ -struct mcck_volatile_info { - __u64 mcic; - __u64 failing_storage_address; - __u32 ext_damage_code; - __u32 reserved; -}; - -#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \ - CR0_MEASUREMENT_ALERT_SUBMASK) -#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \ - CR14_EXTERNAL_DAMAGE_SUBMASK) - -#define SIDAD_SIZE_MASK 0xff -#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) -#define sida_size(sie_block) \ - ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) - -#define CPUSTAT_STOPPED 0x80000000 -#define CPUSTAT_WAIT 0x10000000 -#define CPUSTAT_ECALL_PEND 0x08000000 -#define CPUSTAT_STOP_INT 0x04000000 -#define CPUSTAT_IO_INT 0x02000000 -#define CPUSTAT_EXT_INT 0x01000000 -#define CPUSTAT_RUNNING 0x00800000 -#define CPUSTAT_RETAINED 0x00400000 -#define CPUSTAT_TIMING_SUB 0x00020000 -#define CPUSTAT_SIE_SUB 0x00010000 -#define CPUSTAT_RRF 0x00008000 -#define CPUSTAT_SLSV 0x00004000 -#define CPUSTAT_SLSR 0x00002000 -#define CPUSTAT_ZARCH 0x00000800 -#define CPUSTAT_MCDS 0x00000100 -#define CPUSTAT_KSS 0x00000200 -#define CPUSTAT_SM 0x00000080 -#define CPUSTAT_IBS 0x00000040 -#define CPUSTAT_GED2 0x00000010 -#define CPUSTAT_G 0x00000008 -#define CPUSTAT_GED 0x00000004 -#define CPUSTAT_J 0x00000002 -#define CPUSTAT_P 0x00000001 - -struct kvm_s390_sie_block { - atomic_t cpuflags; /* 0x0000 */ - __u32 : 1; /* 0x0004 */ - __u32 prefix : 18; - __u32 : 1; - __u32 ibc : 12; - __u8 reserved08[4]; /* 0x0008 */ -#define PROG_IN_SIE (1<<0) - __u32 prog0c; /* 0x000c */ - union { - __u8 reserved10[16]; /* 0x0010 */ - struct { - __u64 pv_handle_cpu; - __u64 pv_handle_config; - }; - }; -#define PROG_BLOCK_SIE (1<<0) -#define PROG_REQUEST (1<<1) - atomic_t prog20; /* 0x0020 */ - __u8 reserved24[4]; /* 0x0024 */ - __u64 cputm; /* 0x0028 */ - __u64 ckc; /* 0x0030 */ - __u64 epoch; /* 0x0038 */ - __u32 svcc; /* 0x0040 */ -#define LCTL_CR0 0x8000 -#define LCTL_CR6 0x0200 -#define LCTL_CR9 0x0040 -#define LCTL_CR10 0x0020 -#define LCTL_CR11 0x0010 -#define LCTL_CR14 0x0002 - __u16 lctl; /* 0x0044 */ - __s16 icpua; /* 0x0046 */ -#define ICTL_OPEREXC 0x80000000 -#define ICTL_PINT 0x20000000 -#define ICTL_LPSW 0x00400000 -#define ICTL_STCTL 0x00040000 -#define ICTL_ISKE 0x00004000 -#define ICTL_SSKE 0x00002000 -#define ICTL_RRBE 0x00001000 -#define ICTL_TPROT 0x00000200 - __u32 ictl; /* 0x0048 */ -#define ECA_CEI 0x80000000 -#define ECA_IB 0x40000000 -#define ECA_SIGPI 0x10000000 -#define ECA_MVPGI 0x01000000 -#define ECA_AIV 0x00200000 -#define ECA_VX 0x00020000 -#define ECA_PROTEXCI 0x00002000 -#define ECA_APIE 0x00000008 -#define ECA_SII 0x00000001 - __u32 eca; /* 0x004c */ -#define ICPT_INST 0x04 -#define ICPT_PROGI 0x08 -#define ICPT_INSTPROGI 0x0C -#define ICPT_EXTREQ 0x10 -#define ICPT_EXTINT 0x14 -#define ICPT_IOREQ 0x18 -#define ICPT_WAIT 0x1c -#define ICPT_VALIDITY 0x20 -#define ICPT_STOP 0x28 -#define ICPT_OPEREXC 0x2C -#define ICPT_PARTEXEC 0x38 -#define ICPT_IOINST 0x40 -#define ICPT_KSS 0x5c -#define ICPT_MCHKREQ 0x60 -#define ICPT_INT_ENABLE 0x64 -#define ICPT_PV_INSTR 0x68 -#define ICPT_PV_NOTIFY 0x6c -#define ICPT_PV_PREF 0x70 - __u8 icptcode; /* 0x0050 */ - __u8 icptstatus; /* 0x0051 */ - __u16 ihcpu; /* 0x0052 */ - __u8 reserved54; /* 0x0054 */ -#define IICTL_CODE_NONE 0x00 -#define IICTL_CODE_MCHK 0x01 -#define IICTL_CODE_EXT 0x02 -#define IICTL_CODE_IO 0x03 -#define IICTL_CODE_RESTART 0x04 -#define IICTL_CODE_SPECIFICATION 0x10 -#define IICTL_CODE_OPERAND 0x11 - __u8 iictl; /* 0x0055 */ - __u16 ipa; /* 0x0056 */ - __u32 ipb; /* 0x0058 */ - __u32 scaoh; /* 0x005c */ -#define FPF_BPBC 0x20 - __u8 fpf; /* 0x0060 */ -#define ECB_GS 0x40 -#define ECB_TE 0x10 -#define ECB_SPECI 0x08 -#define ECB_SRSI 0x04 -#define ECB_HOSTPROTINT 0x02 -#define ECB_PTF 0x01 - __u8 ecb; /* 0x0061 */ -#define ECB2_CMMA 0x80 -#define ECB2_IEP 0x20 -#define ECB2_PFMFI 0x08 -#define ECB2_ESCA 0x04 -#define ECB2_ZPCI_LSI 0x02 - __u8 ecb2; /* 0x0062 */ -#define ECB3_AISI 0x20 -#define ECB3_AISII 0x10 -#define ECB3_DEA 0x08 -#define ECB3_AES 0x04 -#define ECB3_RI 0x01 - __u8 ecb3; /* 0x0063 */ -#define ESCA_SCAOL_MASK ~0x3fU - __u32 scaol; /* 0x0064 */ - __u8 sdf; /* 0x0068 */ - __u8 epdx; /* 0x0069 */ - __u8 cpnc; /* 0x006a */ - __u8 reserved6b; /* 0x006b */ - __u32 todpr; /* 0x006c */ -#define GISA_FORMAT1 0x00000001 - __u32 gd; /* 0x0070 */ - __u8 reserved74[12]; /* 0x0074 */ - __u64 mso; /* 0x0080 */ - __u64 msl; /* 0x0088 */ - psw_t gpsw; /* 0x0090 */ - __u64 gg14; /* 0x00a0 */ - __u64 gg15; /* 0x00a8 */ - __u8 reservedb0[8]; /* 0x00b0 */ -#define HPID_KVM 0x4 -#define HPID_VSIE 0x5 - __u8 hpid; /* 0x00b8 */ - __u8 reservedb9[7]; /* 0x00b9 */ - union { - struct { - __u32 eiparams; /* 0x00c0 */ - __u16 extcpuaddr; /* 0x00c4 */ - __u16 eic; /* 0x00c6 */ - }; - __u64 mcic; /* 0x00c0 */ - } __packed; - __u32 reservedc8; /* 0x00c8 */ - union { - struct { - __u16 pgmilc; /* 0x00cc */ - __u16 iprcc; /* 0x00ce */ - }; - __u32 edc; /* 0x00cc */ - } __packed; - union { - struct { - __u32 dxc; /* 0x00d0 */ - __u16 mcn; /* 0x00d4 */ - __u8 perc; /* 0x00d6 */ - __u8 peratmid; /* 0x00d7 */ - }; - __u64 faddr; /* 0x00d0 */ - } __packed; - __u64 peraddr; /* 0x00d8 */ - __u8 eai; /* 0x00e0 */ - __u8 peraid; /* 0x00e1 */ - __u8 oai; /* 0x00e2 */ - __u8 armid; /* 0x00e3 */ - __u8 reservede4[4]; /* 0x00e4 */ - union { - __u64 tecmc; /* 0x00e8 */ - struct { - __u16 subchannel_id; /* 0x00e8 */ - __u16 subchannel_nr; /* 0x00ea */ - __u32 io_int_parm; /* 0x00ec */ - __u32 io_int_word; /* 0x00f0 */ - }; - } __packed; - __u8 reservedf4[8]; /* 0x00f4 */ -#define CRYCB_FORMAT_MASK 0x00000003 -#define CRYCB_FORMAT0 0x00000000 -#define CRYCB_FORMAT1 0x00000001 -#define CRYCB_FORMAT2 0x00000003 - __u32 crycbd; /* 0x00fc */ - __u64 gcr[16]; /* 0x0100 */ - union { - __u64 gbea; /* 0x0180 */ - __u64 sidad; - }; - __u8 reserved188[8]; /* 0x0188 */ - __u64 sdnxo; /* 0x0190 */ - __u8 reserved198[8]; /* 0x0198 */ - __u32 fac; /* 0x01a0 */ - __u8 reserved1a4[20]; /* 0x01a4 */ - __u64 cbrlo; /* 0x01b8 */ - __u8 reserved1c0[8]; /* 0x01c0 */ -#define ECD_HOSTREGMGMT 0x20000000 -#define ECD_MEF 0x08000000 -#define ECD_ETOKENF 0x02000000 -#define ECD_ECC 0x00200000 -#define ECD_HMAC 0x00004000 - __u32 ecd; /* 0x01c8 */ - __u8 reserved1cc[18]; /* 0x01cc */ - __u64 pp; /* 0x01de */ - __u8 reserved1e6[2]; /* 0x01e6 */ - __u64 itdba; /* 0x01e8 */ - __u64 riccbd; /* 0x01f0 */ - __u64 gvrd; /* 0x01f8 */ -} __packed __aligned(512); - -struct kvm_s390_itdb { - __u8 data[256]; -}; - -struct sie_page { - struct kvm_s390_sie_block sie_block; - struct mcck_volatile_info mcck_info; /* 0x0200 */ - __u8 reserved218[360]; /* 0x0218 */ - __u64 pv_grregs[16]; /* 0x0380 */ - __u8 reserved400[512]; /* 0x0400 */ - struct kvm_s390_itdb itdb; /* 0x0600 */ - __u8 reserved700[2304]; /* 0x0700 */ -}; - struct kvm_vcpu_stat { struct kvm_vcpu_stat_generic generic; u64 exit_userspace; @@ -1056,7 +719,6 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu); extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); -static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} diff --git a/arch/s390/include/asm/kvm_host_types.h b/arch/s390/include/asm/kvm_host_types.h new file mode 100644 index 000000000000..1394d3fb648f --- /dev/null +++ b/arch/s390/include/asm/kvm_host_types.h @@ -0,0 +1,348 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_KVM_HOST_TYPES_H +#define _ASM_KVM_HOST_TYPES_H + +#include <linux/atomic.h> +#include <linux/types.h> + +#define KVM_S390_BSCA_CPU_SLOTS 64 +#define KVM_S390_ESCA_CPU_SLOTS 248 + +#define SIGP_CTRL_C 0x80 +#define SIGP_CTRL_SCN_MASK 0x3f + +union bsca_sigp_ctrl { + __u8 value; + struct { + __u8 c : 1; + __u8 r : 1; + __u8 scn : 6; + }; +}; + +union esca_sigp_ctrl { + __u16 value; + struct { + __u8 c : 1; + __u8 reserved: 7; + __u8 scn; + }; +}; + +struct esca_entry { + union esca_sigp_ctrl sigp_ctrl; + __u16 reserved1[3]; + __u64 sda; + __u64 reserved2[6]; +}; + +struct bsca_entry { + __u8 reserved0; + union bsca_sigp_ctrl sigp_ctrl; + __u16 reserved[3]; + __u64 sda; + __u64 reserved2[2]; +}; + +union ipte_control { + unsigned long val; + struct { + unsigned long k : 1; + unsigned long kh : 31; + unsigned long kg : 32; + }; +}; + +/* + * Utility is defined as two bytes but having it four bytes wide + * generates more efficient code. Since the following bytes are + * reserved this makes no functional difference. + */ +union sca_utility { + __u32 val; + struct { + __u32 mtcr : 1; + __u32 : 31; + }; +}; + +struct bsca_block { + union ipte_control ipte_control; + __u64 reserved[5]; + __u64 mcn; + union sca_utility utility; + __u8 reserved2[4]; + struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; +}; + +struct esca_block { + union ipte_control ipte_control; + __u64 reserved1[6]; + union sca_utility utility; + __u8 reserved2[4]; + __u64 mcn[4]; + __u64 reserved3[20]; + struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; +}; + +/* + * This struct is used to store some machine check info from lowcore + * for machine checks that happen while the guest is running. + * This info in host's lowcore might be overwritten by a second machine + * check from host when host is in the machine check's high-level handling. + * The size is 24 bytes. + */ +struct mcck_volatile_info { + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 reserved; +}; + +#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \ + CR0_MEASUREMENT_ALERT_SUBMASK) +#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \ + CR14_EXTERNAL_DAMAGE_SUBMASK) + +#define SIDAD_SIZE_MASK 0xff +#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) +#define sida_size(sie_block) \ + ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) + +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_GED2 0x00000010 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + +struct kvm_s390_sie_block { + atomic_t cpuflags; /* 0x0000 */ + __u32 : 1; /* 0x0004 */ + __u32 prefix : 18; + __u32 : 1; + __u32 ibc : 12; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE (1<<0) + __u32 prog0c; /* 0x000c */ + union { + __u8 reserved10[16]; /* 0x0010 */ + struct { + __u64 pv_handle_cpu; + __u64 pv_handle_config; + }; + }; +#define PROG_BLOCK_SIE (1<<0) +#define PROG_REQUEST (1<<1) + atomic_t prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u32 svcc; /* 0x0040 */ +#define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 + __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_AIV 0x00200000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_APIE 0x00000008 +#define ECA_SII 0x00000001 + __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 +#define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c +#define ICPT_VALIDITY 0x20 +#define ICPT_STOP 0x28 +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c +#define ICPT_MCHKREQ 0x60 +#define ICPT_INT_ENABLE 0x64 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_NOTIFY 0x6c +#define ICPT_PV_PREF 0x70 + __u8 icptcode; /* 0x0050 */ + __u8 icptstatus; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54; /* 0x0054 */ +#define IICTL_CODE_NONE 0x00 +#define IICTL_CODE_MCHK 0x01 +#define IICTL_CODE_EXT 0x02 +#define IICTL_CODE_IO 0x03 +#define IICTL_CODE_RESTART 0x04 +#define IICTL_CODE_SPECIFICATION 0x10 +#define IICTL_CODE_OPERAND 0x11 + __u8 iictl; /* 0x0055 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SPECI 0x08 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 +#define ECB_PTF 0x01 + __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 +#define ECB2_ZPCI_LSI 0x02 + __u8 ecb2; /* 0x0062 */ +#define ECB3_AISI 0x20 +#define ECB3_AISII 0x10 +#define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 + __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU + __u32 scaol; /* 0x0064 */ + __u8 sdf; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ + __u32 todpr; /* 0x006c */ +#define GISA_FORMAT1 0x00000001 + __u32 gd; /* 0x0070 */ + __u8 reserved74[12]; /* 0x0074 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ + psw_t gpsw; /* 0x0090 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[8]; /* 0x00b0 */ +#define HPID_KVM 0x4 +#define HPID_VSIE 0x5 + __u8 hpid; /* 0x00b8 */ + __u8 reservedb9[7]; /* 0x00b9 */ + union { + struct { + __u32 eiparams; /* 0x00c0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + }; + __u64 mcic; /* 0x00c0 */ + } __packed; + __u32 reservedc8; /* 0x00c8 */ + union { + struct { + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + }; + __u32 edc; /* 0x00cc */ + } __packed; + union { + struct { + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + }; + __u64 faddr; /* 0x00d0 */ + } __packed; + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + union { + __u64 tecmc; /* 0x00e8 */ + struct { + __u16 subchannel_id; /* 0x00e8 */ + __u16 subchannel_nr; /* 0x00ea */ + __u32 io_int_parm; /* 0x00ec */ + __u32 io_int_word; /* 0x00f0 */ + }; + } __packed; + __u8 reservedf4[8]; /* 0x00f4 */ +#define CRYCB_FORMAT_MASK 0x00000003 +#define CRYCB_FORMAT0 0x00000000 +#define CRYCB_FORMAT1 0x00000001 +#define CRYCB_FORMAT2 0x00000003 + __u32 crycbd; /* 0x00fc */ + __u64 gcr[16]; /* 0x0100 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 +#define ECD_ETOKENF 0x02000000 +#define ECD_ECC 0x00200000 +#define ECD_HMAC 0x00004000 + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u64 riccbd; /* 0x01f0 */ + __u64 gvrd; /* 0x01f8 */ +} __packed __aligned(512); + +struct kvm_s390_itdb { + __u8 data[256]; +}; + +struct sie_page { + struct kvm_s390_sie_block sie_block; + struct mcck_volatile_info mcck_info; /* 0x0200 */ + __u8 reserved218[360]; /* 0x0218 */ + __u64 pv_grregs[16]; /* 0x0380 */ + __u8 reserved400[512]; /* 0x0400 */ + struct kvm_s390_itdb itdb; /* 0x0600 */ + __u8 reserved700[2304]; /* 0x0700 */ +}; + +#endif /* _ASM_KVM_HOST_TYPES_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 42a092fa1029..e99e9c87b1ce 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -10,6 +10,7 @@ #define _ASM_S390_LOWCORE_H #include <linux/types.h> +#include <asm/machine.h> #include <asm/ptrace.h> #include <asm/ctlreg.h> #include <asm/cpu.h> @@ -126,7 +127,7 @@ struct lowcore { __u64 int_clock; /* 0x0318 */ __u8 pad_0x0320[0x0328-0x0320]; /* 0x0320 */ __u64 clock_comparator; /* 0x0328 */ - __u64 boot_clock[2]; /* 0x0330 */ + __u8 pad_0x0330[0x0340-0x0330]; /* 0x0330 */ /* Current process. */ __u64 current_task; /* 0x0340 */ @@ -163,9 +164,7 @@ struct lowcore { __u32 spinlock_index; /* 0x03b0 */ __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ __u64 percpu_offset; /* 0x03b8 */ - __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */ - __u64 machine_flags; /* 0x03c8 */ - __u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */ + __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */ __u32 return_lpswe; /* 0x0400 */ __u32 return_mcck_lpswe; /* 0x0404 */ @@ -222,9 +221,12 @@ static __always_inline struct lowcore *get_lowcore(void) if (__is_defined(__DECOMPRESSOR)) return NULL; - asm(ALTERNATIVE("llilh %[lc],0", "llilh %[lc],%[alt]", ALT_LOWCORE) - : [lc] "=d" (lc) - : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16)); + asm_inline( + ALTERNATIVE(" lghi %[lc],0", + " llilh %[lc],%[alt]", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [lc] "=d" (lc) + : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16)); return lc; } @@ -238,15 +240,15 @@ static inline void set_prefix(__u32 address) #else /* __ASSEMBLY__ */ .macro GET_LC reg - ALTERNATIVE "llilh \reg,0", \ + ALTERNATIVE "lghi \reg,0", \ __stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm .macro STMG_LC start, end, savearea ALTERNATIVE "stmg \start, \end, \savearea", \ __stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/machine.h b/arch/s390/include/asm/machine.h new file mode 100644 index 000000000000..8abe5afdbfc4 --- /dev/null +++ b/arch/s390/include/asm/machine.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2024 + */ + +#ifndef __ASM_S390_MACHINE_H +#define __ASM_S390_MACHINE_H + +#include <linux/const.h> + +#define MFEATURE_LOWCORE 0 +#define MFEATURE_PCI_MIO 1 +#define MFEATURE_SCC 2 +#define MFEATURE_TLB_GUEST 3 +#define MFEATURE_TX 4 +#define MFEATURE_ESOP 5 +#define MFEATURE_DIAG9C 6 +#define MFEATURE_VM 7 +#define MFEATURE_KVM 8 +#define MFEATURE_LPAR 9 +#define MFEATURE_DIAG288 10 + +#ifndef __ASSEMBLY__ + +#include <linux/bitops.h> +#include <asm/alternative.h> + +extern unsigned long machine_features[1]; + +#define MAX_MFEATURE_BIT (sizeof(machine_features) * BITS_PER_BYTE) + +static inline void __set_machine_feature(unsigned int nr, unsigned long *mfeatures) +{ + if (nr >= MAX_MFEATURE_BIT) + return; + __set_bit(nr, mfeatures); +} + +static inline void set_machine_feature(unsigned int nr) +{ + __set_machine_feature(nr, machine_features); +} + +static inline void __clear_machine_feature(unsigned int nr, unsigned long *mfeatures) +{ + if (nr >= MAX_MFEATURE_BIT) + return; + __clear_bit(nr, mfeatures); +} + +static inline void clear_machine_feature(unsigned int nr) +{ + __clear_machine_feature(nr, machine_features); +} + +static bool __test_machine_feature(unsigned int nr, unsigned long *mfeatures) +{ + if (nr >= MAX_MFEATURE_BIT) + return false; + return test_bit(nr, mfeatures); +} + +static bool test_machine_feature(unsigned int nr) +{ + return __test_machine_feature(nr, machine_features); +} + +static __always_inline bool __test_machine_feature_constant(unsigned int nr) +{ + asm goto( + ALTERNATIVE("brcl 15,%l[l_no]", "brcl 0,0", ALT_FEATURE(%[nr])) + : + : [nr] "i" (nr) + : + : l_no); + return true; +l_no: + return false; +} + +#define DEFINE_MACHINE_HAS_FEATURE(name, feature) \ +static __always_inline bool machine_has_##name(void) \ +{ \ + if (!__is_defined(__DECOMPRESSOR) && __builtin_constant_p(feature)) \ + return __test_machine_feature_constant(feature); \ + return test_machine_feature(feature); \ +} + +DEFINE_MACHINE_HAS_FEATURE(relocated_lowcore, MFEATURE_LOWCORE) +DEFINE_MACHINE_HAS_FEATURE(scc, MFEATURE_SCC) +DEFINE_MACHINE_HAS_FEATURE(tlb_guest, MFEATURE_TLB_GUEST) +DEFINE_MACHINE_HAS_FEATURE(tx, MFEATURE_TX) +DEFINE_MACHINE_HAS_FEATURE(esop, MFEATURE_ESOP) +DEFINE_MACHINE_HAS_FEATURE(diag9c, MFEATURE_DIAG9C) +DEFINE_MACHINE_HAS_FEATURE(vm, MFEATURE_VM) +DEFINE_MACHINE_HAS_FEATURE(kvm, MFEATURE_KVM) +DEFINE_MACHINE_HAS_FEATURE(lpar, MFEATURE_LPAR) + +#define machine_is_vm machine_has_vm +#define machine_is_kvm machine_has_kvm +#define machine_is_lpar machine_has_lpar + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_S390_MACHINE_H */ diff --git a/arch/s390/include/asm/march.h b/arch/s390/include/asm/march.h index fd9eef3be44c..11a71bd14954 100644 --- a/arch/s390/include/asm/march.h +++ b/arch/s390/include/asm/march.h @@ -33,6 +33,10 @@ #define MARCH_HAS_Z16_FEATURES 1 #endif +#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES +#define MARCH_HAS_Z17_FEATURES 1 +#endif + #endif /* __DECOMPRESSOR */ #endif /* __ASM_S390_MARCH_H */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 4c2dc7abc285..f07e49b419ab 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -22,10 +22,7 @@ typedef struct { * The following bitfields need a down_write on the mm * semaphore when they are written to. As they are only * written once, they can be read without a lock. - * - * The mmu context allocates 4K page tables. */ - unsigned int alloc_pgste:1; /* The mmu context uses extended page tables. */ unsigned int has_pgste:1; /* The mmu context uses storage keys. */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d56eb0a1f37b..d9b8501bc93d 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -13,6 +13,7 @@ #include <linux/mm_types.h> #include <asm/tlbflush.h> #include <asm/ctlreg.h> +#include <asm/asce.h> #include <asm-generic/mm_hooks.h> #define init_new_context init_new_context @@ -29,9 +30,6 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.gmap_asce = 0; mm->context.flush_mm = 0; #ifdef CONFIG_PGSTE - mm->context.alloc_pgste = page_table_allocate_pgste || - test_thread_flag(TIF_PGSTE) || - (current->mm && current->mm->context.alloc_pgste); mm->context.has_pgste = 0; mm->context.uses_skeys = 0; mm->context.uses_cmm = 0; @@ -80,7 +78,8 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct * else get_lowcore()->user_asce.val = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - /* Clear previous user-ASCE from CR7 */ + /* Clear previous user-ASCE from CR1 and CR7 */ + local_ctl_load(1, &s390_invalid_asce); local_ctl_load(7, &s390_invalid_asce); if (prev != next) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); @@ -102,6 +101,7 @@ static inline void finish_arch_post_lock_switch(void) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; + unsigned long flags; if (mm) { preempt_disable(); @@ -111,15 +111,25 @@ static inline void finish_arch_post_lock_switch(void) __tlb_flush_mm_lazy(mm); preempt_enable(); } + local_irq_save(flags); + if (test_thread_flag(TIF_ASCE_PRIMARY)) + local_ctl_load(1, &get_lowcore()->kernel_asce); + else + local_ctl_load(1, &get_lowcore()->user_asce); local_ctl_load(7, &get_lowcore()->user_asce); + local_irq_restore(flags); } #define activate_mm activate_mm static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current); + switch_mm_irqs_off(prev, next, current); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + if (test_thread_flag(TIF_ASCE_PRIMARY)) + local_ctl_load(1, &get_lowcore()->kernel_asce); + else + local_ctl_load(1, &get_lowcore()->user_asce); local_ctl_load(7, &get_lowcore()->user_asce); } diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h index 192835a3e24d..c7c96282f011 100644 --- a/arch/s390/include/asm/nospec-branch.h +++ b/arch/s390/include/asm/nospec-branch.h @@ -26,8 +26,6 @@ static inline bool nospec_uses_trampoline(void) return __is_defined(CC_USING_EXPOLINE) && !nospec_disable; } -#ifdef CONFIG_EXPOLINE_EXTERN - void __s390_indirect_jump_r1(void); void __s390_indirect_jump_r2(void); void __s390_indirect_jump_r3(void); @@ -44,8 +42,6 @@ void __s390_indirect_jump_r13(void); void __s390_indirect_jump_r14(void); void __s390_indirect_jump_r15(void); -#endif - #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_EXPOLINE_H */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 1ff145f7b52b..4e5dbabdf202 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -71,9 +71,11 @@ static inline void copy_page(void *to, void *from) #define vma_alloc_zeroed_movable_folio(vma, vaddr) \ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr) -/* - * These are used to make use of C type-checking.. - */ +#ifdef CONFIG_STRICT_MM_TYPECHECKS +#define STRICT_MM_TYPECHECKS +#endif + +#ifdef STRICT_MM_TYPECHECKS typedef struct { unsigned long pgprot; } pgprot_t; typedef struct { unsigned long pgste; } pgste_t; @@ -82,43 +84,48 @@ typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pud; } pud_t; typedef struct { unsigned long p4d; } p4d_t; typedef struct { unsigned long pgd; } pgd_t; -typedef pte_t *pgtable_t; -#define pgprot_val(x) ((x).pgprot) -#define pgste_val(x) ((x).pgste) - -static inline unsigned long pte_val(pte_t pte) -{ - return pte.pte; +#define DEFINE_PGVAL_FUNC(name) \ +static __always_inline unsigned long name ## _val(name ## _t name) \ +{ \ + return name.name; \ } -static inline unsigned long pmd_val(pmd_t pmd) -{ - return pmd.pmd; -} +#else /* STRICT_MM_TYPECHECKS */ -static inline unsigned long pud_val(pud_t pud) -{ - return pud.pud; -} +typedef unsigned long pgprot_t; +typedef unsigned long pgste_t; +typedef unsigned long pte_t; +typedef unsigned long pmd_t; +typedef unsigned long pud_t; +typedef unsigned long p4d_t; +typedef unsigned long pgd_t; -static inline unsigned long p4d_val(p4d_t p4d) -{ - return p4d.p4d; +#define DEFINE_PGVAL_FUNC(name) \ +static __always_inline unsigned long name ## _val(name ## _t name) \ +{ \ + return name; \ } -static inline unsigned long pgd_val(pgd_t pgd) -{ - return pgd.pgd; -} +#endif /* STRICT_MM_TYPECHECKS */ + +DEFINE_PGVAL_FUNC(pgprot) +DEFINE_PGVAL_FUNC(pgste) +DEFINE_PGVAL_FUNC(pte) +DEFINE_PGVAL_FUNC(pmd) +DEFINE_PGVAL_FUNC(pud) +DEFINE_PGVAL_FUNC(p4d) +DEFINE_PGVAL_FUNC(pgd) + +typedef pte_t *pgtable_t; +#define __pgprot(x) ((pgprot_t) { (x) } ) #define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) #define __pud(x) ((pud_t) { (x) } ) #define __p4d(x) ((p4d_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) static inline void page_set_storage_key(unsigned long addr, unsigned char skey, int mapped) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 474e1f8d1d3c..41f900f693d9 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -11,6 +11,9 @@ #include <asm/pci_insn.h> #include <asm/sclp.h> +#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 +#define arch_can_pci_mmap_wc() 1 + #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 @@ -144,7 +147,7 @@ struct zpci_dev { u8 util_str_avail : 1; u8 irqs_registered : 1; u8 tid_avail : 1; - u8 reserved : 1; + u8 rtr_avail : 1; /* Relaxed translation allowed */ unsigned int devfn; /* DEVFN part of the RID*/ u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ @@ -217,6 +220,7 @@ extern struct airq_iv *zpci_aif_sbv; struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state); int zpci_add_device(struct zpci_dev *zdev); int zpci_enable_device(struct zpci_dev *); +int zpci_reenable_device(struct zpci_dev *zdev); int zpci_disable_device(struct zpci_dev *); int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh); int zpci_deconfigure_device(struct zpci_dev *zdev); @@ -245,6 +249,7 @@ void update_uid_checking(bool new); /* IOMMU Interface */ int zpci_init_iommu(struct zpci_dev *zdev); void zpci_destroy_iommu(struct zpci_dev *zdev); +int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status); #ifdef CONFIG_PCI static inline bool zpci_use_mio(struct zpci_dev *zdev) diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index 3fff2f7095c8..7ebff39c84b3 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -156,7 +156,9 @@ struct clp_rsp_query_pci_grp { u16 : 4; u16 noi : 12; /* number of interrupts */ u8 version; - u8 : 6; + u8 : 2; + u8 rtr : 1; /* Relaxed translation requirement */ + u8 : 3; u8 frame : 1; u8 refresh : 1; /* TLB refresh mode */ u16 : 3; diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 42d7cc4262ca..d12e17201661 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -25,6 +25,7 @@ enum zpci_ioat_dtype { #define ZPCI_KEY (PAGE_DEFAULT_KEY << 5) #define ZPCI_TABLE_SIZE_RT (1UL << 42) +#define ZPCI_TABLE_SIZE_RS (1UL << 53) #define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST) #define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT) @@ -55,6 +56,8 @@ enum zpci_ioat_dtype { #define ZPCI_PT_BITS 8 #define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT) #define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS) +#define ZPCI_RS_SHIFT (ZPCI_RT_SHIFT + ZPCI_TABLE_BITS) +#define ZPCI_RF_SHIFT (ZPCI_RS_SHIFT + ZPCI_TABLE_BITS) #define ZPCI_RTE_FLAG_MASK 0x3fffUL #define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index b19b6ed2ab53..5345398df653 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -26,7 +26,6 @@ unsigned long *page_table_alloc(struct mm_struct *); struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_pgste(struct ptdesc *ptdesc); -extern int page_table_allocate_pgste; static inline void crst_table_init(unsigned long *crst, unsigned long entry) { @@ -98,7 +97,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) if (!table) return NULL; crst_table_init(table, _SEGMENT_ENTRY_EMPTY); - if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) { + if (!pagetable_pmd_ctor(mm, virt_to_ptdesc(table))) { crst_table_free(mm, table); return NULL; } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 3ca5af4cfe43..6d8bc27a366e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -14,6 +14,7 @@ #include <linux/sched.h> #include <linux/mm_types.h> +#include <linux/cpufeature.h> #include <linux/page-flags.h> #include <linux/radix-tree.h> #include <linux/atomic.h> @@ -583,13 +584,14 @@ static inline int mm_is_protected(struct mm_struct *mm) return 0; } -static inline int mm_alloc_pgste(struct mm_struct *mm) +static inline pgste_t clear_pgste_bit(pgste_t pgste, unsigned long mask) { -#ifdef CONFIG_PGSTE - if (unlikely(mm->context.alloc_pgste)) - return 1; -#endif - return 0; + return __pgste(pgste_val(pgste) & ~mask); +} + +static inline pgste_t set_pgste_bit(pgste_t pgste, unsigned long mask) +{ + return __pgste(pgste_val(pgste) | mask); } static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) @@ -913,7 +915,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } @@ -1339,7 +1341,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, * PTE does not have _PAGE_PROTECT set, to avoid unnecessary overhead. * A local RDP can be used to do the flush. */ - if (MACHINE_HAS_RDP && !(pte_val(*ptep) & _PAGE_PROTECT)) + if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT)) __ptep_rdp(address, ptep, 0, 0, 1); } #define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault @@ -1354,7 +1356,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, { if (pte_same(*ptep, entry)) return 0; - if (MACHINE_HAS_RDP && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry)) + if (cpu_has_rdp() && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry)) ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry); else ptep_xchg_direct(vma->vm_mm, addr, ptep, entry); @@ -1402,9 +1404,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); #define pgprot_writecombine pgprot_writecombine pgprot_t pgprot_writecombine(pgprot_t prot); -#define pgprot_writethrough pgprot_writethrough -pgprot_t pgprot_writethrough(pgprot_t prot); - #define PFN_PTE_SHIFT PAGE_SHIFT /* @@ -1449,16 +1448,6 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) return pte_mkyoung(__pte); } -static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) -{ - unsigned long physpage = page_to_phys(page); - pte_t __pte = mk_pte_phys(physpage, pgprot); - - if (pte_write(__pte) && PageDirty(page)) - __pte = pte_mkdirty(__pte); - return __pte; -} - #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) #define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) @@ -1880,7 +1869,6 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, #define pmdp_collapse_flush pmdp_collapse_flush #define pfn_pmd(pfn, pgprot) mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot)) -#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) static inline int pmd_trans_huge(pmd_t pmd) { @@ -1890,7 +1878,7 @@ static inline int pmd_trans_huge(pmd_t pmd) #define has_transparent_hugepage has_transparent_hugepage static inline int has_transparent_hugepage(void) { - return MACHINE_HAS_EDAT1 ? 1 : 0; + return cpu_has_edat1() ? 1 : 0; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h index 5dca1a46a9f6..b7b59faf16f4 100644 --- a/arch/s390/include/asm/pkey.h +++ b/arch/s390/include/asm/pkey.h @@ -20,9 +20,22 @@ * @param key pointer to a buffer containing the key blob * @param keylen size of the key blob in bytes * @param protkey pointer to buffer receiving the protected key + * @param xflags additional execution flags (see PKEY_XFLAG_* definitions below) + * As of now the only supported flag is PKEY_XFLAG_NOMEMALLOC. * @return 0 on success, negative errno value on failure */ int pkey_key2protkey(const u8 *key, u32 keylen, - u8 *protkey, u32 *protkeylen, u32 *protkeytype); + u8 *protkey, u32 *protkeylen, u32 *protkeytype, + u32 xflags); + +/* + * If this flag is given in the xflags parameter, the pkey implementation + * is not allowed to allocate memory but instead should fall back to use + * preallocated memory or simple fail with -ENOMEM. + * This flag is for protected key derive within a cipher or similar + * which must not allocate memory which would cause io operations - see + * also the CRYPTO_ALG_ALLOCATES_MEMORY flag in crypto.h. + */ +#define PKEY_XFLAG_NOMEMALLOC 0x0001 #endif /* _KAPI_PKEY_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 4f8d5592c298..6c8063cb8fe7 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -31,6 +31,7 @@ #include <linux/cpumask.h> #include <linux/linkage.h> #include <linux/irqflags.h> +#include <linux/bitops.h> #include <asm/fpu-types.h> #include <asm/cpu.h> #include <asm/page.h> @@ -62,33 +63,27 @@ static __always_inline struct pcpu *this_pcpu(void) static __always_inline void set_cpu_flag(int flag) { - this_pcpu()->flags |= (1UL << flag); + set_bit(flag, &this_pcpu()->flags); } static __always_inline void clear_cpu_flag(int flag) { - this_pcpu()->flags &= ~(1UL << flag); + clear_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_cpu_flag(int flag) { - return this_pcpu()->flags & (1UL << flag); + return test_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_and_set_cpu_flag(int flag) { - if (test_cpu_flag(flag)) - return true; - set_cpu_flag(flag); - return false; + return test_and_set_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_and_clear_cpu_flag(int flag) { - if (!test_cpu_flag(flag)) - return false; - clear_cpu_flag(flag); - return true; + return test_and_clear_bit(flag, &this_pcpu()->flags); } /* @@ -97,7 +92,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag) */ static __always_inline bool test_cpu_flag_of(int flag, int cpu) { - return per_cpu(pcpu_devices, cpu).flags & (1UL << flag); + return test_bit(flag, &per_cpu(pcpu_devices, cpu).flags); } #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) @@ -416,7 +411,11 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) static __always_inline void bpon(void) { - asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82))); + asm_inline volatile( + ALTERNATIVE(" nop\n", + " .insn rrf,0xb2e80000,0,0,13,0\n", + ALT_SPEC(82)) + ); } #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 788bc4467445..0905fa99a31e 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -9,16 +9,15 @@ #include <linux/bits.h> #include <uapi/asm/ptrace.h> +#include <asm/thread_info.h> #include <asm/tpi.h> #define PIF_SYSCALL 0 /* inside a system call */ -#define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */ #define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ #define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */ #define _PIF_SYSCALL BIT(PIF_SYSCALL) -#define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART) #define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS) @@ -128,7 +127,6 @@ struct pt_regs { struct tpi_info tpi_info; }; unsigned long flags; - unsigned long cr1; unsigned long last_break; }; @@ -231,8 +229,44 @@ static inline void instruction_pointer_set(struct pt_regs *regs, int regs_query_register_offset(const char *name); const char *regs_query_register_name(unsigned int offset); -unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); -unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); + +static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->gprs[15]; +} + +static __always_inline unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) +{ + if (offset >= NUM_GPRS) + return 0; + return regs->gprs[offset]; +} + +static __always_inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + unsigned long ksp = kernel_stack_pointer(regs); + + return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs:pt_regs which contains kernel stack pointer. + * @n:stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long addr; + + addr = kernel_stack_pointer(regs) + n * sizeof(long); + if (!regs_within_kernel_stack(regs, addr)) + return 0; + return READ_ONCE_NOCHECK(*(unsigned long *)addr); +} /** * regs_get_kernel_argument() - get Nth function argument in kernel @@ -253,11 +287,6 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, return regs_get_kernel_stack_nth(regs, argoffset + n); } -static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) -{ - return regs->gprs[15]; -} - static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) { regs->gprs[2] = rc; diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 18f37dff03c9..1e62919bacf4 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -168,6 +168,7 @@ int sclp_early_read_storage_info(void); int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); +void sclp_early_detect_machine_features(void); void sclp_early_printk(const char *s); void __sclp_early_printk(const char *s, unsigned int len); void sclp_emergency_printk(const char *s); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 70b920b32827..031e881b4d88 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -13,28 +13,6 @@ #define PARMAREA 0x10400 #define COMMAND_LINE_SIZE CONFIG_COMMAND_LINE_SIZE -/* - * Machine features detected in early.c - */ - -#define MACHINE_FLAG_VM BIT(0) -#define MACHINE_FLAG_KVM BIT(1) -#define MACHINE_FLAG_LPAR BIT(2) -#define MACHINE_FLAG_DIAG9C BIT(3) -#define MACHINE_FLAG_ESOP BIT(4) -#define MACHINE_FLAG_IDTE BIT(5) -#define MACHINE_FLAG_EDAT1 BIT(7) -#define MACHINE_FLAG_EDAT2 BIT(8) -#define MACHINE_FLAG_TOPOLOGY BIT(10) -#define MACHINE_FLAG_TE BIT(11) -#define MACHINE_FLAG_TLB_LC BIT(12) -#define MACHINE_FLAG_TLB_GUEST BIT(14) -#define MACHINE_FLAG_NX BIT(15) -#define MACHINE_FLAG_GS BIT(16) -#define MACHINE_FLAG_SCC BIT(17) -#define MACHINE_FLAG_PCI_MIO BIT(18) -#define MACHINE_FLAG_RDP BIT(19) -#define MACHINE_FLAG_SEQ_INSN BIT(20) #define LPP_MAGIC BIT(31) #define LPP_PID_MASK _AC(0xffffffff, UL) @@ -78,26 +56,6 @@ extern unsigned long max_mappable; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; -#define MACHINE_IS_VM (get_lowcore()->machine_flags & MACHINE_FLAG_VM) -#define MACHINE_IS_KVM (get_lowcore()->machine_flags & MACHINE_FLAG_KVM) -#define MACHINE_IS_LPAR (get_lowcore()->machine_flags & MACHINE_FLAG_LPAR) - -#define MACHINE_HAS_DIAG9C (get_lowcore()->machine_flags & MACHINE_FLAG_DIAG9C) -#define MACHINE_HAS_ESOP (get_lowcore()->machine_flags & MACHINE_FLAG_ESOP) -#define MACHINE_HAS_IDTE (get_lowcore()->machine_flags & MACHINE_FLAG_IDTE) -#define MACHINE_HAS_EDAT1 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT1) -#define MACHINE_HAS_EDAT2 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT2) -#define MACHINE_HAS_TOPOLOGY (get_lowcore()->machine_flags & MACHINE_FLAG_TOPOLOGY) -#define MACHINE_HAS_TE (get_lowcore()->machine_flags & MACHINE_FLAG_TE) -#define MACHINE_HAS_TLB_LC (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_LC) -#define MACHINE_HAS_TLB_GUEST (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_GUEST) -#define MACHINE_HAS_NX (get_lowcore()->machine_flags & MACHINE_FLAG_NX) -#define MACHINE_HAS_GS (get_lowcore()->machine_flags & MACHINE_FLAG_GS) -#define MACHINE_HAS_SCC (get_lowcore()->machine_flags & MACHINE_FLAG_SCC) -#define MACHINE_HAS_PCI_MIO (get_lowcore()->machine_flags & MACHINE_FLAG_PCI_MIO) -#define MACHINE_HAS_RDP (get_lowcore()->machine_flags & MACHINE_FLAG_RDP) -#define MACHINE_HAS_SEQ_INSN (get_lowcore()->machine_flags & MACHINE_FLAG_SEQ_INSN) - /* * Console mode. Override with conmode= */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 7feca96c48c6..03f4d01664f8 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -7,11 +7,29 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H -#include <asm/sigp.h> -#include <asm/lowcore.h> #include <asm/processor.h> +#include <asm/lowcore.h> +#include <asm/machine.h> +#include <asm/sigp.h> + +static __always_inline unsigned int raw_smp_processor_id(void) +{ + unsigned long lc_cpu_nr; + unsigned int cpu; + + BUILD_BUG_ON(sizeof_field(struct lowcore, cpu_nr) != sizeof(cpu)); + lc_cpu_nr = offsetof(struct lowcore, cpu_nr); + asm_inline( + ALTERNATIVE(" ly %[cpu],%[offzero](%%r0)\n", + " ly %[cpu],%[offalt](%%r0)\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [cpu] "=d" (cpu) + : [offzero] "i" (lc_cpu_nr), + [offalt] "i" (lc_cpu_nr + LOWCORE_ALT_ADDRESS), + "m" (((struct lowcore *)0)->cpu_nr)); + return cpu; +} -#define raw_smp_processor_id() (get_lowcore()->cpu_nr) #define arch_scale_cpu_capacity smp_cpu_get_capacity extern struct mutex smp_cpu_state_mutex; diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index f87dd0a84855..f9935db9fd76 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -16,7 +16,23 @@ #include <asm/processor.h> #include <asm/alternative.h> -#define SPINLOCK_LOCKVAL (get_lowcore()->spinlock_lockval) +static __always_inline unsigned int spinlock_lockval(void) +{ + unsigned long lc_lockval; + unsigned int lockval; + + BUILD_BUG_ON(sizeof_field(struct lowcore, spinlock_lockval) != sizeof(lockval)); + lc_lockval = offsetof(struct lowcore, spinlock_lockval); + asm_inline( + ALTERNATIVE(" ly %[lockval],%[offzero](%%r0)\n", + " ly %[lockval],%[offalt](%%r0)\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [lockval] "=d" (lockval) + : [offzero] "i" (lc_lockval), + [offalt] "i" (lc_lockval + LOWCORE_ALT_ADDRESS), + "m" (((struct lowcore *)0)->spinlock_lockval)); + return lockval; +} extern int spin_retry; @@ -60,7 +76,7 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) int old = 0; barrier(); - return likely(arch_try_cmpxchg(&lp->lock, &old, SPINLOCK_LOCKVAL)); + return likely(arch_try_cmpxchg(&lp->lock, &old, spinlock_lockval())); } static inline void arch_spin_lock(arch_spinlock_t *lp) diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index 2ab868cbae6c..f8f68f4ef255 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -26,11 +26,9 @@ void *memmove(void *dest, const void *src, size_t n); #define __HAVE_ARCH_MEMSCAN /* inline & arch function */ #define __HAVE_ARCH_STRCAT /* inline & arch function */ #define __HAVE_ARCH_STRCMP /* arch function */ -#define __HAVE_ARCH_STRCPY /* inline & arch function */ #define __HAVE_ARCH_STRLCAT /* arch function */ #define __HAVE_ARCH_STRLEN /* inline & arch function */ #define __HAVE_ARCH_STRNCAT /* arch function */ -#define __HAVE_ARCH_STRNCPY /* arch function */ #define __HAVE_ARCH_STRNLEN /* inline & arch function */ #define __HAVE_ARCH_STRSTR /* arch function */ #define __HAVE_ARCH_MEMSET16 /* arch function */ @@ -42,7 +40,6 @@ int memcmp(const void *s1, const void *s2, size_t n); int strcmp(const char *s1, const char *s2); size_t strlcat(char *dest, const char *src, size_t n); char *strncat(char *dest, const char *src, size_t n); -char *strncpy(char *dest, const char *src, size_t n); char *strstr(const char *s1, const char *s2); #endif /* !defined(CONFIG_KASAN) && !defined(CONFIG_KMSAN) */ @@ -155,22 +152,6 @@ static inline char *strcat(char *dst, const char *src) } #endif -#ifdef __HAVE_ARCH_STRCPY -static inline char *strcpy(char *dst, const char *src) -{ - char *ret = dst; - - asm volatile( - " lghi 0,0\n" - "0: mvst %[dst],%[src]\n" - " jo 0b" - : [dst] "+&a" (dst), [src] "+&a" (src) - : - : "cc", "memory", "0"); - return ret; -} -#endif - #if defined(__HAVE_ARCH_STRLEN) || (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)) static inline size_t __no_sanitize_prefix_strfunc(strlen)(const char *s) { @@ -208,7 +189,6 @@ static inline size_t strnlen(const char * s, size_t n) void *memchr(const void * s, int c, size_t n); void *memscan(void *s, int c, size_t n); char *strcat(char *dst, const char *src); -char *strcpy(char *dst, const char *src); size_t strlen(const char *s); size_t strnlen(const char * s, size_t n); #endif /* !IN_ARCH_STRING_C */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 27e3d804b311..bd4cb00ccd5e 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -24,6 +24,18 @@ static inline long syscall_get_nr(struct task_struct *task, (regs->int_code & 0xffff) : -1; } +static inline void syscall_set_nr(struct task_struct *task, + struct pt_regs *regs, + int nr) +{ + /* + * Unlike syscall_get_nr(), syscall_set_nr() can be called only when + * the target task is stopped for tracing on entering syscall, so + * there is no need to have the same check syscall_get_nr() has. + */ + regs->int_code = (regs->int_code & ~0xffff) | (nr & 0xffff); +} + static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { @@ -65,19 +77,26 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { unsigned long mask = -1UL; - unsigned int n = 6; #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(task, TIF_31BIT)) mask = 0xffffffff; #endif - while (n-- > 0) - if (n > 0) - args[n] = regs->gprs[2 + n] & mask; + for (int i = 1; i < 6; i++) + args[i] = regs->gprs[2 + i] & mask; args[0] = regs->orig_gpr2 & mask; } +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + const unsigned long *args) +{ + regs->orig_gpr2 = args[0]; + for (int n = 1; n < 6; n++) + regs->gprs[2 + n] = args[n]; +} + static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_COMPAT diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index edca5a751df4..9088c5267f35 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -11,8 +11,34 @@ #ifndef __ASM_S390_SYSINFO_H #define __ASM_S390_SYSINFO_H -#include <asm/bitsperlong.h> #include <linux/uuid.h> +#include <asm/bitsperlong.h> +#include <asm/asm.h> + +/* + * stsi - store system information + * + * Returns the current configuration level if function code 0 was specified. + * Otherwise returns 0 on success or a negative value on error. + */ +static inline int stsi(void *sysinfo, int fc, int sel1, int sel2) +{ + int r0 = (fc << 28) | sel1; + int cc; + + asm volatile( + " lr %%r0,%[r0]\n" + " lr %%r1,%[r1]\n" + " stsi %[sysinfo]\n" + " lr %[r0],%%r0\n" + CC_IPM(cc) + : CC_OUT(cc, cc), [r0] "+d" (r0), [sysinfo] "=Q" (*(char *)sysinfo) + : [r1] "d" (sel2) + : CC_CLOBBER_LIST("0", "1", "memory")); + if (cc == 3) + return -EOPNOTSUPP; + return fc ? 0 : (unsigned int)r0 >> 28; +} struct sysinfo_1_1_1 { unsigned char p:1; diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index c33f7144d1b9..391eb04d26d8 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -9,9 +9,7 @@ #define _ASM_THREAD_INFO_H #include <linux/bits.h> -#ifndef ASM_OFFSETS_C -#include <asm/asm-offsets.h> -#endif +#include <vdso/page.h> /* * General size of kernel stacks @@ -27,8 +25,6 @@ #define STACK_INIT_OFFSET (THREAD_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) #ifndef __ASSEMBLY__ -#include <asm/lowcore.h> -#include <asm/page.h> /* * low level task data that entry.S needs immediate access to @@ -67,7 +63,7 @@ void arch_setup_new_exec(void); #define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */ #define TIF_UPROBE 4 /* breakpointed or single-stepping */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ -#define TIF_PGSTE 6 /* New mm's will use 4K page tables */ +#define TIF_ASCE_PRIMARY 6 /* primary asce is kernel asce */ #define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ @@ -89,7 +85,7 @@ void arch_setup_new_exec(void); #define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY) #define _TIF_UPROBE BIT(TIF_UPROBE) #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) -#define _TIF_PGSTE BIT(TIF_PGSTE) +#define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY) #define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) #define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index a9460bd6555b..bed8d0b5a282 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -13,6 +13,7 @@ #include <linux/preempt.h> #include <linux/time64.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/asm.h> /* The value of the TOD clock for 1.1.1970. */ @@ -267,7 +268,7 @@ static __always_inline u128 eitod_to_ns(u128 todval) */ static inline int tod_after(unsigned long a, unsigned long b) { - if (MACHINE_HAS_SCC) + if (machine_has_scc()) return (long) a > (long) b; return a > b; } @@ -281,7 +282,7 @@ static inline int tod_after(unsigned long a, unsigned long b) */ static inline int tod_after_eq(unsigned long a, unsigned long b) { - if (MACHINE_HAS_SCC) + if (machine_has_scc()) return (long) a >= (long) b; return a >= b; } diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 72655fd2d867..1e50f6f1ad9d 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -36,11 +36,12 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb, #include <asm/tlbflush.h> #include <asm-generic/tlb.h> +#include <asm/gmap.h> /* * Release the page cache reference for a pte removed by * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page - * has already been freed, so just do free_page_and_swap_cache. + * has already been freed, so just do free_folio_and_swap_cache. * * s390 doesn't delay rmap removal. */ @@ -49,7 +50,7 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, { VM_WARN_ON_ONCE(delay_rmap); - free_page_and_swap_cache(page); + free_folio_and_swap_cache(page_folio(page)); return false; } @@ -84,7 +85,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_pmds = 1; - if (mm_alloc_pgste(tlb->mm)) + if (mm_has_pgste(tlb->mm)) gmap_unlink(tlb->mm, (unsigned long *)pte, address); tlb_remove_ptdesc(tlb, virt_to_ptdesc(pte)); } diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 9dfd46dd03c6..75491baa2197 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -2,9 +2,11 @@ #ifndef _S390_TLBFLUSH_H #define _S390_TLBFLUSH_H +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/sched.h> #include <asm/processor.h> +#include <asm/machine.h> /* * Flush all TLB entries on the local CPU. @@ -22,7 +24,7 @@ static inline void __tlb_flush_idte(unsigned long asce) unsigned long opt; opt = IDTE_PTOA; - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) opt |= IDTE_GUEST_ASCE; /* Global TLB flush for the mm */ asm volatile("idte 0,%1,%0" : : "a" (opt), "a" (asce) : "cc"); @@ -52,7 +54,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); barrier(); gmap_asce = READ_ONCE(mm->context.gmap_asce); - if (MACHINE_HAS_IDTE && gmap_asce != -1UL) { + if (cpu_has_idte() && gmap_asce != -1UL) { if (gmap_asce) __tlb_flush_idte(gmap_asce); __tlb_flush_idte(mm->context.asce); @@ -66,7 +68,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) static inline void __tlb_flush_kernel(void) { - if (MACHINE_HAS_IDTE) + if (cpu_has_idte()) __tlb_flush_idte(init_mm.context.asce); else __tlb_flush_global(); diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index cef06bffad80..44110847342a 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -61,6 +61,12 @@ static inline void topology_expect_change(void) { } #endif /* CONFIG_SCHED_TOPOLOGY */ +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + return smp_get_base_cpu(cpu) == cpu; +} +#define topology_is_primary_thread topology_is_primary_thread + #define POLARIZATION_UNKNOWN (-1) #define POLARIZATION_HRZ (0) #define POLARIZATION_VL (1) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f5920163ee97..a43fc88c0050 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -13,125 +13,80 @@ /* * User space memory access functions */ +#include <linux/pgtable.h> #include <asm/asm-extable.h> #include <asm/processor.h> #include <asm/extable.h> #include <asm/facility.h> #include <asm-generic/access_ok.h> +#include <asm/asce.h> #include <linux/instrumented.h> void debug_user_asce(int exit); -union oac { - unsigned int val; - struct { - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac1; - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac2; - }; -}; - -static __always_inline __must_check unsigned long -raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key) -{ - unsigned long rem; - union oac spec = { - .oac2.key = key, - .oac2.as = PSW_BITS_AS_SECONDARY, - .oac2.k = 1, - .oac2.a = 1, - }; - - asm_inline volatile( - " lr %%r0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[from],%[val]\n" - " slgr %[to],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */ - " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */ - " slgr %[rem],%[from]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: lghi %[size],0\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} +#ifdef CONFIG_KMSAN +#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory +#else +#define uaccess_kmsan_or_inline __always_inline +#endif -static __always_inline __must_check unsigned long -raw_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - return raw_copy_from_user_key(to, from, n, 0); -} +#define INLINE_COPY_FROM_USER +#define INLINE_COPY_TO_USER -static __always_inline __must_check unsigned long -raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key) +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_from_user(void *to, const void __user *from, unsigned long size) { - unsigned long rem; - union oac spec = { - .oac1.key = key, - .oac1.as = PSW_BITS_AS_SECONDARY, - .oac1.k = 1, - .oac1.a = 1, - }; - - asm_inline volatile( - " lr %%r0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[to],%[val]\n" - " slgr %[from],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ - " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ - " slgr %[rem],%[to]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: lghi %[size],0\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; + unsigned long osize; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " lhi %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_FROM(0b, 0b) + EX_TABLE_UA_MVCOS_FROM(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to) + : [spec] "I" (0x81), [from] "Q" (*(const char __user *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (__builtin_constant_p(osize) && osize <= 4096) + return osize - size; + if (likely(CC_TRANSFORM(cc) == 0)) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } } -static __always_inline __must_check unsigned long -raw_copy_to_user(void __user *to, const void *from, unsigned long n) +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long size) { - return raw_copy_to_user_key(to, from, n, 0); + unsigned long osize; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " llilh %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_TO(0b, 0b) + EX_TABLE_UA_MVCOS_TO(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to) + : [spec] "I" (0x81), [from] "Q" (*(const char *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (__builtin_constant_p(osize) && osize <= 4096) + return osize - size; + if (likely(CC_TRANSFORM(cc) == 0)) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } } unsigned long __must_check @@ -158,12 +113,6 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo int __noreturn __put_user_bad(void); -#ifdef CONFIG_KMSAN -#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory -#else -#define uaccess_kmsan_or_inline __always_inline -#endif - #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT #define DEFINE_PUT_USER_NOINSTR(type) \ @@ -199,7 +148,7 @@ __put_user_##type##_noinstr(unsigned type __user *to, \ { \ int rc; \ \ - asm volatile( \ + asm_inline volatile( \ " llilh %%r0,%[spec]\n" \ "0: mvcos %[to],%[from],%[size]\n" \ "1: lhi %[rc],0\n" \ @@ -315,7 +264,7 @@ __get_user_##type##_noinstr(unsigned type *to, \ { \ int rc; \ \ - asm volatile( \ + asm_inline volatile( \ " lhi %%r0,%[spec]\n" \ "0: mvcos %[to],%[from],%[size]\n" \ "1: lhi %[rc],0\n" \ @@ -415,12 +364,34 @@ long __must_check strncpy_from_user(char *dst, const char __user *src, long coun long __must_check strnlen_user(const char __user *src, long count); -/* - * Zero Userspace - */ -unsigned long __must_check __clear_user(void __user *to, unsigned long size); +static uaccess_kmsan_or_inline __must_check unsigned long +__clear_user(void __user *to, unsigned long size) +{ + unsigned long osize; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " llilh %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_TO(0b, 0b) + EX_TABLE_UA_MVCOS_TO(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to) + : [spec] "I" (0x81), [from] "Q" (*(const char *)empty_zero_page) + : CC_CLOBBER_LIST("memory", "0")); + if (__builtin_constant_p(osize) && osize <= 4096) + return osize - size; + if (CC_TRANSFORM(cc) == 0) + return osize - size; + size -= 4096; + to += 4096; + } +} -static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) +static __always_inline unsigned long __must_check clear_user(void __user *to, unsigned long n) { might_fault(); return __clear_user(to, n); @@ -508,6 +479,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, __uint128_t old, __uint128_t new, unsigned long key, int size) { + bool sacf_flag; int rc = 0; switch (size) { @@ -520,7 +492,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, _old = ((unsigned int)old & 0xff) << shift; _new = ((unsigned int)new & 0xff) << shift; mask = ~(0xff << shift); - asm volatile( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( " spka 0(%[key])\n" " sacf 256\n" " llill %[count],%[max_loops]\n" @@ -554,6 +527,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [default_key] "J" (PAGE_DEFAULT_KEY), [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS) : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); *(unsigned char *)uval = prev >> shift; if (!count) rc = -EAGAIN; @@ -568,7 +542,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, _old = ((unsigned int)old & 0xffff) << shift; _new = ((unsigned int)new & 0xffff) << shift; mask = ~(0xffff << shift); - asm volatile( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( " spka 0(%[key])\n" " sacf 256\n" " llill %[count],%[max_loops]\n" @@ -602,6 +577,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [default_key] "J" (PAGE_DEFAULT_KEY), [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS) : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); *(unsigned short *)uval = prev >> shift; if (!count) rc = -EAGAIN; @@ -610,7 +586,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, case 4: { unsigned int prev = old; - asm volatile( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( " spka 0(%[key])\n" " sacf 256\n" "0: cs %[prev],%[new],%[address]\n" @@ -625,13 +602,15 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [key] "a" (key << 4), [default_key] "J" (PAGE_DEFAULT_KEY) : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); *(unsigned int *)uval = prev; return rc; } case 8: { unsigned long prev = old; - asm volatile( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( " spka 0(%[key])\n" " sacf 256\n" "0: csg %[prev],%[new],%[address]\n" @@ -646,13 +625,15 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [key] "a" (key << 4), [default_key] "J" (PAGE_DEFAULT_KEY) : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); *(unsigned long *)uval = prev; return rc; } case 16: { __uint128_t prev = old; - asm volatile( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( " spka 0(%[key])\n" " sacf 256\n" "0: cdsg %[prev],%[new],%[address]\n" @@ -667,6 +648,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [key] "a" (key << 4), [default_key] "J" (PAGE_DEFAULT_KEY) : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); *(__uint128_t *)uval = prev; return rc; } diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index b11f5b6d0bd1..8018549a1ad2 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -16,7 +16,6 @@ #include <linux/bug.h> #include <linux/sched.h> #include <asm/page.h> -#include <asm/gmap.h> #include <asm/asm.h> #define UVC_CC_OK 0 @@ -616,8 +615,9 @@ static inline int uv_remove_shared(unsigned long addr) return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS); } -int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list_item_hdr *secret); +int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN], + struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret); int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size); extern int prot_virt_host; @@ -631,7 +631,7 @@ int uv_pin_shared(unsigned long paddr); int uv_destroy_folio(struct folio *folio); int uv_destroy_pte(pte_t pte); int uv_convert_from_secure_pte(pte_t pte); -int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb); +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb); int uv_convert_from_secure(unsigned long paddr); int uv_convert_from_secure_folio(struct folio *folio); diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 92c73e4d97a9..420a073fdde5 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -6,13 +6,11 @@ #ifndef __ASSEMBLY__ -extern struct vdso_data *vdso_data; - int vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ -#define __VVAR_PAGES 2 +#define __VDSO_PAGES 4 #define VDSO_VERSION_STRING LINUX_2.6.29 diff --git a/arch/s390/include/asm/vdso/getrandom.h b/arch/s390/include/asm/vdso/getrandom.h index 36355af7160b..f8713ce39bb2 100644 --- a/arch/s390/include/asm/vdso/getrandom.h +++ b/arch/s390/include/asm/vdso/getrandom.h @@ -23,18 +23,6 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags); } -static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) -{ - /* - * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace - * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ - * PAGE_OFFSET points to the real VVAR page. - */ - if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) - return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - return &_vdso_rng_data; -} - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index 7937765ccfa5..fb4564308e9d 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -14,12 +14,7 @@ #include <linux/compiler.h> -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) -{ - return _vdso_data; -} - -static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) +static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd) { u64 adj, now; @@ -49,12 +44,4 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) return syscall2(__NR_clock_getres, (long)clkid, (long)ts); } -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; -} -#endif - #endif diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h index 3eb576ecd3bd..d346ebe51301 100644 --- a/arch/s390/include/asm/vdso/vsyscall.h +++ b/arch/s390/include/asm/vdso/vsyscall.h @@ -2,32 +2,12 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#define __VDSO_RND_DATA_OFFSET 768 - #ifndef __ASSEMBLY__ #include <linux/hrtimer.h> #include <vdso/datapage.h> #include <asm/vdso.h> -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES -}; - -static __always_inline struct vdso_data *__s390_get_k_vdso_data(void) -{ - return vdso_data; -} -#define __arch_get_k_vdso_data __s390_get_k_vdso_data - -static __always_inline struct vdso_rng_data *__s390_get_k_vdso_rnd_data(void) -{ - return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; -} -#define __arch_get_k_vdso_rng_data __s390_get_k_vdso_rnd_data - /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/arch/s390/include/asm/word-at-a-time.h b/arch/s390/include/asm/word-at-a-time.h index 203acd6e431b..eaa19dee7699 100644 --- a/arch/s390/include/asm/word-at-a-time.h +++ b/arch/s390/include/asm/word-at-a-time.h @@ -52,7 +52,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) { unsigned long data; - asm volatile( + asm_inline volatile( "0: lg %[data],0(%[addr])\n" "1: nopr %%r7\n" EX_TABLE_ZEROPAD(0b, 1b, %[data], %[addr]) diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index db5f3a3faefb..ea5ed6654050 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -46,7 +46,7 @@ obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o obj-y += diag/ -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index 88f0b91d7a73..6252b7d115dd 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -5,7 +5,6 @@ #include <asm/sections.h> unsigned long __bootdata_preserved(__abs_lowcore); -int __bootdata_preserved(relocate_lowcore); int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) { diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 8d5d0de35de0..90c0e6408992 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,41 +1,90 @@ // SPDX-License-Identifier: GPL-2.0 +#ifndef pr_fmt +#define pr_fmt(fmt) "alt: " fmt +#endif + #include <linux/uaccess.h> +#include <linux/printk.h> #include <asm/nospec-branch.h> #include <asm/abs_lowcore.h> #include <asm/alternative.h> #include <asm/facility.h> +#include <asm/sections.h> +#include <asm/machine.h> + +#ifndef a_debug +#define a_debug pr_debug +#endif + +#ifndef __kernel_va +#define __kernel_va(x) (void *)(x) +#endif + +unsigned long __bootdata_preserved(machine_features[1]); + +struct alt_debug { + unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG]; + unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG]; + int spec; +}; + +static struct alt_debug __bootdata_preserved(alt_debug); + +static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data) +{ + char oinsn[33], ninsn[33]; + unsigned long kptr; + unsigned int pos; + + for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++) + hex_byte_pack(&oinsn[2 * pos], old[pos]); + oinsn[2 * pos] = 0; + for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++) + hex_byte_pack(&ninsn[2 * pos], new[pos]); + ninsn[2 * pos] = 0; + kptr = (unsigned long)__kernel_va(old); + a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn); +} void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx) { - u8 *instr, *replacement; + struct alt_debug *d; struct alt_instr *a; - bool replace; + bool debug, replace; + u8 *old, *new; /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. */ + d = &alt_debug; for (a = start; a < end; a++) { if (!(a->ctx & ctx)) continue; switch (a->type) { case ALT_TYPE_FACILITY: replace = test_facility(a->data); + debug = __test_facility(a->data, d->facilities); + break; + case ALT_TYPE_FEATURE: + replace = test_machine_feature(a->data); + debug = __test_machine_feature(a->data, d->mfeatures); break; case ALT_TYPE_SPEC: replace = nobp_enabled(); - break; - case ALT_TYPE_LOWCORE: - replace = have_relocated_lowcore(); + debug = d->spec; break; default: replace = false; + debug = false; } if (!replace) continue; - instr = (u8 *)&a->instr_offset + a->instr_offset; - replacement = (u8 *)&a->repl_offset + a->repl_offset; - s390_kernel_write(instr, replacement, a->instrlen); + old = (u8 *)&a->instr_offset + a->instr_offset; + new = (u8 *)&a->repl_offset + a->repl_offset; + if (debug) + alternative_dump(old, new, a->instrlen, a->type, a->data); + s390_kernel_write(old, new, a->instrlen); } } diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 36709112ae7a..95ecad9c7d7d 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -5,15 +5,14 @@ * and format the required data. */ -#define ASM_OFFSETS_C - #include <linux/kbuild.h> -#include <linux/kvm_host.h> #include <linux/sched.h> #include <linux/purgatory.h> #include <linux/pgtable.h> -#include <linux/ftrace.h> +#include <linux/ftrace_regs.h> +#include <asm/kvm_host_types.h> #include <asm/stacktrace.h> +#include <asm/ptrace.h> int main(void) { @@ -49,8 +48,8 @@ int main(void) OFFSET(__PT_R14, pt_regs, gprs[14]); OFFSET(__PT_R15, pt_regs, gprs[15]); OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); + OFFSET(__PT_INT_CODE, pt_regs, int_code); OFFSET(__PT_FLAGS, pt_regs, flags); - OFFSET(__PT_CR1, pt_regs, cr1); OFFSET(__PT_LAST_BREAK, pt_regs, last_break); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); @@ -76,7 +75,8 @@ int main(void) OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr); OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code); OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc); - OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code); OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code); OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num); OFFSET(__LC_PER_CODE, lowcore, per_code); @@ -122,7 +122,6 @@ int main(void) OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); - OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c index bf983513dd33..c217a5e64094 100644 --- a/arch/s390/kernel/cert_store.c +++ b/arch/s390/kernel/cert_store.c @@ -138,7 +138,7 @@ static void cert_store_key_describe(const struct key *key, struct seq_file *m) * First 64 bytes of the key description is key name in EBCDIC CP 500. * Convert it to ASCII for displaying in /proc/keys. */ - strscpy(ascii, key->description, sizeof(ascii)); + strscpy(ascii, key->description); EBCASC_500(ascii, VC_NAME_LEN_BYTES); seq_puts(m, ascii); @@ -235,7 +235,7 @@ static int __diag320(unsigned long subcode, void *addr) { union register_pair rp = { .even = (unsigned long)addr, }; - asm volatile( + asm_inline volatile( " diag %[rp],%[subcode],0x320\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c index 1b2ae42a0c15..76210f001028 100644 --- a/arch/s390/kernel/cpufeature.c +++ b/arch/s390/kernel/cpufeature.c @@ -5,11 +5,13 @@ #include <linux/cpufeature.h> #include <linux/bug.h> +#include <asm/machine.h> #include <asm/elf.h> enum { TYPE_HWCAP, TYPE_FACILITY, + TYPE_MACHINE, }; struct s390_cpu_feature { @@ -21,6 +23,7 @@ static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = { [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA}, [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS}, [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158}, + [S390_CPU_FEATURE_D288] = {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288}, }; /* @@ -38,6 +41,8 @@ int cpu_have_feature(unsigned int num) return !!(elf_hwcap & BIT(feature->num)); case TYPE_FACILITY: return test_facility(feature->num); + case TYPE_MACHINE: + return test_machine_feature(feature->num); default: WARN_ON_ONCE(1); return 0; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 276cb4c1e11b..adb164223f8c 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -246,15 +246,6 @@ bool is_kdump_kernel(void) } EXPORT_SYMBOL_GPL(is_kdump_kernel); -static const char *nt_name(Elf64_Word type) -{ - const char *name = "LINUX"; - - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - name = KEXEC_CORE_NOTE_NAME; - return name; -} - /* * Initialize ELF note */ @@ -279,10 +270,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, return PTR_ADD(buf, len); } -static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) -{ - return nt_init_name(buf, type, desc, d_len, nt_name(type)); -} +#define nt_init(buf, type, desc) \ + nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type) /* * Calculate the size of ELF note @@ -298,10 +287,7 @@ static size_t nt_size_name(int d_len, const char *name) return size; } -static inline size_t nt_size(Elf64_Word type, int d_len) -{ - return nt_size_name(d_len, nt_name(type)); -} +#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type) /* * Fill ELF notes for one CPU with save area registers @@ -322,18 +308,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); /* Create ELF notes for the CPU */ - ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus)); - ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset)); - ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer)); - ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp)); - ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg)); - ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs)); - ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix)); + ptr = nt_init(ptr, PRSTATUS, nt_prstatus); + ptr = nt_init(ptr, PRFPREG, nt_fpregset); + ptr = nt_init(ptr, S390_TIMER, sa->timer); + ptr = nt_init(ptr, S390_TODCMP, sa->todcmp); + ptr = nt_init(ptr, S390_TODPREG, sa->todpreg); + ptr = nt_init(ptr, S390_CTRS, sa->ctrs); + ptr = nt_init(ptr, S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - ptr = nt_init(ptr, NT_S390_VXRS_HIGH, - &sa->vxrs_high, sizeof(sa->vxrs_high)); - ptr = nt_init(ptr, NT_S390_VXRS_LOW, - &sa->vxrs_low, sizeof(sa->vxrs_low)); + ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high); + ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low); } return ptr; } @@ -346,16 +330,16 @@ static size_t get_cpu_elf_notes_size(void) struct save_area *sa = NULL; size_t size; - size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); - size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); - size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); - size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); - size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); - size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); - size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + size = nt_size(PRSTATUS, struct elf_prstatus); + size += nt_size(PRFPREG, elf_fpregset_t); + size += nt_size(S390_TIMER, sa->timer); + size += nt_size(S390_TODCMP, sa->todcmp); + size += nt_size(S390_TODPREG, sa->todpreg); + size += nt_size(S390_CTRS, sa->ctrs); + size += nt_size(S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); - size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + size += nt_size(S390_VXRS_HIGH, sa->vxrs_high); + size += nt_size(S390_VXRS_LOW, sa->vxrs_low); } return size; @@ -370,8 +354,8 @@ static void *nt_prpsinfo(void *ptr) memset(&prpsinfo, 0, sizeof(prpsinfo)); prpsinfo.pr_sname = 'R'; - strcpy(prpsinfo.pr_fname, "vmlinux"); - return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); + strscpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, PRPSINFO, prpsinfo); } /* @@ -610,7 +594,7 @@ static size_t get_elfcorehdr_size(int phdr_count) /* PT_NOTES */ size += sizeof(Elf64_Phdr); /* nt_prpsinfo */ - size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + size += nt_size(PRPSINFO, struct elf_prpsinfo); /* regsets */ size += get_cpu_cnt() * get_cpu_elf_notes_size(); /* nt_vmcoreinfo */ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index ce038e9205f7..2a41be2f7925 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -251,7 +251,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area, rc->level = level; rc->buf_size = buf_size; rc->entry_size = sizeof(debug_entry_t) + buf_size; - strscpy(rc->name, name, sizeof(rc->name)); + strscpy(rc->name, name); memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *)); refcount_set(&(rc->ref_count), 0); diff --git a/arch/s390/kernel/diag/diag.c b/arch/s390/kernel/diag/diag.c index e15b8dee3228..56b862ba9be8 100644 --- a/arch/s390/kernel/diag/diag.c +++ b/arch/s390/kernel/diag/diag.c @@ -195,7 +195,7 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad { union register_pair rp = { .even = *subcode, .odd = size }; - asm volatile( + asm_inline volatile( " diag %[addr],%[rp],0x204\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -286,7 +286,7 @@ int diag224(void *ptr) int rc = -EOPNOTSUPP; diag_stat_inc(DIAG_STAT_X224); - asm volatile("\n" + asm_inline volatile("\n" " diag %[type],%[addr],0x224\n" "0: lhi %[rc],0\n" "1:\n" diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 1ecd0580561f..dd410962ecbe 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -17,6 +17,7 @@ #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> +#include <asm/asm-offsets.h> #include <asm/processor.h> #include <asm/debug.h> #include <asm/dis.h> @@ -198,13 +199,8 @@ void __noreturn die(struct pt_regs *regs, const char *str) console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); - printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff, + printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff, regs->int_code >> 17, ++die_counter); -#ifdef CONFIG_PREEMPT - pr_cont("PREEMPT "); -#elif defined(CONFIG_PREEMPT_RT) - pr_cont("PREEMPT_RT "); -#endif pr_cont("SMP "); if (debug_pagealloc_enabled()) pr_cont("DEBUG_PAGEALLOC"); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 2fa25164df7d..54cf0923050f 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/sched/debug.h> +#include <linux/cpufeature.h> #include <linux/compiler.h> #include <linux/init.h> #include <linux/errno.h> @@ -21,6 +22,8 @@ #include <asm/asm-extable.h> #include <linux/memblock.h> #include <asm/access-regs.h> +#include <asm/asm-offsets.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/fpu.h> @@ -36,12 +39,14 @@ #include <asm/boot_data.h> #include "entry.h" -#define decompressor_handled_param(param) \ -static int __init ignore_decompressor_param_##param(char *s) \ +#define __decompressor_handled_param(func, param) \ +static int __init ignore_decompressor_param_##func(char *s) \ { \ return 0; \ } \ -early_param(#param, ignore_decompressor_param_##param) +early_param(#param, ignore_decompressor_param_##func) + +#define decompressor_handled_param(param) __decompressor_handled_param(param, param) decompressor_handled_param(mem); decompressor_handled_param(vmalloc); @@ -51,6 +56,7 @@ decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); decompressor_handled_param(relocate_lowcore); decompressor_handled_param(bootdebug); +__decompressor_handled_param(debug_alternative, debug-alternative); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -63,21 +69,6 @@ static void __init kasan_early_init(void) #endif } -static void __init reset_tod_clock(void) -{ - union tod_clock clk; - - if (store_tod_clock_ext_cc(&clk) == 0) - return; - /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) - disabled_wait(); - - memset(&tod_clock_base, 0, sizeof(tod_clock_base)); - tod_clock_base.tod = TOD_UNIX_EPOCH; - get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; -} - /* * Initialize storage key for kernel pages */ @@ -96,26 +87,6 @@ static noinline __init void init_kernel_storage_key(void) static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); -static noinline __init void detect_machine_type(void) -{ - struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; - - /* Check current-configuration-level */ - if (stsi(NULL, 0, 0, 0) <= 2) { - get_lowcore()->machine_flags |= MACHINE_FLAG_LPAR; - return; - } - /* Get virtual-machine cpu information. */ - if (stsi(vmms, 3, 2, 2) || !vmms->count) - return; - - /* Detect known hypervisors */ - if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_KVM; - else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) - get_lowcore()->machine_flags |= MACHINE_FLAG_VM; -} - /* Remove leading, trailing and double whitespace. */ static inline void strim_all(char *str) { @@ -156,9 +127,9 @@ static noinline __init void setup_arch_string(void) strim_all(hvstr); } else { sprintf(hvstr, "%s", - MACHINE_IS_LPAR ? "LPAR" : - MACHINE_IS_VM ? "z/VM" : - MACHINE_IS_KVM ? "KVM" : "unknown"); + machine_is_lpar() ? "LPAR" : + machine_is_vm() ? "z/VM" : + machine_is_kvm() ? "KVM" : "unknown"); } dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); } @@ -167,9 +138,8 @@ static __init void setup_topology(void) { int max_mnest; - if (!test_facility(11)) + if (!cpu_has_topology()) return; - get_lowcore()->machine_flags |= MACHINE_FLAG_TOPOLOGY; for (max_mnest = 6; max_mnest > 1; max_mnest--) { if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) break; @@ -218,65 +188,10 @@ static noinline __init void setup_lowcore_early(void) lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); } -static __init void detect_diag9c(void) -{ - unsigned int cpu_address; - int rc; - - cpu_address = stap(); - diag_stat_inc(DIAG_STAT_X09C); - asm volatile( - " diag %2,0,0x9c\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); - if (!rc) - get_lowcore()->machine_flags |= MACHINE_FLAG_DIAG9C; -} - -static __init void detect_machine_facilities(void) -{ - if (test_facility(8)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT1; - system_ctl_set_bit(0, CR0_EDAT_BIT); - } - if (test_facility(78)) - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT2; - if (test_facility(3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_IDTE; - if (test_facility(50) && test_facility(73)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_TE; - system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); - } - if (test_facility(51)) - get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_LC; - if (test_facility(129)) - system_ctl_set_bit(0, CR0_VECTOR_BIT); - if (test_facility(130)) - get_lowcore()->machine_flags |= MACHINE_FLAG_NX; - if (test_facility(133)) - get_lowcore()->machine_flags |= MACHINE_FLAG_GS; - if (test_facility(139) && (tod_clock_base.tod >> 63)) { - /* Enabled signed clock comparator comparisons */ - get_lowcore()->machine_flags |= MACHINE_FLAG_SCC; - clock_comparator_max = -1ULL >> 1; - system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); - } - if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_PCI_MIO; - /* the control bit is set during PCI initialization */ - } - if (test_facility(194)) - get_lowcore()->machine_flags |= MACHINE_FLAG_RDP; - if (test_facility(85)) - get_lowcore()->machine_flags |= MACHINE_FLAG_SEQ_INSN; -} - static inline void save_vector_registers(void) { #ifdef CONFIG_CRASH_DUMP - if (test_facility(129)) + if (cpu_has_vx()) save_vx_regs(boot_cpu_vector_save_area); #endif } @@ -308,17 +223,13 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { kasan_early_init(); - reset_tod_clock(); time_early_init(); init_kernel_storage_key(); lockdep_off(); sort_amode31_extable(); setup_lowcore_early(); - detect_machine_type(); setup_arch_string(); setup_boot_command_line(); - detect_diag9c(); - detect_machine_facilities(); save_vector_registers(); setup_topology(); sclp_early_detect(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4cc3408c4dac..0f00f4b06d51 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -29,6 +29,7 @@ #include <asm/nmi.h> #include <asm/nospec-insn.h> #include <asm/lowcore.h> +#include <asm/machine.h> _LPP_OFFSET = __LC_LPP @@ -44,7 +45,7 @@ _LPP_OFFSET = __LC_LPP ALTERNATIVE_2 "b \lpswe;nopr", \ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm .macro MBEAR reg, lowcore @@ -67,7 +68,7 @@ _LPP_OFFSET = __LC_LPP clg %r14,__LC_RESTART_STACK(\lowcore) je \oklabel la %r14,\savearea(\lowcore) - j stack_overflow + j stack_invalid .endm /* @@ -115,7 +116,7 @@ _LPP_OFFSET = __LC_LPP .macro SIEEXIT sie_control,lowcore lg %r9,\sie_control # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce + lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce lg %r9,__LC_CURRENT(\lowcore) mvi __TI_sie(%r9),0 larl %r9,sie_exit # skip forward to sie_exit @@ -207,7 +208,7 @@ SYM_FUNC_START(__sie64a) lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE GET_LC %r14 - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce + lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce lg %r14,__LC_CURRENT(%r14) mvi __TI_sie(%r14),0 SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) @@ -239,7 +240,6 @@ SYM_CODE_START(system_call) lghi %r14,0 .Lsysc_per: STBEAR __LC_LAST_BREAK(%r13) - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) lg %r15,__LC_KERNEL_STACK(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) @@ -260,7 +260,6 @@ SYM_CODE_START(system_call) lgr %r3,%r14 brasl %r14,__do_syscall STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) @@ -277,7 +276,6 @@ SYM_CODE_START(ret_from_fork) brasl %r14,__ret_from_fork STACKLEAK_ERASE GET_LC %r13 - lctlg %c1,%c1,__LC_USER_ASCE(%r13) mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) @@ -298,10 +296,7 @@ SYM_CODE_START(pgm_check_handler) lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13) xgr %r10,%r10 tmhh %r8,0x0001 # coming from user space? - jno .Lpgm_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) - j 3f # -> fault in user space -.Lpgm_skip_asce: + jo 3f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) lg %r11,__LC_CURRENT(%r13) tm __TI_sie(%r11),0xff @@ -315,7 +310,7 @@ SYM_CODE_START(pgm_check_handler) tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc 2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 4f + # CHECK_VMAP_STACK branches to stack_invalid or 4f CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f 3: lg %r15,__LC_KERNEL_STACK(%r13) 4: la %r11,STACK_FRAME_OVERHEAD(%r15) @@ -339,7 +334,6 @@ SYM_CODE_START(pgm_check_handler) tmhh %r8,0x0001 # returning to user space? jno .Lpgm_exit_kernel STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON stpt __LC_EXIT_TIMER(%r13) .Lpgm_exit_kernel: @@ -383,8 +377,7 @@ SYM_CODE_START(\name) #endif 0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f -1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) - lg %r15,__LC_KERNEL_STACK(%r13) +1: lg %r15,__LC_KERNEL_STACK(%r13) 2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -407,7 +400,6 @@ SYM_CODE_START(\name) tmhh %r8,0x0001 # returning to user ? jno 2f STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON stpt __LC_EXIT_TIMER(%r13) 2: LBEAR __PT_LAST_BREAK(%r11) @@ -467,7 +459,7 @@ SYM_CODE_START(mcck_int_handler) clgrjl %r9,%r14, 4f larl %r14,.Lsie_leave clgrjhe %r9,%r14, 4f - lg %r10,__LC_PCPU + lg %r10,__LC_PCPU(%r13) oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r13 @@ -475,8 +467,6 @@ SYM_CODE_START(mcck_int_handler) .Lmcck_user: lg %r15,__LC_MCCK_STACK(%r13) la %r11,STACK_FRAME_OVERHEAD(%r15) - stctg %c1,%c1,__PT_CR1(%r11) - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lay %r14,__LC_GPREGS_SAVE_AREA(%r13) mvc __PT_R0(128,%r11),0(%r14) @@ -494,7 +484,6 @@ SYM_CODE_START(mcck_int_handler) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ? @@ -590,22 +579,23 @@ SYM_CODE_END(early_pgm_check_handler) .section .kprobes.text, "ax" /* - * The synchronous or the asynchronous stack overflowed. We are dead. + * The synchronous or the asynchronous stack pointer is invalid. We are dead. * No need to properly save the registers, we are going to panic anyway. * Setup a pt_regs so that show_trace can provide a good call trace. */ -SYM_CODE_START(stack_overflow) +SYM_CODE_START(stack_invalid) GET_LC %r15 lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) - stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + GET_LC %r2 + mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs - jg kernel_stack_overflow -SYM_CODE_END(stack_overflow) + jg kernel_stack_invalid +SYM_CODE_END(stack_invalid) .section .data, "aw" .balign 4 @@ -621,7 +611,7 @@ SYM_DATA_END(daton_psw) .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame SYM_DATA_START(sys_call_table) -#include "asm/syscall_table.h" +#include <asm/syscall_table.h> SYM_DATA_END(sys_call_table) #undef SYSCALL @@ -629,7 +619,7 @@ SYM_DATA_END(sys_call_table) #define SYSCALL(esame,emu) .quad __s390_ ## emu SYM_DATA_START(sys_call_table_emu) -#include "asm/syscall_table.h" +#include <asm/syscall_table.h> SYM_DATA_END(sys_call_table_emu) #undef SYSCALL #endif diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index a1f28879c87e..dd55cc6bbc28 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -31,7 +31,7 @@ void do_secure_storage_access(struct pt_regs *regs); void do_non_secure_storage_access(struct pt_regs *regs); void do_secure_storage_violation(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); -void kernel_stack_overflow(struct pt_regs * regs); +void kernel_stack_invalid(struct pt_regs *regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index e540b022ceb2..e94bb98f5231 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/kmsan-checks.h> +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/execmem.h> #include <trace/syscall.h> @@ -69,7 +70,7 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end) bool ftrace_need_init_nop(void) { - return !MACHINE_HAS_SEQ_INSN; + return !cpu_has_seq_insn(); } int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) @@ -189,7 +190,7 @@ static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec, int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, old_addr, addr); else return ftrace_modify_trampoline_call(rec, old_addr, addr); @@ -213,8 +214,8 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable) int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - /* Expect brcl 0xf,... for the !MACHINE_HAS_SEQ_INSN case */ - if (MACHINE_HAS_SEQ_INSN) + /* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */ + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, addr, 0); else return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false); @@ -234,7 +235,7 @@ static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long add int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, 0, addr); else return ftrace_make_trampoline_call(rec, addr); diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c index 0b68168d9566..cf26d7a37425 100644 --- a/arch/s390/kernel/guarded_storage.c +++ b/arch/s390/kernel/guarded_storage.c @@ -4,6 +4,7 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/kernel.h> #include <linux/syscalls.h> #include <linux/signal.h> @@ -109,7 +110,7 @@ static int gs_broadcast(void) SYSCALL_DEFINE2(s390_guarded_storage, int, command, struct gs_cb __user *, gs_cb) { - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -EOPNOTSUPP; switch (command) { case GS_ENABLE: diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 396034b2fe67..7edb9ded199c 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -18,12 +18,10 @@ __HEAD SYM_CODE_START(startup_continue) - larl %r1,tod_clock_base - GET_LC %r2 - mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2) # # Setup stack # + GET_LC %r2 larl %r14,init_task stg %r14,__LC_CURRENT(%r2) larl %r15,init_thread_union+STACK_INIT_OFFSET diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c index 7857a7e8e56c..e7b66d046e8d 100644 --- a/arch/s390/kernel/hiperdispatch.c +++ b/arch/s390/kernel/hiperdispatch.c @@ -45,6 +45,7 @@ * therefore delaying the throughput loss caused by using SMP threads. */ +#include <linux/cpufeature.h> #include <linux/cpumask.h> #include <linux/debugfs.h> #include <linux/device.h> @@ -87,7 +88,7 @@ static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn); static int hd_set_hiperdispatch_mode(int enable) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) enable = 0; if (hd_enabled == enable) return 0; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 69be2309cde0..ff15f91affde 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -22,6 +22,7 @@ #include <linux/debug_locks.h> #include <linux/vmalloc.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ipl.h> #include <asm/smp.h> @@ -185,7 +186,7 @@ static inline int __diag308(unsigned long subcode, unsigned long addr) r1.even = addr; r1.odd = 0; - asm volatile( + asm_inline volatile( " diag %[r1],%[subcode],0x308\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -269,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ { \ if (len >= sizeof(_value)) \ return -E2BIG; \ - len = strscpy(_value, buf, sizeof(_value)); \ + len = strscpy(_value, buf); \ if ((ssize_t)len < 0) \ return len; \ strim(_value); \ @@ -685,7 +686,7 @@ static int __init ipl_init(void) goto out; switch (ipl_info.type) { case IPL_TYPE_CCW: - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group_vm); else @@ -1272,7 +1273,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM; /* VM PARM */ - if (MACHINE_IS_VM && ipl_block_valid && + if (machine_is_vm() && ipl_block_valid && (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) { ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP; @@ -1286,7 +1287,7 @@ static int __init reipl_nss_init(void) { int rc; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL); @@ -1311,8 +1312,8 @@ static int __init reipl_ccw_init(void) return -ENOMEM; rc = sysfs_create_group(&reipl_kset->kobj, - MACHINE_IS_VM ? &reipl_ccw_attr_group_vm - : &reipl_ccw_attr_group_lpar); + machine_is_vm() ? &reipl_ccw_attr_group_vm + : &reipl_ccw_attr_group_lpar); if (rc) return rc; @@ -1987,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger) static int vmcmd_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -EOPNOTSUPP; vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj); if (!vmcmd_kset) @@ -2248,26 +2249,28 @@ static int __init s390_ipl_init(void) __initcall(s390_ipl_init); -static void __init strncpy_skip_quote(char *dst, char *src, int n) +static void __init strscpy_skip_quote(char *dst, char *src, int n) { int sx, dx; - dx = 0; - for (sx = 0; src[sx] != 0; sx++) { + if (!n) + return; + for (sx = 0, dx = 0; src[sx]; sx++) { if (src[sx] == '"') continue; - dst[dx++] = src[sx]; - if (dx >= n) + dst[dx] = src[sx]; + if (dx + 1 == n) break; + dx++; } + dst[dx] = '\0'; } static int __init vmcmd_on_reboot_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_reboot, str, VMCMD_MAX_SIZE); - vmcmd_on_reboot[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot)); on_reboot_trigger.action = &vmcmd_action; return 1; } @@ -2275,10 +2278,9 @@ __setup("vmreboot=", vmcmd_on_reboot_setup); static int __init vmcmd_on_panic_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_panic, str, VMCMD_MAX_SIZE); - vmcmd_on_panic[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic)); on_panic_trigger.action = &vmcmd_action; return 1; } @@ -2286,10 +2288,9 @@ __setup("vmpanic=", vmcmd_on_panic_setup); static int __init vmcmd_on_halt_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_halt, str, VMCMD_MAX_SIZE); - vmcmd_on_halt[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt)); on_halt_trigger.action = &vmcmd_action; return 1; } @@ -2297,10 +2298,9 @@ __setup("vmhalt=", vmcmd_on_halt_setup); static int __init vmcmd_on_poff_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_poff, str, VMCMD_MAX_SIZE); - vmcmd_on_poff[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff)); on_poff_trigger.action = &vmcmd_action; return 1; } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ef7be599e1f7..bdf9c7cb5685 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -9,6 +9,7 @@ */ #include <linux/kernel_stat.h> +#include <linux/cpufeature.h> #include <linux/interrupt.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> @@ -25,6 +26,7 @@ #include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/stacktrace.h> @@ -84,7 +86,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, - {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, @@ -149,7 +150,7 @@ void noinstr do_io_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } @@ -164,7 +165,7 @@ void noinstr do_io_irq(struct pt_regs *regs) do_irq_async(regs, THIN_INTERRUPT); else do_irq_async(regs, IO_INTERRUPT); - } while (MACHINE_IS_LPAR && irq_pending(regs)); + } while (machine_is_lpar() && irq_pending(regs)); irq_exit_rcu(); @@ -185,7 +186,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 8b80ea57125f..c450120b4474 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -13,6 +13,7 @@ #include <linux/ptrace.h> #include <linux/preempt.h> #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/kdebug.h> #include <linux/uaccess.h> #include <linux/extable.h> @@ -153,7 +154,7 @@ void arch_arm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 1}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { @@ -166,7 +167,7 @@ void arch_disarm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 0}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 8f681ccfb83a..baeb3dcfc1c8 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -13,7 +13,9 @@ #include <linux/reboot.h> #include <linux/ftrace.h> #include <linux/debug_locks.h> +#include <linux/cpufeature.h> #include <asm/guarded_storage.h> +#include <asm/machine.h> #include <asm/pfault.h> #include <asm/cio.h> #include <asm/fpu.h> @@ -94,7 +96,7 @@ static noinline void __machine_kdump(void *image) mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK); if (cpu_has_vx()) save_vx_regs((__vector128 *) mcesa->vector_save_area); - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { local_ctl_store(2, &cr2_old.reg); cr2_new = cr2_old; cr2_new.gse = 1; @@ -178,7 +180,7 @@ void arch_kexec_unprotect_crashkres(void) static int machine_kexec_prepare_kdump(void) { #ifdef CONFIG_CRASH_DUMP - if (MACHINE_IS_VM) + if (machine_is_vm()) diag10_range(PFN_DOWN(crashk_res.start), PFN_DOWN(crashk_res.end - crashk_res.start + 1)); return 0; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index fbd218b6fc8e..3da371c144eb 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -9,6 +9,7 @@ */ #include <linux/kernel_stat.h> +#include <linux/cpufeature.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/entry-common.h> @@ -45,7 +46,7 @@ static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); static inline int nmi_needs_mcesa(void) { - return cpu_has_vx() || MACHINE_HAS_GS; + return cpu_has_vx() || cpu_has_gs(); } /* @@ -61,7 +62,7 @@ void __init nmi_alloc_mcesa_early(u64 *mcesad) if (!nmi_needs_mcesa()) return; *mcesad = __pa(&boot_mcesa); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); } @@ -73,14 +74,14 @@ int nmi_alloc_mcesa(u64 *mcesad) *mcesad = 0; if (!nmi_needs_mcesa()) return 0; - size = MACHINE_HAS_GS ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; + size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; origin = kmalloc(size, GFP_KERNEL); if (!origin) return -ENOMEM; /* The pointer is stored with mcesa_bits ORed in */ kmemleak_not_leak(origin); *mcesad = __pa(origin); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); return 0; } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 33205dd410e4..6a262e198e35 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -442,7 +442,7 @@ static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) ctrset_size = 48; else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) ctrset_size = 128; - else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) + else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8) ctrset_size = 160; break; case CPUMF_CTR_SET_MT_DIAG: @@ -858,18 +858,13 @@ static int cpumf_pmu_event_type(struct perf_event *event) static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; - int err; + int err = -ENOENT; if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); - else - return -ENOENT; - - if (unlikely(err) && event->destroy) - event->destroy(event); return err; } @@ -985,8 +980,6 @@ static int cfdiag_push_sample(struct perf_event *event, } overflow = perf_event_overflow(event, &data, ®s); - if (overflow) - event->pmu->stop(event, 0); perf_event_update_userpage(event); return overflow; @@ -1819,8 +1812,6 @@ static int cfdiag_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; err = cfdiag_event_init2(event); - if (unlikely(err)) - event->destroy(event); out: return err; } diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index e4a6bfc91080..7ace1f9e4ccf 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -237,7 +237,6 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); - CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080); CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081); CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082); @@ -291,8 +290,8 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7); CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080); @@ -365,6 +364,83 @@ CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d); CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2); +CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb); +CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc); +CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd); +CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce); +CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6); +CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd); +CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c); +CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d); +CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112); +CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES), @@ -414,7 +490,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = { NULL, }; -static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = { +static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS), @@ -779,6 +855,87 @@ static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z17, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION), + CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z17, SORTL), + CPUMF_EVENT_PTR(cf_z17, DFLT_CC), + CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF), + CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ static struct attribute_group cpumcf_pmu_events_group = { @@ -859,7 +1016,7 @@ __init const struct attribute_group **cpumf_cf_event_group(void) if (ci.csvn >= 1 && ci.csvn <= 5) csvn = cpumcf_svn_12345_pmu_event_attr; else if (ci.csvn >= 6) - csvn = cpumcf_svn_67_pmu_event_attr; + csvn = cpumcf_svn_678_pmu_event_attr; /* Determine model-specific counter set(s) */ get_cpu_id(&cpu_id); @@ -892,6 +1049,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 0x3932: model = cpumcf_z16_pmu_event_attr; break; + case 0x9175: + case 0x9176: + model = cpumcf_z17_pmu_event_attr; + break; default: model = none; break; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 5f60248cb468..91469401f2c9 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -885,9 +885,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event) event->attr.exclude_idle = 0; err = __hw_perf_event_init(event); - if (unlikely(err)) - if (event->destroy) - event->destroy(event); return err; } @@ -1075,10 +1072,7 @@ static int perf_push_sample(struct perf_event *event, overflow = 0; if (perf_event_exclude(event, ®s, sde_regs)) goto out; - if (perf_event_overflow(event, &data, ®s)) { - overflow = 1; - event->pmu->stop(event, 0); - } + overflow = perf_event_overflow(event, &data, ®s); perf_event_update_userpage(event); out: return overflow; diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 10725f5a6f0f..63875270941b 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -518,7 +518,8 @@ static void paicrypt_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event CRYPTO_ALL is allowed. */ -static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index a8f0bad99cf0..fd14d5ebccbc 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -542,7 +542,8 @@ static void paiext_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event NNPA_ALL is allowed. */ -static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5ce9a795a0fe..11f70c1e2797 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/random.h> @@ -19,6 +20,7 @@ #include <linux/cpu.h> #include <linux/smp.h> #include <asm/text-patching.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/facility.h> #include <asm/elf.h> @@ -72,7 +74,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) this_cpu = smp_processor_id(); if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { __this_cpu_write(cpu_relax_retry, 0); - cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); + cpu = cpumask_next_wrap(this_cpu, cpumask); if (cpu >= nr_cpu_ids) return; if (arch_vcpu_is_preempted(cpu)) @@ -209,14 +211,14 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_DFP; /* huge page support */ - if (MACHINE_HAS_EDAT1) + if (cpu_has_edat1()) elf_hwcap |= HWCAP_HPAGE; /* 64-bit register support for 31-bit processes */ elf_hwcap |= HWCAP_HIGH_GPRS; /* transactional execution */ - if (MACHINE_HAS_TE) + if (machine_has_tx()) elf_hwcap |= HWCAP_TE; /* vector */ @@ -244,10 +246,10 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_NNPA; /* guarded storage */ - if (MACHINE_HAS_GS) + if (cpu_has_gs()) elf_hwcap |= HWCAP_GS; - if (MACHINE_HAS_PCI_MIO) + if (test_machine_feature(MFEATURE_PCI_MIO)) elf_hwcap |= HWCAP_PCI_MIO; /* virtualization support */ @@ -266,31 +268,35 @@ static int __init setup_elf_platform(void) add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { default: /* Use "z10" as default. */ - strcpy(elf_platform, "z10"); + strscpy(elf_platform, "z10"); break; case 0x2817: case 0x2818: - strcpy(elf_platform, "z196"); + strscpy(elf_platform, "z196"); break; case 0x2827: case 0x2828: - strcpy(elf_platform, "zEC12"); + strscpy(elf_platform, "zEC12"); break; case 0x2964: case 0x2965: - strcpy(elf_platform, "z13"); + strscpy(elf_platform, "z13"); break; case 0x3906: case 0x3907: - strcpy(elf_platform, "z14"); + strscpy(elf_platform, "z14"); break; case 0x8561: case 0x8562: - strcpy(elf_platform, "z15"); + strscpy(elf_platform, "z15"); break; case 0x3931: case 0x3932: - strcpy(elf_platform, "z16"); + strscpy(elf_platform, "z16"); + break; + case 0x9175: + case 0x9176: + strscpy(elf_platform, "z17"); break; } return 0; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1cfed8b710b8..e1240f6b29fa 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -7,10 +7,10 @@ * Martin Schwidefsky (schwidefsky@de.ibm.com) */ -#include "asm/ptrace.h" #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/errno.h> @@ -31,6 +31,9 @@ #include <asm/unistd.h> #include <asm/runtime_instr.h> #include <asm/facility.h> +#include <asm/machine.h> +#include <asm/ptrace.h> +#include <asm/rwonce.h> #include <asm/fpu.h> #include "entry.h" @@ -60,7 +63,7 @@ void update_cr_regs(struct task_struct *task) cr0_new = cr0_old; cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ - if (MACHINE_HAS_TE) { + if (machine_has_tx()) { /* Set or clear transaction execution TXC bit 8. */ cr0_new.tcx = 1; if (task->thread.per_flags & PER_FLAG_NO_TE) @@ -75,7 +78,7 @@ void update_cr_regs(struct task_struct *task) } } /* Take care of enable/disable of guarded storage. */ - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { cr2_new.gse = 0; if (task->thread.gs_cb) cr2_new.gse = 1; @@ -470,18 +473,18 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_GET_LAST_BREAK: return put_user(child->thread.last_break, (unsigned long __user *)data); case PTRACE_ENABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags &= ~PER_FLAG_NO_TE; return 0; case PTRACE_DISABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags |= PER_FLAG_NO_TE; child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; return 0; case PTRACE_TE_ABORT_RAND: - if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE)) + if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE)) return -EIO; switch (data) { case 0UL: @@ -1033,7 +1036,7 @@ static int s390_gs_cb_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1050,7 +1053,7 @@ static int s390_gs_cb_set(struct task_struct *target, struct gs_cb gs_cb = { }, *data = NULL; int rc; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!target->thread.gs_cb) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1087,7 +1090,7 @@ static int s390_gs_bc_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1101,7 +1104,7 @@ static int s390_gs_bc_set(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1521,13 +1524,6 @@ static const char *gpr_names[NUM_GPRS] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", }; -unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) -{ - if (offset >= NUM_GPRS) - return 0; - return regs->gprs[offset]; -} - int regs_query_register_offset(const char *name) { unsigned long offset; @@ -1547,29 +1543,3 @@ const char *regs_query_register_name(unsigned int offset) return NULL; return gpr_names[offset]; } - -static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) -{ - unsigned long ksp = kernel_stack_pointer(regs); - - return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); -} - -/** - * regs_get_kernel_stack_nth() - get Nth entry of the stack - * @regs:pt_regs which contains kernel stack pointer. - * @n:stack entry number. - * - * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specifined by @regs. If the @n th entry is NOT in the kernel stack, - * this returns 0. - */ -unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) -{ - unsigned long addr; - - addr = kernel_stack_pointer(regs) + n * sizeof(long); - if (!regs_within_kernel_stack(regs, addr)) - return 0; - return *(unsigned long *)addr; -} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d78bcfe707b5..f244c5560e7f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -54,6 +54,7 @@ #include <asm/archrandom.h> #include <asm/boot_data.h> +#include <asm/machine.h> #include <asm/ipl.h> #include <asm/facility.h> #include <asm/smp.h> @@ -180,8 +181,6 @@ unsigned long __bootdata_preserved(MODULES_END); struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); -DEFINE_STATIC_KEY_FALSE(cpu_has_bear); - /* * The Write Back bit position in the physaddr is given by the SLPC PCI. * Leaving the mask zero always uses write through which is safe @@ -251,7 +250,7 @@ static void __init conmode_default(void) char query_buffer[1024]; char *ptr; - if (MACHINE_IS_VM) { + if (machine_is_vm()) { cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); @@ -289,7 +288,7 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } - } else if (MACHINE_IS_KVM) { + } else if (machine_is_kvm()) { if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) SET_CONSOLE_VT220; else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) @@ -415,7 +414,6 @@ static void __init setup_lowcore(void) lc->clock_comparator = clock_comparator_max; lc->current_task = (unsigned long)&init_task; lc->lpp = LPP_MAGIC; - lc->machine_flags = get_lowcore()->machine_flags; lc->preempt_count = get_lowcore()->preempt_count; nmi_alloc_mcesa_early(&lc->mcesad); lc->sys_enter_timer = get_lowcore()->sys_enter_timer; @@ -652,7 +650,7 @@ static void __init reserve_crashkernel(void) return; } - if (!oldmem_data.start && MACHINE_IS_VM) + if (!oldmem_data.start && machine_is_vm()) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; @@ -898,12 +896,12 @@ void __init setup_arch(char **cmdline_p) /* * print what head.S has found out about the machine */ - if (MACHINE_IS_VM) + if (machine_is_vm()) pr_info("Linux is running as a z/VM " "guest operating system in 64-bit mode\n"); - else if (MACHINE_IS_KVM) + else if (machine_is_kvm()) pr_info("Linux is running under KVM in 64-bit mode\n"); - else if (MACHINE_IS_LPAR) + else if (machine_is_lpar()) pr_info("Linux is running natively in 64-bit mode\n"); else pr_info("Linux is running as a guest in 64-bit mode\n"); @@ -911,7 +909,7 @@ void __init setup_arch(char **cmdline_p) if (!boot_earlyprintk) boot_rb_foreach(print_rb_entry); - if (have_relocated_lowcore()) + if (machine_has_relocated_lowcore()) pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); log_component_list(); @@ -961,7 +959,7 @@ void __init setup_arch(char **cmdline_p) setup_uv(); dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); - if (MACHINE_HAS_EDAT2) + if (cpu_has_edat2()) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); reserve_crashkernel(); @@ -981,10 +979,6 @@ void __init setup_arch(char **cmdline_p) numa_setup(); smp_detect_cpus(); topology_init_early(); - - if (test_facility(193)) - static_branch_enable(&cpu_has_bear); - setup_protection_map(); /* * Create kernel page tables. diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 7b08399b0846..81f12bb77f62 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -18,6 +18,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/export.h> @@ -38,6 +39,7 @@ #include <linux/kprobes.h> #include <asm/access-regs.h> #include <asm/asm-offsets.h> +#include <asm/machine.h> #include <asm/ctlreg.h> #include <asm/pfault.h> #include <asm/diag.h> @@ -97,13 +99,6 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; static unsigned int smp_max_threads __initdata = -1U; cpumask_t cpu_setup_mask; -static int __init early_nosmt(char *s) -{ - smp_max_threads = 1; - return 0; -} -early_param("nosmt", early_nosmt); - static int __init early_smt(char *s) { get_option(&s, &smp_max_threads); @@ -263,13 +258,12 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = get_lowcore()->kernel_asce; lc->user_asce = s390_invalid_asce; - lc->machine_flags = get_lowcore()->machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; abs_lc = get_abs_lowcore(); memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area)); put_abs_lowcore(abs_lc); - lc->cregs_save_area[1] = lc->kernel_asce; + lc->cregs_save_area[1] = lc->user_asce; lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); arch_spin_lock_setup(cpu); @@ -416,7 +410,7 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted); void notrace smp_yield_cpu(int cpu) { - if (!MACHINE_HAS_DIAG9C) + if (!machine_has_diag9c()) return; diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" @@ -561,10 +555,10 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!cpu_has_vx() && !MACHINE_HAS_GS) + if (!cpu_has_vx() && !cpu_has_gs()) return 0; pa = lc->mcesad & MCESA_ORIGIN_MASK; - if (MACHINE_HAS_GS) + if (cpu_has_gs()) pa |= lc->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) @@ -807,6 +801,7 @@ void __init smp_detect_cpus(void) mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; pcpu_set_smt(mtid); + cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1); /* Print number of CPUs */ c_cpus = s_cpus = 0; diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 40edfde25f5b..b153a395f46d 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -9,6 +9,7 @@ #include <linux/stacktrace.h> #include <linux/uaccess.h> #include <linux/compat.h> +#include <asm/asm-offsets.h> #include <asm/stacktrace.h> #include <asm/unwind.h> #include <asm/kprobes.h> diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 5ec28028315b..4fee74553ca2 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -12,6 +12,7 @@ * platform. */ +#include <linux/cpufeature.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/mm.h> @@ -81,25 +82,35 @@ SYSCALL_DEFINE0(ni_syscall) return -ENOSYS; } -static void do_syscall(struct pt_regs *regs) +void noinstr __do_syscall(struct pt_regs *regs, int per_trap) { unsigned long nr; + add_random_kstack_offset(); + enter_from_user_mode(regs); + regs->psw = get_lowcore()->svc_old_psw; + regs->int_code = get_lowcore()->svc_int_code; + update_timer_sys(); + if (cpu_has_bear()) + current->thread.last_break = regs->last_break; + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + if (unlikely(per_trap)) + set_thread_flag(TIF_PER_TRAP); + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); nr = regs->int_code & 0xffff; - if (!nr) { + if (likely(!nr)) { nr = regs->gprs[1] & 0xffff; regs->int_code &= ~0xffffUL; regs->int_code |= nr; } - regs->gprs[2] = nr; - if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) { regs->psw.addr = current->restart_block.arch_data; current->restart_block.arch_data = 1; } nr = syscall_enter_from_user_mode_work(regs, nr); - /* * In the s390 ptrace ABI, both the syscall number and the return value * use gpr2. However, userspace puts the syscall number either in the @@ -107,37 +118,11 @@ static void do_syscall(struct pt_regs *regs) * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here * and if set, the syscall will be skipped. */ - if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) goto out; regs->gprs[2] = -ENOSYS; - if (likely(nr >= NR_syscalls)) - goto out; - do { + if (likely(nr < NR_syscalls)) regs->gprs[2] = current->thread.sys_call_table[nr](regs); - } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART)); out: - syscall_exit_to_user_mode_work(regs); -} - -void noinstr __do_syscall(struct pt_regs *regs, int per_trap) -{ - add_random_kstack_offset(); - enter_from_user_mode(regs); - regs->psw = get_lowcore()->svc_old_psw; - regs->int_code = get_lowcore()->svc_int_code; - update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) - current->thread.last_break = regs->last_break; - - local_irq_enable(); - regs->orig_gpr2 = regs->gprs[2]; - - if (per_trap) - set_thread_flag(TIF_PER_TRAP); - - regs->flags = 0; - set_pt_regs_flag(regs, PIF_SYSCALL); - do_syscall(regs); - exit_to_user_mode(); + syscall_exit_to_user_mode(regs); } diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e9115b4d8b63..a4569b96ef06 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 88055f58fbda..1ea84e942bd4 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -5,6 +5,7 @@ * Martin Schwidefsky <schwidefsky@de.ibm.com>, */ +#include <linux/cpufeature.h> #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -15,54 +16,17 @@ #include <linux/export.h> #include <linux/slab.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include <asm/debug.h> #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/topology.h> #include <asm/fpu.h> +#include <asm/asm.h> int topology_max_mnest; -static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl) -{ - int r0 = (fc << 28) | sel1; - int rc = 0; - - asm volatile( - " lr 0,%[r0]\n" - " lr 1,%[r1]\n" - " stsi 0(%[sysinfo])\n" - "0: jz 2f\n" - "1: lhi %[rc],%[retval]\n" - "2: lr %[r0],0\n" - EX_TABLE(0b, 1b) - : [r0] "+d" (r0), [rc] "+d" (rc) - : [r1] "d" (sel2), - [sysinfo] "a" (sysinfo), - [retval] "K" (-EOPNOTSUPP) - : "cc", "0", "1", "memory"); - *lvl = ((unsigned int) r0) >> 28; - return rc; -} - -/* - * stsi - store system information - * - * Returns the current configuration level if function code 0 was specified. - * Otherwise returns 0 on success or a negative value on error. - */ -int stsi(void *sysinfo, int fc, int sel1, int sel2) -{ - int lvl, rc; - - rc = __stsi(sysinfo, fc, sel1, sel2, &lvl); - if (rc) - return rc; - return fc ? 0 : lvl; -} -EXPORT_SYMBOL(stsi); - #ifdef CONFIG_PROC_FS static bool convert_ext_name(unsigned char encoding, char *name, size_t len) @@ -154,7 +118,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) int i; seq_putc(m, '\n'); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; if (stsi(info, 15, 1, topology_max_mnest)) return; @@ -415,7 +379,7 @@ static struct service_level service_level_vm = { static __init int create_proc_service_level(void) { proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops); - if (MACHINE_IS_VM) + if (machine_is_vm()) register_service_level(&service_level_vm); return 0; } @@ -559,7 +523,7 @@ static __init int stsi_init_debugfs(void) sf = &stsi_file[i]; debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops); } - if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) { + if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) { char link_to[10]; sprintf(link_to, "15_1_%d", topology_mnest_limit()); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e9f47c3a6197..fed17d407a44 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -54,10 +54,10 @@ #include <asm/cio.h> #include "entry.h" -union tod_clock tod_clock_base __section(".data"); +union tod_clock __bootdata_preserved(tod_clock_base); EXPORT_SYMBOL_GPL(tod_clock_base); -u64 clock_comparator_max = -1ULL; +u64 __bootdata_preserved(clock_comparator_max); EXPORT_SYMBOL_GPL(clock_comparator_max); static DEFINE_PER_CPU(struct clock_event_device, comparators); @@ -79,12 +79,10 @@ void __init time_early_init(void) { struct ptff_qto qto; struct ptff_qui qui; - int cs; /* Initialize TOD steering parameters */ tod_steering_end = tod_clock_base.tod; - for (cs = 0; cs < CS_BASES; cs++) - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) return; @@ -373,7 +371,6 @@ static void clock_sync_global(long delta) { unsigned long now, adj; struct ptff_qto qto; - int cs; /* Fixup the monotonic sched clock. */ tod_clock_base.eitod += delta; @@ -389,10 +386,8 @@ static void clock_sync_global(long delta) panic("TOD clock sync offset %li is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); - for (cs = 0; cs < CS_BASES; cs++) { - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; - vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta; - } + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_delta = tod_steering_delta; /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) @@ -685,7 +680,7 @@ static void stp_work_fn(struct work_struct *work) if (!stp_online) { chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); - del_timer_sync(&stp_timer); + timer_delete_sync(&stp_timer); goto out_unlock; } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 211cc8382e4a..3df048e190b1 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -6,6 +6,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/uaccess.h> @@ -240,7 +241,7 @@ int topology_set_cpu_management(int fc) { int cpu, rc; - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return -EOPNOTSUPP; if (fc) rc = ptf(PTF_VERTICAL); @@ -315,13 +316,13 @@ static int __arch_update_cpu_topology(void) hd_status = 0; rc = 0; mutex_lock(&smp_cpu_state_mutex); - if (MACHINE_HAS_TOPOLOGY) { + if (cpu_has_topology()) { rc = 1; store_topology(info); tl_to_masks(info); } update_cpu_masks(); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) topology_update_polarization_simple(); if (cpu_management == 1) hd_status = hd_enable_hiperdispatch(); @@ -376,7 +377,7 @@ static void set_topology_timer(void) void topology_expect_change(void) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; /* This is racy, but it doesn't matter since it is just a heuristic. * Worst case is that we poll in a higher frequency for a bit longer. @@ -500,7 +501,7 @@ int topology_cpu_init(struct cpu *cpu) int rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); - if (rc || !MACHINE_HAS_TOPOLOGY) + if (rc || !cpu_has_topology()) return rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); if (rc) @@ -569,12 +570,12 @@ void __init topology_init_early(void) set_sched_topology(s390_topology); if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) topology_mode = TOPOLOGY_MODE_HW; else topology_mode = TOPOLOGY_MODE_SINGLE; } - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) goto out; tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); info = tl_info; @@ -596,7 +597,7 @@ static inline int topology_get_mode(int enabled) { if (!enabled) return TOPOLOGY_MODE_SINGLE; - return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; + return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; } static inline int topology_is_enabled(void) @@ -686,7 +687,7 @@ static int __init topology_init(void) int rc = 0; timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) set_topology_timer(); else topology_update_polarization_simple(); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index b746213d3110..19687dab32f7 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -3,18 +3,13 @@ * S390 version * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * * Derived from "arch/i386/kernel/traps.c" * Copyright (C) 1991, 1992 Linus Torvalds */ -/* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'asm.s'. - */ -#include "asm/irqflags.h" -#include "asm/ptrace.h" +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/randomize_kstack.h> @@ -29,6 +24,8 @@ #include <linux/entry-common.h> #include <linux/kmsan.h> #include <asm/asm-extable.h> +#include <asm/irqflags.h> +#include <asm/ptrace.h> #include <asm/vtime.h> #include <asm/fpu.h> #include <asm/fault.h> @@ -42,7 +39,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) address = current->thread.trap_tdb.data[3]; else address = regs->psw.addr; - return (void __user *) (address - (regs->int_code >> 16)); + return (void __user *)(address - (regs->int_code >> 16)); } #ifdef CONFIG_GENERIC_BUG @@ -57,16 +54,15 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) if (user_mode(regs)) { force_sig_fault(si_signo, si_code, get_trap_ip(regs)); report_user_fault(regs, si_signo, 0); - } else { + } else { if (!fixup_exception(regs)) die(regs, str); - } + } } static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { - if (notify_die(DIE_TRAP, str, regs, 0, - regs->int_code, si_signo) == NOTIFY_STOP) + if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP) return; do_report_trap(regs, si_signo, si_code, str); } @@ -78,8 +74,7 @@ void do_per_trap(struct pt_regs *regs) return; if (!current->ptrace) return; - force_sig_fault(SIGTRAP, TRAP_HWBKPT, - (void __force __user *) current->thread.per_event.address); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address); } NOKPROBE_SYMBOL(do_per_trap); @@ -98,36 +93,25 @@ static void name(struct pt_regs *regs) \ do_trap(regs, signr, sicode, str); \ } -DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, - "addressing exception") -DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, - "execute exception") -DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, - "fixpoint divide exception") -DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, - "fixpoint overflow exception") -DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, - "HFP overflow exception") -DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, - "HFP underflow exception") -DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, - "HFP significance exception") -DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, - "HFP divide exception") -DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, - "HFP square root exception") -DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, - "operand exception") -DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, - "privileged operation") -DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, - "special operation exception") -DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, - "transaction constraint exception") +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception") +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception") static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc) { int si_code = 0; + /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ @@ -153,36 +137,35 @@ static void translation_specification_exception(struct pt_regs *regs) static void illegal_op(struct pt_regs *regs) { - __u8 opcode[6]; - __u16 __user *location; int is_uprobe_insn = 0; + u16 __user *location; int signal = 0; + u16 opcode; location = get_trap_ip(regs); - if (user_mode(regs)) { - if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) + if (get_user(opcode, location)) return; - if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { + if (opcode == S390_BREAKPOINT_U16) { if (current->ptrace) force_sig_fault(SIGTRAP, TRAP_BRKPT, location); else signal = SIGILL; #ifdef CONFIG_UPROBES - } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) { + } else if (opcode == UPROBE_SWBP_INSN) { is_uprobe_insn = 1; #endif - } else + } else { signal = SIGILL; + } } /* - * We got either an illegal op in kernel mode, or user space trapped + * This is either an illegal op in kernel mode, or user space trapped * on a uprobes illegal instruction. See if kprobes or uprobes picks * it up. If not, SIGILL. */ if (is_uprobe_insn || !user_mode(regs)) { - if (notify_die(DIE_BPT, "bpt", regs, 0, - 3, SIGTRAP) != NOTIFY_STOP) + if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } if (signal) @@ -190,18 +173,10 @@ static void illegal_op(struct pt_regs *regs) } NOKPROBE_SYMBOL(illegal_op); -DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, - "specification exception"); - static void vector_exception(struct pt_regs *regs) { int si_code, vic; - if (!cpu_has_vx()) { - do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation"); - return; - } - /* get vector interrupt code from fpc */ save_user_fpu_regs(); vic = (current->thread.ufpu.fpc & 0xf00) >> 8; @@ -249,7 +224,6 @@ static void monitor_event_exception(struct pt_regs *regs) { if (user_mode(regs)) return; - switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { case BUG_TRAP_TYPE_NONE: fixup_exception(regs); @@ -262,7 +236,7 @@ static void monitor_event_exception(struct pt_regs *regs) } } -void kernel_stack_overflow(struct pt_regs *regs) +void kernel_stack_invalid(struct pt_regs *regs) { /* * Normally regs are unpoisoned by the generic entry code, but @@ -270,12 +244,12 @@ void kernel_stack_overflow(struct pt_regs *regs) */ kmsan_unpoison_entry_regs(regs); bust_spinlocks(1); - printk("Kernel stack overflow.\n"); + pr_emerg("Kernel stack pointer invalid\n"); show_regs(regs); bust_spinlocks(0); - panic("Corrupt kernel stack, can't continue."); + panic("Invalid kernel stack pointer, cannot continue"); } -NOKPROBE_SYMBOL(kernel_stack_overflow); +NOKPROBE_SYMBOL(kernel_stack_invalid); static void __init test_monitor_call(void) { @@ -283,7 +257,7 @@ static void __init test_monitor_call(void) if (!IS_ENABLED(CONFIG_BUG)) return; - asm volatile( + asm_inline volatile( " mc 0,0\n" "0: lhi %[val],0\n" "1:\n" @@ -323,7 +297,6 @@ void noinstr __do_pgm_check(struct pt_regs *regs) teid.val = lc->trans_exc_code; regs->int_code = lc->pgm_int_code; regs->int_parm_long = teid.val; - /* * In case of a guest fault, short-circuit the fault handler and return. * This way the sie64a() function will return 0; fault address and @@ -336,23 +309,19 @@ void noinstr __do_pgm_check(struct pt_regs *regs) current->thread.gmap_int_code = regs->int_code & 0xffff; return; } - state = irqentry_enter(regs); - if (user_mode(regs)) { update_timer_sys(); - if (!static_branch_likely(&cpu_has_bear)) { + if (!cpu_has_bear()) { if (regs->last_break < 4096) regs->last_break = 1; } current->thread.last_break = regs->last_break; } - if (lc->pgm_code & 0x0200) { /* transaction abort */ current->thread.trap_tdb = lc->pgm_tdb; } - if (lc->pgm_code & PGM_INT_CODE_PER) { if (user_mode(regs)) { struct per_event *ev = ¤t->thread.per_event; @@ -368,11 +337,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs) goto out; } } - if (!irqs_disabled_flags(regs->psw.mask)) trace_hardirqs_on(); __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); - trapnr = regs->int_code & PGM_INT_CODE_MASK; if (trapnr) pgm_check_table[trapnr](regs); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 9f05df2da2f7..b99478e84da4 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -15,6 +15,7 @@ #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/pagewalk.h> +#include <linux/backing-dev.h> #include <asm/facility.h> #include <asm/sections.h> #include <asm/uv.h> @@ -135,7 +136,7 @@ int uv_destroy_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -184,7 +185,7 @@ int uv_convert_from_secure_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -206,6 +207,39 @@ int uv_convert_from_secure_pte(pte_t pte) return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); } +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + /* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in @@ -228,7 +262,7 @@ static int expected_folio_refs(struct folio *folio) } /** - * make_folio_secure() - make a folio secure + * __make_folio_secure() - make a folio secure * @folio: the folio to make secure * @uvcb: the uvcb that describes the UVC to be used * @@ -237,20 +271,18 @@ static int expected_folio_refs(struct folio *folio) * * Return: 0 on success; * -EBUSY if the folio is in writeback or has too many references; - * -E2BIG if the folio is large; * -EAGAIN if the UVC needs to be attempted again; * -ENXIO if the address is not mapped; * -EINVAL if the UVC failed for other reasons. * * Context: The caller must hold exactly one extra reference on the folio - * (it's the same logic as split_folio()) + * (it's the same logic as split_folio()), and the folio must be + * locked. */ -int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; - if (folio_test_large(folio)) - return -E2BIG; if (folio_test_writeback(folio)) return -EBUSY; expected = expected_folio_refs(folio) + 1; @@ -277,19 +309,168 @@ int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return -EAGAIN; return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } -EXPORT_SYMBOL_GPL(make_folio_secure); + +static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb) +{ + int rc; + + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = __make_folio_secure(folio, uvcb); + folio_unlock(folio); + + return rc; +} + +/** + * s390_wiggle_split_folio() - try to drain extra references to a folio and + * split the folio if it is large. + * @mm: the mm containing the folio to work on + * @folio: the folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + * Return: 0 if the operation was successful; + * -EAGAIN if splitting the large folio was not successful, + * but another attempt can be made; + * -EINVAL in case of other folio splitting errors. See split_folio(). + */ +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio) +{ + int rc, tried_splits; + + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); + + if (!folio_test_large(folio)) + return 0; + + for (tried_splits = 0; tried_splits < 2; tried_splits++) { + struct address_space *mapping; + loff_t lstart, lend; + struct inode *inode; + + folio_lock(folio); + rc = split_folio(folio); + if (rc != -EBUSY) { + folio_unlock(folio); + return rc; + } + + /* + * Splitting with -EBUSY can fail for various reasons, but we + * have to handle one case explicitly for now: some mappings + * don't allow for splitting dirty folios; writeback will + * mark them clean again, including marking all page table + * entries mapping the folio read-only, to catch future write + * attempts. + * + * While the system should be writing back dirty folios in the + * background, we obtained this folio by looking up a writable + * page table entry. On these problematic mappings, writable + * page table entries imply dirty folios, preventing the + * split in the first place. + * + * To prevent a livelock when trigger writeback manually and + * letting the caller look up the folio again in the page + * table (turning it dirty), immediately try to split again. + * + * This is only a problem for some mappings (e.g., XFS); + * mappings that do not support writeback (e.g., shmem) do not + * apply. + */ + if (!folio_test_dirty(folio) || folio_test_anon(folio) || + !folio->mapping || !mapping_can_writeback(folio->mapping)) { + folio_unlock(folio); + break; + } + + /* + * Ideally, we'd only trigger writeback on this exact folio. But + * there is no easy way to do that, so we'll stabilize the + * mapping while we still hold the folio lock, so we can drop + * the folio lock to trigger writeback on the range currently + * covered by the folio instead. + */ + mapping = folio->mapping; + lstart = folio_pos(folio); + lend = lstart + folio_size(folio) - 1; + inode = igrab(mapping->host); + folio_unlock(folio); + + if (unlikely(!inode)) + break; + + filemap_write_and_wait_range(mapping, lstart, lend); + iput(mapping->host); + } + return -EAGAIN; +} + +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) +{ + struct vm_area_struct *vma; + struct folio_walk fw; + struct folio *folio; + int rc; + + mmap_read_lock(mm); + vma = vma_lookup(mm, hva); + if (!vma) { + mmap_read_unlock(mm); + return -EFAULT; + } + folio = folio_walk_start(&fw, vma, hva, 0); + if (!folio) { + mmap_read_unlock(mm); + return -ENXIO; + } + + folio_get(folio); + /* + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. + */ + if (folio_test_hugetlb(folio)) + rc = -EFAULT; + else if (folio_test_large(folio)) + rc = -E2BIG; + else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID)) + rc = -ENXIO; + else + rc = make_folio_secure(mm, folio, uvcb); + folio_walk_end(&fw, vma); + mmap_read_unlock(mm); + + if (rc == -E2BIG || rc == -EBUSY) { + rc = s390_wiggle_split_folio(mm, folio); + if (!rc) + rc = -EAGAIN; + } + folio_put(folio); + + return rc; +} +EXPORT_SYMBOL_GPL(make_hva_secure); /* * To be called with the folio locked or with an extra reference! This will - * prevent gmap_make_secure from touching the folio concurrently. Having 2 - * parallel arch_make_folio_accessible is fine, as the UV calls will become a - * no-op if the folio is already exported. + * prevent kvm_s390_pv_make_secure() from touching the folio concurrently. + * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will + * become a no-op if the folio is already exported. */ int arch_make_folio_accessible(struct folio *folio) { int rc = 0; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -660,7 +841,12 @@ out_kobj: device_initcall(uv_sysfs_init); /* - * Find the secret with the secret_id in the provided list. + * Locate a secret in the list by its id. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. + * + * Search for a secret with the given secret_id in the Ultravisor secret store. * * Context: might sleep. */ @@ -681,12 +867,15 @@ static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN], /* * Do the actual search for `uv_get_secret_metadata`. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. * * Context: might sleep. */ -static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list *list, - struct uv_secret_list_item_hdr *secret) +int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN], + struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret) { u16 start_idx = 0; u16 list_rc; @@ -708,36 +897,7 @@ static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN], return -ENOENT; } - -/** - * uv_get_secret_metadata() - get secret metadata for a given secret id. - * @secret_id: search pattern. - * @secret: output data, containing the secret's metadata. - * - * Search for a secret with the given secret_id in the Ultravisor secret store. - * - * Context: might sleep. - * - * Return: - * * %0: - Found entry; secret->idx and secret->type are valid. - * * %ENOENT - No entry found. - * * %ENODEV: - Not supported: UV not available or command not available. - * * %EIO: - Other unexpected UV error. - */ -int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list_item_hdr *secret) -{ - struct uv_secret_list *buf; - int rc; - - buf = kzalloc(sizeof(*buf), GFP_KERNEL); - if (!buf) - return -ENOMEM; - rc = find_secret(secret_id, buf, secret); - kfree(buf); - return rc; -} -EXPORT_SYMBOL_GPL(uv_get_secret_metadata); +EXPORT_SYMBOL_GPL(uv_find_secret); /** * uv_retrieve_secret() - get the secret value for the secret index. diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 598b512cde01..430feb1a5013 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -16,8 +16,8 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/smp.h> -#include <linux/time_namespace.h> #include <linux/random.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <asm/vdso/vsyscall.h> #include <asm/alternative.h> @@ -26,85 +26,6 @@ extern char vdso64_start[], vdso64_end[]; extern char vdso32_start[], vdso32_end[]; -static struct vm_special_mapping vvar_mapping; - -static union vdso_data_store vdso_data_store __page_aligned_data; - -struct vdso_data *vdso_data = vdso_data_store.data; - -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -/* - * The VVAR page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will be re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (!vma_is_special_mapping(vma, &vvar_mapping)) - continue; - zap_vma_pages(vma); - break; - } - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long addr, pfn; - vm_fault_t err; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - pfn = virt_to_pfn(vdso_data); - if (timens_page) { - /* - * Fault in VVAR page too, since it will be accessed - * to get clock data anyway. - */ - addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - err = vmf_insert_pfn(vma, addr, pfn); - if (unlikely(err & VM_FAULT_ERROR)) - return err; - pfn = page_to_pfn(timens_page); - } - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - return vmf_insert_pfn(vma, vmf->address, pfn); -} - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { @@ -112,11 +33,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping vdso64_mapping = { .name = "[vdso]", .mremap = vdso_mremap, @@ -142,7 +58,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) struct vm_area_struct *vma; int rc; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); if (mmap_write_lock_killable(mm)) return -EINTR; @@ -157,17 +73,14 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) goto out; - vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, - &vvar_mapping); + vma = vdso_install_vvar_mapping(mm, vvar_start); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; + vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, - VM_READ|VM_EXEC| + VM_READ|VM_EXEC|VM_SEALED_SYSMAP| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_mapping); if (IS_ERR(vma)) { @@ -220,7 +133,7 @@ unsigned long vdso_text_size(void) unsigned long vdso_size(void) { - return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE; + return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 2c5afb88d298..1e4ddd1a683f 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = vdso_user_wrapper-32.o note-32.o # Build rules diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index c916c4f73f76..9630d58c2080 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -6,16 +6,15 @@ #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") OUTPUT_ARCH(s390:31-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index ad206f2068d8..d8f0df742809 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index ec42b7d9cb53..e4f6551ae898 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -7,17 +7,15 @@ #include <asm/vdso/vsyscall.h> #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); - PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index f0ffe874adc2..9a723c48b05a 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o +kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 74f73141f9b9..53233dec8cad 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -11,12 +11,30 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/gmap.h> +#include <asm/gmap_helpers.h> #include <asm/virtio-ccw.h> #include "kvm-s390.h" #include "trace.h" #include "trace-s390.h" #include "gaccess.h" +static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end) +{ + struct kvm_memslot_iter iter; + struct kvm_memory_slot *slot; + struct kvm_memslots *slots; + unsigned long start, end; + + slots = kvm_vcpu_memslots(vcpu); + + kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) { + slot = iter.slot; + start = __gfn_to_hva_memslot(slot, max(gfn_start, slot->base_gfn)); + end = __gfn_to_hva_memslot(slot, min(gfn_end, slot->base_gfn + slot->npages)); + gmap_helper_discard(vcpu->kvm->mm, start, end); + } +} + static int diag_release_pages(struct kvm_vcpu *vcpu) { unsigned long start, end; @@ -32,12 +50,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); + mmap_read_lock(vcpu->kvm->mm); /* * We checked for start >= end above, so lets check for the * fast path (no prefix swap page involved) */ if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) { - gmap_discard(vcpu->arch.gmap, start, end); + do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(end)); } else { /* * This is slow path. gmap_discard will check for start @@ -45,13 +64,14 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) * prefix and let gmap_discard make some of these calls * NOPs. */ - gmap_discard(vcpu->arch.gmap, start, prefix); + do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(prefix)); if (start <= prefix) - gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE); + do_discard_gfn_range(vcpu, 0, 1); if (end > prefix + PAGE_SIZE) - gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE); - gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end); + do_discard_gfn_range(vcpu, 1, 2); + do_discard_gfn_range(vcpu, gpa_to_gfn(prefix) + 2, gpa_to_gfn(end)); } + mmap_read_unlock(vcpu->kvm->mm); return 0; } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index f6fded15633a..21c2e61fece4 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -16,9 +16,10 @@ #include <asm/gmap.h> #include <asm/dat-bits.h> #include "kvm-s390.h" -#include "gmap.h" #include "gaccess.h" +#define GMAP_SHADOW_FAKE_TABLE 1ULL + /* * vaddress union in order to easily decode a virtual address into its * region first index, region second index etc. parts. @@ -318,7 +319,7 @@ enum prot_type { PROT_TYPE_DAT = 3, PROT_TYPE_IEP = 4, /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ - PROT_NONE, + PROT_TYPE_DUMMY, }; static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, @@ -334,7 +335,7 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, switch (code) { case PGM_PROTECTION: switch (prot) { - case PROT_NONE: + case PROT_TYPE_DUMMY: /* We should never get here, acts like termination */ WARN_ON_ONCE(1); break; @@ -804,7 +805,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, gpa = kvm_s390_real_to_abs(vcpu, ga); if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) { rc = PGM_ADDRESSING; - prot = PROT_NONE; + prot = PROT_TYPE_DUMMY; } } if (rc) @@ -962,7 +963,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, if (rc == PGM_PROTECTION) prot = PROT_TYPE_KEYC; else - prot = PROT_NONE; + prot = PROT_TYPE_DUMMY; rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); } out_unlock: diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c index a6d1dbb04c97..56ef153eb8fe 100644 --- a/arch/s390/kvm/gmap-vsie.c +++ b/arch/s390/kvm/gmap-vsie.c @@ -22,7 +22,6 @@ #include <asm/uv.h> #include "kvm-s390.h" -#include "gmap.h" /** * gmap_find_shadow - find a specific asce in the list of shadow tables diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c deleted file mode 100644 index 02adf151d4de..000000000000 --- a/arch/s390/kvm/gmap.c +++ /dev/null @@ -1,212 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Guest memory management for KVM/s390 - * - * Copyright IBM Corp. 2008, 2020, 2024 - * - * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com> - * Martin Schwidefsky <schwidefsky@de.ibm.com> - * David Hildenbrand <david@redhat.com> - * Janosch Frank <frankja@linux.vnet.ibm.com> - */ - -#include <linux/compiler.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/pgtable.h> -#include <linux/pagemap.h> - -#include <asm/lowcore.h> -#include <asm/gmap.h> -#include <asm/uv.h> - -#include "gmap.h" - -/** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) -{ - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} - -static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb) -{ - struct folio *folio = page_folio(page); - int rc; - - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. - * If userspace plays dirty tricks and decides to map huge pages at a - * later point in time, it will receive a segmentation fault or - * KVM_RUN will return -EFAULT. - */ - if (folio_test_hugetlb(folio)) - return -EFAULT; - if (folio_test_large(folio)) { - mmap_read_unlock(gmap->mm); - rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true); - mmap_read_lock(gmap->mm); - if (rc) - return rc; - folio = page_folio(page); - } - - if (!folio_trylock(folio)) - return -EAGAIN; - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(folio_to_phys(folio)); - rc = make_folio_secure(folio, uvcb); - folio_unlock(folio); - - /* - * In theory a race is possible and the folio might have become - * large again before the folio_trylock() above. In that case, no - * action is performed and -EAGAIN is returned; the callers will - * have to try again later. - * In most cases this implies running the VM again, getting the same - * exception again, and make another attempt in this function. - * This is expected to happen extremely rarely. - */ - if (rc == -E2BIG) - return -EAGAIN; - /* The folio has too many references, try to shake some off */ - if (rc == -EBUSY) { - mmap_read_unlock(gmap->mm); - kvm_s390_wiggle_split_folio(gmap->mm, folio, false); - mmap_read_lock(gmap->mm); - return -EAGAIN; - } - - return rc; -} - -/** - * gmap_make_secure() - make one guest page secure - * @gmap: the guest gmap - * @gaddr: the guest address that needs to be made secure - * @uvcb: the UVCB specifying which operation needs to be performed - * - * Context: needs to be called with kvm->srcu held. - * Return: 0 on success, < 0 in case of error (see __gmap_make_secure()). - */ -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) -{ - struct kvm *kvm = gmap->private; - struct page *page; - int rc = 0; - - lockdep_assert_held(&kvm->srcu); - - page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); - mmap_read_lock(gmap->mm); - if (page) - rc = __gmap_make_secure(gmap, page, uvcb); - kvm_release_page_clean(page); - mmap_read_unlock(gmap->mm); - - return rc; -} - -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) -{ - struct uv_cb_cts uvcb = { - .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, - .header.len = sizeof(uvcb), - .guest_handle = gmap->guest_handle, - .gaddr = gaddr, - }; - - return gmap_make_secure(gmap, gaddr, &uvcb); -} - -/** - * __gmap_destroy_page() - Destroy a guest page. - * @gmap: the gmap of the guest - * @page: the page to destroy - * - * An attempt will be made to destroy the given guest page. If the attempt - * fails, an attempt is made to export the page. If both attempts fail, an - * appropriate error is returned. - * - * Context: must be called holding the mm lock for gmap->mm - */ -static int __gmap_destroy_page(struct gmap *gmap, struct page *page) -{ - struct folio *folio = page_folio(page); - int rc; - - /* - * See gmap_make_secure(): large folios cannot be secure. Small - * folio implies FW_LEVEL_PTE. - */ - if (folio_test_large(folio)) - return -EFAULT; - - rc = uv_destroy_folio(folio); - /* - * Fault handlers can race; it is possible that two CPUs will fault - * on the same secure page. One CPU can destroy the page, reboot, - * re-enter secure mode and import it, while the second CPU was - * stuck at the beginning of the handler. At some point the second - * CPU will be able to progress, and it will not be able to destroy - * the page. In that case we do not want to terminate the process, - * we instead try to export the page. - */ - if (rc) - rc = uv_convert_from_secure_folio(folio); - - return rc; -} - -/** - * gmap_destroy_page() - Destroy a guest page. - * @gmap: the gmap of the guest - * @gaddr: the guest address to destroy - * - * An attempt will be made to destroy the given guest page. If the attempt - * fails, an attempt is made to export the page. If both attempts fail, an - * appropriate error is returned. - * - * Context: may sleep. - */ -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) -{ - struct page *page; - int rc = 0; - - mmap_read_lock(gmap->mm); - page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr)); - if (page) - rc = __gmap_destroy_page(gmap, page); - kvm_release_page_clean(page); - mmap_read_unlock(gmap->mm); - return rc; -} diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h deleted file mode 100644 index c8f031c9ea5f..000000000000 --- a/arch/s390/kvm/gmap.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * KVM guest address space mapping code - * - * Copyright IBM Corp. 2007, 2016, 2025 - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> - * Claudio Imbrenda <imbrenda@linux.ibm.com> - */ - -#ifndef ARCH_KVM_S390_GMAP_H -#define ARCH_KVM_S390_GMAP_H - -#define GMAP_SHADOW_FAKE_TABLE 1ULL - -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); -struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level); - -/** - * gmap_shadow_valid - check if a shadow guest address space matches the - * given properties and is still valid - * @sg: pointer to the shadow guest address space structure - * @asce: ASCE for which the shadow table is requested - * @edat_level: edat level to be used for the shadow translation - * - * Returns 1 if the gmap shadow is still valid and matches the given - * properties, the caller can continue using it. Returns 0 otherwise, the - * caller has to request a new shadow gmap in this case. - * - */ -static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) -{ - if (sg->removed) - return 0; - return sg->orig_asce == asce && sg->edat_level == edat_level; -} - -#endif diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 610dd44a948b..c7908950c1f4 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -21,7 +21,6 @@ #include "gaccess.h" #include "trace.h" #include "trace-s390.h" -#include "gmap.h" u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) { @@ -95,7 +94,7 @@ static int handle_validity(struct kvm_vcpu *vcpu) vcpu->stat.exit_validity++; trace_kvm_s390_intercept_validity(vcpu, viwhy); - KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy, + KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%p)", viwhy, current->pid, vcpu->kvm); /* do not warn on invalid runtime instrumentation mode */ @@ -545,7 +544,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu) guest_uvcb->header.cmd); return 0; } - rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb); + rc = kvm_s390_pv_make_secure(vcpu->kvm, uvcb.gaddr, &uvcb); /* * If the unpin did not succeed, the guest will exit again for the UVC * and we will retry the unpin. @@ -653,10 +652,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) break; case ICPT_PV_PREF: rc = 0; - gmap_convert_to_secure(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu)); - gmap_convert_to_secure(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu) + PAGE_SIZE); + kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu)); + kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu) + PAGE_SIZE); break; default: return -EOPNOTSUPP; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 07ff0e10cb7f..60c360c18690 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -10,6 +10,7 @@ #define KMSG_COMPONENT "kvm-s390" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/interrupt.h> #include <linux/kvm_host.h> #include <linux/hrtimer.h> @@ -577,7 +578,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, /* take care of lazy register loading */ kvm_s390_fpu_store(vcpu->run); save_access_regs(vcpu->run->s.regs.acrs); - if (MACHINE_HAS_GS && vcpu->arch.gs_enabled) + if (cpu_has_gs() && vcpu->arch.gs_enabled) save_gs_cb(current->thread.gs_cb); /* Extended save area */ @@ -948,8 +949,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC); rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, (u64 *) __LC_PGM_LAST_BREAK); - rc |= put_guest_lc(vcpu, pgm_info.code, - (u16 *)__LC_PGM_INT_CODE); + rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_CODE); rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW, @@ -3161,7 +3161,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm) if (!gi->origin) return; gisa_clear_ipm(gi->origin); - VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin); + VM_EVENT(kvm, 3, "gisa 0x%p cleared", gi->origin); } void kvm_s390_gisa_init(struct kvm *kvm) @@ -3174,11 +3174,10 @@ void kvm_s390_gisa_init(struct kvm *kvm) gi->alert.mask = 0; spin_lock_init(&gi->alert.ref_lock); gi->expires = 50 * 1000; /* 50 usec */ - hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - gi->timer.function = gisa_vcpu_kicker; + hrtimer_setup(&gi->timer, gisa_vcpu_kicker, CLOCK_MONOTONIC, HRTIMER_MODE_REL); memset(gi->origin, 0, sizeof(struct kvm_s390_gisa)); gi->origin->next_alert = (u32)virt_to_phys(gi->origin); - VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin); + VM_EVENT(kvm, 3, "gisa 0x%p initialized", gi->origin); } void kvm_s390_gisa_enable(struct kvm *kvm) @@ -3219,7 +3218,7 @@ void kvm_s390_gisa_destroy(struct kvm *kvm) process_gib_alert_list(); hrtimer_cancel(&gi->timer); gi->origin = NULL; - VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa); + VM_EVENT(kvm, 3, "gisa 0x%p destroyed", gisa); } void kvm_s390_gisa_disable(struct kvm *kvm) @@ -3468,7 +3467,7 @@ int __init kvm_s390_gib_init(u8 nisc) } } - KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc); + KVM_EVENT(3, "gib 0x%p (nisc=%d) initialized", gib, gib->nisc); goto out; out_unreg_gal: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ebecb96bacce..d5ad10791c25 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -23,6 +23,7 @@ #include <linux/mman.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/cpufeature.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/timer.h> @@ -36,8 +37,10 @@ #include <asm/access-regs.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/stp.h> #include <asm/gmap.h> +#include <asm/gmap_helpers.h> #include <asm/nmi.h> #include <asm/isc.h> #include <asm/sclp.h> @@ -50,7 +53,6 @@ #include "kvm-s390.h" #include "gaccess.h" #include "pci.h" -#include "gmap.h" #define CREATE_TRACE_POINTS #include "trace.h" @@ -443,13 +445,13 @@ static void __init kvm_s390_cpu_feat_init(void) if (test_facility(201)) /* PFCR */ pfcr_query(&kvm_s390_available_subfunc.pfcr); - if (MACHINE_HAS_ESOP) + if (machine_has_esop()) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); /* * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). */ - if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || + if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao || !test_facility(3) || !nested) return; allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); @@ -638,7 +640,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = min_t(unsigned int, num_online_cpus(), r); break; case KVM_CAP_S390_COW: - r = MACHINE_HAS_ESOP; + r = machine_has_esop(); break; case KVM_CAP_S390_VECTOR_REGISTERS: r = test_facility(129); @@ -1020,7 +1022,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } mutex_unlock(&kvm->lock); VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); - VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + VM_EVENT(kvm, 3, "New guest asce: 0x%p", (void *) kvm->arch.gmap->asce); break; } @@ -2672,7 +2674,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) if (r) break; - r = s390_disable_cow_sharing(); + mmap_write_lock(kvm->mm); + r = gmap_helper_disable_cow_sharing(); + mmap_write_unlock(kvm->mm); if (r) break; @@ -3396,7 +3400,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* we emulate STHYI in kvm */ set_kvm_facility(kvm->arch.model.fac_mask, 74); set_kvm_facility(kvm->arch.model.fac_list, 74); - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { set_kvm_facility(kvm->arch.model.fac_mask, 147); set_kvm_facility(kvm->arch.model.fac_list, 147); } @@ -3464,7 +3468,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_gisa_init(kvm); INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); kvm->arch.pv.set_aside = NULL; - KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); return 0; out_err: @@ -3527,7 +3531,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); kvm_s390_vsie_destroy(kvm); - KVM_EVENT(3, "vm 0x%pK destroyed", kvm); + KVM_EVENT(3, "vm 0x%p destroyed", kvm); } /* Section: vcpu related */ @@ -3648,7 +3652,7 @@ static int sca_switch_to_extended(struct kvm *kvm) free_page((unsigned long)old_sca); - VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", + VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)", old_sca, kvm->arch.sca); return 0; } @@ -3892,8 +3896,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) kvm_s390_vcpu_setup_model(vcpu); - /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ - if (MACHINE_HAS_ESOP) + /* pgste_set_pte has special handling for !machine_has_esop() */ + if (machine_has_esop()) vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; if (test_kvm_facility(vcpu->kvm, 9)) vcpu->arch.sie_block->ecb |= ECB_SRSI; @@ -3943,8 +3947,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) if (rc) return rc; } - hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; + hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); vcpu->arch.sie_block->hpid = HPID_KVM; @@ -4025,7 +4029,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) goto out_free_sie_block; } - VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", + VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p", vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); @@ -4952,6 +4956,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { unsigned int flags = 0; unsigned long gaddr; + int rc; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) @@ -4961,16 +4966,6 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case 0: vcpu->stat.exit_null++; break; - case PGM_NON_SECURE_STORAGE_ACCESS: - kvm_s390_assert_primary_as(vcpu); - /* - * This is normal operation; a page belonging to a protected - * guest has not been imported yet. Try to import the page into - * the protected guest. - */ - if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL) - send_sig(SIGSEGV, current, 0); - break; case PGM_SECURE_STORAGE_ACCESS: case PGM_SECURE_STORAGE_VIOLATION: kvm_s390_assert_primary_as(vcpu); @@ -4980,7 +4975,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) * previous protected guest. The old pages need to be destroyed * so the new guest can use them. */ - if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) { + if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) { /* * Either KVM messed up the secure guest mapping or the * same page is mapped into multiple secure guests. @@ -4995,6 +4990,20 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) send_sig(SIGSEGV, current, 0); } break; + case PGM_NON_SECURE_STORAGE_ACCESS: + kvm_s390_assert_primary_as(vcpu); + /* + * This is normal operation; a page belonging to a protected + * guest has not been imported yet. Try to import the page into + * the protected guest. + */ + rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr); + if (rc == -EINVAL) + send_sig(SIGSEGV, current, 0); + if (rc != -ENXIO) + break; + flags = FAULT_FLAG_WRITE; + fallthrough; case PGM_PROTECTION: case PGM_SEGMENT_TRANSLATION: case PGM_PAGE_TRANSLATION: @@ -5171,7 +5180,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; } - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { preempt_disable(); local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); if (current->thread.gs_cb) { @@ -5237,7 +5246,7 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu) kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { preempt_disable(); local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); if (vcpu->arch.gs_enabled) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 8d3bbb2dd8d2..c44fe0c3a097 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -308,6 +308,9 @@ int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user, u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc); int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user, u16 *rc, u16 *rrc); +int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr); +int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr); +int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb); static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm) { @@ -319,6 +322,41 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu) return vcpu->arch.pv.handle; } +/** + * __kvm_s390_pv_destroy_page() - Destroy a guest page. + * @page: the page to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: must be called holding the mm lock for gmap->mm + */ +static inline int __kvm_s390_pv_destroy_page(struct page *page) +{ + struct folio *folio = page_folio(page); + int rc; + + /* Large folios cannot be secure. Small folio implies FW_LEVEL_PTE. */ + if (folio_test_large(folio)) + return -EFAULT; + + rc = uv_destroy_folio(folio); + /* + * Fault handlers can race; it is possible that two CPUs will fault + * on the same secure page. One CPU can destroy the page, reboot, + * re-enter secure mode and import it, while the second CPU was + * stuck at the beginning of the handler. At some point the second + * CPU will be able to progress, and it will not be able to destroy + * the page. In that case we do not want to terminate the process, + * we instead try to export the page. + */ + if (rc) + rc = uv_convert_from_secure_folio(folio); + + return rc; +} + /* implemented in interrupt.c */ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); @@ -398,6 +436,10 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long end); void kvm_s390_vsie_init(struct kvm *kvm); void kvm_s390_vsie_destroy(struct kvm *kvm); +int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level); + +/* implemented in gmap-vsie.c */ +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index 9b9e7fdd5380..8c40154ff50f 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -433,7 +433,6 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { struct zpci_dev *zdev = opaque; - u8 status; int rc; if (!zdev) @@ -480,13 +479,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) */ zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); - rc = zpci_enable_device(zdev); - if (rc) - goto clear_gisa; - - /* Re-register the IOMMU that was already created */ - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); + rc = zpci_reenable_device(zdev); if (rc) goto clear_gisa; @@ -516,7 +509,6 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) { struct zpci_dev *zdev = opaque; struct kvm *kvm; - u8 status; if (!zdev) return; @@ -550,12 +542,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) goto out; } - if (zpci_enable_device(zdev)) - goto out; - - /* Re-register the IOMMU that was already created */ - zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); + zpci_reenable_device(zdev); out: spin_lock(&kvm->arch.kzdev_list_lock); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 1a49b89706f8..9253c70897a8 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1248,6 +1248,8 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc) static int handle_essa(struct kvm_vcpu *vcpu) { + lockdep_assert_held(&vcpu->kvm->srcu); + /* entries expected to be 1FF */ int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; unsigned long *cbrlo; @@ -1297,12 +1299,8 @@ static int handle_essa(struct kvm_vcpu *vcpu) /* Retry the ESSA instruction */ kvm_s390_retry_instr(vcpu); } else { - int srcu_idx; - mmap_read_lock(vcpu->kvm->mm); - srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); i = __do_essa(vcpu, orc); - srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); mmap_read_unlock(vcpu->kvm->mm); if (i < 0) return i; diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 22c012aa5206..14c330ec8ceb 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -17,7 +17,6 @@ #include <linux/sched/mm.h> #include <linux/mmu_notifier.h> #include "kvm-s390.h" -#include "gmap.h" bool kvm_s390_pv_is_protected(struct kvm *kvm) { @@ -34,6 +33,64 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected); /** + * kvm_s390_pv_make_secure() - make one guest page secure + * @kvm: the guest + * @gaddr: the guest address that needs to be made secure + * @uvcb: the UVCB specifying which operation needs to be performed + * + * Context: needs to be called with kvm->srcu held. + * Return: 0 on success, < 0 in case of error. + */ +int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb) +{ + unsigned long vmaddr; + + lockdep_assert_held(&kvm->srcu); + + vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) + return -EFAULT; + return make_hva_secure(kvm->mm, vmaddr, uvcb); +} + +int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr) +{ + struct uv_cb_cts uvcb = { + .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, + .header.len = sizeof(uvcb), + .guest_handle = kvm_s390_pv_get_handle(kvm), + .gaddr = gaddr, + }; + + return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb); +} + +/** + * kvm_s390_pv_destroy_page() - Destroy a guest page. + * @kvm: the guest + * @gaddr: the guest address to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: may sleep. + */ +int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr) +{ + struct page *page; + int rc = 0; + + mmap_read_lock(kvm->mm); + page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); + if (page) + rc = __kvm_s390_pv_destroy_page(page); + kvm_release_page_clean(page); + mmap_read_unlock(kvm->mm); + return rc; +} + +/** * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to * be destroyed * @@ -638,7 +695,7 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak, .tweak[0] = tweak, .tweak[1] = offset, }; - int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb); + int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb); unsigned long vmaddr; bool unlocked; diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 9ac92dbf680d..9e28f165c114 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + TP_printk("create cpu %d at 0x%p, sie block at 0x%p", __entry->id, __entry->vcpu, __entry->sie_block) ); @@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %pK)\n", + TP_printk("enabling channel I/O support (kvm @ %p)\n", __entry->kvm) ); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index a78df3a4f353..13a9661d2b28 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -23,7 +23,6 @@ #include <asm/facility.h> #include "kvm-s390.h" #include "gaccess.h" -#include "gmap.h" enum vsie_page_flags { VSIE_PAGE_IN_USE = 0, @@ -68,6 +67,24 @@ struct vsie_page { __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ }; +/** + * gmap_shadow_valid() - check if a shadow guest address space matches the + * given properties and is still valid + * @sg: pointer to the shadow guest address space structure + * @asce: ASCE for which the shadow table is requested + * @edat_level: edat level to be used for the shadow translation + * + * Returns 1 if the gmap shadow is still valid and matches the given + * properties, the caller can continue using it. Returns 0 otherwise; the + * caller has to request a new shadow gmap in this case. + */ +int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) +{ + if (sg->removed) + return 0; + return sg->orig_asce == asce && sg->edat_level == edat_level; +} + /* trigger a validity icpt for the given scb */ static int set_validity_icpt(struct kvm_s390_sie_block *scb, __u16 reason_code) diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 14bbfe50033c..cd35cdbfa871 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,6 +3,7 @@ # Makefile for s390-specific library files.. # +obj-y += crypto/ lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o lib-y += csum-partial.o obj-y += mem.o xor.o @@ -26,4 +27,4 @@ lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o -crc32-s390-y := crc32-glue.o crc32le-vx.o crc32be-vx.o +crc32-s390-y := crc32.o crc32le-vx.o crc32be-vx.o diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32.c index 137080e61f90..3c4b344417c1 100644 --- a/arch/s390/lib/crc32-glue.c +++ b/arch/s390/lib/crc32.c @@ -18,8 +18,6 @@ #define VX_ALIGNMENT 16L #define VX_ALIGN_MASK (VX_ALIGNMENT - 1) -static DEFINE_STATIC_KEY_FALSE(have_vxrs); - /* * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension * @@ -34,8 +32,7 @@ static DEFINE_STATIC_KEY_FALSE(have_vxrs); unsigned long prealign, aligned, remaining; \ DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ \ - if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || \ - !static_branch_likely(&have_vxrs)) \ + if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || !cpu_has_vx()) \ return ___crc32_sw(crc, data, datalen); \ \ if ((unsigned long)data & VX_ALIGN_MASK) { \ @@ -62,27 +59,15 @@ static DEFINE_STATIC_KEY_FALSE(have_vxrs); DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base) DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base) -DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base) - -static int __init crc32_s390_init(void) -{ - if (cpu_have_feature(S390_CPU_FEATURE_VXRS)) - static_branch_enable(&have_vxrs); - return 0; -} -arch_initcall(crc32_s390_init); - -static void __exit crc32_s390_exit(void) -{ -} -module_exit(crc32_s390_exit); +DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base) u32 crc32_optimizations(void) { - if (static_key_enabled(&have_vxrs)) + if (cpu_has_vx()) { return CRC32_LE_OPTIMIZATION | CRC32_BE_OPTIMIZATION | CRC32C_OPTIMIZATION; + } return 0; } EXPORT_SYMBOL(crc32_optimizations); diff --git a/arch/s390/lib/crypto/Kconfig b/arch/s390/lib/crypto/Kconfig new file mode 100644 index 000000000000..e3f855ef4393 --- /dev/null +++ b/arch/s390/lib/crypto/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config CRYPTO_CHACHA_S390 + tristate + default CRYPTO_LIB_CHACHA + select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA + +config CRYPTO_SHA256_S390 + tristate + default CRYPTO_LIB_SHA256 + select CRYPTO_ARCH_HAVE_LIB_SHA256 + select CRYPTO_LIB_SHA256_GENERIC diff --git a/arch/s390/lib/crypto/Makefile b/arch/s390/lib/crypto/Makefile new file mode 100644 index 000000000000..5df30f1e7930 --- /dev/null +++ b/arch/s390/lib/crypto/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o +chacha_s390-y := chacha-glue.o chacha-s390.o + +obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256-s390.o +sha256-s390-y := sha256.o diff --git a/arch/s390/lib/crypto/chacha-glue.c b/arch/s390/lib/crypto/chacha-glue.c new file mode 100644 index 000000000000..f95ba3483bbc --- /dev/null +++ b/arch/s390/lib/crypto/chacha-glue.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ChaCha stream cipher (s390 optimized) + * + * Copyright IBM Corp. 2021 + */ + +#define KMSG_COMPONENT "chacha_s390" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <crypto/chacha.h> +#include <linux/cpufeature.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sizes.h> +#include <asm/fpu.h> +#include "chacha-s390.h" + +void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + /* TODO: implement hchacha_block_arch() in assembly */ + hchacha_block_generic(state, out, nrounds); +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + /* s390 chacha20 implementation has 20 rounds hard-coded, + * it cannot handle a block of data or less, but otherwise + * it can handle data of arbitrary size + */ + if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) { + chacha_crypt_generic(state, dst, src, bytes, nrounds); + } else { + DECLARE_KERNEL_FPU_ONSTACK32(vxstate); + + kernel_fpu_begin(&vxstate, KERNEL_VXR); + chacha20_vx(dst, src, bytes, &state->x[4], &state->x[12]); + kernel_fpu_end(&vxstate, KERNEL_VXR); + + state->x[12] += round_up(bytes, CHACHA_BLOCK_SIZE) / + CHACHA_BLOCK_SIZE; + } +} +EXPORT_SYMBOL(chacha_crypt_arch); + +bool chacha_is_arch_optimized(void) +{ + return cpu_has_vx(); +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + +MODULE_DESCRIPTION("ChaCha stream cipher (s390 optimized)"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/lib/crypto/chacha-s390.S index 63f3102678c0..63f3102678c0 100644 --- a/arch/s390/crypto/chacha-s390.S +++ b/arch/s390/lib/crypto/chacha-s390.S diff --git a/arch/s390/crypto/chacha-s390.h b/arch/s390/lib/crypto/chacha-s390.h index 733744ce30f5..733744ce30f5 100644 --- a/arch/s390/crypto/chacha-s390.h +++ b/arch/s390/lib/crypto/chacha-s390.h diff --git a/arch/s390/lib/crypto/sha256.c b/arch/s390/lib/crypto/sha256.c new file mode 100644 index 000000000000..7dfe120fafab --- /dev/null +++ b/arch/s390/lib/crypto/sha256.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-256 optimized using the CP Assist for Cryptographic Functions (CPACF) + * + * Copyright 2025 Google LLC + */ +#include <asm/cpacf.h> +#include <crypto/internal/sha2.h> +#include <linux/cpufeature.h> +#include <linux/kernel.h> +#include <linux/module.h> + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_cpacf_sha256); + +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks) +{ + if (static_branch_likely(&have_cpacf_sha256)) + cpacf_kimd(CPACF_KIMD_SHA_256, state, data, + nblocks * SHA256_BLOCK_SIZE); + else + sha256_blocks_generic(state, data, nblocks); +} +EXPORT_SYMBOL_GPL(sha256_blocks_arch); + +bool sha256_is_arch_optimized(void) +{ + return static_key_enabled(&have_cpacf_sha256); +} +EXPORT_SYMBOL_GPL(sha256_is_arch_optimized); + +static int __init sha256_s390_mod_init(void) +{ + if (cpu_have_feature(S390_CPU_FEATURE_MSA) && + cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) + static_branch_enable(&have_cpacf_sha256); + return 0; +} +subsys_initcall(sha256_s390_mod_init); + +static void __exit sha256_s390_mod_exit(void) +{ +} +module_exit(sha256_s390_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-256 using the CP Assist for Cryptographic Functions (CPACF)"); diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index a81a01c44927..ad9da4038511 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -10,11 +10,13 @@ #include <linux/export.h> #include <linux/spinlock.h> #include <linux/jiffies.h> +#include <linux/sysctl.h> #include <linux/init.h> #include <linux/smp.h> #include <linux/percpu.h> #include <linux/io.h> #include <asm/alternative.h> +#include <asm/machine.h> #include <asm/asm.h> int spin_retry = -1; @@ -37,6 +39,23 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); +static const struct ctl_table s390_spin_sysctl_table[] = { + { + .procname = "spin_retry", + .data = &spin_retry, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +}; + +static int __init init_s390_spin_sysctls(void) +{ + register_sysctl_init("kernel", s390_spin_sysctl_table); + return 0; +} +arch_initcall(init_s390_spin_sysctls); + struct spin_wait { struct spin_wait *next, *prev; int node_id; @@ -141,7 +160,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) ix = get_lowcore()->spinlock_index++; barrier(); - lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */ + lockval = spinlock_lockval(); /* cpu + 1 */ node = this_cpu_ptr(&spin_wait[ix]); node->prev = node->next = NULL; node_id = node->node_id; @@ -212,7 +231,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) if (count-- >= 0) continue; count = spin_retry; - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1)) + if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1)) smp_yield_cpu(owner - 1); } @@ -232,7 +251,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) { int lockval, old, new, owner, count; - lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */ + lockval = spinlock_lockval(); /* cpu + 1 */ /* Pass the virtual CPU to the lock holder if it is not running */ owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL); @@ -255,7 +274,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) if (count-- >= 0) continue; count = spin_retry; - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1)) + if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1)) smp_yield_cpu(owner - 1); } } @@ -271,7 +290,7 @@ EXPORT_SYMBOL(arch_spin_lock_wait); int arch_spin_trylock_retry(arch_spinlock_t *lp) { - int cpu = SPINLOCK_LOCKVAL; + int cpu = spinlock_lockval(); int owner, count; for (count = spin_retry; count > 0; count--) { @@ -337,7 +356,7 @@ void arch_spin_relax(arch_spinlock_t *lp) cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK; if (!cpu) return; - if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1)) + if (machine_is_lpar() && !arch_vcpu_is_preempted(cpu - 1)) return; smp_yield_cpu(cpu - 1); } diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 373fa1f01937..099de76e8b1a 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -78,50 +78,6 @@ EXPORT_SYMBOL(strnlen); #endif /** - * strcpy - Copy a %NUL terminated string - * @dest: Where to copy the string to - * @src: Where to copy the string from - * - * returns a pointer to @dest - */ -#ifdef __HAVE_ARCH_STRCPY -char *strcpy(char *dest, const char *src) -{ - char *ret = dest; - - asm volatile( - " lghi 0,0\n" - "0: mvst %[dest],%[src]\n" - " jo 0b\n" - : [dest] "+&a" (dest), [src] "+&a" (src) - : - : "cc", "memory", "0"); - return ret; -} -EXPORT_SYMBOL(strcpy); -#endif - -/** - * strncpy - Copy a length-limited, %NUL-terminated string - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @n: The maximum number of bytes to copy - * - * The result is not %NUL-terminated if the source exceeds - * @n bytes. - */ -#ifdef __HAVE_ARCH_STRNCPY -char *strncpy(char *dest, const char *src, size_t n) -{ - size_t len = __strnend(src, n) - src; - memset(dest + len, 0, n - len); - memcpy(dest, src, len); - return dest; -} -EXPORT_SYMBOL(strncpy); -#endif - -/** * strcat - Append one %NUL-terminated string to another * @dest: The string to be appended to * @src: The string to append to it @@ -181,9 +137,6 @@ EXPORT_SYMBOL(strlcat); * @n: The maximum numbers of bytes to copy * * returns a pointer to @dest - * - * Note that in contrast to strncpy, strncat ensures the result is - * terminated. */ #ifdef __HAVE_ARCH_STRNCAT char *strncat(char *dest, const char *src, size_t n) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index f977b7c37efc..fa7d98fa1320 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -17,20 +17,75 @@ #ifdef CONFIG_DEBUG_ENTRY void debug_user_asce(int exit) { + struct lowcore *lc = get_lowcore(); struct ctlreg cr1, cr7; local_ctl_store(1, &cr1); local_ctl_store(7, &cr7); - if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val) + if (cr1.val == lc->user_asce.val && cr7.val == lc->user_asce.val) return; panic("incorrect ASCE on kernel %s\n" "cr1: %016lx cr7: %016lx\n" "kernel: %016lx user: %016lx\n", exit ? "exit" : "entry", cr1.val, cr7.val, - get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val); + lc->kernel_asce.val, lc->user_asce.val); } #endif /*CONFIG_DEBUG_ENTRY */ +union oac { + unsigned int val; + struct { + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac1; + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac2; + }; +}; + +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key) +{ + unsigned long osize; + union oac spec = { + .oac2.key = key, + .oac2.as = PSW_BITS_AS_SECONDARY, + .oac2.k = 1, + .oac2.a = 1, + }; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_FROM(0b, 0b) + EX_TABLE_UA_MVCOS_FROM(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to) + : [spec] "d" (spec.val), [from] "Q" (*(const char __user *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (CC_TRANSFORM(cc) == 0) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } +} + unsigned long _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) { @@ -48,6 +103,38 @@ unsigned long _copy_from_user_key(void *to, const void __user *from, } EXPORT_SYMBOL(_copy_from_user_key); +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key) +{ + unsigned long osize; + union oac spec = { + .oac1.key = key, + .oac1.as = PSW_BITS_AS_SECONDARY, + .oac1.k = 1, + .oac1.a = 1, + }; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_TO(0b, 0b) + EX_TABLE_UA_MVCOS_TO(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to) + : [spec] "d" (spec.val), [from] "Q" (*(const char *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (CC_TRANSFORM(cc) == 0) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } +} + unsigned long _copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) { @@ -58,39 +145,3 @@ unsigned long _copy_to_user_key(void __user *to, const void *from, return raw_copy_to_user_key(to, from, n, key); } EXPORT_SYMBOL(_copy_to_user_key); - -unsigned long __clear_user(void __user *to, unsigned long size) -{ - unsigned long rem; - union oac spec = { - .oac1.as = PSW_BITS_AS_SECONDARY, - .oac1.a = 1, - }; - - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[zeropg]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[to],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ - " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ - " slgr %[rem],%[to]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[zeropg]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [to] "+&a" (to), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} -EXPORT_SYMBOL(__clear_user); diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index f6c2db7a8669..bd0401cc7ca5 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -9,6 +9,8 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o obj-$(CONFIG_PFAULT) += pfault.o + +obj-$(subst m,y,$(CONFIG_KVM)) += gmap_helpers.o diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 39f44b6256e0..e2a6eb92420f 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -201,7 +201,7 @@ static void cmm_set_timer(void) { if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) { if (timer_pending(&cmm_timer)) - del_timer(&cmm_timer); + timer_delete(&cmm_timer); return; } mod_timer(&cmm_timer, jiffies + secs_to_jiffies(cmm_timeout_seconds)); @@ -424,7 +424,7 @@ out_smsg: #endif unregister_sysctl_table(cmm_sysctl_header); out_sysctl: - del_timer_sync(&cmm_timer); + timer_delete_sync(&cmm_timer); return rc; } module_init(cmm_init); @@ -437,7 +437,7 @@ static void __exit cmm_exit(void) #endif unregister_oom_notifier(&cmm_oom_nb); kthread_stop(cmm_thread_ptr); - del_timer_sync(&cmm_timer); + timer_delete_sync(&cmm_timer); cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); } diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index fa54f3bc0c8d..ac604b176660 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 + +#include <linux/cpufeature.h> #include <linux/set_memory.h> #include <linux/ptdump.h> #include <linux/seq_file.h> @@ -82,7 +84,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) * in which case we have two lpswe instructions in lowcore that need * to be executable. */ - if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear))) + if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !cpu_has_bear())) return; WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX), "s390/mm: Found insecure W+X mapping at address %pS\n", @@ -145,11 +147,48 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } } +static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte) +{ + note_page(pt_st, addr, 4, pte_val(pte)); +} + +static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd) +{ + note_page(pt_st, addr, 3, pmd_val(pmd)); +} + +static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud) +{ + note_page(pt_st, addr, 2, pud_val(pud)); +} + +static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d) +{ + note_page(pt_st, addr, 1, p4d_val(p4d)); +} + +static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd) +{ + note_page(pt_st, addr, 0, pgd_val(pgd)); +} + +static void note_page_flush(struct ptdump_state *pt_st) +{ + pte_t pte_zero = {0}; + + note_page(pt_st, 0, -1, pte_val(pte_zero)); +} + bool ptdump_check_wx(void) { struct pg_state st = { .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]) { {.start = 0, .end = max_addr}, {.start = 0, .end = 0}, @@ -167,7 +206,7 @@ bool ptdump_check_wx(void) }, }; - if (!MACHINE_HAS_NX) + if (!cpu_has_nx()) return true; ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); if (st.wx_pages) { @@ -176,7 +215,7 @@ bool ptdump_check_wx(void) return false; } else { pr_info("Checked W+X mappings: passed, no %sW+X pages found\n", - (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ? + (nospec_uses_trampoline() || !cpu_has_bear()) ? "unexpected " : ""); return true; @@ -188,7 +227,12 @@ static int ptdump_show(struct seq_file *m, void *v) { struct pg_state st = { .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]) { {.start = 0, .end = max_addr}, {.start = 0, .end = 0}, diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index a046be1715cf..7498e858c401 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -73,6 +73,49 @@ static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_reg return true; } +struct insn_ssf { + u64 opc1 : 8; + u64 r3 : 4; + u64 opc2 : 4; + u64 b1 : 4; + u64 d1 : 12; + u64 b2 : 4; + u64 d2 : 12; +} __packed; + +static bool ex_handler_ua_mvcos(const struct exception_table_entry *ex, + bool from, struct pt_regs *regs) +{ + unsigned long uaddr, remainder; + struct insn_ssf *insn; + + /* + * If the faulting user space access crossed a page boundary retry by + * limiting the access to the first page (adjust length accordingly). + * Then the mvcos instruction will either complete with condition code + * zero, or generate another fault where the user space access did not + * cross a page boundary. + * If the faulting user space access did not cross a page boundary set + * length to zero and retry. In this case no user space access will + * happen, and the mvcos instruction will complete with condition code + * zero. + * In both cases the instruction will complete with condition code + * zero (copying finished), and the register which contains the + * length, indicates the number of bytes copied. + */ + regs->psw.addr = extable_fixup(ex); + insn = (struct insn_ssf *)regs->psw.addr; + if (from) + uaddr = regs->gprs[insn->b2] + insn->d2; + else + uaddr = regs->gprs[insn->b1] + insn->d1; + remainder = PAGE_SIZE - (uaddr & (PAGE_SIZE - 1)); + if (regs->gprs[insn->r3] <= remainder) + remainder = 0; + regs->gprs[insn->r3] = remainder; + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -95,6 +138,10 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_zeropad(ex, regs); case EX_TYPE_FPC: return ex_handler_fpc(ex, regs); + case EX_TYPE_UA_MVCOS_TO: + return ex_handler_ua_mvcos(ex, false, regs); + case EX_TYPE_UA_MVCOS_FROM: + return ex_handler_ua_mvcos(ex, true, regs); } panic("invalid exception table entry"); } diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 4692136c0af1..f7da53e212f5 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -21,6 +21,7 @@ #include <linux/ioport.h> #include <linux/refcount.h> #include <linux/pgtable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/page.h> #include <asm/ebcdic.h> @@ -255,7 +256,7 @@ segment_type (char* name) int rc; struct dcss_segment seg; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -ENOSYS; dcss_mkname(name, seg.dcss_name); @@ -418,7 +419,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr, struct dcss_segment *seg; int rc; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -ENOSYS; mutex_lock(&dcss_lock); @@ -529,6 +530,14 @@ segment_modify_shared (char *name, int do_nonshared) return rc; } +static void __dcss_diag_purge_on_cpu_0(void *data) +{ + struct dcss_segment *seg = (struct dcss_segment *)data; + unsigned long dummy; + + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); +} + /* * Decrease the use count of a DCSS segment and remove * it from the address space if nobody is using it @@ -537,10 +546,9 @@ segment_modify_shared (char *name, int do_nonshared) void segment_unload(char *name) { - unsigned long dummy; struct dcss_segment *seg; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; mutex_lock(&dcss_lock); @@ -555,7 +563,14 @@ segment_unload(char *name) kfree(seg->res); vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); - dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); + /* + * Workaround for z/VM issue, where calling the DCSS unload diag on + * a non-IPL CPU would cause bogus sclp maximum memory detection on + * next IPL. + * IPL CPU 0 cannot be set offline, so the dcss_diag() call can + * directly be scheduled to that CPU. + */ + smp_call_function_single(0, __dcss_diag_purge_on_cpu_0, seg, 1); kfree(seg); out_unlock: mutex_unlock(&dcss_lock); @@ -572,7 +587,7 @@ segment_save(char *name) char cmd2[80]; int i, response; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; mutex_lock(&dcss_lock); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9b681f74dccc..e1ad05bfd28a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -11,11 +11,11 @@ #include <linux/kernel_stat.h> #include <linux/mmu_context.h> +#include <linux/cpufeature.h> #include <linux/perf_event.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/sched/debug.h> -#include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> @@ -40,22 +40,11 @@ #include <asm/ptrace.h> #include <asm/fault.h> #include <asm/diag.h> -#include <asm/gmap.h> #include <asm/irq.h> #include <asm/facility.h> #include <asm/uv.h> #include "../kernel/entry.h" -static DEFINE_STATIC_KEY_FALSE(have_store_indication); - -static int __init fault_init(void) -{ - if (test_facility(75)) - static_branch_enable(&have_store_indication); - return 0; -} -early_initcall(fault_init); - /* * Find out which address space caused the exception. */ @@ -81,7 +70,7 @@ static __always_inline bool fault_is_write(struct pt_regs *regs) { union teid teid = { .val = regs->int_parm_long }; - if (static_branch_likely(&have_store_indication)) + if (test_facility(75)) return teid.fsi == TEID_FSI_STORE; return false; } @@ -175,6 +164,23 @@ static void dump_fault_info(struct pt_regs *regs) int show_unhandled_signals = 1; +static const struct ctl_table s390_fault_sysctl_table[] = { + { + .procname = "userprocess_debug", + .data = &show_unhandled_signals, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +}; + +static int __init init_s390_fault_sysctls(void) +{ + register_sysctl_init("kernel", s390_fault_sysctl_table); + return 0; +} +arch_initcall(init_s390_fault_sysctls); + void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) { static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -369,6 +375,7 @@ void do_protection_exception(struct pt_regs *regs) if (unlikely(!teid.b61)) { if (user_mode(regs)) { /* Low-address protection in user mode: cannot happen */ + dump_fault_info(regs); die(regs, "Low-address protection"); } /* @@ -377,7 +384,7 @@ void do_protection_exception(struct pt_regs *regs) */ return handle_fault_error_nolock(regs, 0); } - if (unlikely(MACHINE_HAS_NX && teid.b56)) { + if (unlikely(cpu_has_nx() && teid.b56)) { regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK); return handle_fault_error_nolock(regs, SEGV_ACCERR); } @@ -434,6 +441,8 @@ void do_secure_storage_access(struct pt_regs *regs) if (rc) BUG(); } else { + if (faulthandler_disabled()) + return handle_fault_error_nolock(regs, 0); mm = current->mm; mmap_read_lock(mm); vma = find_vma(mm, addr); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 94d927785800..012a4366a2ad 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -8,6 +8,7 @@ * Janosch Frank <frankja@linux.vnet.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/kernel.h> #include <linux/pagewalk.h> #include <linux/swap.h> @@ -20,9 +21,10 @@ #include <linux/pgtable.h> #include <asm/page-states.h> #include <asm/pgalloc.h> +#include <asm/machine.h> +#include <asm/gmap_helpers.h> #include <asm/gmap.h> #include <asm/page.h> -#include <asm/tlb.h> /* * The address is saved in a radix tree directly; NULL would be ambiguous, @@ -135,7 +137,7 @@ EXPORT_SYMBOL_GPL(gmap_create); static void gmap_flush_tlb(struct gmap *gmap) { - if (MACHINE_HAS_IDTE) + if (cpu_has_idte()) __tlb_flush_idte(gmap->asce); else __tlb_flush_global(); @@ -618,63 +620,20 @@ EXPORT_SYMBOL(__gmap_link); */ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) { - struct vm_area_struct *vma; unsigned long vmaddr; - spinlock_t *ptl; - pte_t *ptep; + + mmap_assert_locked(gmap->mm); /* Find the vm address for the guest address */ vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); if (vmaddr) { vmaddr |= gaddr & ~PMD_MASK; - - vma = vma_lookup(gmap->mm, vmaddr); - if (!vma || is_vm_hugetlb_page(vma)) - return; - - /* Get pointer to the page table entry */ - ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (likely(ptep)) { - ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); - pte_unmap_unlock(ptep, ptl); - } + gmap_helper_zap_one_page(gmap->mm, vmaddr); } } EXPORT_SYMBOL_GPL(__gmap_zap); -void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) -{ - unsigned long gaddr, vmaddr, size; - struct vm_area_struct *vma; - - mmap_read_lock(gmap->mm); - for (gaddr = from; gaddr < to; - gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - continue; - vmaddr |= gaddr & ~PMD_MASK; - /* Find vma in the parent mm */ - vma = find_vma(gmap->mm, vmaddr); - if (!vma) - continue; - /* - * We do not discard pages that are backed by - * hugetlbfs, so we don't have to refault them. - */ - if (is_vm_hugetlb_page(vma)) - continue; - size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range_single(vma, vmaddr, size, NULL); - } - mmap_read_unlock(gmap->mm); -} -EXPORT_SYMBOL_GPL(gmap_discard); - static LIST_HEAD(gmap_notifier_list); static DEFINE_SPINLOCK(gmap_notifier_lock); @@ -2025,10 +1984,10 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, gaddr &= HPAGE_MASK; pmdp_notify_gmap(gmap, pmdp, gaddr); new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN)); - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); @@ -2103,10 +2062,10 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | _SEGMENT_ENTRY_GMAP_UC | _SEGMENT_ENTRY)); - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } @@ -2136,10 +2095,10 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | _SEGMENT_ENTRY_GMAP_UC | _SEGMENT_ENTRY)); - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); @@ -2258,9 +2217,6 @@ int s390_enable_sie(void) /* Do we have pgstes? if yes, we are done */ if (mm_has_pgste(mm)) return 0; - /* Fail if the page tables are 2K */ - if (!mm_alloc_pgste(mm)) - return -EINVAL; mmap_write_lock(mm); mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ @@ -2270,138 +2226,6 @@ int s390_enable_sie(void) } EXPORT_SYMBOL_GPL(s390_enable_sie); -static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, - unsigned long end, struct mm_walk *walk) -{ - unsigned long *found_addr = walk->private; - - /* Return 1 of the page is a zeropage. */ - if (is_zero_pfn(pte_pfn(*pte))) { - /* - * Shared zeropage in e.g., a FS DAX mapping? We cannot do the - * right thing and likely don't care: FAULT_FLAG_UNSHARE - * currently only works in COW mappings, which is also where - * mm_forbids_zeropage() is checked. - */ - if (!is_cow_mapping(walk->vma->vm_flags)) - return -EFAULT; - - *found_addr = addr; - return 1; - } - return 0; -} - -static const struct mm_walk_ops find_zeropage_ops = { - .pte_entry = find_zeropage_pte_entry, - .walk_lock = PGWALK_WRLOCK, -}; - -/* - * Unshare all shared zeropages, replacing them by anonymous pages. Note that - * we cannot simply zap all shared zeropages, because this could later - * trigger unexpected userfaultfd missing events. - * - * This must be called after mm->context.allow_cow_sharing was - * set to 0, to avoid future mappings of shared zeropages. - * - * mm contracts with s390, that even if mm were to remove a page table, - * and racing with walk_page_range_vma() calling pte_offset_map_lock() - * would fail, it will never insert a page table containing empty zero - * pages once mm_forbids_zeropage(mm) i.e. - * mm->context.allow_cow_sharing is set to 0. - */ -static int __s390_unshare_zeropages(struct mm_struct *mm) -{ - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - unsigned long addr; - vm_fault_t fault; - int rc; - - for_each_vma(vmi, vma) { - /* - * We could only look at COW mappings, but it's more future - * proof to catch unexpected zeropages in other mappings and - * fail. - */ - if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) - continue; - addr = vma->vm_start; - -retry: - rc = walk_page_range_vma(vma, addr, vma->vm_end, - &find_zeropage_ops, &addr); - if (rc < 0) - return rc; - else if (!rc) - continue; - - /* addr was updated by find_zeropage_pte_entry() */ - fault = handle_mm_fault(vma, addr, - FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, - NULL); - if (fault & VM_FAULT_OOM) - return -ENOMEM; - /* - * See break_ksm(): even after handle_mm_fault() returned 0, we - * must start the lookup from the current address, because - * handle_mm_fault() may back out if there's any difficulty. - * - * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but - * maybe they could trigger in the future on concurrent - * truncation. In that case, the shared zeropage would be gone - * and we can simply retry and make progress. - */ - cond_resched(); - goto retry; - } - - return 0; -} - -static int __s390_disable_cow_sharing(struct mm_struct *mm) -{ - int rc; - - if (!mm->context.allow_cow_sharing) - return 0; - - mm->context.allow_cow_sharing = 0; - - /* Replace all shared zeropages by anonymous pages. */ - rc = __s390_unshare_zeropages(mm); - /* - * Make sure to disable KSM (if enabled for the whole process or - * individual VMAs). Note that nothing currently hinders user space - * from re-enabling it. - */ - if (!rc) - rc = ksm_disable(mm); - if (rc) - mm->context.allow_cow_sharing = 1; - return rc; -} - -/* - * Disable most COW-sharing of memory pages for the whole process: - * (1) Disable KSM and unmerge/unshare any KSM pages. - * (2) Disallow shared zeropages and unshare any zerpages that are mapped. - * - * Not that we currently don't bother with COW-shared pages that are shared - * with parent/child processes due to fork(). - */ -int s390_disable_cow_sharing(void) -{ - int rc; - - mmap_write_lock(current->mm); - rc = __s390_disable_cow_sharing(current->mm); - mmap_write_unlock(current->mm); - return rc; -} -EXPORT_SYMBOL_GPL(s390_disable_cow_sharing); - /* * Enable storage key handling from now on and initialize the storage * keys with the default key. @@ -2469,7 +2293,7 @@ int s390_enable_skey(void) goto out_up; mm->context.uses_skeys = 1; - rc = __s390_disable_cow_sharing(mm); + rc = gmap_helper_disable_cow_sharing(); if (rc) { mm->context.uses_skeys = 0; goto out_up; @@ -2626,31 +2450,3 @@ int s390_replace_asce(struct gmap *gmap) return 0; } EXPORT_SYMBOL_GPL(s390_replace_asce); - -/** - * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split - * @mm: the mm containing the folio to work on - * @folio: the folio - * @split: whether to split a large folio - * - * Context: Must be called while holding an extra reference to the folio; - * the mm lock should not be held. - */ -int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) -{ - int rc; - - lockdep_assert_not_held(&mm->mmap_lock); - folio_wait_writeback(folio); - lru_add_drain_all(); - if (split) { - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - - if (rc != -EBUSY) - return rc; - } - return -EAGAIN; -} -EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio); diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c new file mode 100644 index 000000000000..a45d417ad951 --- /dev/null +++ b/arch/s390/mm/gmap_helpers.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helper functions for KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2025 + */ +#include <linux/mm_types.h> +#include <linux/mmap_lock.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/pagewalk.h> +#include <linux/ksm.h> +#include <asm/gmap_helpers.h> + +/** + * ptep_zap_swap_entry() - discard a swap entry. + * @mm: the mm + * @entry: the swap entry that needs to be zapped + * + * Discards the given swap entry. If the swap entry was an actual swap + * entry (and not a migration entry, for example), the actual swapped + * page is also discarded from swap. + */ +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) +{ + if (!non_swap_entry(entry)) + dec_mm_counter(mm, MM_SWAPENTS); + else if (is_migration_entry(entry)) + dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry))); + free_swap_and_cache(entry); +} + +/** + * gmap_helper_zap_one_page() - discard a page if it was swapped. + * @mm: the mm + * @vmaddr: the userspace virtual address that needs to be discarded + * + * If the given address maps to a swap entry, discard it. + * + * Context: needs to be called while holding the mmap lock. + */ +void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) +{ + struct vm_area_struct *vma; + spinlock_t *ptl; + pte_t *ptep; + + mmap_assert_locked(mm); + + /* Find the vm address for the guest address */ + vma = vma_lookup(mm, vmaddr); + if (!vma || is_vm_hugetlb_page(vma)) + return; + + /* Get pointer to the page table entry */ + ptep = get_locked_pte(mm, vmaddr, &ptl); + if (unlikely(!ptep)) + return; + if (pte_swap(*ptep)) + ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); + pte_unmap_unlock(ptep, ptl); +} +EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page); + +/** + * gmap_helper_discard() - discard user pages in the given range + * @mm: the mm + * @vmaddr: starting userspace address + * @end: end address (first address outside the range) + * + * All userpace pages in the range [@vamddr, @end) are discarded and unmapped. + * + * Context: needs to be called while holding the mmap lock. + */ +void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end) +{ + struct vm_area_struct *vma; + + mmap_assert_locked(mm); + + while (vmaddr < end) { + vma = find_vma_intersection(mm, vmaddr, end); + if (!vma) + return; + if (!is_vm_hugetlb_page(vma)) + zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL); + vmaddr = vma->vm_end; + } +} +EXPORT_SYMBOL_GPL(gmap_helper_discard); + +static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + unsigned long *found_addr = walk->private; + + /* Return 1 of the page is a zeropage. */ + if (is_zero_pfn(pte_pfn(*pte))) { + /* + * Shared zeropage in e.g., a FS DAX mapping? We cannot do the + * right thing and likely don't care: FAULT_FLAG_UNSHARE + * currently only works in COW mappings, which is also where + * mm_forbids_zeropage() is checked. + */ + if (!is_cow_mapping(walk->vma->vm_flags)) + return -EFAULT; + + *found_addr = addr; + return 1; + } + return 0; +} + +static const struct mm_walk_ops find_zeropage_ops = { + .pte_entry = find_zeropage_pte_entry, + .walk_lock = PGWALK_WRLOCK, +}; + +/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages + * @mm: the mm whose zero pages are to be unshared + * + * Unshare all shared zeropages, replacing them by anonymous pages. Note that + * we cannot simply zap all shared zeropages, because this could later + * trigger unexpected userfaultfd missing events. + * + * This must be called after mm->context.allow_cow_sharing was + * set to 0, to avoid future mappings of shared zeropages. + * + * mm contracts with s390, that even if mm were to remove a page table, + * and racing with walk_page_range_vma() calling pte_offset_map_lock() + * would fail, it will never insert a page table containing empty zero + * pages once mm_forbids_zeropage(mm) i.e. + * mm->context.allow_cow_sharing is set to 0. + */ +static int __gmap_helper_unshare_zeropages(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + unsigned long addr; + vm_fault_t fault; + int rc; + + for_each_vma(vmi, vma) { + /* + * We could only look at COW mappings, but it's more future + * proof to catch unexpected zeropages in other mappings and + * fail. + */ + if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) + continue; + addr = vma->vm_start; + +retry: + rc = walk_page_range_vma(vma, addr, vma->vm_end, + &find_zeropage_ops, &addr); + if (rc < 0) + return rc; + else if (!rc) + continue; + + /* addr was updated by find_zeropage_pte_entry() */ + fault = handle_mm_fault(vma, addr, + FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, + NULL); + if (fault & VM_FAULT_OOM) + return -ENOMEM; + /* + * See break_ksm(): even after handle_mm_fault() returned 0, we + * must start the lookup from the current address, because + * handle_mm_fault() may back out if there's any difficulty. + * + * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but + * maybe they could trigger in the future on concurrent + * truncation. In that case, the shared zeropage would be gone + * and we can simply retry and make progress. + */ + cond_resched(); + goto retry; + } + + return 0; +} + +/** + * gmap_helper_disable_cow_sharing() - disable all COW sharing + * + * Disable most COW-sharing of memory pages for the whole process: + * (1) Disable KSM and unmerge/unshare any KSM pages. + * (2) Disallow shared zeropages and unshare any zerpages that are mapped. + * + * Not that we currently don't bother with COW-shared pages that are shared + * with parent/child processes due to fork(). + */ +int gmap_helper_disable_cow_sharing(void) +{ + struct mm_struct *mm = current->mm; + int rc; + + mmap_assert_write_locked(mm); + + if (!mm->context.allow_cow_sharing) + return 0; + + mm->context.allow_cow_sharing = 0; + + /* Replace all shared zeropages by anonymous pages. */ + rc = __gmap_helper_unshare_zeropages(mm); + /* + * Make sure to disable KSM (if enabled for the whole process or + * individual VMAs). Note that nothing currently hinders user space + * from re-enabling it. + */ + if (!rc) + rc = ksm_disable(mm); + if (rc) + mm->context.allow_cow_sharing = 1; + return rc; +} +EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 2e568f175cd4..e88c02c9e642 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -9,12 +9,13 @@ #define KMSG_COMPONENT "hugetlb" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <asm/pgalloc.h> +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/sched/mm.h> #include <linux/security.h> +#include <asm/pgalloc.h> /* * If the bit selected by single-bit bitmask "a" is set within "x", move @@ -248,9 +249,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, bool __init arch_hugetlb_valid_size(unsigned long size) { - if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) + if (cpu_has_edat1() && size == PMD_SIZE) return true; - else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) + else if (cpu_has_edat2() && size == PUD_SIZE) return true; else return false; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f2298f7a3f21..074bf4fb4ce2 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -8,6 +8,7 @@ * Copyright (C) 1995 Linus Torvalds */ +#include <linux/cpufeature.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -39,7 +40,6 @@ #include <asm/kfence.h> #include <asm/dma.h> #include <asm/abs_lowcore.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/sclp.h> @@ -73,8 +73,6 @@ static void __init setup_zero_pages(void) { unsigned long total_pages = memblock_estimated_nr_free_pages(); unsigned int order; - struct page *page; - int i; /* Latest machines require a mapping granularity of 512KB */ order = 7; @@ -83,16 +81,7 @@ static void __init setup_zero_pages(void) while (order > 2 && (total_pages >> 10) < (1UL << order)) order--; - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Out of memory in setup_zero_pages"); - - page = virt_to_page((void *) empty_zero_page); - split_page(page, order); - for (i = 1 << order; i > 0; i--) { - mark_page_reserved(page); - page++; - } + empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE); zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } @@ -117,7 +106,7 @@ void mark_rodata_ro(void) { unsigned long size = __end_ro_after_init - __start_ro_after_init; - if (MACHINE_HAS_NX) + if (cpu_has_nx()) system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); __set_memory_ro(__start_ro_after_init, __end_ro_after_init); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); @@ -165,19 +154,13 @@ static void pv_init(void) swiotlb_update_mem_attributes(); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - set_max_mapnr(max_low_pfn); - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - pv_init(); - kfence_split_mapping(); - /* this will put all low memory onto the freelists */ - memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ } @@ -239,16 +222,13 @@ struct s390_cma_mem_data { static int s390_cma_check_range(struct cma *cma, void *data) { struct s390_cma_mem_data *mem_data; - unsigned long start, end; mem_data = data; - start = cma_get_base(cma); - end = start + cma_get_size(cma); - if (end < mem_data->start) - return 0; - if (start >= mem_data->end) - return 0; - return -EBUSY; + + if (cma_intersects(cma, mem_data->start, mem_data->end)) + return -EBUSY; + + return 0; } static int s390_cma_mem_notifier(struct notifier_block *nb, @@ -285,7 +265,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long size_pages = PFN_DOWN(size); int rc; - if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + if (WARN_ON_ONCE(pgprot_val(params->pgprot) != pgprot_val(PAGE_KERNEL))) return -EINVAL; VM_BUG_ON(!mhp_range_allowed(start, size, true)); diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 76f376876e0d..40a526d28184 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -51,7 +51,6 @@ static inline unsigned long mmap_base(unsigned long rnd, { unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_maxrandom_size() + stack_guard_gap; - unsigned long gap_min, gap_max; /* Values close to RLIM_INFINITY can overflow. */ if (gap + pad > gap) @@ -61,13 +60,7 @@ static inline unsigned long mmap_base(unsigned long rnd, * Top of mmap area (just below the process stack). * Leave at least a ~128 MB hole. */ - gap_min = SZ_128M; - gap_max = (STACK_TOP / 6) * 5; - - if (gap < gap_min) - gap = gap_min; - else if (gap > gap_max) - gap = gap_max; + gap = clamp(gap, SZ_128M, (STACK_TOP / 6) * 5); return PAGE_ALIGN(STACK_TOP - gap - rnd); } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index eae97fb61712..348e759840e7 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -3,6 +3,7 @@ * Copyright IBM Corp. 2011 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/hugetlb.h> #include <linux/proc_fs.h> #include <linux/vmalloc.h> @@ -27,7 +28,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end) unsigned long boundary, size; while (start < end) { - if (MACHINE_HAS_EDAT1) { + if (cpu_has_edat1()) { /* set storage keys for a 1MB frame */ size = 1UL << 20; boundary = (start + size) & ~(size - 1); @@ -63,7 +64,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, unsigned long *table, mask; mask = 0; - if (MACHINE_HAS_EDAT2) { + if (cpu_has_edat2()) { switch (dtt) { case CRDTE_DTT_REGION3: mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1); @@ -77,7 +78,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, } table = (unsigned long *)((unsigned long)old & mask); crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val); - } else if (MACHINE_HAS_IDTE) { + } else if (cpu_has_idte()) { cspg(old, *old, new); } else { csp((unsigned int *)old + 1, *old, new); @@ -373,7 +374,7 @@ int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags unsigned long end; int rc; - if (!MACHINE_HAS_NX) + if (!cpu_has_nx()) flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); if (!flags) return 0; diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index 1aac13bb8f53..e6175d75e4b0 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -9,6 +9,7 @@ #include <linux/init.h> #include <linux/irq.h> #include <asm/asm-extable.h> +#include <asm/asm-offsets.h> #include <asm/pfault.h> #include <asm/diag.h> @@ -56,7 +57,7 @@ int __pfault_init(void) if (pfault_disable) return rc; diag_stat_inc(DIAG_STAT_X258); - asm volatile( + asm_inline volatile( " diag %[refbk],%[rc],0x258\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) @@ -78,7 +79,7 @@ void __pfault_fini(void) if (pfault_disable) return; diag_stat_inc(DIAG_STAT_X258); - asm volatile( + asm_inline volatile( " diag %[refbk],0,0x258\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 30387a6e98ff..b449fd2605b0 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -12,35 +12,8 @@ #include <asm/mmu_context.h> #include <asm/page-states.h> #include <asm/pgalloc.h> -#include <asm/gmap.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> -#ifdef CONFIG_PGSTE - -int page_table_allocate_pgste = 0; -EXPORT_SYMBOL(page_table_allocate_pgste); - -static const struct ctl_table page_table_sysctl[] = { - { - .procname = "allocate_pgste", - .data = &page_table_allocate_pgste, - .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -}; - -static int __init page_table_register_sysctl(void) -{ - return register_sysctl("vm", page_table_sysctl) ? 0 : -ENOMEM; -} -__initcall(page_table_register_sysctl); - -#endif /* CONFIG_PGSTE */ - unsigned long *crst_table_alloc(struct mm_struct *mm) { struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER); @@ -63,11 +36,15 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; + struct ctlreg asce; /* change all active ASCEs to avoid the creation of new TLBs */ if (current->active_mm == mm) { - get_lowcore()->user_asce.val = mm->context.asce; - local_ctl_load(7, &get_lowcore()->user_asce); + asce.val = mm->context.asce; + get_lowcore()->user_asce = asce; + local_ctl_load(7, &asce); + if (!test_thread_flag(TIF_ASCE_PRIMARY)) + local_ctl_load(1, &asce); } __tlb_flush_local(); } @@ -77,6 +54,8 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) unsigned long *pgd = NULL, *p4d = NULL, *__pgd; unsigned long asce_limit = mm->context.asce_limit; + mmap_assert_write_locked(mm); + /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ VM_BUG_ON(asce_limit < _REGION2_SIZE); @@ -100,13 +79,6 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) spin_lock_bh(&mm->page_table_lock); - /* - * This routine gets called with mmap_lock lock held and there is - * no reason to optimize for the case of otherwise. However, if - * that would ever change, the below check will let us know. - */ - VM_BUG_ON(asce_limit != mm->context.asce_limit); - if (p4d) { __pgd = (unsigned long *) mm->pgd; p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd); @@ -170,7 +142,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (!ptdesc) return NULL; - if (!pagetable_pte_ctor(ptdesc)) { + if (!pagetable_pte_ctor(mm, ptdesc)) { pagetable_free(ptdesc); return NULL; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index f05e62e037c2..7df70cd8f739 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -4,6 +4,7 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -19,10 +20,10 @@ #include <linux/ksm.h> #include <linux/mman.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/page-states.h> +#include <asm/machine.h> pgprot_t pgprot_writecombine(pgprot_t prot) { @@ -34,22 +35,12 @@ pgprot_t pgprot_writecombine(pgprot_t prot) } EXPORT_SYMBOL_GPL(pgprot_writecombine); -pgprot_t pgprot_writethrough(pgprot_t prot) -{ - /* - * mio_wb_bit_mask may be set on a different CPU, but it is only set - * once at init and only read afterwards. - */ - return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask); -} -EXPORT_SYMBOL_GPL(pgprot_writethrough); - static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int nodat) { unsigned long opt, asce; - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { opt = 0; asce = READ_ONCE(mm->context.gmap_asce); if (asce == 0UL || nodat) @@ -69,7 +60,7 @@ static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, { unsigned long opt, asce; - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { opt = 0; asce = READ_ONCE(mm->context.gmap_asce); if (asce == 0UL || nodat) @@ -94,7 +85,7 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, if (unlikely(pte_val(old) & _PAGE_INVALID)) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) ptep_ipte_local(mm, addr, ptep, nodat); else @@ -173,10 +164,10 @@ static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, skey = (unsigned long) page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ + pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */ /* Copy page access key and fetch protection bit to pgste */ - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56); #endif return pgste; @@ -210,7 +201,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) if ((pte_val(entry) & _PAGE_PRESENT) && (pte_val(entry) & _PAGE_WRITE) && !(pte_val(entry) & _PAGE_INVALID)) { - if (!MACHINE_HAS_ESOP) { + if (!machine_has_esop()) { /* * Without enhanced suppression-on-protection force * the dirty bit on for all writable ptes. @@ -220,7 +211,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) } if (!(pte_val(entry) & _PAGE_PROTECT)) /* This pte allows write access, set user-dirty */ - pgste_val(pgste) |= PGSTE_UC_BIT; + pgste = set_pgste_bit(pgste, PGSTE_UC_BIT); } #endif set_pte(ptep, entry); @@ -236,7 +227,7 @@ static inline pgste_t pgste_pte_notify(struct mm_struct *mm, bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); if (bits) { - pgste_val(pgste) ^= bits; + pgste = __pgste(pgste_val(pgste) ^ bits); ptep_notify(mm, addr, ptep, bits); } #endif @@ -374,7 +365,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, static inline void pmdp_idte_local(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_LOCAL); else @@ -386,12 +377,12 @@ static inline void pmdp_idte_local(struct mm_struct *mm, static inline void pmdp_idte_global(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); - } else if (MACHINE_HAS_IDTE) { + } else if (cpu_has_idte()) { __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); @@ -411,7 +402,7 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) pmdp_idte_local(mm, addr, pmdp); else @@ -505,7 +496,7 @@ EXPORT_SYMBOL(pmdp_xchg_lazy); static inline void pudp_idte_local(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_LOCAL); else @@ -515,10 +506,10 @@ static inline void pudp_idte_local(struct mm_struct *mm, static inline void pudp_idte_global(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); else /* @@ -537,7 +528,7 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm, if (pud_val(old) & _REGION_ENTRY_INVALID) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) pudp_idte_local(mm, addr, pudp); else @@ -609,7 +600,7 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, /* the mm_has_pgste() check is done in set_pte_at() */ preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; + pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO); pgste_set_key(ptep, pgste, entry, mm); pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); @@ -622,7 +613,7 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= PGSTE_IN_BIT; + pgste = set_pgste_bit(pgste, PGSTE_IN_BIT); pgste_set_unlock(ptep, pgste); preempt_enable(); } @@ -667,7 +658,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID)); entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT)); } - pgste_val(pgste) |= bit; + pgste = set_pgste_bit(pgste, bit); pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); return 0; @@ -687,7 +678,7 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, if (!(pte_val(spte) & _PAGE_INVALID) && !((pte_val(spte) & _PAGE_PROTECT) && !(pte_val(pte) & _PAGE_PROTECT))) { - pgste_val(spgste) |= PGSTE_VSIE_BIT; + spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT); tpgste = pgste_get_lock(tptep); tpte = __pte((pte_val(spte) & PAGE_MASK) | (pte_val(pte) & _PAGE_PROTECT)); @@ -745,7 +736,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_clear(mm, addr, ptep); } if (reset) - pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); + pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgste_set_unlock(ptep, pgste); preempt_enable(); } @@ -758,8 +749,8 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) /* Clear storage key ACC and F, but set R/C */ preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; + pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT); ptev = pte_val(*ptep); if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0); @@ -780,13 +771,13 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, pgste = pgste_get_lock(ptep); dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); - pgste_val(pgste) &= ~PGSTE_UC_BIT; + pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT); pte = *ptep; if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { pgste = pgste_pte_notify(mm, addr, ptep, pgste); nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); ptep_ipte_global(mm, addr, ptep, nodat); - if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) + if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE)) pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); else pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID)); @@ -842,11 +833,11 @@ again: if (!ptep) goto again; new = old = pgste_get_lock(ptep); - pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | - PGSTE_ACC_BITS | PGSTE_FP_BIT); + new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT | + PGSTE_ACC_BITS | PGSTE_FP_BIT); keyul = (unsigned long) key; - pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; - pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48); + new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56); if (!(pte_val(*ptep) & _PAGE_INVALID)) { unsigned long bits, skey; @@ -857,12 +848,12 @@ again: /* Set storage key ACC and FP */ page_set_storage_key(paddr, skey, !nq); /* Merge host changed & referenced into pgste */ - pgste_val(new) |= bits << 52; + new = set_pgste_bit(new, bits << 52); } /* changing the guest storage key is considered a change of the page */ if ((pgste_val(new) ^ pgste_val(old)) & (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) - pgste_val(new) |= PGSTE_UC_BIT; + new = set_pgste_bit(new, PGSTE_UC_BIT); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); @@ -950,19 +941,19 @@ again: goto again; new = old = pgste_get_lock(ptep); /* Reset guest reference bit only */ - pgste_val(new) &= ~PGSTE_GR_BIT; + new = clear_pgste_bit(new, PGSTE_GR_BIT); if (!(pte_val(*ptep) & _PAGE_INVALID)) { paddr = pte_val(*ptep) & PAGE_MASK; cc = page_reset_referenced(paddr); /* Merge real referenced bit into host-set */ - pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; + new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT); } /* Reflect guest's logical view, not physical */ cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; /* Changing the guest storage key is considered a change of the page */ if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) - pgste_val(new) |= PGSTE_UC_BIT; + new = set_pgste_bit(new, PGSTE_UC_BIT); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); @@ -1126,7 +1117,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, if (res) pgstev |= _PGSTE_GPS_ZERO; - pgste_val(pgste) = pgstev; + pgste = __pgste(pgstev); pgste_set_unlock(ptep, pgste); pte_unmap_unlock(ptep, ptl); return res; @@ -1159,8 +1150,8 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long hva, return -EFAULT; new = pgste_get_lock(ptep); - pgste_val(new) &= ~bits; - pgste_val(new) |= value & bits; + new = clear_pgste_bit(new, bits); + new = set_pgste_bit(new, value & bits); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 8ead999e340b..448dd6ed1069 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -4,6 +4,7 @@ */ #include <linux/memory_hotplug.h> +#include <linux/cpufeature.h> #include <linux/memblock.h> #include <linux/pfn.h> #include <linux/mm.h> @@ -249,12 +250,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, } else if (pmd_none(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE) && - MACHINE_HAS_EDAT1 && direct && + cpu_has_edat1() && direct && !debug_pagealloc_enabled()) { set_pmd(pmd, __pmd(__pa(addr) | prot)); pages++; continue; - } else if (!direct && MACHINE_HAS_EDAT1) { + } else if (!direct && cpu_has_edat1()) { void *new_page; /* @@ -335,7 +336,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, } else if (pud_none(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE) && - MACHINE_HAS_EDAT2 && direct && + cpu_has_edat2() && direct && !debug_pagealloc_enabled()) { set_pud(pud, __pud(__pa(addr) | prot)); pages++; @@ -659,7 +660,7 @@ void __init vmem_map_init(void) * prefix page is used to return to the previous context with * an LPSWE instruction and therefore must be executable. */ - if (!static_key_enabled(&cpu_has_bear)) + if (!cpu_has_bear()) set_memory_x(0, 1); if (debug_pagealloc_enabled()) __set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9d440a0b729e..0c9a35782c83 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -48,8 +48,6 @@ struct bpf_jit { int lit64; /* Current position in 64-bit literal pool */ int base_ip; /* Base address for literal pool */ int exit_ip; /* Address of exit */ - int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ - int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ int tail_call_start; /* Tail call start offset */ int excnt; /* Number of exception table entries */ int prologue_plt_ret; /* Return address for prologue hotpatch PLT */ @@ -127,6 +125,18 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) jit->seen_regs |= (1 << r1); } +static s32 off_to_pcrel(struct bpf_jit *jit, u32 off) +{ + return off - jit->prg; +} + +static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr) +{ + if (jit->prg_buf) + return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg); + return 0; +} + #define REG_SET_SEEN(b1) \ ({ \ reg_set_seen(jit, b1); \ @@ -201,7 +211,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT4_PCREL_RIC(op, mask, target) \ ({ \ - int __rel = ((target) - jit->prg) / 2; \ + int __rel = off_to_pcrel(jit, target) / 2; \ _EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \ }) @@ -239,7 +249,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target) \ ({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ + unsigned int rel = off_to_pcrel(jit, target) / 2; \ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \ (op2) | (mask) << 12); \ REG_SET_SEEN(b1); \ @@ -248,7 +258,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target) \ ({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ + unsigned int rel = off_to_pcrel(jit, target) / 2; \ _EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \ (rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \ REG_SET_SEEN(b1); \ @@ -257,29 +267,41 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \ ({ \ - int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \ + int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) +static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel) +{ + u32 pc32dbl = (s32)(pcrel / 2); + + _EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff); +} + +static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel) +{ + emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel); + REG_SET_SEEN(b); +} + #define EMIT6_PCREL_RILB(op, b, target) \ -({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ - _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\ - REG_SET_SEEN(b); \ -}) + emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target)) -#define EMIT6_PCREL_RIL(op, target) \ -({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ - _EMIT6((op) | rel >> 16, rel & 0xffff); \ -}) +#define EMIT6_PCREL_RILB_PTR(op, b, target_ptr) \ + emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr)) + +static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel) +{ + emit6_pcrel_ril(jit, op | mask << 20, pcrel); +} #define EMIT6_PCREL_RILC(op, mask, target) \ -({ \ - EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \ -}) + emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target)) + +#define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr) \ + emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr)) #define _EMIT6_IMM(op, imm) \ ({ \ @@ -503,7 +525,7 @@ static void bpf_skip(struct bpf_jit *jit, int size) { if (size >= 6 && !is_valid_rel(size)) { /* brcl 0xf,size */ - EMIT6_PCREL_RIL(0xc0f4000000, size); + EMIT6_PCREL_RILC(0xc0040000, 0xf, size); size -= 6; } else if (size >= 4 && is_valid_rel(size)) { /* brc 0xf,size */ @@ -544,7 +566,15 @@ static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target) { memcpy(plt, &bpf_plt, sizeof(*plt)); plt->ret = ret; - plt->target = target; + /* + * (target == NULL) implies that the branch to this PLT entry was + * patched and became a no-op. However, some CPU could have jumped + * to this PLT entry before patching and may be still executing it. + * + * Since the intention in this case is to make the PLT entry a no-op, + * make the target point to the return label instead of NULL. + */ + plt->target = target ?: ret; } /* @@ -605,43 +635,30 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, } /* Setup stack and backchain */ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) { - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - /* lgr %w1,%r15 (backchain) */ - EMIT4(0xb9040000, REG_W1, REG_15); + /* lgr %w1,%r15 (backchain) */ + EMIT4(0xb9040000, REG_W1, REG_15); /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); /* aghi %r15,-STK_OFF */ EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth)); - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - /* stg %w1,152(%r15) (backchain) */ - EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, - REG_15, 152); + /* stg %w1,152(%r15) (backchain) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, + REG_15, 152); } } /* - * Emit an expoline for a jump that follows - */ -static void emit_expoline(struct bpf_jit *jit) -{ - /* exrl %r0,.+10 */ - EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); - /* j . */ - EMIT4_PCREL(0xa7f40000, 0); -} - -/* - * Emit __s390_indirect_jump_r1 thunk if necessary + * Jump using a register either directly or via an expoline thunk */ -static void emit_r1_thunk(struct bpf_jit *jit) -{ - if (nospec_uses_trampoline()) { - jit->r1_thunk_ip = jit->prg; - emit_expoline(jit); - /* br %r1 */ - _EMIT2(0x07f1); - } -} +#define EMIT_JUMP_REG(reg) do { \ + if (nospec_uses_trampoline()) \ + /* brcl 0xf,__s390_indirect_jump_rN */ \ + EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f, \ + __s390_indirect_jump_r ## reg); \ + else \ + /* br %rN */ \ + _EMIT2(0x07f0 | reg); \ +} while (0) /* * Call r1 either directly or via __s390_indirect_jump_r1 thunk @@ -650,7 +667,8 @@ static void call_r1(struct bpf_jit *jit) { if (nospec_uses_trampoline()) /* brasl %r14,__s390_indirect_jump_r1 */ - EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); + EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, + __s390_indirect_jump_r1); else /* basr %r14,%r1 */ EMIT2(0x0d00, REG_14, REG_1); @@ -666,16 +684,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) EMIT4(0xb9040000, REG_2, BPF_REG_0); /* Restore registers */ save_restore_regs(jit, REGS_RESTORE, stack_depth, 0); - if (nospec_uses_trampoline()) { - jit->r14_thunk_ip = jit->prg; - /* Generate __s390_indirect_jump_r14 thunk */ - emit_expoline(jit); - } - /* br %r14 */ - _EMIT2(0x07fe); - - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - emit_r1_thunk(jit); + EMIT_JUMP_REG(14); jit->prg = ALIGN(jit->prg, 8); jit->prologue_plt = jit->prg; @@ -1877,7 +1886,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, /* aghi %r1,tail_call_start */ EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start); /* brcl 0xf,__s390_indirect_jump_r1 */ - EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip); + EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf, + __s390_indirect_jump_r1); } else { /* bc 0xf,tail_call_start(%r1) */ _EMIT4(0x47f01000 + jit->tail_call_start); @@ -2585,9 +2595,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (nr_stack_args > MAX_NR_STACK_ARGS) return -ENOTSUPP; - /* Return to %r14, since func_addr and %r0 are not available. */ - if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) || - (flags & BPF_TRAMP_F_INDIRECT)) + /* Return to %r14 in the struct_ops case. */ + if (flags & BPF_TRAMP_F_INDIRECT) flags |= BPF_TRAMP_F_SKIP_FRAME; /* @@ -2847,17 +2856,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, 0xf000 | tjit->tccnt_off); /* aghi %r15,stack_size */ EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size); - /* Emit an expoline for the following indirect jump. */ - if (nospec_uses_trampoline()) - emit_expoline(jit); if (flags & BPF_TRAMP_F_SKIP_FRAME) - /* br %r14 */ - _EMIT2(0x07fe); + EMIT_JUMP_REG(14); else - /* br %r1 */ - _EMIT2(0x07f1); - - emit_r1_thunk(jit); + EMIT_JUMP_REG(1); return 0; } @@ -2919,10 +2921,16 @@ bool bpf_jit_supports_arena(void) bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) { - /* - * Currently the verifier uses this function only to check which - * atomic stores to arena are supported, and they all are. - */ + if (!in_arena) + return true; + switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(insn)) + return false; + } return true; } diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index df73c5182990..1810e0944a4e 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ - pci_bus.o pci_kvm_hook.o pci_report.o + pci_bus.o pci_kvm_hook.o pci_report.o pci_fixup.o obj-$(CONFIG_PCI_IOV) += pci_iov.o obj-$(CONFIG_SYSFS) += pci_sysfs.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 88f72745fa59..cd6676c2d602 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -31,6 +31,7 @@ #include <linux/lockdep.h> #include <linux/list_sort.h> +#include <asm/machine.h> #include <asm/isc.h> #include <asm/airq.h> #include <asm/facility.h> @@ -44,6 +45,7 @@ /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); +static DEFINE_MUTEX(zpci_add_remove_lock); static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE); static DEFINE_SPINLOCK(zpci_domain_lock); @@ -69,6 +71,15 @@ EXPORT_SYMBOL_GPL(zpci_aipb); struct airq_iv *zpci_aif_sbv; EXPORT_SYMBOL_GPL(zpci_aif_sbv); +void zpci_zdev_put(struct zpci_dev *zdev) +{ + if (!zdev) + return; + mutex_lock(&zpci_add_remove_lock); + kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); +} + struct zpci_dev *get_zdev_by_fid(u32 fid) { struct zpci_dev *tmp, *zdev = NULL; @@ -124,14 +135,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, struct zpci_fib fib = {0}; u8 cc; - WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; /* Work around off by one in ISM virt device */ if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base) fib.pal = limit + (1 << 12); else fib.pal = limit; - fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; + fib.iota = iota; fib.gd = zdev->gisa; cc = zpci_mod_fc(req, &fib, status); if (cc) @@ -255,7 +265,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, } void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t prot) { /* * When PCI MIO instructions are unavailable the "physical" address @@ -265,7 +275,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, if (!static_branch_unlikely(&have_mio)) return (void __iomem *)phys_addr; - return generic_ioremap_prot(phys_addr, size, __pgprot(prot)); + return generic_ioremap_prot(phys_addr, size, prot); } EXPORT_SYMBOL(ioremap_prot); @@ -690,6 +700,23 @@ int zpci_enable_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_enable_device); +int zpci_reenable_device(struct zpci_dev *zdev) +{ + u8 status; + int rc; + + rc = zpci_enable_device(zdev); + if (rc) + return rc; + + rc = zpci_iommu_register_ioat(zdev, &status); + if (rc) + zpci_disable_device(zdev); + + return rc; +} +EXPORT_SYMBOL_GPL(zpci_reenable_device); + int zpci_disable_device(struct zpci_dev *zdev) { u32 fh = zdev->fh; @@ -739,7 +766,6 @@ EXPORT_SYMBOL_GPL(zpci_disable_device); */ int zpci_hot_reset_device(struct zpci_dev *zdev) { - u8 status; int rc; lockdep_assert_held(&zdev->state_lock); @@ -758,19 +784,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) return rc; } - rc = zpci_enable_device(zdev); - if (rc) - return rc; + rc = zpci_reenable_device(zdev); - if (zdev->dma_table) - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); - if (rc) { - zpci_disable_device(zdev); - return rc; - } - - return 0; + return rc; } /** @@ -831,6 +847,7 @@ int zpci_add_device(struct zpci_dev *zdev) { int rc; + mutex_lock(&zpci_add_remove_lock); zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state); rc = zpci_init_iommu(zdev); if (rc) @@ -844,12 +861,14 @@ int zpci_add_device(struct zpci_dev *zdev) spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); spin_unlock(&zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); return 0; error_destroy_iommu: zpci_destroy_iommu(zdev); error: zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc); + mutex_unlock(&zpci_add_remove_lock); return rc; } @@ -919,21 +938,20 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) * @zdev: the zpci_dev that was reserved * * Handle the case that a given zPCI function was reserved by another system. - * After a call to this function the zpci_dev can not be found via - * get_zdev_by_fid() anymore but may still be accessible via existing - * references though it will not be functional anymore. */ void zpci_device_reserved(struct zpci_dev *zdev) { - /* - * Remove device from zpci_list as it is going away. This also - * makes sure we ignore subsequent zPCI events for this device. - */ - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); + lockdep_assert_held(&zdev->state_lock); + /* We may declare the device reserved multiple times */ + if (zdev->state == ZPCI_FN_STATE_RESERVED) + return; zdev->state = ZPCI_FN_STATE_RESERVED; zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + /* + * The underlying device is gone. Allow the zdev to be freed + * as soon as all other references are gone by accounting for + * the removal as a dropped reference. + */ zpci_zdev_put(zdev); } @@ -941,13 +959,14 @@ void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); + lockdep_assert_held(&zpci_add_remove_lock); WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); - - if (zdev->zbus->bus) - zpci_bus_remove_device(zdev, false); - - if (zdev_enabled(zdev)) - zpci_disable_device(zdev); + /* + * We already hold zpci_list_lock thanks to kref_put_lock(). + * This makes sure no new reference can be taken from the list. + */ + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); if (zdev->has_hp_slot) zpci_exit_slot(zdev); @@ -1073,7 +1092,7 @@ char * __init pcibios_setup(char *str) return NULL; } if (!strcmp(str, "nomio")) { - get_lowcore()->machine_flags &= ~MACHINE_FLAG_PCI_MIO; + clear_machine_feature(MFEATURE_PCI_MIO); return NULL; } if (!strcmp(str, "force_floating")) { @@ -1148,7 +1167,7 @@ static int __init pci_base_init(void) return 0; } - if (MACHINE_HAS_PCI_MIO) { + if (test_machine_feature(MFEATURE_PCI_MIO)) { static_branch_enable(&have_mio); system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT); } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 39a481ec4a40..81bdb54ad5e3 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -19,6 +19,7 @@ #include <linux/jump_label.h> #include <linux/pci.h> #include <linux/printk.h> +#include <linux/dma-direct.h> #include <asm/pci_clp.h> #include <asm/pci_dma.h> @@ -283,10 +284,32 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) return zbus; } +static void pci_dma_range_setup(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = to_zpci(pdev); + u64 aligned_end, size; + dma_addr_t dma_start; + int ret; + + dma_start = PAGE_ALIGN(zdev->start_dma); + aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1); + if (aligned_end >= dma_start) + size = aligned_end - dma_start; + else + size = 0; + WARN_ON_ONCE(size == 0); + + ret = dma_direct_set_offset(&pdev->dev, 0, dma_start, size); + if (ret) + pr_err("Failed to allocate DMA range map for %s\n", pci_name(pdev)); +} + void pcibios_bus_add_device(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); + pci_dma_range_setup(pdev); + /* * With pdev->no_vf_scan the common PCI probing code does not * perform PF/VF linking. @@ -335,6 +358,9 @@ static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev { struct pci_dev *pdev; + if (!zdev->vfn) + return false; + pdev = zpci_iov_find_parent_pf(zbus, zdev); if (!pdev) return true; diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index e86a9419d233..ae3d7a9159bd 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -21,11 +21,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev); void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); void zpci_release_device(struct kref *kref); -static inline void zpci_zdev_put(struct zpci_dev *zdev) -{ - if (zdev) - kref_put(&zdev->kref, zpci_release_device); -} + +void zpci_zdev_put(struct zpci_dev *zdev); static inline void zpci_zdev_get(struct zpci_dev *zdev) { diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 14bf7e8d06b7..241f7251c873 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -56,7 +56,7 @@ static inline int clp_get_ilp(unsigned long *ilp) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n" "0: lhi %[exc],0\n" "1:\n" @@ -79,7 +79,7 @@ static __always_inline int clp_req(void *data, unsigned int lps) u64 ignored; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n" "0: lhi %[exc],0\n" "1:\n" @@ -112,6 +112,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, zdev->version = response->version; zdev->maxstbl = response->maxstbl; zdev->dtsm = response->dtsm; + zdev->rtr_avail = response->rtr; switch (response->version) { case 1: @@ -427,6 +428,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; } zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (IS_ERR(zdev)) + return; list_add_tail(&zdev->entry, scan_list); } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 7bd7721c1239..d930416d4c90 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) case PCI_ERS_RESULT_CAN_RECOVER: case PCI_ERS_RESULT_RECOVERED: case PCI_ERS_RESULT_NEED_RESET: + case PCI_ERS_RESULT_NONE: return false; default: return true; @@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver) return false; if (!driver->err_handler->error_detected) return false; - if (!driver->err_handler->slot_reset) - return false; - if (!driver->err_handler->resume) - return false; return true; } @@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, struct zpci_dev *zdev = to_zpci(pdev); int rc; + /* The underlying device may have been disabled by the event */ + if (!zdev_enabled(zdev)) + return PCI_ERS_RESULT_NEED_RESET; + pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); rc = zpci_reset_load_store_blocked(zdev); if (rc) { @@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } - if (driver->err_handler->mmio_enabled) { + if (driver->err_handler->mmio_enabled) ers_res = driver->err_handler->mmio_enabled(pdev); - if (ers_result_indicates_abort(ers_res)) { - pr_info("%s: Automatic recovery failed after MMIO re-enable\n", - pci_name(pdev)); - return ers_res; - } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { - pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); - return ers_res; - } + else + ers_res = PCI_ERS_RESULT_NONE; + + if (ers_result_indicates_abort(ers_res)) { + pr_info("%s: Automatic recovery failed after MMIO re-enable\n", + pci_name(pdev)); + return ers_res; + } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { + pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); + return ers_res; } pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); @@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, return ers_res; } pdev->error_state = pci_channel_io_normal; - ers_res = driver->err_handler->slot_reset(pdev); + + if (driver->err_handler->slot_reset) + ers_res = driver->err_handler->slot_reset(pdev); + else + ers_res = PCI_ERS_RESULT_NONE; + if (ers_result_indicates_abort(ers_res)) { pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); return ers_res; @@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) goto out_unlock; } - if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { + if (ers_res != PCI_ERS_RESULT_NEED_RESET) { ers_res = zpci_event_do_error_state_clear(pdev, driver); if (ers_result_indicates_abort(ers_res)) { status_str = "failed (abort on MMIO enable)"; @@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (ers_res == PCI_ERS_RESULT_NEED_RESET) ers_res = zpci_event_do_reset(pdev, driver); + /* + * ers_res can be PCI_ERS_RESULT_NONE either because the driver + * decided to return it, indicating that it abstains from voting + * on how to recover, or because it didn't implement the callback. + * Both cases assume, that if there is nothing else causing a + * disconnect, we recovered successfully. + */ + if (ers_res == PCI_ERS_RESULT_NONE) + ers_res = PCI_ERS_RESULT_RECOVERED; + if (ers_res != PCI_ERS_RESULT_RECOVERED) { pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); @@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = NULL; pci_ers_result_t ers_res; + u32 fh = 0; + int rc; zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", ccdf->fid, ccdf->fh, ccdf->pec); @@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) if (zdev) { mutex_lock(&zdev->state_lock); + rc = clp_refresh_fh(zdev->fid, &fh); + if (rc) + goto no_pdev; + if (!fh || ccdf->fh != fh) { + /* Ignore events with stale handles */ + zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n", + ccdf->fid, fh, ccdf->fh); + goto no_pdev; + } zpci_update_fh(zdev, ccdf->fh); if (zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); @@ -335,6 +364,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) zdev->state = ZPCI_FN_STATE_STANDBY; } +static void zpci_event_reappear(struct zpci_dev *zdev) +{ + lockdep_assert_held(&zdev->state_lock); + /* + * The zdev is in the reserved state. This means that it was presumed to + * go away but there are still undropped references. Now, the platform + * announced its availability again. Bring back the lingering zdev + * to standby. This is safe because we hold a temporary reference + * now so that it won't go away. Account for the re-appearance of the + * underlying device by incrementing the reference count. + */ + zdev->state = ZPCI_FN_STATE_STANDBY; + zpci_zdev_get(zdev); + zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh); +} + static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); @@ -358,8 +403,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); /* the configuration request may be stale */ - if (zdev->state != ZPCI_FN_STATE_STANDBY) + else if (zdev->state != ZPCI_FN_STATE_STANDBY) break; zdev->state = ZPCI_FN_STATE_CONFIGURED; } @@ -375,6 +422,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); zpci_update_fh(zdev, ccdf->fh); } break; diff --git a/arch/s390/pci/pci_fixup.c b/arch/s390/pci/pci_fixup.c new file mode 100644 index 000000000000..35688b645098 --- /dev/null +++ b/arch/s390/pci/pci_fixup.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Exceptions for specific devices, + * + * Copyright IBM Corp. 2025 + * + * Author(s): + * Niklas Schnelle <schnelle@linux.ibm.com> + */ +#include <linux/pci.h> + +static void zpci_ism_bar_no_mmap(struct pci_dev *pdev) +{ + /* + * ISM's BAR is special. Drivers written for ISM know + * how to handle this but others need to be aware of their + * special nature e.g. to prevent attempts to mmap() it. + */ + pdev->non_mappable_bars = 1; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, + PCI_DEVICE_ID_IBM_ISM, + zpci_ism_bar_no_mmap); diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index f5a75ea7629a..eb978c8012be 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -160,7 +160,7 @@ static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status) u64 __data; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d20000,%[data],%[req_off]\n" "0: lhi %[exc],0\n" "1:\n" @@ -229,7 +229,7 @@ static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status) u64 __data; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d60000,%[data],%[ioaddr_len]\n" "0: lhi %[exc],0\n" "1:\n" @@ -267,7 +267,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d00000,%[data],%[req_off]\n" "0: lhi %[exc],0\n" "1:\n" @@ -321,7 +321,7 @@ static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d40000,%[data],%[ioaddr_len]\n" "0: lhi %[exc],0\n" "1:\n" @@ -356,7 +356,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n" "0: lhi %[exc],0\n" "1:\n" @@ -410,7 +410,7 @@ static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n" "0: lhi %[exc],0\n" "1:\n" diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 46f99dc164ad..51e7a28af899 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -32,9 +32,11 @@ static inline int __pcistb_mio_inuser( u64 len, u8 *status) { int cc, exception; + bool sacf_flag; exception = 1; - asm volatile ( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile ( " sacf 256\n" "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n" "1: lhi %[exc],0\n" @@ -44,6 +46,7 @@ static inline int __pcistb_mio_inuser( : CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception) : [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src)) : CC_CLOBBER_LIST("memory")); + disable_sacf_uaccess(sacf_flag); *status = len >> 24 & 0xff; return exception ? -ENXIO : CC_TRANSFORM(cc); } @@ -54,6 +57,7 @@ static inline int __pcistg_mio_inuser( { union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen}; int cc, exception; + bool sacf_flag; u64 val = 0; u64 cnt = ulen; u8 tmp; @@ -64,7 +68,8 @@ static inline int __pcistg_mio_inuser( * address space. pcistg then uses the user mappings. */ exception = 1; - asm volatile ( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile ( " sacf 256\n" "0: llgc %[tmp],0(%[src])\n" "4: sllg %[val],%[val],8\n" @@ -81,6 +86,7 @@ static inline int __pcistg_mio_inuser( CC_OUT(cc, cc), [ioaddr_len] "+&d" (ioaddr_len.pair) : : CC_CLOBBER_LIST("memory")); + disable_sacf_uaccess(sacf_flag); *status = ioaddr_len.odd >> 24 & 0xff; cc = exception ? -ENXIO : CC_TRANSFORM(cc); @@ -175,8 +181,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, args.address = mmio_addr; args.vma = vma; ret = follow_pfnmap_start(&args); - if (ret) - goto out_unlock_mmap; + if (ret) { + fixup_user_fault(current->mm, mmio_addr, FAULT_FLAG_WRITE, NULL); + ret = follow_pfnmap_start(&args); + if (ret) + goto out_unlock_mmap; + } io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); @@ -200,6 +210,7 @@ static inline int __pcilg_mio_inuser( u64 ulen, u8 *status) { union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen}; + bool sacf_flag; u64 cnt = ulen; int shift = ulen * 8; int cc, exception; @@ -211,7 +222,8 @@ static inline int __pcilg_mio_inuser( * user address @dst */ exception = 1; - asm volatile ( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile ( " sacf 256\n" "0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n" "1: lhi %[exc],0\n" @@ -232,10 +244,10 @@ static inline int __pcilg_mio_inuser( : [ioaddr_len] "+&d" (ioaddr_len.pair), [exc] "+d" (exception), CC_OUT(cc, cc), [val] "=d" (val), [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp), - [shift] "+d" (shift) + [shift] "+a" (shift) : : CC_CLOBBER_LIST("memory")); - + disable_sacf_uaccess(sacf_flag); cc = exception ? -ENXIO : CC_TRANSFORM(cc); /* did we write everything to the user space buffer? */ if (!cc && cnt != 0) @@ -315,14 +327,18 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) goto out_unlock_mmap; ret = -EACCES; - if (!(vma->vm_flags & VM_WRITE)) + if (!(vma->vm_flags & VM_READ)) goto out_unlock_mmap; args.vma = vma; args.address = mmio_addr; ret = follow_pfnmap_start(&args); - if (ret) - goto out_unlock_mmap; + if (ret) { + fixup_user_fault(current->mm, mmio_addr, 0, NULL); + ret = follow_pfnmap_start(&args); + if (ret) + goto out_unlock_mmap; + } io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 2de1ea6c3a8c..0ecad08e1b1e 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -52,7 +52,6 @@ static DEVICE_ATTR_RO(mio_enabled); static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) { - u8 status; int ret; pci_stop_and_remove_bus_device(pdev); @@ -70,16 +69,8 @@ static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) return ret; } - ret = zpci_enable_device(zdev); - if (ret) - return ret; + ret = zpci_reenable_device(zdev); - if (zdev->dma_table) { - ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); - if (ret) - zpci_disable_device(zdev); - } return ret; } diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 855f818deb98..d5c68ade71ab 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -54,6 +54,9 @@ static struct facility_def facility_defs[] = { #ifdef CONFIG_HAVE_MARCH_Z15_FEATURES 61, /* miscellaneous-instruction-extension 3 */ #endif +#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES + 84, /* miscellaneous-instruction-extension 4 */ +#endif -1 /* END */ } }, |