diff options
Diffstat (limited to 'arch/s390')
35 files changed, 571 insertions, 261 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index db20c1589a98..5b39918b7042 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -117,6 +117,7 @@ config S390 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_HUGETLBFS + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_BUILTIN_BSWAP @@ -469,19 +470,11 @@ config SCHED_SMT config SCHED_MC def_bool n -config SCHED_BOOK - def_bool n - -config SCHED_DRAWER - def_bool n - config SCHED_TOPOLOGY def_bool y prompt "Topology scheduler support" select SCHED_SMT select SCHED_MC - select SCHED_BOOK - select SCHED_DRAWER help Topology scheduler support improves the CPU scheduler's decision making when dealing with machines that have multi-threading, @@ -716,7 +709,6 @@ config EADM_SCH config VFIO_CCW def_tristate n prompt "Support for VFIO-CCW subchannels" - depends on S390_CCW_IOMMU depends on VFIO select VFIO_MDEV help @@ -728,7 +720,7 @@ config VFIO_CCW config VFIO_AP def_tristate n prompt "VFIO support for AP devices" - depends on S390_AP_IOMMU && KVM + depends on KVM depends on VFIO depends on ZCRYPT select VFIO_MDEV diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index acb1f8b53105..c67f59db7a51 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -45,6 +45,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat static pte_t pte_z; +static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) +{ + start = PAGE_ALIGN_DOWN(__sha(start)); + end = PAGE_ALIGN(__sha(end)); + pgtable_populate(start, end, mode); +} + static void kasan_populate_shadow(void) { pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); @@ -95,17 +102,17 @@ static void kasan_populate_shadow(void) */ for_each_physmem_usable_range(i, &start, &end) - pgtable_populate(__sha(start), __sha(end), POPULATE_KASAN_MAP_SHADOW); + kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_end = VMALLOC_START; /* shallowly populate kasan shadow for vmalloc and modules */ - pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_KASAN_SHALLOW); + kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); } else { untracked_end = MODULES_VADDR; } /* populate kasan shadow for untracked memory */ - pgtable_populate(__sha(ident_map_size), __sha(untracked_end), POPULATE_KASAN_ZERO_SHADOW); - pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_KASAN_ZERO_SHADOW); + kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW); + kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); } static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 4ccf66d29fc2..aa95cf6dfabb 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -116,6 +116,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_TC_SKB_EXT=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -591,8 +592,6 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m -CONFIG_S390_CCW_IOMMU=y -CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -703,6 +702,7 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" +CONFIG_INIT_STACK_NONE=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_PCRYPT=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 693297a2e897..f041945f9148 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -107,6 +107,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_TC_SKB_EXT=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -580,8 +581,6 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m -CONFIG_S390_CCW_IOMMU=y -CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -686,6 +685,7 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" +CONFIG_INIT_STACK_NONE=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 33a232bb68af..6f68b39817ef 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -67,6 +67,7 @@ CONFIG_ZFCP=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" +CONFIG_INIT_STACK_NONE=y # CONFIG_ZLIB_DFLTCC is not set CONFIG_XZ_DEC_MICROLZMA=y CONFIG_PRINTK_TIME=y diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c index 7752bd314558..5fae187f947a 100644 --- a/arch/s390/crypto/chacha-glue.c +++ b/arch/s390/crypto/chacha-glue.c @@ -82,7 +82,7 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, * it cannot handle a block of data or less, but otherwise * it can handle data of arbitrary size */ - if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20) + if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !MACHINE_HAS_VX) chacha_crypt_generic(state, dst, src, bytes, nrounds); else chacha20_crypt_s390(state, dst, src, bytes, diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 29dc827e0fe8..d29a9d908797 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -5,7 +5,7 @@ * s390 implementation of the AES Cipher Algorithm with protected keys. * * s390 Version: - * Copyright IBM Corp. 2017,2020 + * Copyright IBM Corp. 2017, 2023 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Harald Freudenberger <freude@de.ibm.com> */ @@ -132,7 +132,8 @@ static inline int __paes_keyblob2pkey(struct key_blob *kb, if (i > 0 && ret == -EAGAIN && in_task()) if (msleep_interruptible(1000)) return -EINTR; - ret = pkey_keyblob2pkey(kb->key, kb->keylen, pk); + ret = pkey_keyblob2pkey(kb->key, kb->keylen, + pk->protkey, &pk->len, &pk->type); if (ret == 0) break; } @@ -145,6 +146,7 @@ static inline int __paes_convert_key(struct s390_paes_ctx *ctx) int ret; struct pkey_protkey pkey; + pkey.len = sizeof(pkey.protkey); ret = __paes_keyblob2pkey(&ctx->kb, &pkey); if (ret) return ret; @@ -414,6 +416,9 @@ static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx) { struct pkey_protkey pkey0, pkey1; + pkey0.len = sizeof(pkey0.protkey); + pkey1.len = sizeof(pkey1.protkey); + if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) || __paes_keyblob2pkey(&ctx->kb[1], &pkey1)) return -EINVAL; diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h index c37eb921bfbf..a873e873e1ee 100644 --- a/arch/s390/include/asm/asm-prototypes.h +++ b/arch/s390/include/asm/asm-prototypes.h @@ -6,4 +6,8 @@ #include <asm/fpu/api.h> #include <asm-generic/asm-prototypes.h> +__int128_t __ashlti3(__int128_t a, int b); +__int128_t __ashrti3(__int128_t a, int b); +__int128_t __lshrti3(__int128_t a, int b); + #endif /* _ASM_S390_PROTOTYPES_H */ diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 06e0e42f4eec..aae0315374de 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -190,38 +190,18 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, #define arch_cmpxchg_local arch_cmpxchg #define arch_cmpxchg64_local arch_cmpxchg -#define system_has_cmpxchg_double() 1 +#define system_has_cmpxchg128() 1 -static __always_inline int __cmpxchg_double(unsigned long p1, unsigned long p2, - unsigned long o1, unsigned long o2, - unsigned long n1, unsigned long n2) +static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new) { - union register_pair old = { .even = o1, .odd = o2, }; - union register_pair new = { .even = n1, .odd = n2, }; - int cc; - asm volatile( " cdsg %[old],%[new],%[ptr]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=&d" (cc), [old] "+&d" (old.pair) - : [new] "d" (new.pair), - [ptr] "QS" (*(unsigned long *)p1), "Q" (*(unsigned long *)p2) + : [old] "+d" (old), [ptr] "+QS" (*ptr) + : [new] "d" (new) : "memory", "cc"); - return !cc; + return old; } -#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \ -({ \ - typeof(p1) __p1 = (p1); \ - typeof(p2) __p2 = (p2); \ - \ - BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ - BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ - VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\ - __cmpxchg_double((unsigned long)__p1, (unsigned long)__p2, \ - (unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2)); \ -}) +#define arch_cmpxchg128 arch_cmpxchg128 #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index a386070f1d56..3cb9d813f022 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -112,7 +112,7 @@ struct compat_statfs64 { u32 f_namelen; u32 f_frsize; u32 f_flags; - u32 f_spare[4]; + u32 f_spare[5]; }; /* diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 646b12981f20..b378e2b57ad8 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -2,7 +2,7 @@ /* * CP Assist for Cryptographic Functions (CPACF) * - * Copyright IBM Corp. 2003, 2017 + * Copyright IBM Corp. 2003, 2023 * Author(s): Thomas Spatzier * Jan Glauber * Harald Freudenberger (freude@de.ibm.com) @@ -132,6 +132,11 @@ #define CPACF_PCKMO_ENC_AES_128_KEY 0x12 #define CPACF_PCKMO_ENC_AES_192_KEY 0x13 #define CPACF_PCKMO_ENC_AES_256_KEY 0x14 +#define CPACF_PCKMO_ENC_ECC_P256_KEY 0x20 +#define CPACF_PCKMO_ENC_ECC_P384_KEY 0x21 +#define CPACF_PCKMO_ENC_ECC_P521_KEY 0x22 +#define CPACF_PCKMO_ENC_ECC_ED25519_KEY 0x28 +#define CPACF_PCKMO_ENC_ECC_ED448_KEY 0x29 /* * Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 7e417d7de568..a0de5b9b02ea 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -140,7 +140,7 @@ union hws_trailer_header { unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ unsigned long long overflow; /* 64 - Overflow Count */ }; - __uint128_t val; + u128 val; }; struct hws_trailer_entry { diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h index 0d1c74a7a650..a4d2e103f116 100644 --- a/arch/s390/include/asm/os_info.h +++ b/arch/s390/include/asm/os_info.h @@ -16,6 +16,9 @@ #define OS_INFO_VMCOREINFO 0 #define OS_INFO_REIPL_BLOCK 1 +#define OS_INFO_FLAGS_ENTRY 2 + +#define OS_INFO_FLAG_REIPL_CLEAR (1UL << 0) struct os_info_entry { u64 addr; @@ -30,8 +33,8 @@ struct os_info { u16 version_minor; u64 crashkernel_addr; u64 crashkernel_size; - struct os_info_entry entry[2]; - u8 reserved[4024]; + struct os_info_entry entry[3]; + u8 reserved[4004]; } __packed; void os_info_init(void); diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 081837b391e3..264095dd84bc 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -148,6 +148,22 @@ #define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) + +#define this_cpu_cmpxchg128(pcp, oval, nval) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + u128 old__, new__, ret__; \ + pcp_op_T__ *ptr__; \ + old__ = oval; \ + new__ = nval; \ + preempt_disable_notrace(); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ + ret__ = cmpxchg128((void *)ptr__, old__, new__); \ + preempt_enable_notrace(); \ + ret__; \ +}) + #define arch_this_cpu_xchg(pcp, nval) \ ({ \ typeof(pcp) *ptr__; \ @@ -164,24 +180,6 @@ #define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval) #define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval) -#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \ -({ \ - typeof(pcp1) *p1__; \ - typeof(pcp2) *p2__; \ - int ret__; \ - \ - preempt_disable_notrace(); \ - p1__ = raw_cpu_ptr(&(pcp1)); \ - p2__ = raw_cpu_ptr(&(pcp2)); \ - ret__ = __cmpxchg_double((unsigned long)p1__, (unsigned long)p2__, \ - (unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2)); \ - preempt_enable_notrace(); \ - ret__; \ -}) - -#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double - #include <asm-generic/percpu.h> #endif /* __ARCH_S390_PERCPU__ */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6822a11c2c8a..c55f3c3365af 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -42,9 +42,6 @@ static inline void update_page_count(int level, long count) atomic_long_add(count, &direct_pages_count[level]); } -struct seq_file; -void arch_report_meminfo(struct seq_file *m); - /* * The S390 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index 8e9c582592b3..9e41a74fce9a 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -3,6 +3,7 @@ #define _ASM_S390_MEM_DETECT_H #include <linux/types.h> +#include <asm/page.h> enum physmem_info_source { MEM_DETECT_NONE = 0, @@ -133,7 +134,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t) #define for_each_physmem_reserved_type_range(t, range, p_start, p_end) \ for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end; \ - range && range->end; range = range->chain, \ + range && range->end; range = range->chain ? __va(range->chain) : NULL, \ *p_start = range ? range->start : 0, *p_end = range ? range->end : 0) static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t, @@ -145,7 +146,7 @@ static inline struct reserved_range *__physmem_reserved_next(enum reserved_range return range; } if (range->chain) - return range->chain; + return __va(range->chain); while (++*t < RR_MAX) { range = &physmem_info.reserved[*t]; if (range->end) diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h index dd3d20c332ac..47d80a7451a6 100644 --- a/arch/s390/include/asm/pkey.h +++ b/arch/s390/include/asm/pkey.h @@ -2,7 +2,7 @@ /* * Kernelspace interface to the pkey device driver * - * Copyright IBM Corp. 2016,2019 + * Copyright IBM Corp. 2016, 2023 * * Author: Harald Freudenberger <freude@de.ibm.com> * @@ -23,6 +23,6 @@ * @return 0 on success, negative errno value on failure */ int pkey_keyblob2pkey(const u8 *key, u32 keylen, - struct pkey_protkey *protkey); + u8 *protkey, u32 *protkeylen, u32 *protkeytype); #endif /* _KAPI_PKEY_H */ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index ce878e85b6e4..4d646659a5f5 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -63,7 +63,7 @@ static inline int store_tod_clock_ext_cc(union tod_clock *clk) return cc; } -static inline void store_tod_clock_ext(union tod_clock *tod) +static __always_inline void store_tod_clock_ext(union tod_clock *tod) { asm volatile("stcke %0" : "=Q" (*tod) : : "cc"); } @@ -177,7 +177,7 @@ static inline void local_tick_enable(unsigned long comp) typedef unsigned long cycles_t; -static inline unsigned long get_tod_clock(void) +static __always_inline unsigned long get_tod_clock(void) { union tod_clock clk; @@ -204,6 +204,11 @@ void init_cpu_timer(void); extern union tod_clock tod_clock_base; +static __always_inline unsigned long __get_tod_clock_monotonic(void) +{ + return get_tod_clock() - tod_clock_base.tod; +} + /** * get_clock_monotonic - returns current time in clock rate units * @@ -216,7 +221,7 @@ static inline unsigned long get_tod_clock_monotonic(void) unsigned long tod; preempt_disable_notrace(); - tod = get_tod_clock() - tod_clock_base.tod; + tod = __get_tod_clock_monotonic(); preempt_enable_notrace(); return tod; } @@ -240,7 +245,7 @@ static inline unsigned long get_tod_clock_monotonic(void) * -> ns = (th * 125) + ((tl * 125) >> 9); * */ -static inline unsigned long tod_to_ns(unsigned long todval) +static __always_inline unsigned long tod_to_ns(unsigned long todval) { return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index 924b876f992c..f7bae1c63bd6 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -2,7 +2,7 @@ /* * Userspace interface to the pkey device driver * - * Copyright IBM Corp. 2017, 2019 + * Copyright IBM Corp. 2017, 2023 * * Author: Harald Freudenberger <freude@de.ibm.com> * @@ -32,10 +32,15 @@ #define MINKEYBLOBSIZE SECKEYBLOBSIZE /* defines for the type field within the pkey_protkey struct */ -#define PKEY_KEYTYPE_AES_128 1 -#define PKEY_KEYTYPE_AES_192 2 -#define PKEY_KEYTYPE_AES_256 3 -#define PKEY_KEYTYPE_ECC 4 +#define PKEY_KEYTYPE_AES_128 1 +#define PKEY_KEYTYPE_AES_192 2 +#define PKEY_KEYTYPE_AES_256 3 +#define PKEY_KEYTYPE_ECC 4 +#define PKEY_KEYTYPE_ECC_P256 5 +#define PKEY_KEYTYPE_ECC_P384 6 +#define PKEY_KEYTYPE_ECC_P521 7 +#define PKEY_KEYTYPE_ECC_ED25519 8 +#define PKEY_KEYTYPE_ECC_ED448 9 /* the newer ioctls use a pkey_key_type enum for type information */ enum pkey_key_type { diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h index 72604f7792c3..f85b50723dd3 100644 --- a/arch/s390/include/uapi/asm/statfs.h +++ b/arch/s390/include/uapi/asm/statfs.h @@ -30,7 +30,7 @@ struct statfs { unsigned int f_namelen; unsigned int f_frsize; unsigned int f_flags; - unsigned int f_spare[4]; + unsigned int f_spare[5]; }; struct statfs64 { @@ -45,7 +45,7 @@ struct statfs64 { unsigned int f_namelen; unsigned int f_frsize; unsigned int f_flags; - unsigned int f_spare[4]; + unsigned int f_spare[5]; }; #endif diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 8983837b3565..6b2a051e1f8a 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -10,6 +10,7 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) # Do not trace early setup code CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) endif diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 8a617be28bb4..7af69948b290 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -568,9 +568,9 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt) int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) { Elf64_Phdr *phdr_notes, *phdr_loads; + size_t alloc_size; int mem_chunk_cnt; void *ptr, *hdr; - u32 alloc_size; u64 hdr_off; /* If we are not in kdump or zfcp/nvme dump mode return */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 43de939b7af1..85a00d97a314 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -176,6 +176,8 @@ static bool reipl_fcp_clear; static bool reipl_ccw_clear; static bool reipl_eckd_clear; +static unsigned long os_info_flags; + static inline int __diag308(unsigned long subcode, unsigned long addr) { union register_pair r1; @@ -1935,14 +1937,27 @@ static struct shutdown_action __refdata dump_action = { static void dump_reipl_run(struct shutdown_trigger *trigger) { - unsigned long ipib = (unsigned long) reipl_block_actual; struct lowcore *abs_lc; unsigned int csum; + /* + * Set REIPL_CLEAR flag in os_info flags entry indicating + * 'clear' sysfs attribute has been set on the panicked system + * for specified reipl type. + * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN. + */ + if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) || + (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) || + (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) || + (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) || + reipl_type == IPL_TYPE_NSS || + reipl_type == IPL_TYPE_UNKNOWN) + os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR; + os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags)); csum = (__force unsigned int) csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); abs_lc = get_abs_lowcore(); - abs_lc->ipib = ipib; + abs_lc->ipib = __pa(reipl_block_actual); abs_lc->ipib_checksum = csum; put_abs_lowcore(abs_lc); dump_run(trigger); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index f1b35dcdf3eb..42215f9404af 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -352,7 +352,8 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, rc = apply_rela_bits(loc, val, 0, 64, 0, write); else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) { - val += (Elf_Addr) me->mem[MOD_TEXT].base - loc; + val += (Elf_Addr)me->mem[MOD_TEXT].base + + me->arch.got_offset - loc; rc = apply_rela_bits(loc, val, 1, 32, 1, write); } break; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index cf1b6e8a708d..90679143534b 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -76,6 +76,7 @@ static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) } struct cpu_cf_events { + refcount_t refcnt; /* Reference count */ atomic_t ctr_set[CPUMF_CTR_SET_MAX]; u64 state; /* For perf_event_open SVC */ u64 dev_state; /* For /dev/hwctr */ @@ -88,9 +89,6 @@ struct cpu_cf_events { unsigned int sets; /* # Counter set saved in memory */ }; -/* Per-CPU event structure for the counter facility */ -static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events); - static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ static debug_info_t *cf_dbg; @@ -103,6 +101,221 @@ static debug_info_t *cf_dbg; */ static struct cpumf_ctr_info cpumf_ctr_info; +struct cpu_cf_ptr { + struct cpu_cf_events *cpucf; +}; + +static struct cpu_cf_root { /* Anchor to per CPU data */ + refcount_t refcnt; /* Overall active events */ + struct cpu_cf_ptr __percpu *cfptr; +} cpu_cf_root; + +/* + * Serialize event initialization and event removal. Both are called from + * user space in task context with perf_event_open() and close() + * system calls. + * + * This mutex serializes functions cpum_cf_alloc_cpu() called at event + * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu() + * called at event removal via call back function hw_perf_event_destroy() + * when the event is deleted. They are serialized to enforce correct + * bookkeeping of pointer and reference counts anchored by + * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the + * per CPU pointers stored in cpu_cf_root::cfptr. + */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Get pointer to per-cpu structure. + * + * Function get_cpu_cfhw() is called from + * - cfset_copy_all(): This function is protected by cpus_read_lock(), so + * CPU hot plug remove can not happen. Event removal requires a close() + * first. + * + * Function this_cpu_cfhw() is called from perf common code functions: + * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}(): + * All functions execute with interrupts disabled on that particular CPU. + * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all(). + * + * Therefore it is safe to access the CPU specific pointer to the event. + */ +static struct cpu_cf_events *get_cpu_cfhw(int cpu) +{ + struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr; + + if (p) { + struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu); + + return q->cpucf; + } + return NULL; +} + +static struct cpu_cf_events *this_cpu_cfhw(void) +{ + return get_cpu_cfhw(smp_processor_id()); +} + +/* Disable counter sets on dedicated CPU */ +static void cpum_cf_reset_cpu(void *flags) +{ + lcctl(0); +} + +/* Free per CPU data when the last event is removed. */ +static void cpum_cf_free_root(void) +{ + if (!refcount_dec_and_test(&cpu_cf_root.refcnt)) + return; + free_percpu(cpu_cf_root.cfptr); + cpu_cf_root.cfptr = NULL; + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + on_each_cpu(cpum_cf_reset_cpu, NULL, 1); + debug_sprintf_event(cf_dbg, 4, "%s2 root.refcnt %u cfptr %px\n", + __func__, refcount_read(&cpu_cf_root.refcnt), + cpu_cf_root.cfptr); +} + +/* + * On initialization of first event also allocate per CPU data dynamically. + * Start with an array of pointers, the array size is the maximum number of + * CPUs possible, which might be larger than the number of CPUs currently + * online. + */ +static int cpum_cf_alloc_root(void) +{ + int rc = 0; + + if (refcount_inc_not_zero(&cpu_cf_root.refcnt)) + return rc; + + /* The memory is already zeroed. */ + cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr); + if (cpu_cf_root.cfptr) { + refcount_set(&cpu_cf_root.refcnt, 1); + on_each_cpu(cpum_cf_reset_cpu, NULL, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + } else { + rc = -ENOMEM; + } + + return rc; +} + +/* Free CPU counter data structure for a PMU */ +static void cpum_cf_free_cpu(int cpu) +{ + struct cpu_cf_events *cpuhw; + struct cpu_cf_ptr *p; + + mutex_lock(&pmc_reserve_mutex); + /* + * When invoked via CPU hotplug handler, there might be no events + * installed or that particular CPU might not have an + * event installed. This anchor pointer can be NULL! + */ + if (!cpu_cf_root.cfptr) + goto out; + p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); + cpuhw = p->cpucf; + /* + * Might be zero when called from CPU hotplug handler and no event + * installed on that CPU, but on different CPUs. + */ + if (!cpuhw) + goto out; + + if (refcount_dec_and_test(&cpuhw->refcnt)) { + kfree(cpuhw); + p->cpucf = NULL; + } + cpum_cf_free_root(); +out: + mutex_unlock(&pmc_reserve_mutex); +} + +/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */ +static int cpum_cf_alloc_cpu(int cpu) +{ + struct cpu_cf_events *cpuhw; + struct cpu_cf_ptr *p; + int rc; + + mutex_lock(&pmc_reserve_mutex); + rc = cpum_cf_alloc_root(); + if (rc) + goto unlock; + p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); + cpuhw = p->cpucf; + + if (!cpuhw) { + cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL); + if (cpuhw) { + p->cpucf = cpuhw; + refcount_set(&cpuhw->refcnt, 1); + } else { + rc = -ENOMEM; + } + } else { + refcount_inc(&cpuhw->refcnt); + } + if (rc) { + /* + * Error in allocation of event, decrement anchor. Since + * cpu_cf_event in not created, its destroy() function is not + * invoked. Adjust the reference counter for the anchor. + */ + cpum_cf_free_root(); + } +unlock: + mutex_unlock(&pmc_reserve_mutex); + return rc; +} + +/* + * Create/delete per CPU data structures for /dev/hwctr interface and events + * created by perf_event_open(). + * If cpu is -1, track task on all available CPUs. This requires + * allocation of hardware data structures for all CPUs. This setup handles + * perf_event_open() with task context and /dev/hwctr interface. + * If cpu is non-zero install event on this CPU only. This setup handles + * perf_event_open() with CPU context. + */ +static int cpum_cf_alloc(int cpu) +{ + cpumask_var_t mask; + int rc; + + if (cpu == -1) { + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + for_each_online_cpu(cpu) { + rc = cpum_cf_alloc_cpu(cpu); + if (rc) { + for_each_cpu(cpu, mask) + cpum_cf_free_cpu(cpu); + break; + } + cpumask_set_cpu(cpu, mask); + } + free_cpumask_var(mask); + } else { + rc = cpum_cf_alloc_cpu(cpu); + } + return rc; +} + +static void cpum_cf_free(int cpu) +{ + if (cpu == -1) { + for_each_online_cpu(cpu) + cpum_cf_free_cpu(cpu); + } else { + cpum_cf_free_cpu(cpu); + } +} + #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ /* interval in seconds */ @@ -451,10 +664,10 @@ static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set) */ static void cpumf_pmu_enable(struct pmu *pmu) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int err; - if (cpuhw->flags & PMU_F_ENABLED) + if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED)) return; err = lcctl(cpuhw->state | cpuhw->dev_state); @@ -471,11 +684,11 @@ static void cpumf_pmu_enable(struct pmu *pmu) */ static void cpumf_pmu_disable(struct pmu *pmu) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - int err; + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); u64 inactive; + int err; - if (!(cpuhw->flags & PMU_F_ENABLED)) + if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED)) return; inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); @@ -487,58 +700,10 @@ static void cpumf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } -#define PMC_INIT 0UL -#define PMC_RELEASE 1UL - -static void cpum_cf_setup_cpu(void *flags) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - - switch ((unsigned long)flags) { - case PMC_INIT: - cpuhw->flags |= PMU_F_RESERVED; - break; - - case PMC_RELEASE: - cpuhw->flags &= ~PMU_F_RESERVED; - break; - } - - /* Disable CPU counter sets */ - lcctl(0); - debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n", - __func__, *(int *)flags, cpuhw->flags, - cpuhw->state); -} - -/* Initialize the CPU-measurement counter facility */ -static int __kernel_cpumcf_begin(void) -{ - on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1); - irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); - - return 0; -} - -/* Release the CPU-measurement counter facility */ -static void __kernel_cpumcf_end(void) -{ - on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1); - irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); -} - -/* Number of perf events counting hardware events */ -static atomic_t num_events = ATOMIC_INIT(0); -/* Used to avoid races in calling reserve/release_cpumf_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); - /* Release the PMU if event is the last perf event */ static void hw_perf_event_destroy(struct perf_event *event) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_dec_return(&num_events) == 0) - __kernel_cpumcf_end(); - mutex_unlock(&pmc_reserve_mutex); + cpum_cf_free(event->cpu); } /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ @@ -562,14 +727,6 @@ static const int cpumf_generic_events_user[] = { [PERF_COUNT_HW_BUS_CYCLES] = -1, }; -static void cpumf_hw_inuse(void) -{ - mutex_lock(&pmc_reserve_mutex); - if (atomic_inc_return(&num_events) == 1) - __kernel_cpumcf_begin(); - mutex_unlock(&pmc_reserve_mutex); -} - static int is_userspace_event(u64 ev) { return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || @@ -653,7 +810,8 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) } /* Initialize for using the CPU-measurement counter facility */ - cpumf_hw_inuse(); + if (cpum_cf_alloc(event->cpu)) + return -ENOMEM; event->destroy = hw_perf_event_destroy; /* @@ -756,7 +914,7 @@ static void cpumf_pmu_read(struct perf_event *event) static void cpumf_pmu_start(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct hw_perf_event *hwc = &event->hw; int i; @@ -830,7 +988,7 @@ static int cfdiag_push_sample(struct perf_event *event, static void cpumf_pmu_stop(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct hw_perf_event *hwc = &event->hw; int i; @@ -857,8 +1015,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) false); if (cfdiag_diffctr(cpuhw, event->hw.config_base)) cfdiag_push_sample(event, cpuhw); - } else if (cpuhw->flags & PMU_F_RESERVED) { - /* Only update when PMU not hotplugged off */ + } else { hw_perf_event_update(event); } hwc->state |= PERF_HES_UPTODATE; @@ -867,7 +1024,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) static int cpumf_pmu_add(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); ctr_set_enable(&cpuhw->state, event->hw.config_base); event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; @@ -880,7 +1037,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) static void cpumf_pmu_del(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int i; cpumf_pmu_stop(event, PERF_EF_UPDATE); @@ -912,29 +1069,83 @@ static struct pmu cpumf_pmu = { .read = cpumf_pmu_read, }; -static int cpum_cf_setup(unsigned int cpu, unsigned long flags) -{ - local_irq_disable(); - cpum_cf_setup_cpu((void *)flags); - local_irq_enable(); - return 0; -} +static struct cfset_session { /* CPUs and counter set bit mask */ + struct list_head head; /* Head of list of active processes */ +} cfset_session = { + .head = LIST_HEAD_INIT(cfset_session.head) +}; + +static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */ +/* + * Synchronize access to device /dev/hwc. This mutex protects against + * concurrent access to functions cfset_open() and cfset_release(). + * Same for CPU hotplug add and remove events triggering + * cpum_cf_online_cpu() and cpum_cf_offline_cpu(). + * It also serializes concurrent device ioctl access from multiple + * processes accessing /dev/hwc. + * + * The mutex protects concurrent access to the /dev/hwctr session management + * struct cfset_session and reference counting variable cfset_opencnt. + */ +static DEFINE_MUTEX(cfset_ctrset_mutex); +/* + * CPU hotplug handles only /dev/hwctr device. + * For perf_event_open() the CPU hotplug handling is done on kernel common + * code: + * - CPU add: Nothing is done since a file descriptor can not be created + * and returned to the user. + * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and + * pmu_delete(). The event itself is removed when the file descriptor is + * closed. + */ static int cfset_online_cpu(unsigned int cpu); + static int cpum_cf_online_cpu(unsigned int cpu) { - debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__, - cpu, in_interrupt()); - cpum_cf_setup(cpu, PMC_INIT); - return cfset_online_cpu(cpu); + int rc = 0; + + debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d " + "opencnt %d\n", __func__, cpu, + refcount_read(&cpu_cf_root.refcnt), + refcount_read(&cfset_opencnt)); + /* + * Ignore notification for perf_event_open(). + * Handle only /dev/hwctr device sessions. + */ + mutex_lock(&cfset_ctrset_mutex); + if (refcount_read(&cfset_opencnt)) { + rc = cpum_cf_alloc_cpu(cpu); + if (!rc) + cfset_online_cpu(cpu); + } + mutex_unlock(&cfset_ctrset_mutex); + return rc; } static int cfset_offline_cpu(unsigned int cpu); + static int cpum_cf_offline_cpu(unsigned int cpu) { - debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu); - cfset_offline_cpu(cpu); - return cpum_cf_setup(cpu, PMC_RELEASE); + debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d opencnt %d\n", + __func__, cpu, refcount_read(&cpu_cf_root.refcnt), + refcount_read(&cfset_opencnt)); + /* + * During task exit processing of grouped perf events triggered by CPU + * hotplug processing, pmu_disable() is called as part of perf context + * removal process. Therefore do not trigger event removal now for + * perf_event_open() created events. Perf common code triggers event + * destruction when the event file descriptor is closed. + * + * Handle only /dev/hwctr device sessions. + */ + mutex_lock(&cfset_ctrset_mutex); + if (refcount_read(&cfset_opencnt)) { + cfset_offline_cpu(cpu); + cpum_cf_free_cpu(cpu); + } + mutex_unlock(&cfset_ctrset_mutex); + return 0; } /* Return true if store counter set multiple instruction is available */ @@ -953,13 +1164,13 @@ static void cpumf_measurement_alert(struct ext_code ext_code, return; inc_irq_stat(IRQEXT_CMC); - cpuhw = this_cpu_ptr(&cpu_cf_events); /* * Measurement alerts are shared and might happen when the PMU * is not reserved. Ignore these alerts in this case. */ - if (!(cpuhw->flags & PMU_F_RESERVED)) + cpuhw = this_cpu_cfhw(); + if (!cpuhw) return; /* counter authorization change alert */ @@ -1039,19 +1250,11 @@ out1: * counter set via normal file operations. */ -static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */ -static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */ struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ unsigned int sets; /* Counter set bit mask */ atomic_t cpus_ack; /* # CPUs successfully executed func */ }; -static struct cfset_session { /* CPUs and counter set bit mask */ - struct list_head head; /* Head of list of active processes */ -} cfset_session = { - .head = LIST_HEAD_INIT(cfset_session.head) -}; - struct cfset_request { /* CPUs and counter set bit mask */ unsigned long ctrset; /* Bit mask of counter set to read */ cpumask_t mask; /* CPU mask to read from */ @@ -1113,11 +1316,11 @@ static void cfset_session_add(struct cfset_request *p) /* Stop all counter sets via ioctl interface */ static void cfset_ioctl_off(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int rc; - /* Check if any counter set used by /dev/hwc */ + /* Check if any counter set used by /dev/hwctr */ for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) if ((p->sets & cpumf_ctr_ctl[rc])) { if (!atomic_dec_return(&cpuhw->ctr_set[rc])) { @@ -1141,7 +1344,7 @@ static void cfset_ioctl_off(void *parm) /* Start counter sets on particular CPU */ static void cfset_ioctl_on(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int rc; @@ -1163,7 +1366,7 @@ static void cfset_ioctl_on(void *parm) static void cfset_release_cpu(void *p) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int rc; debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", @@ -1203,27 +1406,41 @@ static int cfset_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; } - if (!atomic_dec_return(&cfset_opencnt)) + if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */ on_each_cpu(cfset_release_cpu, NULL, 1); + cpum_cf_free(-1); + } mutex_unlock(&cfset_ctrset_mutex); - - hw_perf_event_destroy(NULL); return 0; } +/* + * Open via /dev/hwctr device. Allocate all per CPU resources on the first + * open of the device. The last close releases all per CPU resources. + * Parallel perf_event_open system calls also use per CPU resources. + * These invocations are handled via reference counting on the per CPU data + * structures. + */ static int cfset_open(struct inode *inode, struct file *file) { - if (!capable(CAP_SYS_ADMIN)) + int rc = 0; + + if (!perfmon_capable()) return -EPERM; + file->private_data = NULL; + mutex_lock(&cfset_ctrset_mutex); - if (atomic_inc_return(&cfset_opencnt) == 1) - cfset_session_init(); + if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */ + rc = cpum_cf_alloc(-1); + if (!rc) { + cfset_session_init(); + refcount_set(&cfset_opencnt, 1); + } + } mutex_unlock(&cfset_ctrset_mutex); - cpumf_hw_inuse(); - file->private_data = NULL; /* nonseekable_open() never fails */ - return nonseekable_open(inode, file); + return rc ?: nonseekable_open(inode, file); } static int cfset_all_start(struct cfset_request *req) @@ -1280,7 +1497,7 @@ static int cfset_all_copy(unsigned long arg, cpumask_t *mask) ctrset_read = (struct s390_ctrset_read __user *)arg; uptr = ctrset_read->data; for_each_cpu(cpu, mask) { - struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu); + struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu); struct s390_ctrset_cpudata __user *ctrset_cpudata; ctrset_cpudata = uptr; @@ -1324,7 +1541,7 @@ static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, /* Read all counter sets. */ static void cfset_cpu_read(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int set, set_size; size_t space; @@ -1348,9 +1565,9 @@ static void cfset_cpu_read(void *parm) cpuhw->used += space; cpuhw->sets += 1; } + debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, + cpuhw->sets, cpuhw->used); } - debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, - cpuhw->sets, cpuhw->used); } static int cfset_all_read(unsigned long arg, struct cfset_request *req) @@ -1502,6 +1719,7 @@ static struct miscdevice cfset_dev = { .name = S390_HWCTR_DEVICE, .minor = MISC_DYNAMIC_MINOR, .fops = &cfset_fops, + .mode = 0666, }; /* Hotplug add of a CPU. Scan through all active processes and add @@ -1512,7 +1730,6 @@ static int cfset_online_cpu(unsigned int cpu) struct cfset_call_on_cpu_parm p; struct cfset_request *rp; - mutex_lock(&cfset_ctrset_mutex); if (!list_empty(&cfset_session.head)) { list_for_each_entry(rp, &cfset_session.head, node) { p.sets = rp->ctrset; @@ -1520,19 +1737,18 @@ static int cfset_online_cpu(unsigned int cpu) cpumask_set_cpu(cpu, &rp->mask); } } - mutex_unlock(&cfset_ctrset_mutex); return 0; } /* Hotplug remove of a CPU. Scan through all active processes and clear * that CPU from the list of CPUs supplied with ioctl(..., START, ...). + * Adjust reference counts. */ static int cfset_offline_cpu(unsigned int cpu) { struct cfset_call_on_cpu_parm p; struct cfset_request *rp; - mutex_lock(&cfset_ctrset_mutex); if (!list_empty(&cfset_session.head)) { list_for_each_entry(rp, &cfset_session.head, node) { p.sets = rp->ctrset; @@ -1540,7 +1756,6 @@ static int cfset_offline_cpu(unsigned int cpu) cpumask_clear_cpu(cpu, &rp->mask); } } - mutex_unlock(&cfset_ctrset_mutex); return 0; } @@ -1618,7 +1833,8 @@ static int cfdiag_event_init(struct perf_event *event) } /* Initialize for using the CPU-measurement counter facility */ - cpumf_hw_inuse(); + if (cpum_cf_alloc(event->cpu)) + return -ENOMEM; event->destroy = hw_perf_event_destroy; err = cfdiag_event_init2(event); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 7ef72f5ff52e..8ecfbce4ac92 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1271,16 +1271,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, } } -static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new) -{ - asm volatile( - " cdsg %[old],%[new],%[ptr]\n" - : [old] "+d" (old), [ptr] "+QS" (*ptr) - : [new] "d" (new) - : "memory", "cc"); - return old; -} - /* hw_perf_event_update() - Process sampling buffer * @event: The perf event * @flush_all: Flag to also flush partially filled sample-data-blocks @@ -1352,7 +1342,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) new.f = 0; new.a = 1; new.overflow = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); /* Advance to next sample-data-block */ @@ -1562,7 +1552,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, } new.a = 1; new.overflow = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); return true; } @@ -1636,7 +1626,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, new.a = 1; else new.a = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); *overflow += orig_overflow; } diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index a7b339c4fd7c..fe7d1774ded1 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -36,7 +36,7 @@ struct paicrypt_map { unsigned long *page; /* Page for CPU to store counters */ struct pai_userdata *save; /* Page to store no-zero counters */ unsigned int active_events; /* # of PAI crypto users */ - unsigned int refcnt; /* Reference count mapped buffers */ + refcount_t refcnt; /* Reference count mapped buffers */ enum paievt_mode mode; /* Type of event */ struct perf_event *event; /* Perf event for sampling */ }; @@ -57,10 +57,11 @@ static void paicrypt_event_destroy(struct perf_event *event) static_branch_dec(&pai_key); mutex_lock(&pai_reserve_mutex); debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d" - " mode %d refcnt %d\n", __func__, + " mode %d refcnt %u\n", __func__, event->attr.config, event->cpu, - cpump->active_events, cpump->mode, cpump->refcnt); - if (!--cpump->refcnt) { + cpump->active_events, cpump->mode, + refcount_read(&cpump->refcnt)); + if (refcount_dec_and_test(&cpump->refcnt)) { debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n", __func__, (unsigned long)cpump->page, cpump->save); @@ -149,8 +150,10 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) /* Allocate memory for counter page and counter extraction. * Only the first counting event has to allocate a page. */ - if (cpump->page) + if (cpump->page) { + refcount_inc(&cpump->refcnt); goto unlock; + } rc = -ENOMEM; cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL); @@ -164,18 +167,18 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) goto unlock; } rc = 0; + refcount_set(&cpump->refcnt, 1); unlock: /* If rc is non-zero, do not set mode and reference count */ if (!rc) { - cpump->refcnt++; cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING; } debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d" - " mode %d refcnt %d page %#lx save %p rc %d\n", + " mode %d refcnt %u page %#lx save %p rc %d\n", __func__, a->sample_period, cpump->active_events, - cpump->mode, cpump->refcnt, + cpump->mode, refcount_read(&cpump->refcnt), (unsigned long)cpump->page, cpump->save, rc); mutex_unlock(&pai_reserve_mutex); return rc; diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index fcea307d7529..3b4f384f77f7 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -50,7 +50,7 @@ struct paiext_map { struct pai_userdata *save; /* Area to store non-zero counters */ enum paievt_mode mode; /* Type of event */ unsigned int active_events; /* # of PAI Extension users */ - unsigned int refcnt; + refcount_t refcnt; struct perf_event *event; /* Perf event for sampling */ struct paiext_cb *paiext_cb; /* PAI extension control block area */ }; @@ -60,14 +60,14 @@ struct paiext_mapptr { }; static struct paiext_root { /* Anchor to per CPU data */ - int refcnt; /* Overall active events */ + refcount_t refcnt; /* Overall active events */ struct paiext_mapptr __percpu *mapptr; } paiext_root; /* Free per CPU data when the last event is removed. */ static void paiext_root_free(void) { - if (!--paiext_root.refcnt) { + if (refcount_dec_and_test(&paiext_root.refcnt)) { free_percpu(paiext_root.mapptr); paiext_root.mapptr = NULL; } @@ -80,7 +80,7 @@ static void paiext_root_free(void) */ static int paiext_root_alloc(void) { - if (++paiext_root.refcnt == 1) { + if (!refcount_inc_not_zero(&paiext_root.refcnt)) { /* The memory is already zeroed. */ paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); if (!paiext_root.mapptr) { @@ -91,6 +91,7 @@ static int paiext_root_alloc(void) */ return -ENOMEM; } + refcount_set(&paiext_root.refcnt, 1); } return 0; } @@ -122,7 +123,7 @@ static void paiext_event_destroy(struct perf_event *event) mutex_lock(&paiext_reserve_mutex); cpump->event = NULL; - if (!--cpump->refcnt) /* Last reference gone */ + if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ paiext_free(mp); paiext_root_free(); mutex_unlock(&paiext_reserve_mutex); @@ -163,7 +164,7 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) rc = -ENOMEM; cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); if (!cpump) - goto unlock; + goto undo; /* Allocate memory for counter area and counter extraction. * These are @@ -183,8 +184,9 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) GFP_KERNEL); if (!cpump->save || !cpump->area || !cpump->paiext_cb) { paiext_free(mp); - goto unlock; + goto undo; } + refcount_set(&cpump->refcnt, 1); cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING; } else { @@ -195,15 +197,15 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) if (cpump->mode == PAI_MODE_SAMPLING || (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) { rc = -EBUSY; - goto unlock; + goto undo; } + refcount_inc(&cpump->refcnt); } rc = 0; cpump->event = event; - ++cpump->refcnt; -unlock: +undo: if (rc) { /* Error in allocation of event, decrement anchor. Since * the event in not created, its destroy() function is never @@ -211,6 +213,7 @@ unlock: */ paiext_root_free(); } +unlock: mutex_unlock(&paiext_reserve_mutex); /* If rc is non-zero, no increment of counter/sampler was done. */ return rc; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 6b7b6d5e3632..276278199c44 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -102,6 +102,11 @@ void __init time_early_init(void) ((long) qui.old_leap * 4096000000L); } +unsigned long long noinstr sched_clock_noinstr(void) +{ + return tod_to_ns(__get_tod_clock_monotonic()); +} + /* * Scheduler clock - returns current time in nanosec units. */ diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 9fd19530c9a5..68adf1de8888 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -95,7 +95,7 @@ out: static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) { static cpumask_t mask; - int i; + unsigned int max_cpu; cpumask_clear(&mask); if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) @@ -104,9 +104,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) if (topology_mode != TOPOLOGY_MODE_HW) goto out; cpu -= cpu % (smp_cpu_mtid + 1); - for (i = 0; i <= smp_cpu_mtid; i++) { - if (cpumask_test_cpu(cpu + i, &cpu_setup_mask)) - cpumask_set_cpu(cpu + i, &mask); + max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); + for (; cpu <= max_cpu; cpu++) { + if (cpumask_test_cpu(cpu, &cpu_setup_mask)) + cpumask_set_cpu(cpu, &mask); } out: cpumask_copy(dst, &mask); @@ -123,25 +124,26 @@ static void add_cpus_to_mask(struct topology_core *tl_core, unsigned int core; for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { - unsigned int rcore; - int lcpu, i; + unsigned int max_cpu, rcore; + int cpu; rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; - lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); - if (lcpu < 0) + cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); + if (cpu < 0) continue; - for (i = 0; i <= smp_cpu_mtid; i++) { - topo = &cpu_topology[lcpu + i]; + max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); + for (; cpu <= max_cpu; cpu++) { + topo = &cpu_topology[cpu]; topo->drawer_id = drawer->id; topo->book_id = book->id; topo->socket_id = socket->id; topo->core_id = rcore; - topo->thread_id = lcpu + i; + topo->thread_id = cpu; topo->dedicated = tl_core->d; - cpumask_set_cpu(lcpu + i, &drawer->mask); - cpumask_set_cpu(lcpu + i, &book->mask); - cpumask_set_cpu(lcpu + i, &socket->mask); - smp_cpu_set_polarization(lcpu + i, tl_core->pp); + cpumask_set_cpu(cpu, &drawer->mask); + cpumask_set_cpu(cpu, &book->mask); + cpumask_set_cpu(cpu, &socket->mask); + smp_cpu_set_polarization(cpu, tl_core->pp); } } } diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 580d2e3265cb..7c50eca85ca4 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,7 +3,7 @@ # Makefile for s390-specific library files.. # -lib-y += delay.o string.o uaccess.o find.o spinlock.o +lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S new file mode 100644 index 000000000000..de33cf02cfd2 --- /dev/null +++ b/arch/s390/lib/tishift.S @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/linkage.h> +#include <asm/nospec-insn.h> +#include <asm/export.h> + + .section .noinstr.text, "ax" + + GEN_BR_THUNK %r14 + +SYM_FUNC_START(__ashlti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + srlg %r3,%r1,0(%r3) + sllg %r0,%r0,0(%r4) + sllg %r1,%r1,0(%r4) + ogr %r0,%r3 + j 1f +0: sllg %r0,%r1,-64(%r4) + lghi %r1,0 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__ashlti3) +EXPORT_SYMBOL(__ashlti3) + +SYM_FUNC_START(__ashrti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + sllg %r3,%r0,0(%r3) + srlg %r1,%r1,0(%r4) + srag %r0,%r0,0(%r4) + ogr %r1,%r3 + j 1f +0: srag %r1,%r0,-64(%r4) + srag %r0,%r0,63 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__ashrti3) +EXPORT_SYMBOL(__ashrti3) + +SYM_FUNC_START(__lshrti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + sllg %r3,%r0,0(%r3) + srlg %r1,%r1,0(%r4) + srlg %r0,%r0,0(%r4) + ogr %r1,%r3 + j 1f +0: srlg %r1,%r0,-64(%r4) + lghi %r0,0 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__lshrti3) +EXPORT_SYMBOL(__lshrti3) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 5ba3bd8a7b12..ca5a418c58a8 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -4,6 +4,7 @@ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> */ #include <linux/hugetlb.h> +#include <linux/proc_fs.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <asm/cacheflush.h> diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 5b22c6e24528..b9dcb4ae6c59 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -667,7 +667,15 @@ static void __init memblock_region_swap(void *a, void *b, int size) #ifdef CONFIG_KASAN #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) + +static inline int set_memory_kasan(unsigned long start, unsigned long end) +{ + start = PAGE_ALIGN_DOWN(__sha(start)); + end = PAGE_ALIGN(__sha(end)); + return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT); +} #endif + /* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug @@ -737,10 +745,8 @@ void __init vmem_map_init(void) } #ifdef CONFIG_KASAN - for_each_mem_range(i, &base, &end) { - set_memory_rwnx(__sha(base), - (__sha(end) - __sha(base)) >> PAGE_SHIFT); - } + for_each_mem_range(i, &base, &end) + set_memory_kasan(base, end); #endif set_memory_rox((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT); diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index cf14740abd1c..4e930f566878 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -26,6 +26,7 @@ KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common KBUILD_CFLAGS += -fno-stack-protector +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) |