summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig198
-rw-r--r--arch/s390/Kconfig.debug10
-rw-r--r--arch/s390/Makefile33
-rw-r--r--arch/s390/Makefile.postlink32
-rw-r--r--arch/s390/appldata/appldata_base.c16
-rw-r--r--arch/s390/appldata/appldata_os.c3
-rw-r--r--arch/s390/boot/.gitignore1
-rw-r--r--arch/s390/boot/Makefile41
-rw-r--r--arch/s390/boot/als.c49
-rw-r--r--arch/s390/boot/alternative.c138
-rw-r--r--arch/s390/boot/boot.h56
-rw-r--r--arch/s390/boot/decompressor.c10
-rw-r--r--arch/s390/boot/head.S29
-rwxr-xr-xarch/s390/boot/install.sh2
-rw-r--r--arch/s390/boot/ipl_data.c12
-rw-r--r--arch/s390/boot/ipl_parm.c67
-rw-r--r--arch/s390/boot/ipl_report.c5
-rw-r--r--arch/s390/boot/kaslr.c4
-rw-r--r--arch/s390/boot/kmsan.c6
-rw-r--r--arch/s390/boot/pgm_check.c92
-rw-r--r--arch/s390/boot/pgm_check_info.c181
-rw-r--r--arch/s390/boot/physmem_info.c214
-rw-r--r--arch/s390/boot/printk.c299
-rw-r--r--arch/s390/boot/stackprotector.c6
-rw-r--r--arch/s390/boot/startup.c289
-rw-r--r--arch/s390/boot/string.c28
-rw-r--r--arch/s390/boot/trampoline.S9
-rw-r--r--arch/s390/boot/uv.c15
-rw-r--r--arch/s390/boot/uv.h13
-rw-r--r--arch/s390/boot/vmem.c202
-rw-r--r--arch/s390/boot/vmlinux.lds.S9
-rw-r--r--arch/s390/configs/compat.config3
-rw-r--r--arch/s390/configs/debug_defconfig123
-rw-r--r--arch/s390/configs/defconfig117
-rw-r--r--arch/s390/configs/kasan.config2
-rw-r--r--arch/s390/configs/mmtypes.config2
-rw-r--r--arch/s390/configs/zfcpdump_defconfig7
-rw-r--r--arch/s390/crypto/Kconfig84
-rw-r--r--arch/s390/crypto/Makefile12
-rw-r--r--arch/s390/crypto/aes_s390.c170
-rw-r--r--arch/s390/crypto/arch_random.c1
-rw-r--r--arch/s390/crypto/chacha-glue.c130
-rw-r--r--arch/s390/crypto/chacha-s390.S908
-rw-r--r--arch/s390/crypto/chacha-s390.h14
-rw-r--r--arch/s390/crypto/crc32-vx.c305
-rw-r--r--arch/s390/crypto/crc32-vx.h12
-rw-r--r--arch/s390/crypto/crc32be-vx.c174
-rw-r--r--arch/s390/crypto/crc32le-vx.c240
-rw-r--r--arch/s390/crypto/ghash_s390.c104
-rw-r--r--arch/s390/crypto/hmac_s390.c426
-rw-r--r--arch/s390/crypto/paes_s390.c1862
-rw-r--r--arch/s390/crypto/phmac_s390.c1063
-rw-r--r--arch/s390/crypto/prng.c17
-rw-r--r--arch/s390/crypto/sha.h35
-rw-r--r--arch/s390/crypto/sha1_s390.c103
-rw-r--r--arch/s390/crypto/sha256_s390.c143
-rw-r--r--arch/s390/crypto/sha3_256_s390.c146
-rw-r--r--arch/s390/crypto/sha3_512_s390.c154
-rw-r--r--arch/s390/crypto/sha512_s390.c149
-rw-r--r--arch/s390/crypto/sha_common.c124
-rw-r--r--arch/s390/hypfs/hypfs.h9
-rw-r--r--arch/s390/hypfs/hypfs_dbfs.c24
-rw-r--r--arch/s390/hypfs/hypfs_diag.c27
-rw-r--r--arch/s390/hypfs/hypfs_diag.h2
-rw-r--r--arch/s390/hypfs/hypfs_diag0c.c5
-rw-r--r--arch/s390/hypfs/hypfs_diag_fs.c68
-rw-r--r--arch/s390/hypfs/hypfs_sprp.c8
-rw-r--r--arch/s390/hypfs/hypfs_vm.c5
-rw-r--r--arch/s390/hypfs/hypfs_vm_fs.c21
-rw-r--r--arch/s390/hypfs/inode.c91
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/abs_lowcore.h1
-rw-r--r--arch/s390/include/asm/alternative-asm.h57
-rw-r--r--arch/s390/include/asm/alternative.h155
-rw-r--r--arch/s390/include/asm/ap.h50
-rw-r--r--arch/s390/include/asm/appldata.h3
-rw-r--r--arch/s390/include/asm/arch-stackprotector.h25
-rw-r--r--arch/s390/include/asm/arch_hweight.h77
-rw-r--r--arch/s390/include/asm/asce.h36
-rw-r--r--arch/s390/include/asm/asm-const.h2
-rw-r--r--arch/s390/include/asm/asm-extable.h22
-rw-r--r--arch/s390/include/asm/asm.h51
-rw-r--r--arch/s390/include/asm/atomic.h96
-rw-r--r--arch/s390/include/asm/atomic_ops.h178
-rw-r--r--arch/s390/include/asm/barrier.h12
-rw-r--r--arch/s390/include/asm/bitops.h302
-rw-r--r--arch/s390/include/asm/boot_data.h51
-rw-r--r--arch/s390/include/asm/bug.h102
-rw-r--r--arch/s390/include/asm/ccwdev.h2
-rw-r--r--arch/s390/include/asm/checksum.h6
-rw-r--r--arch/s390/include/asm/cio.h2
-rw-r--r--arch/s390/include/asm/cmpxchg.h376
-rw-r--r--arch/s390/include/asm/compat.h140
-rw-r--r--arch/s390/include/asm/cpacf.h256
-rw-r--r--arch/s390/include/asm/cpu.h4
-rw-r--r--arch/s390/include/asm/cpu_mf-insn.h4
-rw-r--r--arch/s390/include/asm/cpu_mf.h69
-rw-r--r--arch/s390/include/asm/cpufeature.h14
-rw-r--r--arch/s390/include/asm/css_chars.h2
-rw-r--r--arch/s390/include/asm/ctlreg.h17
-rw-r--r--arch/s390/include/asm/current.h18
-rw-r--r--arch/s390/include/asm/dat-bits.h170
-rw-r--r--arch/s390/include/asm/debug.h15
-rw-r--r--arch/s390/include/asm/diag.h21
-rw-r--r--arch/s390/include/asm/diag288.h41
-rw-r--r--arch/s390/include/asm/dwarf.h4
-rw-r--r--arch/s390/include/asm/ebcdic.h16
-rw-r--r--arch/s390/include/asm/elf.h87
-rw-r--r--arch/s390/include/asm/entry-common.h10
-rw-r--r--arch/s390/include/asm/extmem.h2
-rw-r--r--arch/s390/include/asm/facility.h60
-rw-r--r--arch/s390/include/asm/fprobe.h10
-rw-r--r--arch/s390/include/asm/fpu-insn-asm.h26
-rw-r--r--arch/s390/include/asm/fpu-insn.h208
-rw-r--r--arch/s390/include/asm/fpu.h7
-rw-r--r--arch/s390/include/asm/ftrace.h102
-rw-r--r--arch/s390/include/asm/futex.h113
-rw-r--r--arch/s390/include/asm/gmap.h28
-rw-r--r--arch/s390/include/asm/gmap_helpers.h15
-rw-r--r--arch/s390/include/asm/hardirq.h6
-rw-r--r--arch/s390/include/asm/hiperdispatch.h14
-rw-r--r--arch/s390/include/asm/hugetlb.h105
-rw-r--r--arch/s390/include/asm/idals.h76
-rw-r--r--arch/s390/include/asm/io.h6
-rw-r--r--arch/s390/include/asm/irq.h7
-rw-r--r--arch/s390/include/asm/irqflags.h17
-rw-r--r--arch/s390/include/asm/jump_label.h4
-rw-r--r--arch/s390/include/asm/kexec.h3
-rw-r--r--arch/s390/include/asm/kfence.h17
-rw-r--r--arch/s390/include/asm/kmsan.h59
-rw-r--r--arch/s390/include/asm/kvm_host.h369
-rw-r--r--arch/s390/include/asm/kvm_host_types.h348
-rw-r--r--arch/s390/include/asm/kvm_para.h2
-rw-r--r--arch/s390/include/asm/lowcore.h51
-rw-r--r--arch/s390/include/asm/machine.h104
-rw-r--r--arch/s390/include/asm/march.h42
-rw-r--r--arch/s390/include/asm/mem_encrypt.h4
-rw-r--r--arch/s390/include/asm/mmu.h3
-rw-r--r--arch/s390/include/asm/mmu_context.h28
-rw-r--r--arch/s390/include/asm/mmzone.h17
-rw-r--r--arch/s390/include/asm/module.h14
-rw-r--r--arch/s390/include/asm/nmi.h4
-rw-r--r--arch/s390/include/asm/nospec-branch.h17
-rw-r--r--arch/s390/include/asm/nospec-insn.h7
-rw-r--r--arch/s390/include/asm/page-states.h3
-rw-r--r--arch/s390/include/asm/page.h121
-rw-r--r--arch/s390/include/asm/pai.h28
-rw-r--r--arch/s390/include/asm/pci.h44
-rw-r--r--arch/s390/include/asm/pci_clp.h17
-rw-r--r--arch/s390/include/asm/pci_dma.h3
-rw-r--r--arch/s390/include/asm/pci_insn.h10
-rw-r--r--arch/s390/include/asm/pci_io.h6
-rw-r--r--arch/s390/include/asm/percpu.h26
-rw-r--r--arch/s390/include/asm/perf_event.h31
-rw-r--r--arch/s390/include/asm/pgalloc.h71
-rw-r--r--arch/s390/include/asm/pgtable.h434
-rw-r--r--arch/s390/include/asm/physmem_info.h7
-rw-r--r--arch/s390/include/asm/pkey.h17
-rw-r--r--arch/s390/include/asm/preempt.h121
-rw-r--r--arch/s390/include/asm/processor.h84
-rw-r--r--arch/s390/include/asm/ptrace.h60
-rw-r--r--arch/s390/include/asm/purgatory.h4
-rw-r--r--arch/s390/include/asm/runtime-const.h77
-rw-r--r--arch/s390/include/asm/rwonce.h2
-rw-r--r--arch/s390/include/asm/sclp.h43
-rw-r--r--arch/s390/include/asm/seccomp.h5
-rw-r--r--arch/s390/include/asm/set_memory.h2
-rw-r--r--arch/s390/include/asm/setup.h48
-rw-r--r--arch/s390/include/asm/sigp.h15
-rw-r--r--arch/s390/include/asm/skey.h32
-rw-r--r--arch/s390/include/asm/smp.h33
-rw-r--r--arch/s390/include/asm/softirq_stack.h2
-rw-r--r--arch/s390/include/asm/sparsemem.h18
-rw-r--r--arch/s390/include/asm/spinlock.h40
-rw-r--r--arch/s390/include/asm/spinlock_types.h2
-rw-r--r--arch/s390/include/asm/stackprotector.h16
-rw-r--r--arch/s390/include/asm/stacktrace.h6
-rw-r--r--arch/s390/include/asm/stp.h1
-rw-r--r--arch/s390/include/asm/string.h42
-rw-r--r--arch/s390/include/asm/syscall.h48
-rw-r--r--arch/s390/include/asm/syscall_wrapper.h95
-rw-r--r--arch/s390/include/asm/sysinfo.h28
-rw-r--r--arch/s390/include/asm/thread_info.h62
-rw-r--r--arch/s390/include/asm/timex.h68
-rw-r--r--arch/s390/include/asm/tlb.h19
-rw-r--r--arch/s390/include/asm/tlbflush.h22
-rw-r--r--arch/s390/include/asm/topology.h9
-rw-r--r--arch/s390/include/asm/tpi.h4
-rw-r--r--arch/s390/include/asm/trace/ap.h87
-rw-r--r--arch/s390/include/asm/trace/hiperdispatch.h58
-rw-r--r--arch/s390/include/asm/trace/zcrypt.h44
-rw-r--r--arch/s390/include/asm/types.h4
-rw-r--r--arch/s390/include/asm/uaccess.h735
-rw-r--r--arch/s390/include/asm/unistd.h9
-rw-r--r--arch/s390/include/asm/uv.h223
-rw-r--r--arch/s390/include/asm/vdso-symbols.h9
-rw-r--r--arch/s390/include/asm/vdso.h23
-rw-r--r--arch/s390/include/asm/vdso/data.h12
-rw-r--r--arch/s390/include/asm/vdso/getrandom.h28
-rw-r--r--arch/s390/include/asm/vdso/gettimeofday.h23
-rw-r--r--arch/s390/include/asm/vdso/time_data.h11
-rw-r--r--arch/s390/include/asm/vdso/vsyscall.h14
-rw-r--r--arch/s390/include/asm/vtime.h16
-rw-r--r--arch/s390/include/asm/word-at-a-time.h2
-rw-r--r--arch/s390/include/uapi/asm/bitsperlong.h4
-rw-r--r--arch/s390/include/uapi/asm/dasd.h2
-rw-r--r--arch/s390/include/uapi/asm/diag.h32
-rw-r--r--arch/s390/include/uapi/asm/ipcbuf.h3
-rw-r--r--arch/s390/include/uapi/asm/kvm.h3
-rw-r--r--arch/s390/include/uapi/asm/pkey.h41
-rw-r--r--arch/s390/include/uapi/asm/posix_types.h13
-rw-r--r--arch/s390/include/uapi/asm/ptrace.h129
-rw-r--r--arch/s390/include/uapi/asm/schid.h4
-rw-r--r--arch/s390/include/uapi/asm/sigcontext.h15
-rw-r--r--arch/s390/include/uapi/asm/stat.h70
-rw-r--r--arch/s390/include/uapi/asm/types.h4
-rw-r--r--arch/s390/include/uapi/asm/unistd.h4
-rw-r--r--arch/s390/include/uapi/asm/uvdevice.h32
-rw-r--r--arch/s390/kernel/Makefile25
-rw-r--r--arch/s390/kernel/abs_lowcore.c1
-rw-r--r--arch/s390/kernel/alternative.c120
-rw-r--r--arch/s390/kernel/asm-offsets.c41
-rw-r--r--arch/s390/kernel/audit.c16
-rw-r--r--arch/s390/kernel/audit.h16
-rw-r--r--arch/s390/kernel/cert_store.c4
-rw-r--r--arch/s390/kernel/compat_audit.c48
-rw-r--r--arch/s390/kernel/compat_linux.c289
-rw-r--r--arch/s390/kernel/compat_linux.h101
-rw-r--r--arch/s390/kernel/compat_ptrace.h64
-rw-r--r--arch/s390/kernel/compat_signal.c420
-rw-r--r--arch/s390/kernel/cpacf.c118
-rw-r--r--arch/s390/kernel/cpcmd.c13
-rw-r--r--arch/s390/kernel/cpufeature.c6
-rw-r--r--arch/s390/kernel/crash_dump.c113
-rw-r--r--arch/s390/kernel/ctlreg.c1
-rw-r--r--arch/s390/kernel/debug.c360
-rw-r--r--arch/s390/kernel/diag/Makefile1
-rw-r--r--arch/s390/kernel/diag/diag.c (renamed from arch/s390/kernel/diag.c)45
-rw-r--r--arch/s390/kernel/diag/diag310.c276
-rw-r--r--arch/s390/kernel/diag/diag324.c224
-rw-r--r--arch/s390/kernel/diag/diag_ioctl.h14
-rw-r--r--arch/s390/kernel/diag/diag_misc.c63
-rw-r--r--arch/s390/kernel/dis.c38
-rw-r--r--arch/s390/kernel/dumpstack.c24
-rw-r--r--arch/s390/kernel/early.c181
-rw-r--r--arch/s390/kernel/early_printk.c16
-rw-r--r--arch/s390/kernel/earlypgm.S23
-rw-r--r--arch/s390/kernel/entry.S374
-rw-r--r--arch/s390/kernel/entry.h3
-rw-r--r--arch/s390/kernel/facility.c1
-rw-r--r--arch/s390/kernel/fpu.c4
-rw-r--r--arch/s390/kernel/ftrace.c147
-rw-r--r--arch/s390/kernel/ftrace.h2
-rw-r--r--arch/s390/kernel/guarded_storage.c3
-rw-r--r--arch/s390/kernel/head.S (renamed from arch/s390/kernel/head64.S)8
-rw-r--r--arch/s390/kernel/hiperdispatch.c430
-rw-r--r--arch/s390/kernel/idle.c11
-rw-r--r--arch/s390/kernel/ipl.c262
-rw-r--r--arch/s390/kernel/irq.c41
-rw-r--r--arch/s390/kernel/kexec_elf.c2
-rw-r--r--arch/s390/kernel/kexec_image.c2
-rw-r--r--arch/s390/kernel/kprobes.c22
-rw-r--r--arch/s390/kernel/lgr.c2
-rw-r--r--arch/s390/kernel/machine_kexec.c12
-rw-r--r--arch/s390/kernel/machine_kexec_file.c6
-rw-r--r--arch/s390/kernel/mcount.S28
-rw-r--r--arch/s390/kernel/module.c21
-rw-r--r--arch/s390/kernel/nmi.c117
-rw-r--r--arch/s390/kernel/nospec-branch.c16
-rw-r--r--arch/s390/kernel/nospec-sysfs.c12
-rw-r--r--arch/s390/kernel/numa.c11
-rw-r--r--arch/s390/kernel/os_info.c6
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c59
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c171
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c447
-rw-r--r--arch/s390/kernel/perf_event.c11
-rw-r--r--arch/s390/kernel/perf_pai.c1230
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c804
-rw-r--r--arch/s390/kernel/perf_pai_ext.c695
-rw-r--r--arch/s390/kernel/perf_regs.c3
-rw-r--r--arch/s390/kernel/process.c18
-rw-r--r--arch/s390/kernel/processor.c53
-rw-r--r--arch/s390/kernel/ptrace.c600
-rw-r--r--arch/s390/kernel/reipl.S26
-rw-r--r--arch/s390/kernel/setup.c174
-rw-r--r--arch/s390/kernel/signal.c29
-rw-r--r--arch/s390/kernel/skey.c48
-rw-r--r--arch/s390/kernel/smp.c312
-rw-r--r--arch/s390/kernel/stackprotector.c156
-rw-r--r--arch/s390/kernel/stacktrace.c25
-rw-r--r--arch/s390/kernel/sthyi.c109
-rw-r--r--arch/s390/kernel/syscall.c63
-rw-r--r--arch/s390/kernel/syscalls/Makefile57
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl850
-rwxr-xr-xarch/s390/kernel/syscalls/syscalltbl232
-rw-r--r--arch/s390/kernel/sysinfo.c51
-rw-r--r--arch/s390/kernel/text_amode31.S3
-rw-r--r--arch/s390/kernel/time.c195
-rw-r--r--arch/s390/kernel/topology.c182
-rw-r--r--arch/s390/kernel/traps.c171
-rw-r--r--arch/s390/kernel/unwind_bc.c6
-rw-r--r--arch/s390/kernel/uprobes.c13
-rw-r--r--arch/s390/kernel/uv.c703
-rw-r--r--arch/s390/kernel/vdso.c159
-rw-r--r--arch/s390/kernel/vdso/.gitignore (renamed from arch/s390/kernel/vdso32/.gitignore)2
-rw-r--r--arch/s390/kernel/vdso/Makefile76
-rwxr-xr-xarch/s390/kernel/vdso/gen_vdso_offsets.sh (renamed from arch/s390/kernel/vdso64/gen_vdso_offsets.sh)2
-rw-r--r--arch/s390/kernel/vdso/getcpu.c (renamed from arch/s390/kernel/vdso64/getcpu.c)0
-rw-r--r--arch/s390/kernel/vdso/note.S (renamed from arch/s390/kernel/vdso32/note.S)0
-rw-r--r--arch/s390/kernel/vdso/vdso.h (renamed from arch/s390/kernel/vdso64/vdso.h)7
-rw-r--r--arch/s390/kernel/vdso/vdso.lds.S (renamed from arch/s390/kernel/vdso64/vdso64.lds.S)58
-rw-r--r--arch/s390/kernel/vdso/vdso_generic.c (renamed from arch/s390/kernel/vdso64/vdso64_generic.c)0
-rw-r--r--arch/s390/kernel/vdso/vdso_user_wrapper.S (renamed from arch/s390/kernel/vdso64/vdso_user_wrapper.S)14
-rw-r--r--arch/s390/kernel/vdso/vdso_wrapper.S (renamed from arch/s390/kernel/vdso32/vdso32_wrapper.S)8
-rw-r--r--arch/s390/kernel/vdso/vgetrandom-chacha.S181
-rw-r--r--arch/s390/kernel/vdso/vgetrandom.c14
-rw-r--r--arch/s390/kernel/vdso32/Makefile64
-rwxr-xr-xarch/s390/kernel/vdso32/gen_vdso_offsets.sh15
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S141
-rw-r--r--arch/s390/kernel/vdso32/vdso_user_wrapper.S22
-rw-r--r--arch/s390/kernel/vdso64/.gitignore2
-rw-r--r--arch/s390/kernel/vdso64/Makefile74
-rw-r--r--arch/s390/kernel/vdso64/note.S13
-rw-r--r--arch/s390/kernel/vdso64/vdso64_wrapper.S15
-rw-r--r--arch/s390/kernel/vmcore_info.c3
-rw-r--r--arch/s390/kernel/vmlinux.lds.S100
-rw-r--r--arch/s390/kernel/vtime.c82
-rw-r--r--arch/s390/kernel/wti.c215
-rw-r--r--arch/s390/kvm/Kconfig2
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c32
-rw-r--r--arch/s390/kvm/gaccess.c261
-rw-r--r--arch/s390/kvm/gaccess.h14
-rw-r--r--arch/s390/kvm/gmap-vsie.c141
-rw-r--r--arch/s390/kvm/intercept.c19
-rw-r--r--arch/s390/kvm/interrupt.c180
-rw-r--r--arch/s390/kvm/kvm-s390.c725
-rw-r--r--arch/s390/kvm/kvm-s390.h85
-rw-r--r--arch/s390/kvm/pci.c24
-rw-r--r--arch/s390/kvm/priv.c16
-rw-r--r--arch/s390/kvm/pv.c98
-rw-r--r--arch/s390/kvm/trace-s390.h4
-rw-r--r--arch/s390/kvm/vsie.c204
-rw-r--r--arch/s390/lib/delay.c1
-rw-r--r--arch/s390/lib/mem.S15
-rw-r--r--arch/s390/lib/spinlock.c85
-rw-r--r--arch/s390/lib/string.c65
-rw-r--r--arch/s390/lib/test_kprobes.c1
-rw-r--r--arch/s390/lib/test_modules.c1
-rw-r--r--arch/s390/lib/test_unwind.c11
-rw-r--r--arch/s390/lib/uaccess.c357
-rw-r--r--arch/s390/lib/xor.c63
-rw-r--r--arch/s390/mm/Makefile4
-rw-r--r--arch/s390/mm/cmm.c39
-rw-r--r--arch/s390/mm/dump_pagetables.c266
-rw-r--r--arch/s390/mm/extable.c77
-rw-r--r--arch/s390/mm/extmem.c58
-rw-r--r--arch/s390/mm/fault.c297
-rw-r--r--arch/s390/mm/gmap.c854
-rw-r--r--arch/s390/mm/gmap_helpers.c233
-rw-r--r--arch/s390/mm/hugetlbpage.c142
-rw-r--r--arch/s390/mm/init.c63
-rw-r--r--arch/s390/mm/maccess.c7
-rw-r--r--arch/s390/mm/mmap.c75
-rw-r--r--arch/s390/mm/pageattr.c47
-rw-r--r--arch/s390/mm/pfault.c8
-rw-r--r--arch/s390/mm/pgalloc.c112
-rw-r--r--arch/s390/mm/pgtable.c143
-rw-r--r--arch/s390/mm/vmem.c56
-rw-r--r--arch/s390/net/Makefile2
-rw-r--r--arch/s390/net/bpf_jit.h55
-rw-r--r--arch/s390/net/bpf_jit_comp.c415
-rw-r--r--arch/s390/net/bpf_timed_may_goto.S45
-rw-r--r--arch/s390/net/pnet.c1
-rw-r--r--arch/s390/pci/Makefile5
-rw-r--r--arch/s390/pci/pci.c216
-rw-r--r--arch/s390/pci/pci_bus.c101
-rw-r--r--arch/s390/pci/pci_bus.h12
-rw-r--r--arch/s390/pci/pci_clp.c61
-rw-r--r--arch/s390/pci/pci_debug.c13
-rw-r--r--arch/s390/pci/pci_event.c148
-rw-r--r--arch/s390/pci/pci_fixup.c23
-rw-r--r--arch/s390/pci/pci_insn.c122
-rw-r--r--arch/s390/pci/pci_iov.c59
-rw-r--r--arch/s390/pci/pci_iov.h9
-rw-r--r--arch/s390/pci/pci_irq.c122
-rw-r--r--arch/s390/pci/pci_kvm_hook.c2
-rw-r--r--arch/s390/pci/pci_mmio.c148
-rw-r--r--arch/s390/pci/pci_report.c157
-rw-r--r--arch/s390/pci/pci_report.h16
-rw-r--r--arch/s390/pci/pci_sysfs.c63
-rw-r--r--arch/s390/purgatory/Makefile7
-rw-r--r--arch/s390/purgatory/head.S2
-rw-r--r--arch/s390/purgatory/purgatory.c2
-rw-r--r--arch/s390/tools/gen_facilities.c6
-rw-r--r--arch/s390/tools/gen_opcode_table.c27
-rw-r--r--arch/s390/tools/opcodes.txt52
-rw-r--r--arch/s390/tools/relocs.c2
398 files changed, 19339 insertions, 17237 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c59d2b54df49..938e5df75b2d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -21,7 +21,7 @@ config ARCH_PROC_KCORE_TEXT
def_bool y
config GENERIC_HWEIGHT
- def_bool y
+ def_bool !HAVE_MARCH_Z196_FEATURES
config GENERIC_BUG
def_bool y if BUG
@@ -41,9 +41,6 @@ config AUDIT_ARCH
config NO_IOPORT_MAP
def_bool y
-config PCI_QUIRKS
- def_bool n
-
config ARCH_SUPPORTS_UPROBES
def_bool y
@@ -52,6 +49,29 @@ config KASAN_SHADOW_OFFSET
depends on KASAN
default 0x1C000000000000
+config CC_HAS_BUILTIN_FFS
+ def_bool !(CC_IS_GCC && GCC_VERSION < 160000)
+ help
+ GCC versions before 16.0.0 generate library calls to ffs()
+ for __builtin_ffs() even when __has_builtin(__builtin_ffs)
+ is true.
+
+config CC_ASM_FLAG_OUTPUT_BROKEN
+ def_bool CC_IS_GCC && GCC_VERSION < 140200
+ help
+ GCC versions before 14.2.0 may die with an internal
+ compiler error in some configurations if flag output
+ operands are used within inline assemblies.
+
+config CC_HAS_ASM_AOR_FORMAT_FLAGS
+ def_bool !(CC_IS_CLANG && CLANG_VERSION < 190100)
+ help
+ Clang versions before 19.1.0 do not support A,
+ O, and R inline assembly format flags.
+
+config CC_HAS_STACKPROTECTOR_GLOBAL
+ def_bool $(cc-option, -mstack-protector-guard=global -mstack-protector-guard-record)
+
config S390
def_bool y
#
@@ -60,16 +80,18 @@ config S390
imply IMA_SECURE_AND_OR_TRUSTED_BOOT
select ALTERNATE_USER_ADDRESS_SPACE
select ARCH_32BIT_USTAT_F_TINODE
- select ARCH_BINFMT_ELF_STATE
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
+ select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_DEVMEM_IS_ALLOWED
+ select ARCH_HAS_DMA_OPS if PCI
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
select ARCH_HAS_FORTIFY_SOURCE
@@ -79,6 +101,8 @@ config S390
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
+ select ARCH_HAS_PREEMPT_LAZY
+ select ARCH_HAS_PTDUMP
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SCALED_CPUTIME
select ARCH_HAS_SET_DIRECT_MAP
@@ -87,8 +111,9 @@ config S390
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_UBSAN
- select ARCH_HAS_VDSO_DATA
+ select ARCH_HAS_VDSO_TIME_DATA
select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select ARCH_HAVE_TRACE_MMIO_ACCESS
select ARCH_INLINE_READ_LOCK
select ARCH_INLINE_READ_LOCK_BH
select ARCH_INLINE_READ_LOCK_IRQ
@@ -123,6 +148,7 @@ config S390
select ARCH_SUPPORTS_DEBUG_PAGEALLOC
select ARCH_SUPPORTS_HUGETLBFS
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG
+ select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_USE_BUILTIN_BSWAP
@@ -131,25 +157,24 @@ config S390
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_IPC_PARSE_VERSION
+ select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_KERNEL_PMD_MKWRITE
select ARCH_WANT_LD_ORPHAN_WARN
- select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
+ select ARCH_WANTS_THP_SWAP
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS2
select DCACHE_WORD_ACCESS if !KMSAN
- select DMA_OPS if PCI
select DYNAMIC_FTRACE if FUNCTION_TRACER
select FUNCTION_ALIGNMENT_8B if CC_IS_GCC
select FUNCTION_ALIGNMENT_16B if !CC_IS_GCC
select GENERIC_ALLOCATOR
+ select GENERIC_CPU_DEVICES
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_ENTRY
select GENERIC_GETTIMEOFDAY
- select GENERIC_PTDUMP
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
- select GENERIC_VDSO_TIME_NS
select GENERIC_IOREMAP if PCI
select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
@@ -158,31 +183,35 @@ config S390
select HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_VMALLOC
select HAVE_ARCH_KCSAN
+ select HAVE_ARCH_KMSAN
select HAVE_ARCH_KFENCE
+ select HAVE_ARCH_KSTACK_ERASE
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_SOFT_DIRTY
- select HAVE_ARCH_STACKLEAK
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_VMAP_STACK
select HAVE_ASM_MODVERSIONS
+ select HAVE_BUILDTIME_MCOUNT_SORT
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+ select HAVE_FTRACE_REGS_HAVING_PT_REGS
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST
select HAVE_FENTRY
- select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
- select HAVE_FUNCTION_GRAPH_RETVAL
+ select HAVE_FUNCTION_GRAPH_FREGS
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
@@ -209,6 +238,7 @@ config S390
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_DYNAMIC_KEY
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
select HAVE_RETHOOK
@@ -217,23 +247,29 @@ config S390
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_SETUP_PER_CPU_AREA
select HAVE_SOFTIRQ_ON_OWN_STACK
+ select HAVE_STACKPROTECTOR if CC_HAS_STACKPROTECTOR_GLOBAL
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
select HAVE_VIRT_CPU_ACCOUNTING_IDLE
+ select HOTPLUG_SMT
select IOMMU_HELPER if PCI
select IOMMU_SUPPORT if PCI
+ select KASAN_VMALLOC if KASAN
+ select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_MERGE_VMAS
select MMU_GATHER_NO_GATHER
select MMU_GATHER_RCU_TABLE_FREE
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PCI
select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PROC_VMCORE_DEVICE_RAM if PROC_VMCORE
select NEED_SG_DMA_LENGTH if PCI
select OLD_SIGACTION
select OLD_SIGSUSPEND3
select PCI_DOMAINS if PCI
select PCI_MSI if PCI
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
+ select PCI_QUIRKS if PCI
select SPARSE_IRQ
select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
@@ -241,7 +277,9 @@ config S390
select TRACE_IRQFLAGS_SUPPORT
select TTY
select USER_STACKTRACE_SUPPORT
+ select VDSO_GETRANDOM
select VIRT_CPU_ACCOUNTING
+ select VMAP_STACK
select ZONE_DMA
# Note: keep the above list sorted alphabetically
@@ -273,6 +311,9 @@ config ARCH_SUPPORTS_CRASH_DUMP
This option also enables s390 zfcpdump.
See also <file:Documentation/arch/s390/zfcpdump.rst>
+config ARCH_DEFAULT_CRASH_DUMP
+ def_bool y
+
menu "Processor type and features"
config HAVE_MARCH_Z10_FEATURES
@@ -302,6 +343,10 @@ config HAVE_MARCH_Z16_FEATURES
def_bool n
select HAVE_MARCH_Z15_FEATURES
+config HAVE_MARCH_Z17_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z16_FEATURES
+
choice
prompt "Processor type"
default MARCH_Z196
@@ -367,6 +412,14 @@ config MARCH_Z16
Select this to enable optimizations for IBM z16 (3931 and
3932 series).
+config MARCH_Z17
+ bool "IBM z17"
+ select HAVE_MARCH_Z17_FEATURES
+ depends on $(cc-option,-march=z17)
+ help
+ Select this to enable optimizations for IBM z17 (9175 and
+ 9176 series).
+
endchoice
config MARCH_Z10_TUNE
@@ -390,6 +443,9 @@ config MARCH_Z15_TUNE
config MARCH_Z16_TUNE
def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT
+config MARCH_Z17_TUNE
+ def_bool TUNE_Z17 || MARCH_Z17 && TUNE_DEFAULT
+
choice
prompt "Tune code generation"
default TUNE_DEFAULT
@@ -434,6 +490,10 @@ config TUNE_Z16
bool "IBM z16"
depends on $(cc-option,-mtune=z16)
+config TUNE_Z17
+ bool "IBM z17"
+ depends on $(cc-option,-mtune=z17)
+
endchoice
config 64BIT
@@ -447,22 +507,6 @@ config COMMAND_LINE_SIZE
This allows you to specify the maximum length of the kernel command
line.
-config COMPAT
- def_bool n
- prompt "Kernel support for 31 bit emulation"
- select ARCH_WANT_OLD_COMPAT_IPC
- select COMPAT_OLD_SIGACTION
- select HAVE_UID16
- depends on MULTIUSER
- depends on !CC_IS_CLANG && !LD_IS_LLD
- help
- Select this option if you want to enable your system kernel to
- handle system-calls from ELF binaries for 31 bit ESA. This option
- (and some other stuff like libraries and such) is needed for
- executing 31 bit applications.
-
- If unsure say N.
-
config SMP
def_bool y
@@ -495,15 +539,11 @@ config NODES_SHIFT
depends on NUMA
default "1"
-config SCHED_SMT
- def_bool n
-
-config SCHED_MC
- def_bool n
-
config SCHED_TOPOLOGY
def_bool y
prompt "Topology scheduler support"
+ select ARCH_SUPPORTS_SCHED_SMT
+ select ARCH_SUPPORTS_SCHED_MC
select SCHED_SMT
select SCHED_MC
help
@@ -511,6 +551,26 @@ config SCHED_TOPOLOGY
making when dealing with machines that have multi-threading,
multiple cores or multiple books.
+config SCHED_TOPOLOGY_VERTICAL
+ def_bool y
+ bool "Use vertical CPU polarization by default"
+ depends on SCHED_TOPOLOGY
+ help
+ Use vertical CPU polarization by default if available.
+ The default CPU polarization is horizontal.
+
+config HIPERDISPATCH_ON
+ def_bool y
+ bool "Use hiperdispatch on vertical polarization by default"
+ depends on SCHED_TOPOLOGY
+ depends on PROC_SYSCTL
+ help
+ Hiperdispatch aims to improve the CPU scheduler's decision
+ making when using vertical polarization by adjusting CPU
+ capacities dynamically. Set this option to use hiperdispatch
+ on vertical polarization by default. This can be overwritten
+ by sysctl's s390.hiperdispatch attribute later on.
+
source "kernel/Kconfig.hz"
config CERT_STORE
@@ -555,17 +615,13 @@ config EXPOLINE
If unsure, say N.
config EXPOLINE_EXTERN
- def_bool y if EXPOLINE
- depends on EXPOLINE
- depends on CC_IS_GCC && GCC_VERSION >= 110200
- depends on $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC))
- prompt "Generate expolines as extern functions."
+ def_bool EXPOLINE && CC_IS_GCC && GCC_VERSION >= 110200 && \
+ $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC))
help
- This option is required for some tooling like kpatch. The kernel is
- compiled with -mindirect-branch=thunk-extern and requires a newer
- compiler.
-
- If unsure, say N.
+ Generate expolines as external functions if the compiler supports it.
+ This option is required for some tooling like kpatch, if expolines
+ are enabled. The kernel is compiled with
+ -mindirect-branch=thunk-extern, which requires a newer compiler.
choice
prompt "Expoline default"
@@ -585,6 +641,7 @@ endchoice
config RELOCATABLE
def_bool y
+ select ARCH_VMLINUX_NEEDS_RELOCS
help
This builds a kernel image that retains relocation information
so it can be loaded at an arbitrary address.
@@ -602,6 +659,19 @@ config RANDOMIZE_BASE
as a security feature that deters exploit attempts relying on
knowledge of the location of kernel internals.
+config RANDOMIZE_IDENTITY_BASE
+ bool "Randomize the address of the identity mapping base"
+ depends on RANDOMIZE_BASE
+ default DEBUG_VM
+ help
+ The identity mapping base address is pinned to zero by default.
+ Allow randomization of that base to expose otherwise missed
+ notion of physical and virtual addresses of data structures.
+ That does not have any impact on the base address at which the
+ kernel image is loaded.
+
+ If unsure, say N
+
config KERNEL_IMAGE_BASE
hex "Kernel image base address"
range 0x100000 0x1FFFFFE0000000 if !KASAN
@@ -628,7 +698,6 @@ menu "Memory setup"
config ARCH_SPARSEMEM_ENABLE
def_bool y
select SPARSEMEM_VMEMMAP_ENABLE
- select SPARSEMEM_VMEMMAP
config ARCH_SPARSEMEM_DEFAULT
def_bool y
@@ -643,32 +712,6 @@ config MAX_PHYSMEM_BITS
Increasing the number of bits also increases the kernel image size.
By default 46 bits (64TB) are supported.
-config CHECK_STACK
- def_bool y
- depends on !VMAP_STACK
- prompt "Detect kernel stack overflow"
- help
- This option enables the compiler option -mstack-guard and
- -mstack-size if they are available. If the compiler supports them
- it will emit additional code to each function prolog to trigger
- an illegal operation if the kernel stack is about to overflow.
-
- Say N if you are unsure.
-
-config STACK_GUARD
- int "Size of the guard area (128-1024)"
- range 128 1024
- depends on CHECK_STACK
- default "256"
- help
- This allows you to specify the size of the guard area at the lower
- end of the kernel stack. If the kernel stack points into the guard
- area on function entry an illegal operation is triggered. The size
- needs to be a power of 2. Please keep in mind that the size of an
- interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit.
- The minimum size for the stack guard should be 256 for 31 bit and
- 512 for 64 bit.
-
endmenu
menu "I/O subsystem"
@@ -797,17 +840,6 @@ config HAVE_PNETID
menu "Virtualization"
-config PROTECTED_VIRTUALIZATION_GUEST
- def_bool n
- prompt "Protected virtualization guest support"
- help
- Select this option, if you want to be able to run this
- kernel as a protected virtualization KVM guest.
- Protected virtualization capable machines have a mini hypervisor
- located at machine level (an ultravisor). With help of the
- Ultravisor, KVM will be able to run "protected" VMs, special
- VMs whose memory and management data are unavailable to KVM.
-
config PFAULT
def_bool y
prompt "Pseudo page fault support"
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index c4300ea4abf8..7955d7eee7d8 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -13,6 +13,16 @@ config DEBUG_ENTRY
If unsure, say N.
+config STRICT_MM_TYPECHECKS
+ bool "Strict Memory Management Type Checks"
+ depends on DEBUG_KERNEL
+ help
+ Enable strict type checking for memory management types like pte_t
+ and pmd_t. This generates slightly worse code and should be used
+ for debug builds.
+
+ If unsure, say N.
+
config CIO_INJECT
bool "CIO Inject interfaces"
depends on DEBUG_KERNEL && DEBUG_FS
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index f2b21c7a70ef..d78ad6885ca2 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -15,16 +15,17 @@ KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
KBUILD_CFLAGS += -fPIC
-LDFLAGS_vmlinux := -no-pie --emit-relocs --discard-none
+LDFLAGS_vmlinux := $(call ld-option,-no-pie)
extra_tools := relocs
aflags_dwarf := -Wa,-gdwarf-2
KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
ifndef CONFIG_AS_IS_LLVM
KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
endif
-KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack
+KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11 -fms-extensions
KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR
+KBUILD_CFLAGS_DECOMPRESSOR += -Wno-pointer-sign
KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain
KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding
@@ -34,9 +35,10 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-membe
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_NO_ARRAY_BOUNDS),-Wno-array-bounds)
+KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_IS_CLANG),-Wno-microsoft-anon-tag)
UTS_MACHINE := s390x
-STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384)
+STACK_SIZE := $(if $(CONFIG_KASAN),65536,$(if $(CONFIG_KMSAN),65536,16384))
CHECKFLAGS += -D__s390__ -D__s390x__
export LD_BFD
@@ -48,6 +50,7 @@ mflags-$(CONFIG_MARCH_Z13) := -march=z13
mflags-$(CONFIG_MARCH_Z14) := -march=z14
mflags-$(CONFIG_MARCH_Z15) := -march=z15
mflags-$(CONFIG_MARCH_Z16) := -march=z16
+mflags-$(CONFIG_MARCH_Z17) := -march=z17
export CC_FLAGS_MARCH := $(mflags-y)
@@ -61,6 +64,7 @@ cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13
cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14
cflags-$(CONFIG_MARCH_Z15_TUNE) += -mtune=z15
cflags-$(CONFIG_MARCH_Z16_TUNE) += -mtune=z16
+cflags-$(CONFIG_MARCH_Z17_TUNE) += -mtune=z17
cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
@@ -72,15 +76,6 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
-ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
- CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE)
- ifeq ($(call cc-option,-mstack-size=8192),)
- CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD)
- endif
- export CC_FLAGS_CHECK_STACK
- cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK)
-endif
-
ifdef CONFIG_EXPOLINE
ifdef CONFIG_EXPOLINE_EXTERN
CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern
@@ -95,6 +90,10 @@ ifdef CONFIG_EXPOLINE
aflags-y += -DCC_USING_EXPOLINE
endif
+ifeq ($(CONFIG_STACKPROTECTOR),y)
+ KBUILD_CFLAGS += -mstack-protector-guard=global -mstack-protector-guard-record
+endif
+
ifdef CONFIG_FUNCTION_TRACER
ifeq ($(call cc-option,-mfentry -mnop-mcount),)
# make use of hotpatch feature if the compiler supports it
@@ -140,10 +139,9 @@ zfcpdump:
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
archheaders:
- $(Q)$(MAKE) $(build)=$(syscalls) uapi
+ $(Q)$(MAKE) $(build)=$(syscalls) all
archprepare:
- $(Q)$(MAKE) $(build)=$(syscalls) kapi
$(Q)$(MAKE) $(build)=$(tools) kapi $(extra_tools)
ifeq ($(KBUILD_EXTMOD),)
# We need to generate vdso-offsets.h before compiling certain files in kernel/.
@@ -154,12 +152,9 @@ ifeq ($(KBUILD_EXTMOD),)
# this hack.
prepare: vdso_prepare
vdso_prepare: prepare0
- $(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h
- $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
- $(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h)
+ $(Q)$(MAKE) $(build)=arch/s390/kernel/vdso include/generated/vdso-offsets.h
-vdso-install-y += arch/s390/kernel/vdso64/vdso64.so.dbg
-vdso-install-$(CONFIG_COMPAT) += arch/s390/kernel/vdso32/vdso32.so.dbg
+vdso-install-y += arch/s390/kernel/vdso/vdso.so.dbg
endif
diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink
new file mode 100644
index 000000000000..c2b737500a91
--- /dev/null
+++ b/arch/s390/Makefile.postlink
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# ===========================================================================
+# Post-link s390 pass
+# ===========================================================================
+#
+# 1. Separate relocations from vmlinux into relocs.S.
+# 2. Strip relocations from vmlinux.
+
+PHONY := __archpost
+__archpost:
+
+-include include/config/auto.conf
+include $(srctree)/scripts/Kbuild.include
+
+CMD_RELOCS=arch/s390/tools/relocs
+OUT_RELOCS = arch/s390/boot
+quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/relocs.S
+ cmd_relocs = \
+ mkdir -p $(OUT_RELOCS); \
+ $(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S
+
+vmlinux.unstripped: FORCE
+ $(call cmd,relocs)
+
+clean:
+ @rm -f $(OUT_RELOCS)/relocs.S
+
+PHONY += FORCE clean
+
+FORCE:
+
+.PHONY: $(PHONY)
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index c2978cb03b36..feb43db63f30 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -9,9 +9,9 @@
* Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
-#define KMSG_COMPONENT "appldata"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "appldata: " fmt
+#include <linux/export.h>
#include <linux/module.h>
#include <linux/sched/stat.h>
#include <linux/init.h>
@@ -46,13 +46,13 @@
* /proc entries (sysctl)
*/
static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(struct ctl_table *ctl, int write,
+static int appldata_timer_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-static int appldata_interval_handler(struct ctl_table *ctl, int write,
+static int appldata_interval_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
static struct ctl_table_header *appldata_sysctl_header;
-static struct ctl_table appldata_table[] = {
+static const struct ctl_table appldata_table[] = {
{
.procname = "timer",
.mode = S_IRUGO | S_IWUSR,
@@ -199,7 +199,7 @@ static void __appldata_vtimer_setup(int cmd)
* Start/Stop timer, show status of timer (0 = not active, 1 = active)
*/
static int
-appldata_timer_handler(struct ctl_table *ctl, int write,
+appldata_timer_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int timer_active = appldata_timer_active;
@@ -232,7 +232,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write,
* current timer interval.
*/
static int
-appldata_interval_handler(struct ctl_table *ctl, int write,
+appldata_interval_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int interval = appldata_interval;
@@ -262,7 +262,7 @@ appldata_interval_handler(struct ctl_table *ctl, int write,
* monitoring (0 = not in process, 1 = in process)
*/
static int
-appldata_generic_handler(struct ctl_table *ctl, int write,
+appldata_generic_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct appldata_ops *ops = NULL, *tmp_ops;
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index a363d30ce739..137d4e7e1e9a 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -8,8 +8,7 @@
* Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
-#define KMSG_COMPONENT "appldata"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "appldata: " fmt
#include <linux/module.h>
#include <linux/init.h>
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
index f5ef099e2fd3..af2a6a7bc028 100644
--- a/arch/s390/boot/.gitignore
+++ b/arch/s390/boot/.gitignore
@@ -5,4 +5,5 @@ relocs.S
section_cmp.*
vmlinux
vmlinux.lds
+vmlinux.map
vmlinux.syms
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 070c9b2e905f..490167faba7a 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -3,45 +3,36 @@
# Makefile for the linux s390-specific parts of the memory manager.
#
+# Tooling runtimes are unavailable and cannot be linked for early boot code
KCOV_INSTRUMENT := n
GCOV_PROFILE := n
UBSAN_SANITIZE := n
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
-
-KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
-KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
+KMSAN_SANITIZE := n
#
-# Use minimum architecture for als.c to be able to print an error
+# Use minimum architecture level so it is possible to print an error
# message if the kernel is started on a machine which is too old
#
-ifndef CONFIG_CC_IS_CLANG
-CC_FLAGS_MARCH_MINIMUM := -march=z900
-else
CC_FLAGS_MARCH_MINIMUM := -march=z10
-endif
-
-ifneq ($(CC_FLAGS_MARCH),$(CC_FLAGS_MARCH_MINIMUM))
-AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
-AFLAGS_head.o += $(CC_FLAGS_MARCH_MINIMUM)
-AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH)
-AFLAGS_mem.o += $(CC_FLAGS_MARCH_MINIMUM)
-CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH)
-CFLAGS_als.o += $(CC_FLAGS_MARCH_MINIMUM)
-CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH)
-CFLAGS_sclp_early_core.o += $(CC_FLAGS_MARCH_MINIMUM)
-endif
+
+KBUILD_AFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_AFLAGS_DECOMPRESSOR))
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_CFLAGS_DECOMPRESSOR))
+KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM) -D__DISABLE_EXPORTS
+KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM) -D__DISABLE_EXPORTS
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o
-obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
+obj-y += version.o pgm_check.o ctype.o ipl_data.o relocs.o alternative.o
+obj-y += uv.o printk.o trampoline.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
+obj-$(CONFIG_KMSAN) += kmsan.o
+obj-$(CONFIG_STACKPROTECTOR) += stackprotector.o
obj-all := $(obj-y) piggy.o syms.o
targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
@@ -107,11 +98,9 @@ OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
-CMD_RELOCS=arch/s390/tools/relocs
-quiet_cmd_relocs = RELOCS $@
- cmd_relocs = $(CMD_RELOCS) $< > $@
-$(obj)/relocs.S: vmlinux FORCE
- $(call if_changed,relocs)
+# relocs.S is created by the vmlinux postlink step.
+$(obj)/relocs.S: vmlinux
+ @true
suffix-$(CONFIG_KERNEL_GZIP) := .gz
suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
index 47c48fbfb563..25a20986b96e 100644
--- a/arch/s390/boot/als.c
+++ b/arch/s390/boot/als.c
@@ -9,42 +9,8 @@
#include <asm/sclp.h>
#include "boot.h"
-/*
- * The code within this file will be called very early. It may _not_
- * access anything within the bss section, since that is not cleared
- * yet and may contain data (e.g. initrd) that must be saved by other
- * code.
- * For temporary objects the stack (16k) should be used.
- */
-
static unsigned long als[] = { FACILITIES_ALS };
-static void u16_to_hex(char *str, u16 val)
-{
- int i, num;
-
- for (i = 1; i <= 4; i++) {
- num = (val >> (16 - 4 * i)) & 0xf;
- if (num >= 10)
- num += 7;
- *str++ = '0' + num;
- }
- *str = '\0';
-}
-
-static void print_machine_type(void)
-{
- static char mach_str[80] = "Detected machine-type number: ";
- char type_str[5];
- struct cpuid id;
-
- get_cpu_id(&id);
- u16_to_hex(type_str, id.machine);
- strcat(mach_str, type_str);
- strcat(mach_str, "\n");
- sclp_early_printk(mach_str);
-}
-
static void u16_to_decimal(char *str, u16 val)
{
int div = 1;
@@ -80,8 +46,7 @@ void print_missing_facilities(void)
* z/VM adds a four character prefix.
*/
if (strlen(als_str) > 70) {
- strcat(als_str, "\n");
- sclp_early_printk(als_str);
+ boot_emerg("%s\n", als_str);
*als_str = '\0';
}
u16_to_decimal(val_str, i * BITS_PER_LONG + j);
@@ -89,16 +54,18 @@ void print_missing_facilities(void)
first = 0;
}
}
- strcat(als_str, "\n");
- sclp_early_printk(als_str);
+ boot_emerg("%s\n", als_str);
}
static void facility_mismatch(void)
{
- sclp_early_printk("The Linux kernel requires more recent processor hardware\n");
- print_machine_type();
+ struct cpuid id;
+
+ get_cpu_id(&id);
+ boot_emerg("The Linux kernel requires more recent processor hardware\n");
+ boot_emerg("Detected machine-type number: %4x\n", id.machine);
print_missing_facilities();
- sclp_early_printk("See Principles of Operations for facility bits\n");
+ boot_emerg("See z/Architecture Principles of Operation - Facility Indications\n");
disabled_wait();
}
diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c
new file mode 100644
index 000000000000..19ea7934b918
--- /dev/null
+++ b/arch/s390/boot/alternative.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "alt: " fmt
+#include "boot.h"
+
+#define a_debug boot_debug
+
+#include "../kernel/alternative.c"
+
+static void alt_debug_all(int type)
+{
+ int i;
+
+ switch (type) {
+ case ALT_TYPE_FACILITY:
+ for (i = 0; i < ARRAY_SIZE(alt_debug.facilities); i++)
+ alt_debug.facilities[i] = -1UL;
+ break;
+ case ALT_TYPE_FEATURE:
+ for (i = 0; i < ARRAY_SIZE(alt_debug.mfeatures); i++)
+ alt_debug.mfeatures[i] = -1UL;
+ break;
+ case ALT_TYPE_SPEC:
+ alt_debug.spec = 1;
+ break;
+ }
+}
+
+static void alt_debug_modify(int type, unsigned int nr, bool clear)
+{
+ switch (type) {
+ case ALT_TYPE_FACILITY:
+ if (clear)
+ __clear_facility(nr, alt_debug.facilities);
+ else
+ __set_facility(nr, alt_debug.facilities);
+ break;
+ case ALT_TYPE_FEATURE:
+ if (clear)
+ __clear_machine_feature(nr, alt_debug.mfeatures);
+ else
+ __set_machine_feature(nr, alt_debug.mfeatures);
+ break;
+ }
+}
+
+static char *alt_debug_parse(int type, char *str)
+{
+ unsigned long val, endval;
+ char *endp;
+ bool clear;
+ int i;
+
+ if (*str == ':') {
+ str++;
+ } else {
+ alt_debug_all(type);
+ return str;
+ }
+ clear = false;
+ if (*str == '!') {
+ alt_debug_all(type);
+ clear = true;
+ str++;
+ }
+ while (*str) {
+ val = simple_strtoull(str, &endp, 0);
+ if (str == endp)
+ break;
+ str = endp;
+ if (*str == '-') {
+ str++;
+ endval = simple_strtoull(str, &endp, 0);
+ if (str == endp)
+ break;
+ str = endp;
+ while (val <= endval) {
+ alt_debug_modify(type, val, clear);
+ val++;
+ }
+ } else {
+ alt_debug_modify(type, val, clear);
+ }
+ if (*str != ',')
+ break;
+ str++;
+ }
+ return str;
+}
+
+/*
+ * Use debug-alternative command line parameter for debugging:
+ * "debug-alternative"
+ * -> print debug message for every single alternative
+ *
+ * "debug-alternative=0;2"
+ * -> print debug message for all alternatives with type 0 and 2
+ *
+ * "debug-alternative=0:0-7"
+ * -> print debug message for all alternatives with type 0 and with
+ * facility numbers within the range of 0-7
+ * (if type 0 is ALT_TYPE_FACILITY)
+ *
+ * "debug-alternative=0:!8;1"
+ * -> print debug message for all alternatives with type 0, for all
+ * facility number, except facility 8, and in addition print all
+ * alternatives with type 1
+ */
+void alt_debug_setup(char *str)
+{
+ unsigned long type;
+ char *endp;
+ int i;
+
+ if (!str) {
+ alt_debug_all(ALT_TYPE_FACILITY);
+ alt_debug_all(ALT_TYPE_FEATURE);
+ alt_debug_all(ALT_TYPE_SPEC);
+ return;
+ }
+ while (*str) {
+ type = simple_strtoull(str, &endp, 0);
+ if (str == endp)
+ break;
+ str = endp;
+ switch (type) {
+ case ALT_TYPE_FACILITY:
+ case ALT_TYPE_FEATURE:
+ str = alt_debug_parse(type, str);
+ break;
+ case ALT_TYPE_SPEC:
+ alt_debug_all(ALT_TYPE_SPEC);
+ break;
+ }
+ if (*str != ';')
+ break;
+ str++;
+ }
+}
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 18027fdc92b0..61a205b489fb 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -6,15 +6,11 @@
#define IPL_START 0x200
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <linux/printk.h>
#include <asm/physmem_info.h>
-
-struct machine_info {
- unsigned char has_edat1 : 1;
- unsigned char has_edat2 : 1;
- unsigned char has_nx : 1;
-};
+#include <asm/stacktrace.h>
struct vmlinux_info {
unsigned long entry;
@@ -30,6 +26,12 @@ struct vmlinux_info {
unsigned long init_mm_off;
unsigned long swapper_pg_dir_off;
unsigned long invalid_pg_dir_off;
+ unsigned long alt_instructions;
+ unsigned long alt_instructions_end;
+#ifdef CONFIG_STACKPROTECTOR
+ unsigned long stack_prot_start;
+ unsigned long stack_prot_end;
+#endif
#ifdef CONFIG_KASAN
unsigned long kasan_early_shadow_page_off;
unsigned long kasan_early_shadow_pte_off;
@@ -46,13 +48,16 @@ void physmem_set_usable_limit(unsigned long limit);
void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size);
void physmem_free(enum reserved_range_type type);
/* for continuous/multiple allocations per type */
-unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
- unsigned long align);
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+ unsigned long align);
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+ unsigned long align, bool die_on_oom);
/* for single allocations, 1 per type */
unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
unsigned long align, unsigned long min, unsigned long max,
bool die_on_oom);
unsigned long get_physmem_alloc_pos(void);
+void dump_physmem_reserved(void);
bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
unsigned long *intersection_start);
bool is_ipl_block_dump(void);
@@ -64,16 +69,41 @@ void parse_boot_command_line(void);
void verify_facilities(void);
void print_missing_facilities(void);
void sclp_early_setup_buffer(void);
-void print_pgm_check_info(void);
+void alt_debug_setup(char *str);
+void do_pgm_check(struct pt_regs *regs);
unsigned long randomize_within_range(unsigned long size, unsigned long align,
unsigned long min, unsigned long max);
void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit);
-void __printf(1, 2) decompressor_printk(const char *fmt, ...);
+int __printf(1, 2) boot_printk(const char *fmt, ...);
void print_stacktrace(unsigned long sp);
void error(char *m);
int get_random(unsigned long limit, unsigned long *value);
+void boot_rb_dump(void);
+void __noreturn jump_to_kernel(psw_t *psw);
+
+#ifndef boot_fmt
+#define boot_fmt(fmt) fmt
+#endif
+
+#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__)
+
+#define boot_panic(...) do { \
+ boot_emerg(__VA_ARGS__); \
+ print_stacktrace(current_frame_address()); \
+ boot_emerg(" -- System halted\n"); \
+ disabled_wait(); \
+} while (0)
extern struct machine_info machine;
+extern int boot_console_loglevel;
+extern bool boot_ignore_loglevel;
/* Symbols defined by linker scripts */
extern const char kernel_version[];
@@ -89,8 +119,10 @@ extern char _end[], _decompressor_end[];
extern unsigned char _compressed_start[];
extern unsigned char _compressed_end[];
extern struct vmlinux_info _vmlinux_info;
+
#define vmlinux _vmlinux_info
+#define __lowcore_pa(x) ((unsigned long)(x) % sizeof(struct lowcore))
#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
#define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset))
#define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys)
@@ -102,5 +134,5 @@ static inline bool intersects(unsigned long addr0, unsigned long size0,
{
return addr0 + size0 > addr1 && addr1 + size1 > addr0;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index f478e8e9cbda..8d1bc25a6bf4 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
+#include <asm/boot_data.h>
#include <asm/page.h>
#include "decompressor.h"
#include "boot.h"
@@ -63,6 +64,13 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
#include "../../../../lib/decompress_unzstd.c"
#endif
+static void decompress_error(char *m)
+{
+ if (bootdebug)
+ boot_rb_dump();
+ boot_panic("Decompression error: %s\n", m);
+}
+
unsigned long mem_safe_offset(void)
{
return ALIGN(free_mem_end_ptr, PAGE_SIZE);
@@ -71,5 +79,5 @@ unsigned long mem_safe_offset(void)
void deploy_kernel(void *output)
{
__decompress(_compressed_start, _compressed_end - _compressed_start,
- NULL, NULL, output, vmlinux.image_size, NULL, error);
+ NULL, NULL, output, vmlinux.image_size, NULL, decompress_error);
}
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index 637c29c3f6e3..0b511d5c030b 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -254,8 +254,9 @@ SYM_CODE_START_LOCAL(startup_normal)
xc 0xf00(256),0xf00
larl %r13,.Lctl
lctlg %c0,%c15,0(%r13) # load control registers
- stcke __LC_BOOT_CLOCK
- mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
+ larl %r13,tod_clock_base
+ stcke 0(%r13)
+ mvc __LC_LAST_UPDATE_CLOCK(8),1(%r13)
larl %r13,6f
spt 0(%r13)
mvc __LC_LAST_UPDATE_TIMER(8),0(%r13)
@@ -292,18 +293,12 @@ SYM_CODE_END(startup_normal)
#include "head_kdump.S"
-#
-# This program check is active immediately after kernel start
-# and until early_pgm_check_handler is set in kernel/early.c
-# It simply saves general/control registers and psw in
-# the save area and does disabled wait with a faulty address.
-#
SYM_CODE_START_LOCAL(startup_pgm_check_handler)
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ stmg %r8,%r15,__LC_SAVE_AREA
la %r8,4095
stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8)
stmg %r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8)
- mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC
+ mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA
mvc __LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW
mvc __LC_RETURN_PSW(16),__LC_PGM_OLD_PSW
ni __LC_RETURN_PSW,0xfc # remove IO and EX bits
@@ -311,8 +306,18 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler)
oi __LC_RETURN_PSW+1,0x2 # set wait state bit
larl %r9,.Lold_psw_disabled_wait
stg %r9,__LC_PGM_NEW_PSW+8
- larl %r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD
- brasl %r14,print_pgm_check_info
+ larl %r15,_dump_info_stack_end-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+ la %r2,STACK_FRAME_OVERHEAD(%r15)
+ mvc __PT_PSW(16,%r2),__LC_PSW_SAVE_AREA-4095(%r8)
+ mvc __PT_R0(128,%r2),__LC_GPREGS_SAVE_AREA-4095(%r8)
+ mvc __PT_LAST_BREAK(8,%r2),__LC_PGM_LAST_BREAK
+ mvc __PT_INT_CODE(4,%r2),__LC_PGM_INT_CODE
+ brasl %r14,do_pgm_check
+ larl %r9,startup_pgm_check_handler
+ stg %r9,__LC_PGM_NEW_PSW+8
+ mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ lpswe __LC_RETURN_PSW
.Lold_psw_disabled_wait:
la %r8,4095
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8)
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index a13dd2f2aa1c..fa41486258ee 100755
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -15,6 +15,8 @@
# $3 - kernel map file
# $4 - default install path (blank if root directory)
+set -e
+
echo "Warning: '${INSTALLKERNEL}' command not available - additional " \
"bootloader config required" >&2
if [ -f "$4/vmlinuz-$1" ]; then mv -- "$4/vmlinuz-$1" "$4/vmlinuz-$1.old"; fi
diff --git a/arch/s390/boot/ipl_data.c b/arch/s390/boot/ipl_data.c
index 0846e2b249c6..b0fd8a526b42 100644
--- a/arch/s390/boot/ipl_data.c
+++ b/arch/s390/boot/ipl_data.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/compat.h>
#include <linux/ptrace.h>
#include <asm/cio.h>
#include <asm/asm-offsets.h>
@@ -12,11 +11,13 @@
#define PSW_MASK_DISABLED (PSW_MASK_WAIT | PSW_MASK_EA | PSW_MASK_BA)
struct ipl_lowcore {
- psw_t32 ipl_psw; /* 0x0000 */
+ psw32_t ipl_psw; /* 0x0000 */
struct ccw0 ccwpgm[2]; /* 0x0008 */
u8 fill[56]; /* 0x0018 */
struct ccw0 ccwpgmcc[20]; /* 0x0050 */
- u8 pad_0xf0[0x01a0-0x00f0]; /* 0x00f0 */
+ u8 pad_0xf0[0x0140-0x00f0]; /* 0x00f0 */
+ psw_t svc_old_psw; /* 0x0140 */
+ u8 pad_0x150[0x01a0-0x0150]; /* 0x0150 */
psw_t restart_psw; /* 0x01a0 */
psw_t external_new_psw; /* 0x01b0 */
psw_t svc_new_psw; /* 0x01c0 */
@@ -75,6 +76,11 @@ static struct ipl_lowcore ipl_lowcore __used __section(".ipldata") = {
[18] = CCW0(CCW_CMD_READ_IPL, 0x690, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
[19] = CCW0(CCW_CMD_READ_IPL, 0x6e0, 0x50, CCW_FLAG_SLI),
},
+ /*
+ * Let the GDB's lx-symbols command find the jump_to_kernel symbol
+ * without having to load decompressor symbols.
+ */
+ .svc_old_psw = { .mask = 0, .addr = (unsigned long)jump_to_kernel },
.restart_psw = { .mask = 0, .addr = IPL_START, },
.external_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_EXT_NEW_PSW, },
.svc_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_SVC_NEW_PSW, },
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index b24de9aabf7d..6bc950b92be7 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -3,7 +3,10 @@
#include <linux/init.h>
#include <linux/ctype.h>
#include <linux/pgtable.h>
+#include <asm/arch-stackprotector.h>
+#include <asm/abs_lowcore.h>
#include <asm/page-states.h>
+#include <asm/machine.h>
#include <asm/ebcdic.h>
#include <asm/sclp.h>
#include <asm/sections.h>
@@ -33,29 +36,14 @@ int vmalloc_size_set;
static inline int __diag308(unsigned long subcode, void *addr)
{
- unsigned long reg1, reg2;
- union register_pair r1;
- psw_t old;
-
- r1.even = (unsigned long) addr;
- r1.odd = 0;
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ union register_pair r1 = { .even = (unsigned long)addr, .odd = 0 };
+
+ asm_inline volatile(
" diag %[r1],%[subcode],0x308\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [r1] "+&d" (r1.pair),
- [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- "+Q" (S390_lowcore.program_new_psw),
- "=Q" (old)
- : [subcode] "d" (subcode),
- [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw)
+ "0:\n"
+ EX_TABLE(0b, 0b)
+ : [r1] "+d" (r1.pair)
+ : [subcode] "d" (subcode)
: "cc", "memory");
return r1.odd;
}
@@ -192,7 +180,7 @@ void setup_boot_command_line(void)
if (has_ebcdic_char(parmarea.command_line))
EBCASC(parmarea.command_line, COMMAND_LINE_SIZE);
/* copy arch command line */
- strcpy(early_command_line, strim(parmarea.command_line));
+ strscpy(early_command_line, strim(parmarea.command_line));
/* append IPL PARM data to the boot command line */
if (!is_prot_virt_guest() && ipl_block_valid)
@@ -214,7 +202,7 @@ static void check_cleared_facilities(void)
for (i = 0; i < ARRAY_SIZE(als); i++) {
if ((stfle_fac_list[i] & als[i]) != als[i]) {
- sclp_early_printk("Warning: The Linux kernel requires facilities cleared via command line option\n");
+ boot_emerg("The Linux kernel requires facilities cleared via command line option\n");
print_missing_facilities();
break;
}
@@ -266,7 +254,8 @@ void parse_boot_command_line(void)
int rc;
__kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
- args = strcpy(command_line_buf, early_command_line);
+ strscpy(command_line_buf, early_command_line);
+ args = command_line_buf;
while (*args) {
args = next_arg(args, &param, &val);
@@ -294,6 +283,9 @@ void parse_boot_command_line(void)
if (!strcmp(param, "facilities") && val)
modify_fac_list(val);
+ if (!strcmp(param, "debug-alternative"))
+ alt_debug_setup(val);
+
if (!strcmp(param, "nokaslr"))
__kaslr_enabled = 0;
@@ -303,6 +295,11 @@ void parse_boot_command_line(void)
cmma_flag = 0;
}
+#ifdef CONFIG_STACKPROTECTOR
+ if (!strcmp(param, "debug_stackprotector"))
+ stack_protector_debug = 1;
+#endif
+
#if IS_ENABLED(CONFIG_KVM)
if (!strcmp(param, "prot_virt")) {
rc = kstrtobool(val, &enabled);
@@ -310,5 +307,25 @@ void parse_boot_command_line(void)
prot_virt_host = 1;
}
#endif
+ if (!strcmp(param, "relocate_lowcore") && test_facility(193))
+ set_machine_feature(MFEATURE_LOWCORE);
+ if (!strcmp(param, "earlyprintk"))
+ boot_earlyprintk = true;
+ if (!strcmp(param, "debug"))
+ boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
+ if (!strcmp(param, "bootdebug")) {
+ bootdebug = true;
+ if (val)
+ strscpy(bootdebug_filter, val);
+ }
+ if (!strcmp(param, "quiet"))
+ boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET;
+ if (!strcmp(param, "ignore_loglevel"))
+ boot_ignore_loglevel = true;
+ if (!strcmp(param, "loglevel")) {
+ boot_console_loglevel = simple_strtoull(val, NULL, 10);
+ if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN)
+ boot_console_loglevel = CONSOLE_LOGLEVEL_MIN;
+ }
}
}
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
index 1803035e68d2..f73cd757a5f7 100644
--- a/arch/s390/boot/ipl_report.c
+++ b/arch/s390/boot/ipl_report.c
@@ -30,7 +30,6 @@ static unsigned long get_cert_comp_list_size(void)
{
struct ipl_rb_certificate_entry *cert;
struct ipl_rb_component_entry *comp;
- size_t size;
/*
* Find the length for the IPL report boot data
@@ -106,7 +105,7 @@ int read_ipl_report(void)
* the IPL parameter list, then align the address to a double
* word boundary.
*/
- tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr;
+ tmp = (unsigned long)get_lowcore()->ipl_parmblock_ptr;
pl_hdr = (struct ipl_pl_hdr *) tmp;
tmp = (tmp + pl_hdr->len + 7) & -8UL;
rl_hdr = (struct ipl_rl_hdr *) tmp;
@@ -155,7 +154,7 @@ void save_ipl_cert_comp_list(void)
return;
size = get_cert_comp_list_size();
- early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int));
+ early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int));
ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size;
copy_components_bootdata();
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index bd3bf5ef472d..941f4c9e27cc 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -32,7 +32,7 @@ struct prng_parm {
static int check_prng(void)
{
if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) {
- sclp_early_printk("KASLR disabled: CPU has no PRNG\n");
+ boot_warn("KASLR disabled: CPU has no PRNG\n");
return 0;
}
if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
@@ -168,7 +168,7 @@ static unsigned long iterate_valid_positions(unsigned long size, unsigned long a
* cannot have chains.
*
* On the other hand, "dynamic" or "repetitive" allocations are done via
- * physmem_alloc_top_down(). These allocations are tightly packed together
+ * physmem_alloc_or_die(). These allocations are tightly packed together
* top down from the end of online memory. physmem_alloc_pos represents
* current position where those allocations start.
*
diff --git a/arch/s390/boot/kmsan.c b/arch/s390/boot/kmsan.c
new file mode 100644
index 000000000000..e7b3ac48143e
--- /dev/null
+++ b/arch/s390/boot/kmsan.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kmsan-checks.h>
+
+void kmsan_unpoison_memory(const void *address, size_t size)
+{
+}
diff --git a/arch/s390/boot/pgm_check.c b/arch/s390/boot/pgm_check.c
new file mode 100644
index 000000000000..fa621fa5bc02
--- /dev/null
+++ b/arch/s390/boot/pgm_check.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/stdarg.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
+#include <asm/lowcore.h>
+#include <asm/setup.h>
+#include <asm/sclp.h>
+#include <asm/uv.h>
+#include "boot.h"
+
+void print_stacktrace(unsigned long sp)
+{
+ struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
+ (unsigned long)_stack_end };
+ bool first = true;
+
+ boot_emerg("Call Trace:\n");
+ while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) {
+ struct stack_frame *sf = (struct stack_frame *)sp;
+
+ if (first)
+ boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
+ else
+ boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
+ if (sf->back_chain <= sp)
+ break;
+ sp = sf->back_chain;
+ first = false;
+ }
+}
+
+extern struct exception_table_entry __start___ex_table[];
+extern struct exception_table_entry __stop___ex_table[];
+
+static inline unsigned long extable_insn(const struct exception_table_entry *x)
+{
+ return (unsigned long)&x->insn + x->insn;
+}
+
+static bool ex_handler(struct pt_regs *regs)
+{
+ const struct exception_table_entry *ex;
+
+ for (ex = __start___ex_table; ex < __stop___ex_table; ex++) {
+ if (extable_insn(ex) != regs->psw.addr)
+ continue;
+ if (ex->type != EX_TYPE_FIXUP)
+ return false;
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+ }
+ return false;
+}
+
+void do_pgm_check(struct pt_regs *regs)
+{
+ struct psw_bits *psw = &psw_bits(regs->psw);
+ unsigned long *gpregs = regs->gprs;
+
+ if (ex_handler(regs))
+ return;
+ if (bootdebug)
+ boot_rb_dump();
+ boot_emerg("Linux version %s\n", kernel_version);
+ if (!is_prot_virt_guest() && early_command_line[0])
+ boot_emerg("Kernel command line: %s\n", early_command_line);
+ boot_emerg("Kernel fault: interruption code %04x ilc:%d\n",
+ regs->int_code & 0xffff, regs->int_code >> 17);
+ if (kaslr_enabled()) {
+ boot_emerg("Kernel random base: %lx\n", __kaslr_offset);
+ boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys);
+ }
+ boot_emerg("PSW : %016lx %016lx (%pS)\n",
+ regs->psw.mask, regs->psw.addr, (void *)regs->psw.addr);
+ boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
+ psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
+ psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba);
+ boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
+ print_stacktrace(gpregs[15]);
+ boot_emerg("Last Breaking-Event-Address:\n");
+ boot_emerg(" [<%016lx>] %pS\n", regs->last_break, (void *)regs->last_break);
+ /* Convert to disabled wait PSW */
+ psw->io = 0;
+ psw->ext = 0;
+ psw->wait = 1;
+}
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
deleted file mode 100644
index ea96275b0380..000000000000
--- a/arch/s390/boot/pgm_check_info.c
+++ /dev/null
@@ -1,181 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/stdarg.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <asm/stacktrace.h>
-#include <asm/boot_data.h>
-#include <asm/lowcore.h>
-#include <asm/setup.h>
-#include <asm/sclp.h>
-#include <asm/uv.h>
-#include "boot.h"
-
-const char hex_asc[] = "0123456789abcdef";
-
-static char *as_hex(char *dst, unsigned long val, int pad)
-{
- char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
-
- for (*p-- = 0; p >= dst; val >>= 4)
- *p-- = hex_asc[val & 0x0f];
- return end;
-}
-
-static char *symstart(char *p)
-{
- while (*p)
- p--;
- return p + 1;
-}
-
-static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len)
-{
- /* symbol entries are in a form "10000 c4 startup\0" */
- char *a = _decompressor_syms_start;
- char *b = _decompressor_syms_end;
- unsigned long start;
- unsigned long size;
- char *pivot;
- char *endp;
-
- while (a < b) {
- pivot = symstart(a + (b - a) / 2);
- start = simple_strtoull(pivot, &endp, 16);
- size = simple_strtoull(endp + 1, &endp, 16);
- if (ip < start) {
- b = pivot;
- continue;
- }
- if (ip > start + size) {
- a = pivot + strlen(pivot) + 1;
- continue;
- }
- *off = ip - start;
- *len = size;
- return endp + 1;
- }
- return NULL;
-}
-
-static noinline char *strsym(void *ip)
-{
- static char buf[64];
- unsigned short off;
- unsigned short len;
- char *p;
-
- p = findsym((unsigned long)ip, &off, &len);
- if (p) {
- strncpy(buf, p, sizeof(buf));
- /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */
- p = buf + strnlen(buf, sizeof(buf) - 15);
- strcpy(p, "+0x");
- p = as_hex(p + 3, off, 0);
- strcpy(p, "/0x");
- as_hex(p + 3, len, 0);
- } else {
- as_hex(buf, (unsigned long)ip, 16);
- }
- return buf;
-}
-
-void decompressor_printk(const char *fmt, ...)
-{
- char buf[1024] = { 0 };
- char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */
- unsigned long pad;
- char *p = buf;
- va_list args;
-
- va_start(args, fmt);
- for (; p < end && *fmt; fmt++) {
- if (*fmt != '%') {
- *p++ = *fmt;
- continue;
- }
- pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0;
- switch (*fmt) {
- case 's':
- p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf));
- break;
- case 'p':
- if (*++fmt != 'S')
- goto out;
- p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf));
- break;
- case 'l':
- if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad))
- goto out;
- p = as_hex(p, va_arg(args, unsigned long), pad);
- break;
- case 'x':
- if (end - p <= max(sizeof(int) * 2, pad))
- goto out;
- p = as_hex(p, va_arg(args, unsigned int), pad);
- break;
- default:
- goto out;
- }
- }
-out:
- va_end(args);
- sclp_early_printk(buf);
-}
-
-void print_stacktrace(unsigned long sp)
-{
- struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
- (unsigned long)_stack_end };
- bool first = true;
-
- decompressor_printk("Call Trace:\n");
- while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) {
- struct stack_frame *sf = (struct stack_frame *)sp;
-
- decompressor_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" :
- " sp:%016lx [<%016lx>] %pS\n",
- sp, sf->gprs[8], (void *)sf->gprs[8]);
- if (sf->back_chain <= sp)
- break;
- sp = sf->back_chain;
- first = false;
- }
-}
-
-void print_pgm_check_info(void)
-{
- unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area;
- struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area);
-
- decompressor_printk("Linux version %s\n", kernel_version);
- if (!is_prot_virt_guest() && early_command_line[0])
- decompressor_printk("Kernel command line: %s\n", early_command_line);
- decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n",
- S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1);
- if (kaslr_enabled()) {
- decompressor_printk("Kernel random base: %lx\n", __kaslr_offset);
- decompressor_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys);
- }
- decompressor_printk("PSW : %016lx %016lx (%pS)\n",
- S390_lowcore.psw_save_area.mask,
- S390_lowcore.psw_save_area.addr,
- (void *)S390_lowcore.psw_save_area.addr);
- decompressor_printk(
- " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
- psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
- psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri,
- psw->eaba);
- decompressor_printk("GPRS: %016lx %016lx %016lx %016lx\n",
- gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
- decompressor_printk(" %016lx %016lx %016lx %016lx\n",
- gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
- decompressor_printk(" %016lx %016lx %016lx %016lx\n",
- gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
- decompressor_printk(" %016lx %016lx %016lx %016lx\n",
- gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
- print_stacktrace(S390_lowcore.gpregs_save_area[15]);
- decompressor_printk("Last Breaking-Event-Address:\n");
- decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break,
- (void *)S390_lowcore.pgm_last_break);
-}
diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c
index 0cf79826eef9..1f2ca5435838 100644
--- a/arch/s390/boot/physmem_info.c
+++ b/arch/s390/boot/physmem_info.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "physmem: " fmt
#include <linux/processor.h>
#include <linux/errno.h>
#include <linux/init.h>
@@ -9,6 +10,7 @@
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sclp.h>
+#include <asm/asm.h>
#include <asm/uv.h>
#include "decompressor.h"
#include "boot.h"
@@ -27,7 +29,7 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n)
return &physmem_info.online[n];
if (unlikely(!physmem_info.online_extended)) {
physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range(
- RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
+ RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
physmem_alloc_pos, true);
}
return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES];
@@ -57,37 +59,25 @@ void add_physmem_online_range(u64 start, u64 end)
static int __diag260(unsigned long rx1, unsigned long rx2)
{
- unsigned long reg1, reg2, ry;
union register_pair rx;
- psw_t old;
- int rc;
+ int cc, exception;
+ unsigned long ry;
rx.even = rx1;
rx.odd = rx2;
ry = 0x10; /* storage configuration */
- rc = -1; /* fail */
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ exception = 1;
+ asm_inline volatile(
" diag %[rx],%[ry],0x260\n"
- " ipm %[rc]\n"
- " srl %[rc],28\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- [rc] "+&d" (rc),
- [ry] "+&d" (ry),
- "+Q" (S390_lowcore.program_new_psw),
- "=Q" (old)
- : [rx] "d" (rx.pair),
- [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw)
- : "cc", "memory");
- return rc == 0 ? ry : -1;
+ "0: lhi %[exc],0\n"
+ "1:\n"
+ CC_IPM(cc)
+ EX_TABLE(0b, 1b)
+ : CC_OUT(cc, cc), [exc] "+d" (exception), [ry] "+d" (ry)
+ : [rx] "d" (rx.pair)
+ : CC_CLOBBER_LIST("memory"));
+ cc = exception ? -1 : CC_TRANSFORM(cc);
+ return cc == 0 ? ry : -1;
}
static int diag260(void)
@@ -109,33 +99,44 @@ static int diag260(void)
return 0;
}
+#define DIAG500_SC_STOR_LIMIT 4
+
+static int diag500_storage_limit(unsigned long *max_physmem_end)
+{
+ unsigned long storage_limit;
+
+ asm_inline volatile(
+ " lghi %%r1,%[subcode]\n"
+ " lghi %%r2,0\n"
+ " diag %%r2,%%r4,0x500\n"
+ "0: lgr %[slimit],%%r2\n"
+ EX_TABLE(0b, 0b)
+ : [slimit] "=d" (storage_limit)
+ : [subcode] "i" (DIAG500_SC_STOR_LIMIT)
+ : "memory", "1", "2");
+ if (!storage_limit)
+ return -EINVAL;
+ /* Convert inclusive end to exclusive end */
+ *max_physmem_end = storage_limit + 1;
+ return 0;
+}
+
static int tprot(unsigned long addr)
{
- unsigned long reg1, reg2;
- int rc = -EFAULT;
- psw_t old;
-
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ int cc, exception;
+
+ exception = 1;
+ asm_inline volatile(
" tprot 0(%[addr]),0\n"
- " ipm %[rc]\n"
- " srl %[rc],28\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- [rc] "+&d" (rc),
- "=Q" (S390_lowcore.program_new_psw.addr),
- "=Q" (old)
- : [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw),
- [addr] "a" (addr)
- : "cc", "memory");
- return rc;
+ "0: lhi %[exc],0\n"
+ "1:\n"
+ CC_IPM(cc)
+ EX_TABLE(0b, 1b)
+ : CC_OUT(cc, cc), [exc] "+d" (exception)
+ : [addr] "a" (addr)
+ : CC_CLOBBER_LIST("memory"));
+ cc = exception ? -EFAULT : CC_TRANSFORM(cc);
+ return cc;
}
static unsigned long search_mem_end(void)
@@ -157,30 +158,48 @@ unsigned long detect_max_physmem_end(void)
{
unsigned long max_physmem_end = 0;
- if (!sclp_early_get_memsize(&max_physmem_end)) {
+ if (!diag500_storage_limit(&max_physmem_end)) {
+ physmem_info.info_source = MEM_DETECT_DIAG500_STOR_LIMIT;
+ } else if (!sclp_early_get_memsize(&max_physmem_end)) {
physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
} else {
max_physmem_end = search_mem_end();
physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
}
+ boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end,
+ get_physmem_info_source());
return max_physmem_end;
}
void detect_physmem_online_ranges(unsigned long max_physmem_end)
{
+ unsigned long start, end;
+ int i;
+
if (!sclp_early_read_storage_info()) {
physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO;
+ } else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) {
+ unsigned long online_end;
+
+ if (!sclp_early_get_memsize(&online_end)) {
+ physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
+ add_physmem_online_range(0, online_end);
+ }
} else if (!diag260()) {
physmem_info.info_source = MEM_DETECT_DIAG260;
} else if (max_physmem_end) {
add_physmem_online_range(0, max_physmem_end);
}
+ boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source());
+ for_each_physmem_online_range(i, &start, &end)
+ boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end);
}
void physmem_set_usable_limit(unsigned long limit)
{
physmem_info.usable = limit;
physmem_alloc_pos = limit;
+ boot_debug("Usable memory limit: 0x%016lx\n", limit);
}
static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max)
@@ -190,38 +209,45 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min,
enum reserved_range_type t;
int i;
- decompressor_printk("Linux version %s\n", kernel_version);
+ boot_emerg("Linux version %s\n", kernel_version);
if (!is_prot_virt_guest() && early_command_line[0])
- decompressor_printk("Kernel command line: %s\n", early_command_line);
- decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n",
- size, align, min, max);
- decompressor_printk("Reserved memory ranges:\n");
+ boot_emerg("Kernel command line: %s\n", early_command_line);
+ boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n",
+ size, align, min, max);
+ boot_emerg("Reserved memory ranges:\n");
for_each_physmem_reserved_range(t, range, &start, &end) {
- decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
+ boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
total_reserved_mem += end - start;
}
- decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n",
- get_physmem_info_source(), physmem_info.info_source);
+ boot_emerg("Usable online memory ranges (info source: %s [%d]):\n",
+ get_physmem_info_source(), physmem_info.info_source);
for_each_physmem_usable_range(i, &start, &end) {
- decompressor_printk("%016lx %016lx\n", start, end);
+ boot_emerg("%016lx %016lx\n", start, end);
total_mem += end - start;
}
- decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n",
- total_mem, total_reserved_mem,
- total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
- print_stacktrace(current_frame_address());
- sclp_early_printk("\n\n -- System halted\n");
- disabled_wait();
+ boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n",
+ total_mem, total_reserved_mem,
+ total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
+ boot_panic("Oom\n");
}
-void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
{
physmem_info.reserved[type].start = addr;
physmem_info.reserved[type].end = addr + size;
}
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+{
+ _physmem_reserve(type, addr, size);
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size,
+ get_rr_type_name(type));
+}
+
void physmem_free(enum reserved_range_type type)
{
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start,
+ physmem_info.reserved[type].end, get_rr_type_name(type));
physmem_info.reserved[type].start = 0;
physmem_info.reserved[type].end = 0;
}
@@ -288,41 +314,73 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s
max = min(max, physmem_alloc_pos);
addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom);
if (addr)
- physmem_reserve(type, addr, size);
+ _physmem_reserve(type, addr, size);
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size,
+ get_rr_type_name(type));
return addr;
}
-unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
- unsigned long align)
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+ unsigned long align, bool die_on_oom)
{
struct reserved_range *range = &physmem_info.reserved[type];
- struct reserved_range *new_range;
+ struct reserved_range *new_range = NULL;
unsigned int ranges_left;
unsigned long addr;
addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges,
- &ranges_left, true);
+ &ranges_left, die_on_oom);
+ if (!addr)
+ return 0;
/* if not a consecutive allocation of the same type or first allocation */
if (range->start != addr + size) {
if (range->end) {
- physmem_alloc_pos = __physmem_alloc_range(
- sizeof(struct reserved_range), 0, 0, physmem_alloc_pos,
- physmem_alloc_ranges, &ranges_left, true);
- new_range = (struct reserved_range *)physmem_alloc_pos;
+ addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0,
+ physmem_alloc_pos, physmem_alloc_ranges,
+ &ranges_left, true);
+ new_range = (struct reserved_range *)addr;
+ addr = __physmem_alloc_range(size, align, 0, addr, ranges_left,
+ &ranges_left, die_on_oom);
+ if (!addr)
+ return 0;
*new_range = *range;
range->chain = new_range;
- addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos,
- ranges_left, &ranges_left, true);
}
range->end = addr + size;
}
+ if (type != RR_VMEM) {
+ boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:",
+ addr, addr + size, get_rr_type_name(type), align, !!new_range);
+ }
range->start = addr;
physmem_alloc_pos = addr;
physmem_alloc_ranges = ranges_left;
return addr;
}
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+ unsigned long align)
+{
+ return physmem_alloc(type, size, align, true);
+}
+
unsigned long get_physmem_alloc_pos(void)
{
return physmem_alloc_pos;
}
+
+void dump_physmem_reserved(void)
+{
+ struct reserved_range *range;
+ enum reserved_range_type t;
+ unsigned long start, end;
+
+ boot_debug("Reserved memory ranges:\n");
+ for_each_physmem_reserved_range(t, range, &start, &end) {
+ if (end) {
+ boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n",
+ get_rr_type_name(t), start, end, (unsigned long)range,
+ (unsigned long)range->chain);
+ }
+ }
+}
diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c
new file mode 100644
index 000000000000..4bb6bc95704e
--- /dev/null
+++ b/arch/s390/boot/printk.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/stdarg.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
+#include <asm/sections.h>
+#include <asm/lowcore.h>
+#include <asm/setup.h>
+#include <asm/timex.h>
+#include <asm/sclp.h>
+#include <asm/uv.h>
+#include "boot.h"
+
+int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT;
+bool boot_ignore_loglevel;
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+static void boot_rb_add(const char *str, size_t len)
+{
+ /* leave double '\0' in the end */
+ size_t avail = sizeof(boot_rb) - boot_rb_off - 1;
+
+ /* store strings separated by '\0' */
+ if (len + 1 > avail)
+ boot_rb_off = 0;
+ avail = sizeof(boot_rb) - boot_rb_off - 1;
+ strscpy(boot_rb + boot_rb_off, str, avail);
+ boot_rb_off += len + 1;
+}
+
+static void print_rb_entry(const char *str)
+{
+ sclp_early_printk(printk_skip_level(str));
+}
+
+static bool debug_messages_printed(void)
+{
+ return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG);
+}
+
+void boot_rb_dump(void)
+{
+ if (debug_messages_printed())
+ return;
+ sclp_early_printk("Boot messages ring buffer:\n");
+ boot_rb_foreach(print_rb_entry);
+}
+
+const char hex_asc[] = "0123456789abcdef";
+
+static char *as_hex(char *dst, unsigned long val, int pad)
+{
+ char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
+
+ for (*p-- = '\0'; p >= dst; val >>= 4)
+ *p-- = hex_asc[val & 0x0f];
+ return dst;
+}
+
+#define MAX_NUMLEN 21
+static char *as_dec(char *buf, unsigned long val, bool is_signed)
+{
+ bool negative = false;
+ char *p = buf + MAX_NUMLEN;
+
+ if (is_signed && (long)val < 0) {
+ val = (val == LONG_MIN ? LONG_MIN : -(long)val);
+ negative = true;
+ }
+
+ *--p = '\0';
+ do {
+ *--p = '0' + (val % 10);
+ val /= 10;
+ } while (val);
+
+ if (negative)
+ *--p = '-';
+ return p;
+}
+
+static ssize_t strpad(char *dst, size_t dst_size, const char *src,
+ int _pad, bool zero_pad, bool decimal)
+{
+ ssize_t len = strlen(src), pad = _pad;
+ char *p = dst;
+
+ if (max(len, abs(pad)) >= dst_size)
+ return -E2BIG;
+
+ if (pad > len) {
+ if (decimal && zero_pad && *src == '-') {
+ *p++ = '-';
+ src++;
+ len--;
+ pad--;
+ }
+ memset(p, zero_pad ? '0' : ' ', pad - len);
+ p += pad - len;
+ }
+ memcpy(p, src, len);
+ p += len;
+ if (pad < 0 && -pad > len) {
+ memset(p, ' ', -pad - len);
+ p += -pad - len;
+ }
+ *p = '\0';
+ return p - dst;
+}
+
+static char *symstart(char *p)
+{
+ while (*p)
+ p--;
+ return p + 1;
+}
+
+static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len)
+{
+ /* symbol entries are in a form "10000 c4 startup\0" */
+ char *a = _decompressor_syms_start;
+ char *b = _decompressor_syms_end;
+ unsigned long start;
+ unsigned long size;
+ char *pivot;
+ char *endp;
+
+ while (a < b) {
+ pivot = symstart(a + (b - a) / 2);
+ start = simple_strtoull(pivot, &endp, 16);
+ size = simple_strtoull(endp + 1, &endp, 16);
+ if (ip < start) {
+ b = pivot;
+ continue;
+ }
+ if (ip > start + size) {
+ a = pivot + strlen(pivot) + 1;
+ continue;
+ }
+ *off = ip - start;
+ *len = size;
+ return endp + 1;
+ }
+ return NULL;
+}
+
+#define MAX_SYMLEN 64
+static noinline char *strsym(char *buf, void *ip)
+{
+ unsigned short off;
+ unsigned short len;
+ char *p;
+
+ p = findsym((unsigned long)ip, &off, &len);
+ if (p) {
+ strscpy(buf, p, MAX_SYMLEN);
+ /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */
+ p = buf + strnlen(buf, MAX_SYMLEN - 15);
+ strscpy(p, "+0x", MAX_SYMLEN - (p - buf));
+ as_hex(p + 3, off, 0);
+ strcat(p, "/0x");
+ as_hex(p + strlen(p), len, 0);
+ } else {
+ as_hex(buf, (unsigned long)ip, 16);
+ }
+ return buf;
+}
+
+static inline int printk_loglevel(const char *buf)
+{
+ if (buf[0] == KERN_SOH_ASCII && buf[1]) {
+ switch (buf[1]) {
+ case '0' ... '7':
+ return buf[1] - '0';
+ }
+ }
+ return MESSAGE_LOGLEVEL_DEFAULT;
+}
+
+static void boot_console_earlyprintk(const char *buf)
+{
+ int level = printk_loglevel(buf);
+
+ /* always print emergency messages */
+ if (level > LOGLEVEL_EMERG && !boot_earlyprintk)
+ return;
+ buf = printk_skip_level(buf);
+ /* print debug messages only when bootdebug is enabled */
+ if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf))))
+ return;
+ if (boot_ignore_loglevel || level < boot_console_loglevel)
+ sclp_early_printk(buf);
+}
+
+static char *add_timestamp(char *buf)
+{
+#ifdef CONFIG_PRINTK_TIME
+ unsigned long ns = tod_to_ns(__get_tod_clock_monotonic());
+ char ts[MAX_NUMLEN];
+
+ *buf++ = '[';
+ buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0);
+ *buf++ = '.';
+ buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0);
+ *buf++ = ']';
+ *buf++ = ' ';
+#endif
+ return buf;
+}
+
+#define va_arg_len_type(args, lenmod, typemod) \
+ ((lenmod == 'l') ? va_arg(args, typemod long) : \
+ (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \
+ (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \
+ (lenmod == 'z') ? va_arg(args, typemod long) : \
+ va_arg(args, typemod int))
+
+int boot_printk(const char *fmt, ...)
+{
+ char buf[1024] = { 0 };
+ char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */
+ bool zero_pad, decimal;
+ char *strval, *p = buf;
+ char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)];
+ va_list args;
+ char lenmod;
+ ssize_t len;
+ int pad;
+
+ *p++ = KERN_SOH_ASCII;
+ *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT;
+ p = add_timestamp(p);
+ fmt = printk_skip_level(fmt);
+
+ va_start(args, fmt);
+ for (; p < end && *fmt; fmt++) {
+ if (*fmt != '%') {
+ *p++ = *fmt;
+ continue;
+ }
+ if (*++fmt == '%') {
+ *p++ = '%';
+ continue;
+ }
+ zero_pad = (*fmt == '0');
+ pad = simple_strtol(fmt, (char **)&fmt, 10);
+ lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0;
+ if (lenmod == 'h' && *fmt == 'h') {
+ lenmod = 'H';
+ fmt++;
+ }
+ decimal = false;
+ switch (*fmt) {
+ case 's':
+ if (lenmod)
+ goto out;
+ strval = va_arg(args, char *);
+ zero_pad = false;
+ break;
+ case 'p':
+ if (*++fmt != 'S' || lenmod)
+ goto out;
+ strval = strsym(valbuf, va_arg(args, void *));
+ zero_pad = false;
+ break;
+ case 'd':
+ case 'i':
+ strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1);
+ decimal = true;
+ break;
+ case 'u':
+ strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
+ break;
+ case 'x':
+ strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
+ break;
+ default:
+ goto out;
+ }
+ len = strpad(p, end - p, strval, pad, zero_pad, decimal);
+ if (len == -E2BIG)
+ break;
+ p += len;
+ }
+out:
+ va_end(args);
+ len = strlen(buf);
+ if (len) {
+ boot_rb_add(buf, len);
+ boot_console_earlyprintk(buf);
+ }
+ return len;
+}
diff --git a/arch/s390/boot/stackprotector.c b/arch/s390/boot/stackprotector.c
new file mode 100644
index 000000000000..68494940c12a
--- /dev/null
+++ b/arch/s390/boot/stackprotector.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define boot_fmt(fmt) "stackprot: " fmt
+
+#include "boot.h"
+#include "../kernel/stackprotector.c"
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 5a36d5538dae..f77067dfc2a8 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,13 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "startup: " fmt
#include <linux/string.h>
#include <linux/elf.h>
#include <asm/page-states.h>
#include <asm/boot_data.h>
#include <asm/extmem.h>
#include <asm/sections.h>
+#include <asm/diag288.h>
#include <asm/maccess.h>
+#include <asm/machine.h>
+#include <asm/sysinfo.h>
#include <asm/cpu_mf.h>
#include <asm/setup.h>
+#include <asm/timex.h>
#include <asm/kasan.h>
#include <asm/kexec.h>
#include <asm/sclp.h>
@@ -15,6 +20,9 @@
#include <asm/uv.h>
#include <asm/abs_lowcore.h>
#include <asm/physmem_info.h>
+#include <asm/stacktrace.h>
+#include <asm/asm-offsets.h>
+#include <asm/arch-stackprotector.h>
#include "decompressor.h"
#include "boot.h"
#include "uv.h"
@@ -30,59 +38,124 @@ unsigned long __bootdata_preserved(vmemmap_size);
unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata_preserved(max_mappable);
+unsigned long __bootdata_preserved(page_noexec_mask);
+unsigned long __bootdata_preserved(segment_noexec_mask);
+unsigned long __bootdata_preserved(region_noexec_mask);
+union tod_clock __bootdata_preserved(tod_clock_base);
+u64 __bootdata_preserved(clock_comparator_max) = -1UL;
u64 __bootdata_preserved(stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
-struct machine_info machine;
+static char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
-void error(char *x)
+static void detect_machine_type(void)
{
- sclp_early_printk("\n\n");
- sclp_early_printk(x);
- sclp_early_printk("\n\n -- System halted");
+ struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
- disabled_wait();
+ /* Check current-configuration-level */
+ if (stsi(NULL, 0, 0, 0) <= 2) {
+ set_machine_feature(MFEATURE_LPAR);
+ return;
+ }
+ /* Get virtual-machine cpu information. */
+ if (stsi(vmms, 3, 2, 2) || !vmms->count)
+ return;
+ /* Detect known hypervisors */
+ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
+ set_machine_feature(MFEATURE_KVM);
+ else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
+ set_machine_feature(MFEATURE_VM);
+}
+
+static void detect_diag288(void)
+{
+ /* "BEGIN" in EBCDIC character set */
+ static const char cmd[] = "\xc2\xc5\xc7\xc9\xd5";
+ unsigned long action, len;
+
+ action = machine_is_vm() ? (unsigned long)cmd : LPARWDT_RESTART;
+ len = machine_is_vm() ? sizeof(cmd) : 0;
+ if (__diag288(WDT_FUNC_INIT, MIN_INTERVAL, action, len))
+ return;
+ __diag288(WDT_FUNC_CANCEL, 0, 0, 0);
+ set_machine_feature(MFEATURE_DIAG288);
+}
+
+static void detect_diag9c(void)
+{
+ unsigned int cpu;
+ int rc = 1;
+
+ cpu = stap();
+ asm_inline volatile(
+ " diag %[cpu],%%r0,0x9c\n"
+ "0: lhi %[rc],0\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [rc] "+d" (rc)
+ : [cpu] "d" (cpu)
+ : "cc", "memory");
+ if (!rc)
+ set_machine_feature(MFEATURE_DIAG9C);
+}
+
+static void reset_tod_clock(void)
+{
+ union tod_clock clk;
+
+ if (store_tod_clock_ext_cc(&clk) == 0)
+ return;
+ /* TOD clock not running. Set the clock to Unix Epoch. */
+ if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk))
+ disabled_wait();
+ memset(&tod_clock_base, 0, sizeof(tod_clock_base));
+ tod_clock_base.tod = TOD_UNIX_EPOCH;
+ get_lowcore()->last_update_clock = TOD_UNIX_EPOCH;
}
static void detect_facilities(void)
{
- if (test_facility(8)) {
- machine.has_edat1 = 1;
+ if (cpu_has_edat1())
local_ctl_set_bit(0, CR0_EDAT_BIT);
+ page_noexec_mask = -1UL;
+ segment_noexec_mask = -1UL;
+ region_noexec_mask = -1UL;
+ if (!cpu_has_nx()) {
+ page_noexec_mask &= ~_PAGE_NOEXEC;
+ segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC;
+ region_noexec_mask &= ~_REGION_ENTRY_NOEXEC;
+ }
+ if (IS_ENABLED(CONFIG_PCI) && test_facility(153))
+ set_machine_feature(MFEATURE_PCI_MIO);
+ reset_tod_clock();
+ if (test_facility(139) && (tod_clock_base.tod >> 63)) {
+ /* Enable signed clock comparator comparisons */
+ set_machine_feature(MFEATURE_SCC);
+ clock_comparator_max = -1UL >> 1;
+ local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT);
+ }
+ if (test_facility(50) && test_facility(73)) {
+ set_machine_feature(MFEATURE_TX);
+ local_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT);
}
- if (test_facility(78))
- machine.has_edat2 = 1;
- if (test_facility(130))
- machine.has_nx = 1;
+ if (cpu_has_vx())
+ local_ctl_set_bit(0, CR0_VECTOR_BIT);
}
static int cmma_test_essa(void)
{
- unsigned long reg1, reg2, tmp = 0;
+ unsigned long tmp = 0;
int rc = 1;
- psw_t old;
/* Test ESSA_GET_STATE */
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ asm_inline volatile(
" .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
- " la %[rc],0\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- [rc] "+&d" (rc),
- [tmp] "=&d" (tmp),
- "+Q" (S390_lowcore.program_new_psw),
- "=Q" (old)
- : [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw),
- [cmd] "i" (ESSA_GET_STATE)
+ "0: lhi %[rc],0\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [rc] "+d" (rc), [tmp] "+d" (tmp)
+ : [cmd] "i" (ESSA_GET_STATE)
: "cc", "memory");
return rc;
}
@@ -101,10 +174,10 @@ static void cmma_init(void)
static void setup_lpp(void)
{
- S390_lowcore.current_pid = 0;
- S390_lowcore.lpp = LPP_MAGIC;
+ get_lowcore()->current_pid = 0;
+ get_lowcore()->lpp = LPP_MAGIC;
if (test_facility(40))
- lpp(&S390_lowcore.lpp);
+ lpp(&get_lowcore()->lpp);
}
#ifdef CONFIG_KERNEL_UNCOMPRESSED
@@ -136,17 +209,17 @@ static void rescue_initrd(unsigned long min, unsigned long max)
return;
old_addr = addr;
physmem_free(RR_INITRD);
- addr = physmem_alloc_top_down(RR_INITRD, size, 0);
+ addr = physmem_alloc_or_die(RR_INITRD, size, 0);
memmove((void *)addr, (void *)old_addr, size);
}
static void copy_bootdata(void)
{
if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size)
- error(".boot.data section size mismatch");
+ boot_panic(".boot.data section size mismatch\n");
memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size);
if (__boot_data_preserved_end - __boot_data_preserved_start != vmlinux.bootdata_preserved_size)
- error(".boot.preserved.data section size mismatch");
+ boot_panic(".boot.preserved.data section size mismatch\n");
memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
}
@@ -160,8 +233,8 @@ static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr,
for (reloc = (int *)__vmlinux_relocs_64_start; reloc < (int *)__vmlinux_relocs_64_end; reloc++) {
loc = (long)*reloc + phys_offset;
if (loc < min_addr || loc > max_addr)
- error("64-bit relocation outside of kernel!\n");
- *(u64 *)loc += offset - __START_KERNEL;
+ boot_panic("64-bit relocation outside of kernel!\n");
+ *(u64 *)loc += offset;
}
}
@@ -176,7 +249,7 @@ static void kaslr_adjust_got(unsigned long offset)
*/
for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) {
if (*entry)
- *entry += offset - __START_KERNEL;
+ *entry += offset;
}
}
@@ -184,12 +257,15 @@ static void kaslr_adjust_got(unsigned long offset)
* Merge information from several sources into a single ident_map_size value.
* "ident_map_size" represents the upper limit of physical memory we may ever
* reach. It might not be all online memory, but also include standby (offline)
- * memory. "ident_map_size" could be lower then actual standby or even online
+ * memory or memory areas reserved for other means (e.g., memory devices such as
+ * virtio-mem).
+ *
+ * "ident_map_size" could be lower then actual standby/reserved or even online
* memory present, due to limiting factors. We should never go above this limit.
* It is the size of our identity mapping.
*
* Consider the following factors:
- * 1. max_physmem_end - end of physical memory online or standby.
+ * 1. max_physmem_end - end of physical memory online, standby or reserved.
* Always >= end of the last online memory range (get_physmem_online_end()).
* 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the
* kernel is able to support.
@@ -212,12 +288,16 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
if (oldmem_data.start) {
__kaslr_enabled = 0;
ident_map_size = min(ident_map_size, oldmem_data.size);
+ boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size);
} else if (ipl_block_valid && is_ipl_block_dump()) {
__kaslr_enabled = 0;
- if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
+ if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) {
ident_map_size = min(ident_map_size, hsa_size);
+ boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size);
+ }
}
#endif
+ boot_debug("Identity map size: 0x%016lx\n", ident_map_size);
}
#define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore))
@@ -233,6 +313,8 @@ static unsigned long get_vmem_size(unsigned long identity_size,
vsize = round_up(SZ_2G + max_mappable, rte_size) +
round_up(vmemmap_size, rte_size) +
FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
+ if (IS_ENABLED(CONFIG_KMSAN))
+ vsize += MODULES_LEN * 2;
return size_add(vsize, vmalloc_size);
}
@@ -251,10 +333,11 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
/* choose kernel address space layout: 4 or 3 levels. */
- BUILD_BUG_ON(!IS_ALIGNED(__START_KERNEL, THREAD_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(TEXT_OFFSET, THREAD_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
+ boot_debug("vmem size estimated: 0x%016lx\n", vsize);
if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE ||
(vsize > _REGION2_SIZE && kaslr_enabled())) {
asce_limit = _REGION1_SIZE;
@@ -278,8 +361,10 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
* otherwise asce_limit and rte_size would have been adjusted.
*/
vmax = adjust_to_uv_max(asce_limit);
+ boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax);
#ifdef CONFIG_KASAN
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START);
+ boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END);
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
#endif
@@ -293,27 +378,47 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
pos = 0;
kernel_end = vmax - pos * THREAD_SIZE;
kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE);
+ boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax);
+ boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start,
+ kernel_start + kernel_size);
} else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) {
kernel_start = round_down(vmax - kernel_size, THREAD_SIZE);
- decompressor_printk("The kernel base address is forced to %lx\n", kernel_start);
+ boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start,
+ kernel_start + kernel_size);
} else {
kernel_start = __NO_KASLR_START_KERNEL;
+ boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start,
+ kernel_start + kernel_size);
}
__kaslr_offset = kernel_start;
+ boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset);
MODULES_END = round_down(kernel_start, _SEGMENT_SIZE);
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
+ if (IS_ENABLED(CONFIG_KMSAN))
+ VMALLOC_END -= MODULES_LEN * 2;
+ boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END);
/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
vsize = (VMALLOC_END - FIXMAP_SIZE) / 2;
vsize = round_down(vsize, _SEGMENT_SIZE);
vmalloc_size = min(vmalloc_size, vsize);
+ if (IS_ENABLED(CONFIG_KMSAN)) {
+ /* take 2/3 of vmalloc area for KMSAN shadow and origins */
+ vmalloc_size = round_down(vmalloc_size / 3, _SEGMENT_SIZE);
+ VMALLOC_END -= vmalloc_size * 2;
+ }
VMALLOC_START = VMALLOC_END - vmalloc_size;
+ boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END);
__memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE);
+ boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area,
+ __memcpy_real_area + MEMCPY_REAL_SIZE);
__abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
sizeof(struct lowcore));
+ boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore,
+ __abs_lowcore + ABS_LOWCORE_MAP_SIZE);
/* split remaining virtual space between 1:1 mapping & vmemmap array */
pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page));
@@ -333,7 +438,11 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS));
max_mappable = max(ident_map_size, MAX_DCSS_ADDR);
max_mappable = min(max_mappable, vmemmap_start);
+#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE
__identity_base = round_down(vmemmap_start - max_mappable, rte_size);
+#endif
+ boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base,
+ __identity_base + ident_map_size);
return asce_limit;
}
@@ -369,6 +478,12 @@ static void kaslr_adjust_vmlinux_info(long offset)
vmlinux.init_mm_off += offset;
vmlinux.swapper_pg_dir_off += offset;
vmlinux.invalid_pg_dir_off += offset;
+ vmlinux.alt_instructions += offset;
+ vmlinux.alt_instructions_end += offset;
+#ifdef CONFIG_STACKPROTECTOR
+ vmlinux.stack_prot_start += offset;
+ vmlinux.stack_prot_end += offset;
+#endif
#ifdef CONFIG_KASAN
vmlinux.kasan_early_shadow_page_off += offset;
vmlinux.kasan_early_shadow_pte_off += offset;
@@ -378,31 +493,29 @@ static void kaslr_adjust_vmlinux_info(long offset)
#endif
}
-static void fixup_vmlinux_info(void)
-{
- vmlinux.entry -= __START_KERNEL;
- kaslr_adjust_vmlinux_info(-__START_KERNEL);
-}
-
void startup_kernel(void)
{
- unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size;
- unsigned long nokaslr_offset_phys, kaslr_large_page_offset;
- unsigned long amode31_lma = 0;
+ unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size;
+ unsigned long nokaslr_text_lma, text_lma = 0, amode31_lma = 0;
+ unsigned long kernel_size = TEXT_OFFSET + vmlinux_size;
+ unsigned long kaslr_large_page_offset;
unsigned long max_physmem_end;
unsigned long asce_limit;
unsigned long safe_addr;
psw_t psw;
- fixup_vmlinux_info();
setup_lpp();
+ store_ipl_parmblock();
+ uv_query_info();
+ setup_boot_command_line();
+ parse_boot_command_line();
/*
* Non-randomized kernel physical start address must be _SEGMENT_SIZE
* aligned (see blow).
*/
- nokaslr_offset_phys = ALIGN(mem_safe_offset(), _SEGMENT_SIZE);
- safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size);
+ nokaslr_text_lma = ALIGN(mem_safe_offset(), _SEGMENT_SIZE);
+ safe_addr = PAGE_ALIGN(nokaslr_text_lma + vmlinux_size);
/*
* Reserve decompressor memory together with decompression heap,
@@ -415,13 +528,14 @@ void startup_kernel(void)
oldmem_data.start = parmarea.oldmem_base;
oldmem_data.size = parmarea.oldmem_size;
- store_ipl_parmblock();
read_ipl_report();
- uv_query_info();
sclp_early_read_info();
- setup_boot_command_line();
- parse_boot_command_line();
+ sclp_early_detect_machine_features();
detect_facilities();
+ detect_diag9c();
+ detect_machine_type();
+ /* detect_diag288() needs machine type */
+ detect_diag288();
cmma_init();
sanitize_prot_virt_host();
max_physmem_end = detect_max_physmem_end();
@@ -446,16 +560,27 @@ void startup_kernel(void)
*/
kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK;
if (kaslr_enabled()) {
- unsigned long end = ident_map_size - kaslr_large_page_offset;
+ unsigned long size = vmlinux_size + kaslr_large_page_offset;
- __kaslr_offset_phys = randomize_within_range(kernel_size, _SEGMENT_SIZE, 0, end);
+ text_lma = randomize_within_range(size, _SEGMENT_SIZE, TEXT_OFFSET, ident_map_size);
}
- if (!__kaslr_offset_phys)
- __kaslr_offset_phys = nokaslr_offset_phys;
- __kaslr_offset_phys |= kaslr_large_page_offset;
+ if (!text_lma)
+ text_lma = nokaslr_text_lma;
+ text_lma |= kaslr_large_page_offset;
+
+ /*
+ * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region is
+ * never accessed via the kernel image mapping as per the linker script:
+ *
+ * . = TEXT_OFFSET;
+ *
+ * Therefore, this region could be used for something else and does
+ * not need to be reserved. See how it is skipped in setup_vmem().
+ */
+ __kaslr_offset_phys = text_lma - TEXT_OFFSET;
kaslr_adjust_vmlinux_info(__kaslr_offset_phys);
- physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size);
- deploy_kernel((void *)__kaslr_offset_phys);
+ physmem_reserve(RR_VMLINUX, text_lma, vmlinux_size);
+ deploy_kernel((void *)text_lma);
/* vmlinux decompression is done, shrink reserved low memory */
physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
@@ -467,14 +592,18 @@ void startup_kernel(void)
* __vmlinux_relocs_64_end as the lower range address. However,
* .amode31 section is written to by the decompressed kernel - at
* that time the contents of .vmlinux.relocs is not needed anymore.
- * Conversly, .vmlinux.relocs is read only by the decompressor, even
+ * Conversely, .vmlinux.relocs is read only by the decompressor, even
* before the kernel started. Therefore, in case the two sections
* overlap there is no risk of corrupting any data.
*/
- if (kaslr_enabled())
- amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G);
+ if (kaslr_enabled()) {
+ unsigned long amode31_min;
+
+ amode31_min = (unsigned long)_decompressor_end;
+ amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G);
+ }
if (!amode31_lma)
- amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size;
+ amode31_lma = text_lma - vmlinux.amode31_size;
physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
/*
@@ -490,23 +619,29 @@ void startup_kernel(void)
* - copy_bootdata() must follow setup_vmem() to propagate changes
* to bootdata made by setup_vmem()
*/
- clear_bss_section(__kaslr_offset_phys);
- kaslr_adjust_relocs(__kaslr_offset_phys, __kaslr_offset_phys + vmlinux.image_size,
+ clear_bss_section(text_lma);
+ kaslr_adjust_relocs(text_lma, text_lma + vmlinux.image_size,
__kaslr_offset, __kaslr_offset_phys);
kaslr_adjust_got(__kaslr_offset);
setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
+ dump_physmem_reserved();
copy_bootdata();
+ __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions,
+ (struct alt_instr *)_vmlinux_info.alt_instructions_end,
+ ALT_CTX_EARLY);
+ stack_protector_apply_early(text_lma);
/*
* Save KASLR offset for early dumps, before vmcore_info is set.
* Mark as uneven to distinguish from real vmcore_info pointer.
*/
- S390_lowcore.vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0;
+ get_lowcore()->vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0;
/*
* Jump to the decompressed kernel entry point and switch DAT mode on.
*/
psw.addr = __kaslr_offset + vmlinux.entry;
psw.mask = PSW_KERNEL_BITS;
- __load_psw(psw);
+ boot_debug("Starting kernel at: 0x%016lx\n", psw.addr);
+ jump_to_kernel(&psw);
}
diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c
index faccb33b462c..bd68161434a6 100644
--- a/arch/s390/boot/string.c
+++ b/arch/s390/boot/string.c
@@ -1,11 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#define IN_BOOT_STRING_C 1
#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#undef CONFIG_KASAN
#undef CONFIG_KASAN_GENERIC
+#undef CONFIG_KMSAN
#include "../lib/string.c"
+/*
+ * Duplicate some functions from the common lib/string.c
+ * instead of fully including it.
+ */
+
int strncmp(const char *cs, const char *ct, size_t count)
{
unsigned char c1, c2;
@@ -22,6 +29,27 @@ int strncmp(const char *cs, const char *ct, size_t count)
return 0;
}
+ssize_t sized_strscpy(char *dst, const char *src, size_t count)
+{
+ size_t len;
+
+ if (count == 0)
+ return -E2BIG;
+ len = strnlen(src, count - 1);
+ memcpy(dst, src, len);
+ dst[len] = '\0';
+ return src[len] ? -E2BIG : len;
+}
+
+void *memset64(uint64_t *s, uint64_t v, size_t count)
+{
+ uint64_t *xs = s;
+
+ while (count--)
+ *xs++ = v;
+ return s;
+}
+
char *skip_spaces(const char *str)
{
while (isspace(*str))
diff --git a/arch/s390/boot/trampoline.S b/arch/s390/boot/trampoline.S
new file mode 100644
index 000000000000..1cb5adf005ea
--- /dev/null
+++ b/arch/s390/boot/trampoline.S
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+
+# This function is identical to __load_psw(), but the lx-symbols GDB command
+# puts a breakpoint on it, so it needs to be kept separate.
+SYM_CODE_START(jump_to_kernel)
+ lpswe 0(%r2)
+SYM_CODE_END(jump_to_kernel)
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 1e66d2cbb096..4568e8f81dac 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -8,12 +8,8 @@
#include "uv.h"
/* will be used in arch/s390/kernel/uv.c */
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
-#endif
-#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
-#endif
struct uv_info __bootdata_preserved(uv_info);
void uv_query_info(void)
@@ -26,8 +22,8 @@ void uv_query_info(void)
if (!test_facility(158))
return;
- /* rc==0x100 means that there is additional data we do not process */
- if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
+ /* Ignore that there might be more data we do not process */
+ if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != UVC_RC_MORE_DATA)
return;
if (IS_ENABLED(CONFIG_KVM)) {
@@ -50,17 +46,15 @@ void uv_query_info(void)
uv_info.supp_add_secret_req_ver = uvcb.supp_add_secret_req_ver;
uv_info.supp_add_secret_pcf = uvcb.supp_add_secret_pcf;
uv_info.supp_secret_types = uvcb.supp_secret_types;
- uv_info.max_secrets = uvcb.max_secrets;
+ uv_info.max_assoc_secrets = uvcb.max_assoc_secrets;
+ uv_info.max_retr_secrets = uvcb.max_retr_secrets;
}
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
prot_virt_guest = 1;
-#endif
}
-#if IS_ENABLED(CONFIG_KVM)
unsigned long adjust_to_uv_max(unsigned long limit)
{
if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
@@ -92,4 +86,3 @@ void sanitize_prot_virt_host(void)
{
prot_virt_host = is_prot_virt_host_capable();
}
-#endif
diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h
index 0f3070856f8d..da4a4a8d48e0 100644
--- a/arch/s390/boot/uv.h
+++ b/arch/s390/boot/uv.h
@@ -2,21 +2,8 @@
#ifndef BOOT_UV_H
#define BOOT_UV_H
-#if IS_ENABLED(CONFIG_KVM)
unsigned long adjust_to_uv_max(unsigned long limit);
void sanitize_prot_virt_host(void);
-#else
-static inline unsigned long adjust_to_uv_max(unsigned long limit)
-{
- return limit;
-}
-static inline void sanitize_prot_virt_host(void) {}
-#endif
-
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
void uv_query_info(void);
-#else
-static inline void uv_query_info(void) {}
-#endif
#endif /* BOOT_UV_H */
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 40cfce2687c4..fbe64ffdfb96 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -1,4 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "vmem: " fmt
+#include <linux/cpufeature.h>
#include <linux/sched/task.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
@@ -9,6 +11,7 @@
#include <asm/ctlreg.h>
#include <asm/physmem_info.h>
#include <asm/maccess.h>
+#include <asm/machine.h>
#include <asm/abs_lowcore.h>
#include "decompressor.h"
#include "boot.h"
@@ -26,16 +29,47 @@ atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
enum populate_mode {
POPULATE_NONE,
POPULATE_DIRECT,
+ POPULATE_LOWCORE,
POPULATE_ABS_LOWCORE,
POPULATE_IDENTITY,
POPULATE_KERNEL,
#ifdef CONFIG_KASAN
+ /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */
POPULATE_KASAN_MAP_SHADOW,
POPULATE_KASAN_ZERO_SHADOW,
POPULATE_KASAN_SHALLOW
#endif
};
+#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t
+static inline const char *get_populate_mode_name(enum populate_mode t)
+{
+ switch (t) {
+ POPULATE_MODE_NAME(NONE);
+ POPULATE_MODE_NAME(DIRECT);
+ POPULATE_MODE_NAME(LOWCORE);
+ POPULATE_MODE_NAME(ABS_LOWCORE);
+ POPULATE_MODE_NAME(IDENTITY);
+ POPULATE_MODE_NAME(KERNEL);
+#ifdef CONFIG_KASAN
+ POPULATE_MODE_NAME(KASAN_MAP_SHADOW);
+ POPULATE_MODE_NAME(KASAN_ZERO_SHADOW);
+ POPULATE_MODE_NAME(KASAN_SHALLOW);
+#endif
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static bool is_kasan_populate_mode(enum populate_mode mode)
+{
+#ifdef CONFIG_KASAN
+ return mode >= POPULATE_KASAN_MAP_SHADOW;
+#else
+ return false;
+#endif
+}
+
static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode);
#ifdef CONFIG_KASAN
@@ -51,9 +85,12 @@ static pte_t pte_z;
static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
{
- start = PAGE_ALIGN_DOWN(__sha(start));
- end = PAGE_ALIGN(__sha(end));
- pgtable_populate(start, end, mode);
+ unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start));
+ unsigned long sha_end = PAGE_ALIGN(__sha(end));
+
+ boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode),
+ start, end, sha_start, sha_end);
+ pgtable_populate(sha_start, sha_end, mode);
}
static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
@@ -62,13 +99,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern
pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
unsigned long memgap_start = 0;
- unsigned long untracked_end;
unsigned long start, end;
int i;
pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
- if (!machine.has_nx)
- pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC));
crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
@@ -89,18 +123,13 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern
}
memgap_start = end;
}
- kasan_populate(kernel_start, kernel_end, POPULATE_KASAN_MAP_SHADOW);
+ kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW);
kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
- if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
- untracked_end = VMALLOC_START;
- /* shallowly populate kasan shadow for vmalloc and modules */
- kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
- } else {
- untracked_end = MODULES_VADDR;
- }
+ /* shallowly populate kasan shadow for vmalloc and modules */
+ kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
/* populate kasan shadow for untracked memory */
- kasan_populate((unsigned long)__identity_va(ident_map_size), untracked_end,
+ kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START,
POPULATE_KASAN_ZERO_SHADOW);
kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
}
@@ -207,7 +236,7 @@ static void *boot_crst_alloc(unsigned long val)
unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
unsigned long *table;
- table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
+ table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size);
crst_table_init(table, val);
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
@@ -223,7 +252,7 @@ static pte_t *boot_pte_alloc(void)
* during POPULATE_KASAN_MAP_SHADOW when EDAT is off
*/
if (!pte_leftover) {
- pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
+ pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
pte = pte_leftover + _PAGE_TABLE_SIZE;
__arch_set_page_dat(pte, 1);
} else {
@@ -235,13 +264,16 @@ static pte_t *boot_pte_alloc(void)
return pte;
}
-static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode)
+static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size,
+ enum populate_mode mode)
{
switch (mode) {
case POPULATE_NONE:
- return -1;
+ return INVALID_PHYS_ADDR;
case POPULATE_DIRECT:
return addr;
+ case POPULATE_LOWCORE:
+ return __lowcore_pa(addr);
case POPULATE_ABS_LOWCORE:
return __abs_lowcore_pa(addr);
case POPULATE_KERNEL:
@@ -250,38 +282,64 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m
return __identity_pa(addr);
#ifdef CONFIG_KASAN
case POPULATE_KASAN_MAP_SHADOW:
- addr = physmem_alloc_top_down(RR_VMEM, size, size);
- memset((void *)addr, 0, size);
- return addr;
+ /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */
+ addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE);
+ if (addr) {
+ memset((void *)addr, 0, size);
+ return addr;
+ }
+ return INVALID_PHYS_ADDR;
#endif
default:
- return -1;
+ return INVALID_PHYS_ADDR;
}
}
-static bool large_allowed(enum populate_mode mode)
+static bool large_page_mapping_allowed(enum populate_mode mode)
{
- return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL);
+ switch (mode) {
+ case POPULATE_DIRECT:
+ case POPULATE_IDENTITY:
+ case POPULATE_KERNEL:
+#ifdef CONFIG_KASAN
+ case POPULATE_KASAN_MAP_SHADOW:
+#endif
+ return true;
+ default:
+ return false;
+ }
}
-static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end,
- enum populate_mode mode)
+static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- unsigned long size = end - addr;
+ unsigned long pa, size = end - addr;
+
+ if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) ||
+ !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE))
+ return INVALID_PHYS_ADDR;
- return machine.has_edat2 && large_allowed(mode) &&
- IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) &&
- IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE);
+ pa = resolve_pa_may_alloc(addr, size, mode);
+ if (!IS_ALIGNED(pa, PUD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ return pa;
}
-static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end,
- enum populate_mode mode)
+static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- unsigned long size = end - addr;
+ unsigned long pa, size = end - addr;
+
+ if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) ||
+ !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ pa = resolve_pa_may_alloc(addr, size, mode);
+ if (!IS_ALIGNED(pa, PMD_SIZE))
+ return INVALID_PHYS_ADDR;
- return machine.has_edat1 && large_allowed(mode) &&
- IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) &&
- IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE);
+ return pa;
}
static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
@@ -295,22 +353,20 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
if (pte_none(*pte)) {
if (kasan_pte_populate_zero_shadow(pte, mode))
continue;
- entry = __pte(_pa(addr, PAGE_SIZE, mode));
+ entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode));
entry = set_pte_bit(entry, PAGE_KERNEL);
- if (!machine.has_nx)
- entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
set_pte(pte, entry);
pages++;
}
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_4K, pages);
}
static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next, pages = 0;
+ unsigned long pa, next, pages = 0;
pmd_t *pmd, entry;
pte_t *pte;
@@ -320,11 +376,10 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
if (pmd_none(*pmd)) {
if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode))
continue;
- if (can_large_pmd(pmd, addr, next, mode)) {
- entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
+ pa = try_get_large_pmd_pa(pmd, addr, next, mode);
+ if (pa != INVALID_PHYS_ADDR) {
+ entry = __pmd(pa);
entry = set_pmd_bit(entry, SEGMENT_KERNEL);
- if (!machine.has_nx)
- entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
set_pmd(pmd, entry);
pages++;
continue;
@@ -336,14 +391,14 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
}
pgtable_pte_populate(pmd, addr, next, mode);
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_1M, pages);
}
static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next, pages = 0;
+ unsigned long pa, next, pages = 0;
pud_t *pud, entry;
pmd_t *pmd;
@@ -353,11 +408,10 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
if (pud_none(*pud)) {
if (kasan_pud_populate_zero_shadow(pud, addr, next, mode))
continue;
- if (can_large_pud(pud, addr, next, mode)) {
- entry = __pud(_pa(addr, _REGION3_SIZE, mode));
+ pa = try_get_large_pud_pa(pud, addr, next, mode);
+ if (pa != INVALID_PHYS_ADDR) {
+ entry = __pud(pa);
entry = set_pud_bit(entry, REGION3_KERNEL);
- if (!machine.has_nx)
- entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
set_pud(pud, entry);
pages++;
continue;
@@ -369,7 +423,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
}
pgtable_pmd_populate(pud, addr, next, mode);
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_2G, pages);
}
@@ -399,6 +453,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
pgd_t *pgd;
p4d_t *p4d;
+ if (!is_kasan_populate_mode(mode)) {
+ boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n",
+ get_populate_mode_name(mode), addr, end,
+ resolve_pa_may_alloc(addr, 0, mode),
+ resolve_pa_may_alloc(end - 1, 0, mode) + 1);
+ }
+
pgd = pgd_offset(&init_mm, addr);
for (; addr < end; addr = next, pgd++) {
next = pgd_addr_end(addr, end);
@@ -418,6 +479,7 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit)
{
+ unsigned long lowcore_address = 0;
unsigned long start, end;
unsigned long asce_type;
unsigned long asce_bits;
@@ -455,18 +517,36 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
+ if (machine_has_relocated_lowcore())
+ lowcore_address = LOWCORE_ALT_ADDRESS;
+
/*
* To allow prefixing the lowcore must be mapped with 4KB pages.
* To prevent creation of a large page at address 0 first map
* the lowcore and create the identity mapping only afterwards.
*/
- pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT);
+ pgtable_populate(lowcore_address,
+ lowcore_address + sizeof(struct lowcore),
+ POPULATE_LOWCORE);
for_each_physmem_usable_range(i, &start, &end) {
+ /* Do not map lowcore with identity mapping */
+ if (!start)
+ start = sizeof(struct lowcore);
pgtable_populate((unsigned long)__identity_va(start),
(unsigned long)__identity_va(end),
POPULATE_IDENTITY);
}
- pgtable_populate(kernel_start, kernel_end, POPULATE_KERNEL);
+
+ /*
+ * [kernel_start..kernel_start + TEXT_OFFSET] region is never
+ * accessed as per the linker script:
+ *
+ * . = TEXT_OFFSET;
+ *
+ * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to
+ * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region.
+ */
+ pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL);
pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT);
pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
POPULATE_ABS_LOWCORE);
@@ -476,13 +556,13 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
kasan_populate_shadow(kernel_start, kernel_end);
- S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits;
- S390_lowcore.user_asce = s390_invalid_asce;
+ get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits;
+ get_lowcore()->user_asce = s390_invalid_asce;
- local_ctl_load(1, &S390_lowcore.kernel_asce);
- local_ctl_load(7, &S390_lowcore.user_asce);
- local_ctl_load(13, &S390_lowcore.kernel_asce);
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ local_ctl_load(7, &get_lowcore()->user_asce);
+ local_ctl_load(13, &get_lowcore()->kernel_asce);
- init_mm.context.asce = S390_lowcore.kernel_asce.val;
+ init_mm.context.asce = get_lowcore()->kernel_asce.val;
init_mm.pgd = init_mm_pgd;
}
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index a750711d44c8..50988022f9ea 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -40,6 +40,7 @@ SECTIONS
*(.rodata.*)
_erodata = . ;
}
+ EXCEPTION_TABLE(16)
.got : {
*(.got)
}
@@ -109,7 +110,12 @@ SECTIONS
#ifdef CONFIG_KERNEL_UNCOMPRESSED
. = ALIGN(PAGE_SIZE);
. += AMODE31_SIZE; /* .amode31 section */
- . = ALIGN(1 << 20); /* _SEGMENT_SIZE */
+
+ /*
+ * Make sure the location counter is not less than TEXT_OFFSET.
+ * _SEGMENT_SIZE is not available, use ALIGN(1 << 20) instead.
+ */
+ . = MAX(TEXT_OFFSET, ALIGN(1 << 20));
#else
. = ALIGN(8);
#endif
@@ -160,7 +166,6 @@ SECTIONS
/DISCARD/ : {
COMMON_DISCARDS
*(.eh_frame)
- *(__ex_table)
*(*__ksymtab*)
*(___kcrctab*)
}
diff --git a/arch/s390/configs/compat.config b/arch/s390/configs/compat.config
deleted file mode 100644
index 6fd051453ae8..000000000000
--- a/arch/s390/configs/compat.config
+++ /dev/null
@@ -1,3 +0,0 @@
-# Help: Enable compat support
-CONFIG_COMPAT=y
-CONFIG_COMPAT_32BIT_TIME=y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index f3602414a961..0713914b25b4 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -5,6 +5,7 @@ CONFIG_WATCH_QUEUE=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_POSIX_AUX_CLOCKS=y
CONFIG_BPF_SYSCALL=y
CONFIG_BPF_JIT=y
CONFIG_BPF_JIT_ALWAYS_ON=y
@@ -19,6 +20,7 @@ CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_SCHED_PROXY_EXEC=y
CONFIG_NUMA_BALANCING=y
CONFIG_MEMCG=y
CONFIG_BLK_CGROUP=y
@@ -38,11 +40,11 @@ CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
-# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
+CONFIG_CRASH_DM_CRYPT=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
@@ -50,16 +52,15 @@ CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_CERT_STORE=y
CONFIG_EXPOLINE=y
-# CONFIG_EXPOLINE_EXTERN is not set
CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
-CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_S390_HYPFS_FS=y
CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_S390_KPROBES_SANITY_TEST=m
CONFIG_S390_MODULES_SANITY_TEST=m
@@ -93,13 +94,14 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
-CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
+CONFIG_SLAB_BUCKETS=y
CONFIG_SLUB_STATS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
+CONFIG_PERSISTENT_HUGE_ZERO_FOLIO=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CMA_DEBUGFS=y
CONFIG_CMA_SYSFS=y
@@ -107,6 +109,7 @@ CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_ZONE_DEVICE=y
CONFIG_PERCPU_STATS=y
CONFIG_GUP_TEST=y
CONFIG_ANON_VMA_NAME=y
@@ -116,10 +119,17 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
+CONFIG_TLS_DEVICE=y
+CONFIG_TLS_TOE=y
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
+CONFIG_SMC=m
CONFIG_SMC_DIAG=m
-CONFIG_SMC_LO=y
+CONFIG_DIBS=y
+CONFIG_DIBS_LO=y
+CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
@@ -225,17 +235,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
CONFIG_NETFILTER_XT_TARGET_CT=m
CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
CONFIG_NETFILTER_XT_TARGET_HMARK=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NETFILTER_XT_TARGET_NETMAP=m
CONFIG_NETFILTER_XT_TARGET_NFLOG=m
CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
CONFIG_NETFILTER_XT_TARGET_TEE=m
CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
CONFIG_NETFILTER_XT_TARGET_SECMARK=m
CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -321,16 +333,8 @@ CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
@@ -343,15 +347,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
CONFIG_IP6_NF_MATCH_MH=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_IP_SCTP=m
CONFIG_RDS=m
CONFIG_RDS_RDMA=m
CONFIG_RDS_TCP=m
@@ -386,6 +384,7 @@ CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_PLUG=m
CONFIG_NET_SCH_ETS=m
+CONFIG_NET_SCH_DUALPI2=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
@@ -395,6 +394,9 @@ CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_CGROUP=y
CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_EMATCH=y
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
@@ -405,6 +407,9 @@ CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GATE=m
CONFIG_NET_TC_SKB_EXT=y
CONFIG_DNS_RESOLVER=y
@@ -426,6 +431,13 @@ CONFIG_DEVTMPFS_SAFE=y
# CONFIG_FW_LOADER is not set
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
+CONFIG_ZRAM_BACKEND_LZ4=y
+CONFIG_ZRAM_BACKEND_LZ4HC=y
+CONFIG_ZRAM_BACKEND_ZSTD=y
+CONFIG_ZRAM_BACKEND_DEFLATE=y
+CONFIG_ZRAM_BACKEND_842=y
+CONFIG_ZRAM_BACKEND_LZO=y
+CONFIG_ZRAM_DEF_COMP_DEFLATE=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -461,7 +473,9 @@ CONFIG_SCSI_DH_EMC=m
CONFIG_SCSI_DH_ALUA=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LLBITMAP=y
# CONFIG_MD_BITMAP_FILE is not set
+CONFIG_MD_LINEAR=m
CONFIG_MD_CLUSTER=m
CONFIG_BCACHE=m
CONFIG_BLK_DEV_DM=y
@@ -486,12 +500,14 @@ CONFIG_DM_UEVENT=y
CONFIG_DM_FLAKEY=m
CONFIG_DM_VERITY=m
CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING=y
CONFIG_DM_SWITCH=m
CONFIG_DM_INTEGRITY=m
CONFIG_DM_VDO=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
+CONFIG_OVPN=m
CONFIG_EQUALIZER=m
CONFIG_IFB=m
CONFIG_MACVLAN=m
@@ -535,6 +551,8 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
+CONFIG_MLX5_SF=y
+# CONFIG_NET_VENDOR_META is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -616,9 +634,19 @@ CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_MEM=m
CONFIG_VIRTIO_INPUT=y
+CONFIG_VDPA=m
+CONFIG_VDPA_SIM=m
+CONFIG_VDPA_SIM_NET=m
+CONFIG_VDPA_SIM_BLOCK=m
+CONFIG_VDPA_USER=m
+CONFIG_MLX5_VDPA_NET=m
+CONFIG_VP_VDPA=m
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
+CONFIG_VHOST_VDPA=m
+CONFIG_DEV_DAX=m
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -628,9 +656,12 @@ CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
CONFIG_JFS_STATISTICS=y
CONFIG_XFS_FS=y
+CONFIG_XFS_SUPPORT_V4=y
+CONFIG_XFS_SUPPORT_ASCII_CI=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
+# CONFIG_XFS_ONLINE_SCRUB is not set
CONFIG_XFS_DEBUG=y
CONFIG_GFS2_FS=m
CONFIG_GFS2_FS_LOCKING_DLM=y
@@ -640,10 +671,6 @@ CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_BTRFS_DEBUG=y
CONFIG_BTRFS_ASSERT=y
CONFIG_NILFS2_FS=m
-CONFIG_BCACHEFS_FS=y
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-CONFIG_FS_DAX=y
CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FS_ENCRYPTION=y
CONFIG_FS_VERITY=y
@@ -695,6 +722,7 @@ CONFIG_NFSD=m
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
CONFIG_NFSD_V4_SECURITY_LABEL=y
+# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set
CONFIG_CIFS=m
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
@@ -712,11 +740,10 @@ CONFIG_NLS_UTF8=m
CONFIG_DLM=m
CONFIG_UNICODE=y
CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
-CONFIG_HARDENED_USERCOPY=y
-CONFIG_FORTIFY_SOURCE=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_LOCKDOWN_LSM=y
@@ -729,24 +756,24 @@ CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
-CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_HARDENED_USERCOPY=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_USER=m
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_SELFTESTS=y
+CONFIG_CRYPTO_SELFTESTS_FULL=y
+CONFIG_CRYPTO_NULL=y
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_SM2=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
@@ -760,19 +787,17 @@ CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
CONFIG_CRYPTO_HCTR2=m
-CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3_GENERIC=m
-CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_CRC32=m
@@ -780,33 +805,28 @@ CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
CONFIG_CRYPTO_ZSTD=m
-CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_CRC32_S390=y
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA3_256_S390=m
-CONFIG_CRYPTO_SHA3_512_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_CHACHA_S390=m
+CONFIG_CRYPTO_HMAC_S390=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
+CONFIG_PKEY_CCA=m
+CONFIG_PKEY_EP11=m
+CONFIG_PKEY_PCKMO=m
+CONFIG_PKEY_UV=m
CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_PHMAC_S390=m
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+CONFIG_CRYPTO_KRB5=m
+CONFIG_CRYPTO_KRB5_SELFTESTS=y
CONFIG_CORDIC=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRC32_SELFTEST=y
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_CRC8=m
+CONFIG_TRACE_MMIO_ACCESS=y
CONFIG_RANDOM32_SELFTEST=y
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_DMA_CMA=y
@@ -864,6 +884,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300
CONFIG_LATENCYTOP=y
CONFIG_BOOTTIME_TRACING=y
CONFIG_FUNCTION_GRAPH_RETVAL=y
+CONFIG_FUNCTION_GRAPH_RETADDR=y
CONFIG_FPROBE=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_STACK_TRACER=y
@@ -876,12 +897,14 @@ CONFIG_USER_EVENTS=y
CONFIG_HIST_TRIGGERS=y
CONFIG_FTRACE_STARTUP_TEST=y
# CONFIG_EVENT_TRACE_STARTUP_TEST is not set
+CONFIG_FTRACE_SORT_STARTUP_TEST=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_TRACE_PRINTK=m
CONFIG_SAMPLE_FTRACE_DIRECT=m
CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
CONFIG_SAMPLE_FTRACE_OPS=m
CONFIG_DEBUG_ENTRY=y
+CONFIG_STRICT_MM_TYPECHECKS=y
CONFIG_CIO_INJECT=y
CONFIG_KUNIT=m
CONFIG_KUNIT_DEBUGFS=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index d0d8925fdf09..c064e0cacc98 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -4,6 +4,7 @@ CONFIG_WATCH_QUEUE=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_POSIX_AUX_CLOCKS=y
CONFIG_BPF_SYSCALL=y
CONFIG_BPF_JIT=y
CONFIG_BPF_JIT_ALWAYS_ON=y
@@ -17,6 +18,7 @@ CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_SCHED_PROXY_EXEC=y
CONFIG_NUMA_BALANCING=y
CONFIG_MEMCG=y
CONFIG_BLK_CGROUP=y
@@ -36,24 +38,22 @@ CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
-# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
+CONFIG_CRASH_DM_CRYPT=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
-CONFIG_HZ_100=y
+CONFIG_HZ_1000=y
CONFIG_CERT_STORE=y
CONFIG_EXPOLINE=y
-# CONFIG_EXPOLINE_EXTERN is not set
CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
-CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_S390_HYPFS_FS=y
@@ -88,18 +88,20 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
-CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
+CONFIG_SLAB_BUCKETS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
+CONFIG_PERSISTENT_HUGE_ZERO_FOLIO=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CMA_SYSFS=y
CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_ZONE_DEVICE=y
CONFIG_PERCPU_STATS=y
CONFIG_ANON_VMA_NAME=y
CONFIG_USERFAULTFD=y
@@ -108,10 +110,17 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
+CONFIG_TLS_DEVICE=y
+CONFIG_TLS_TOE=y
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
+CONFIG_SMC=m
CONFIG_SMC_DIAG=m
-CONFIG_SMC_LO=y
+CONFIG_DIBS=y
+CONFIG_DIBS_LO=y
+CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
@@ -217,17 +226,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
CONFIG_NETFILTER_XT_TARGET_CT=m
CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
CONFIG_NETFILTER_XT_TARGET_HMARK=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NETFILTER_XT_TARGET_NETMAP=m
CONFIG_NETFILTER_XT_TARGET_NFLOG=m
CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
CONFIG_NETFILTER_XT_TARGET_TEE=m
CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
CONFIG_NETFILTER_XT_TARGET_SECMARK=m
CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -313,16 +324,8 @@ CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
@@ -335,15 +338,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
CONFIG_IP6_NF_MATCH_MH=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_IP_SCTP=m
CONFIG_RDS=m
CONFIG_RDS_RDMA=m
CONFIG_RDS_TCP=m
@@ -377,6 +374,7 @@ CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_PLUG=m
CONFIG_NET_SCH_ETS=m
+CONFIG_NET_SCH_DUALPI2=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
@@ -386,6 +384,9 @@ CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_CGROUP=y
CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_EMATCH=y
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
@@ -396,6 +397,9 @@ CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GATE=m
CONFIG_NET_TC_SKB_EXT=y
CONFIG_DNS_RESOLVER=y
@@ -417,6 +421,13 @@ CONFIG_DEVTMPFS_SAFE=y
# CONFIG_FW_LOADER is not set
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
+CONFIG_ZRAM_BACKEND_LZ4=y
+CONFIG_ZRAM_BACKEND_LZ4HC=y
+CONFIG_ZRAM_BACKEND_ZSTD=y
+CONFIG_ZRAM_BACKEND_DEFLATE=y
+CONFIG_ZRAM_BACKEND_842=y
+CONFIG_ZRAM_BACKEND_LZO=y
+CONFIG_ZRAM_DEF_COMP_DEFLATE=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -452,7 +463,9 @@ CONFIG_SCSI_DH_EMC=m
CONFIG_SCSI_DH_ALUA=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LLBITMAP=y
# CONFIG_MD_BITMAP_FILE is not set
+CONFIG_MD_LINEAR=m
CONFIG_MD_CLUSTER=m
CONFIG_BCACHE=m
CONFIG_BLK_DEV_DM=y
@@ -477,12 +490,14 @@ CONFIG_DM_UEVENT=y
CONFIG_DM_FLAKEY=m
CONFIG_DM_VERITY=m
CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING=y
CONFIG_DM_SWITCH=m
CONFIG_DM_INTEGRITY=m
CONFIG_DM_VDO=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
+CONFIG_OVPN=m
CONFIG_EQUALIZER=m
CONFIG_IFB=m
CONFIG_MACVLAN=m
@@ -526,6 +541,8 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
+CONFIG_MLX5_SF=y
+# CONFIG_NET_VENDOR_META is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -607,9 +624,19 @@ CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_MEM=m
CONFIG_VIRTIO_INPUT=y
+CONFIG_VDPA=m
+CONFIG_VDPA_SIM=m
+CONFIG_VDPA_SIM_NET=m
+CONFIG_VDPA_SIM_BLOCK=m
+CONFIG_VDPA_USER=m
+CONFIG_MLX5_VDPA_NET=m
+CONFIG_VP_VDPA=m
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
+CONFIG_VHOST_VDPA=m
+CONFIG_DEV_DAX=m
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -619,19 +646,18 @@ CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
CONFIG_JFS_STATISTICS=y
CONFIG_XFS_FS=y
+CONFIG_XFS_SUPPORT_V4=y
+CONFIG_XFS_SUPPORT_ASCII_CI=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
+# CONFIG_XFS_ONLINE_SCRUB is not set
CONFIG_GFS2_FS=m
CONFIG_GFS2_FS_LOCKING_DLM=y
CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-CONFIG_FS_DAX=y
CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FS_ENCRYPTION=y
CONFIG_FS_VERITY=y
@@ -663,7 +689,6 @@ CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TMPFS_INODE64=y
CONFIG_TMPFS_QUOTA=y
CONFIG_HUGETLBFS=y
-CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
@@ -683,6 +708,7 @@ CONFIG_NFSD=m
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
CONFIG_NFSD_V4_SECURITY_LABEL=y
+# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set
CONFIG_CIFS=m
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
@@ -700,6 +726,7 @@ CONFIG_NLS_UTF8=m
CONFIG_DLM=m
CONFIG_UNICODE=y
CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
@@ -715,25 +742,22 @@ CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
-CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_USER=m
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_SELFTESTS=y
+CONFIG_CRYPTO_NULL=y
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_SM2=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
@@ -747,19 +771,17 @@ CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
CONFIG_CRYPTO_HCTR2=m
-CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3_GENERIC=m
-CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_CRC32=m
@@ -767,33 +789,29 @@ CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
CONFIG_CRYPTO_ZSTD=m
-CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_CRC32_S390=y
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA3_256_S390=m
-CONFIG_CRYPTO_SHA3_512_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_CHACHA_S390=m
+CONFIG_CRYPTO_HMAC_S390=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
+CONFIG_PKEY_CCA=m
+CONFIG_PKEY_EP11=m
+CONFIG_PKEY_PCKMO=m
+CONFIG_PKEY_UV=m
CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_PHMAC_S390=m
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+CONFIG_CRYPTO_KRB5=m
+CONFIG_CRYPTO_KRB5_SELFTESTS=y
CONFIG_CORDIC=m
CONFIG_PRIME_NUMBERS=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_CRC8=m
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
@@ -814,6 +832,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
CONFIG_BOOTTIME_TRACING=y
CONFIG_FUNCTION_GRAPH_RETVAL=y
+CONFIG_FUNCTION_GRAPH_RETADDR=y
CONFIG_FPROBE=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_STACK_TRACER=y
diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config
index 84c2b551e992..cefbe2ba1228 100644
--- a/arch/s390/configs/kasan.config
+++ b/arch/s390/configs/kasan.config
@@ -1,4 +1,4 @@
# Help: Enable KASan for debugging
CONFIG_KASAN=y
CONFIG_KASAN_INLINE=y
-CONFIG_KASAN_VMALLOC=y
+CONFIG_KERNEL_IMAGE_BASE=0x7FFFE0000000
diff --git a/arch/s390/configs/mmtypes.config b/arch/s390/configs/mmtypes.config
new file mode 100644
index 000000000000..fe32b442d789
--- /dev/null
+++ b/arch/s390/configs/mmtypes.config
@@ -0,0 +1,2 @@
+# Help: Enable strict memory management typechecks
+CONFIG_STRICT_MM_TYPECHECKS=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 8c2b61363bab..b5478267d6a7 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -1,5 +1,6 @@
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_POSIX_AUX_CLOCKS=y
CONFIG_BPF_SYSCALL=y
# CONFIG_CPU_ISOLATION is not set
# CONFIG_UTS_NS is not set
@@ -11,7 +12,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_KEXEC=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=2
-CONFIG_HZ_100=y
+CONFIG_HZ_1000=y
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
# CONFIG_AP is not set
@@ -32,7 +33,6 @@ CONFIG_NET=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_SAFE=y
CONFIG_BLK_DEV_RAM=y
-# CONFIG_DCSSBLK is not set
# CONFIG_DASD is not set
CONFIG_ENCLOSURE_SERVICES=y
CONFIG_SCSI=y
@@ -49,6 +49,7 @@ CONFIG_ZFCP=y
# CONFIG_HVC_IUCV is not set
# CONFIG_HW_RANDOM_S390 is not set
# CONFIG_HMC_DRV is not set
+# CONFIG_S390_UV_UAPI is not set
# CONFIG_S390_TAPE is not set
# CONFIG_VMCP is not set
# CONFIG_MONWRITER is not set
@@ -61,7 +62,6 @@ CONFIG_ZFCP=y
# CONFIG_INOTIFY_USER is not set
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_LSM="yama,loadpin,safesetid,integrity"
# CONFIG_ZLIB_DFLTCC is not set
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_PRINTK_TIME=y
@@ -70,7 +70,6 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_FS=y
CONFIG_PANIC_ON_OOPS=y
-# CONFIG_SCHED_DEBUG is not set
CONFIG_RCU_CPU_STALL_TIMEOUT=60
# CONFIG_RCU_TRACE is not set
# CONFIG_FTRACE is not set
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
index 06ee706b0d78..f838ca055f6d 100644
--- a/arch/s390/crypto/Kconfig
+++ b/arch/s390/crypto/Kconfig
@@ -2,76 +2,8 @@
menu "Accelerated Cryptographic Algorithms for CPU (s390)"
-config CRYPTO_CRC32_S390
- tristate "CRC32c and CRC32"
- depends on S390
- select CRYPTO_HASH
- select CRC32
- help
- CRC32c and CRC32 CRC algorithms
-
- Architecture: s390
-
- It is available with IBM z13 or later.
-
-config CRYPTO_SHA512_S390
- tristate "Hash functions: SHA-384 and SHA-512"
- depends on S390
- select CRYPTO_HASH
- help
- SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
-
- Architecture: s390
-
- It is available as of z10.
-
-config CRYPTO_SHA1_S390
- tristate "Hash functions: SHA-1"
- depends on S390
- select CRYPTO_HASH
- help
- SHA-1 secure hash algorithm (FIPS 180)
-
- Architecture: s390
-
- It is available as of z990.
-
-config CRYPTO_SHA256_S390
- tristate "Hash functions: SHA-224 and SHA-256"
- depends on S390
- select CRYPTO_HASH
- help
- SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
- Architecture: s390
-
- It is available as of z9.
-
-config CRYPTO_SHA3_256_S390
- tristate "Hash functions: SHA3-224 and SHA3-256"
- depends on S390
- select CRYPTO_HASH
- help
- SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202)
-
- Architecture: s390
-
- It is available as of z14.
-
-config CRYPTO_SHA3_512_S390
- tristate "Hash functions: SHA3-384 and SHA3-512"
- depends on S390
- select CRYPTO_HASH
- help
- SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202)
-
- Architecture: s390
-
- It is available as of z14.
-
config CRYPTO_GHASH_S390
tristate "Hash functions: GHASH"
- depends on S390
select CRYPTO_HASH
help
GCM GHASH hash function (NIST SP800-38D)
@@ -82,7 +14,6 @@ config CRYPTO_GHASH_S390
config CRYPTO_AES_S390
tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM"
- depends on S390
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
help
@@ -104,7 +35,6 @@ config CRYPTO_AES_S390
config CRYPTO_DES_S390
tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR"
- depends on S390
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
select CRYPTO_LIB_DES
@@ -119,17 +49,13 @@ config CRYPTO_DES_S390
As of z990 the ECB and CBC mode are hardware accelerated.
As of z196 the CTR mode is hardware accelerated.
-config CRYPTO_CHACHA_S390
- tristate "Ciphers: ChaCha20"
- depends on S390
- select CRYPTO_SKCIPHER
- select CRYPTO_LIB_CHACHA_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
+config CRYPTO_HMAC_S390
+ tristate "Keyed-hash message authentication code: HMAC"
+ select CRYPTO_HASH
help
- Length-preserving cipher: ChaCha20 stream cipher (RFC 7539)
+ s390 specific HMAC hardware support for SHA224, SHA256, SHA384 and
+ SHA512.
Architecture: s390
- It is available as of z13.
-
endmenu
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 1b1cc478fa94..387a229e1038 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -3,19 +3,11 @@
# Cryptographic API
#
-obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_SHA3_256_S390) += sha3_256_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
-obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
-obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
+obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o
+obj-$(CONFIG_CRYPTO_PHMAC_S390) += phmac_s390.o
obj-y += arch_random.o
-
-crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
-chacha_s390-y := chacha-glue.o chacha-s390.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index c6fe5405de4a..d0a295435680 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -14,8 +14,7 @@
* Derived from "crypto/aes_generic.c"
*/
-#define KMSG_COMPONENT "aes_s390"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "aes_s390: " fmt
#include <crypto/aes.h>
#include <crypto/algapi.h>
@@ -51,8 +50,13 @@ struct s390_aes_ctx {
};
struct s390_xts_ctx {
- u8 key[32];
- u8 pcc_key[32];
+ union {
+ u8 keys[64];
+ struct {
+ u8 key[32];
+ u8 pcc_key[32];
+ };
+ };
int key_len;
unsigned long fc;
struct crypto_skcipher *fallback;
@@ -61,7 +65,6 @@ struct s390_xts_ctx {
struct gcm_sg_walk {
struct scatter_walk walk;
unsigned int walk_bytes;
- u8 *walk_ptr;
unsigned int walk_bytes_remain;
u8 buf[AES_BLOCK_SIZE];
unsigned int buf_bytes;
@@ -526,6 +529,108 @@ static struct skcipher_alg xts_aes_alg = {
.decrypt = xts_aes_decrypt,
};
+static int fullxts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+ unsigned long fc;
+ int err;
+
+ err = xts_fallback_setkey(tfm, in_key, key_len);
+ if (err)
+ return err;
+
+ /* Pick the correct function code based on the key length */
+ fc = (key_len == 32) ? CPACF_KM_XTS_128_FULL :
+ (key_len == 64) ? CPACF_KM_XTS_256_FULL : 0;
+
+ /* Check if the function code is available */
+ xts_ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+ if (!xts_ctx->fc)
+ return 0;
+
+ /* Store double-key */
+ memcpy(xts_ctx->keys, in_key, key_len);
+ xts_ctx->key_len = key_len;
+ return 0;
+}
+
+static int fullxts_aes_crypt(struct skcipher_request *req, unsigned long modifier)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+ unsigned int offset, nbytes, n;
+ struct skcipher_walk walk;
+ int ret;
+ struct {
+ __u8 key[64];
+ __u8 tweak[16];
+ __u8 nap[16];
+ } fxts_param = {
+ .nap = {0},
+ };
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (unlikely(!xts_ctx->fc || (req->cryptlen % AES_BLOCK_SIZE) != 0)) {
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, xts_ctx->fallback);
+ return (modifier & CPACF_DECRYPT) ?
+ crypto_skcipher_decrypt(subreq) :
+ crypto_skcipher_encrypt(subreq);
+ }
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+
+ offset = xts_ctx->key_len & 0x20;
+ memcpy(fxts_param.key + offset, xts_ctx->keys, xts_ctx->key_len);
+ memcpy(fxts_param.tweak, req->iv, AES_BLOCK_SIZE);
+ fxts_param.nap[0] = 0x01; /* initial alpha power (1, little-endian) */
+
+ while ((nbytes = walk.nbytes) != 0) {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ cpacf_km(xts_ctx->fc | modifier, fxts_param.key + offset,
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ ret = skcipher_walk_done(&walk, nbytes - n);
+ }
+ memzero_explicit(&fxts_param, sizeof(fxts_param));
+ return ret;
+}
+
+static int fullxts_aes_encrypt(struct skcipher_request *req)
+{
+ return fullxts_aes_crypt(req, 0);
+}
+
+static int fullxts_aes_decrypt(struct skcipher_request *req)
+{
+ return fullxts_aes_crypt(req, CPACF_DECRYPT);
+}
+
+static struct skcipher_alg fullxts_aes_alg = {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "full-xts-aes-s390",
+ .base.cra_priority = 403, /* aes-xts-s390 + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = xts_fallback_init,
+ .exit = xts_fallback_exit,
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = fullxts_aes_set_key,
+ .encrypt = fullxts_aes_encrypt,
+ .decrypt = fullxts_aes_decrypt,
+};
+
static int ctr_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
@@ -680,29 +785,20 @@ static void gcm_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg,
static inline unsigned int _gcm_sg_clamp_and_map(struct gcm_sg_walk *gw)
{
- struct scatterlist *nextsg;
-
- gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain);
- while (!gw->walk_bytes) {
- nextsg = sg_next(gw->walk.sg);
- if (!nextsg)
- return 0;
- scatterwalk_start(&gw->walk, nextsg);
- gw->walk_bytes = scatterwalk_clamp(&gw->walk,
- gw->walk_bytes_remain);
- }
- gw->walk_ptr = scatterwalk_map(&gw->walk);
+ if (gw->walk_bytes_remain == 0)
+ return 0;
+ gw->walk_bytes = scatterwalk_next(&gw->walk, gw->walk_bytes_remain);
return gw->walk_bytes;
}
static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw,
- unsigned int nbytes)
+ unsigned int nbytes, bool out)
{
gw->walk_bytes_remain -= nbytes;
- scatterwalk_unmap(gw->walk_ptr);
- scatterwalk_advance(&gw->walk, nbytes);
- scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain);
- gw->walk_ptr = NULL;
+ if (out)
+ scatterwalk_done_dst(&gw->walk, nbytes);
+ else
+ scatterwalk_done_src(&gw->walk, nbytes);
}
static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
@@ -728,16 +824,16 @@ static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
}
if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) {
- gw->ptr = gw->walk_ptr;
+ gw->ptr = gw->walk.addr;
gw->nbytes = gw->walk_bytes;
goto out;
}
while (1) {
n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes);
- memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n);
+ memcpy(gw->buf + gw->buf_bytes, gw->walk.addr, n);
gw->buf_bytes += n;
- _gcm_sg_unmap_and_advance(gw, n);
+ _gcm_sg_unmap_and_advance(gw, n, false);
if (gw->buf_bytes >= minbytesneeded) {
gw->ptr = gw->buf;
gw->nbytes = gw->buf_bytes;
@@ -769,13 +865,12 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
}
if (gw->walk_bytes >= minbytesneeded) {
- gw->ptr = gw->walk_ptr;
+ gw->ptr = gw->walk.addr;
gw->nbytes = gw->walk_bytes;
goto out;
}
- scatterwalk_unmap(gw->walk_ptr);
- gw->walk_ptr = NULL;
+ scatterwalk_unmap(&gw->walk);
gw->ptr = gw->buf;
gw->nbytes = sizeof(gw->buf);
@@ -797,7 +892,7 @@ static int gcm_in_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone)
} else
gw->buf_bytes = 0;
} else
- _gcm_sg_unmap_and_advance(gw, bytesdone);
+ _gcm_sg_unmap_and_advance(gw, bytesdone, false);
return bytesdone;
}
@@ -814,11 +909,11 @@ static int gcm_out_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone)
if (!_gcm_sg_clamp_and_map(gw))
return i;
n = min(gw->walk_bytes, bytesdone - i);
- memcpy(gw->walk_ptr, gw->buf + i, n);
- _gcm_sg_unmap_and_advance(gw, n);
+ memcpy(gw->walk.addr, gw->buf + i, n);
+ _gcm_sg_unmap_and_advance(gw, n, true);
}
} else
- _gcm_sg_unmap_and_advance(gw, bytesdone);
+ _gcm_sg_unmap_and_advance(gw, bytesdone, true);
return bytesdone;
}
@@ -955,7 +1050,7 @@ static struct aead_alg gcm_aes_aead = {
};
static struct crypto_alg *aes_s390_alg;
-static struct skcipher_alg *aes_s390_skcipher_algs[4];
+static struct skcipher_alg *aes_s390_skcipher_algs[5];
static int aes_s390_skciphers_num;
static struct aead_alg *aes_s390_aead_alg;
@@ -1012,6 +1107,13 @@ static int __init aes_s390_init(void)
goto out_err;
}
+ if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128_FULL) ||
+ cpacf_test_func(&km_functions, CPACF_KM_XTS_256_FULL)) {
+ ret = aes_s390_register_skcipher(&fullxts_aes_alg);
+ if (ret)
+ goto out_err;
+ }
+
if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
ret = aes_s390_register_skcipher(&xts_aes_alg);
@@ -1054,4 +1156,4 @@ MODULE_ALIAS_CRYPTO("aes-all");
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
MODULE_LICENSE("GPL");
-MODULE_IMPORT_NS(CRYPTO_INTERNAL);
+MODULE_IMPORT_NS("CRYPTO_INTERNAL");
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index a8a2407381af..083e8d5eada2 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -6,6 +6,7 @@
* Author(s): Harald Freudenberger
*/
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/random.h>
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
deleted file mode 100644
index f8b0c52e77a4..000000000000
--- a/arch/s390/crypto/chacha-glue.c
+++ /dev/null
@@ -1,130 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * s390 ChaCha stream cipher.
- *
- * Copyright IBM Corp. 2021
- */
-
-#define KMSG_COMPONENT "chacha_s390"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/algapi.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-#include <asm/fpu.h>
-#include "chacha-s390.h"
-
-static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
- unsigned int nbytes, const u32 *key,
- u32 *counter)
-{
- DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
-
- kernel_fpu_begin(&vxstate, KERNEL_VXR);
- chacha20_vx(dst, src, nbytes, key, counter);
- kernel_fpu_end(&vxstate, KERNEL_VXR);
-
- *counter += round_up(nbytes, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
-}
-
-static int chacha20_s390(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- u32 state[CHACHA_STATE_WORDS] __aligned(16);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int rc;
-
- rc = skcipher_walk_virt(&walk, req, false);
- chacha_init_generic(state, ctx->key, req->iv);
-
- while (walk.nbytes > 0) {
- nbytes = walk.nbytes;
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- if (nbytes <= CHACHA_BLOCK_SIZE) {
- chacha_crypt_generic(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes,
- ctx->nrounds);
- } else {
- chacha20_crypt_s390(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes,
- &state[4], &state[12]);
- }
- rc = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
- return rc;
-}
-
-void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
-{
- /* TODO: implement hchacha_block_arch() in assembly */
- hchacha_block_generic(state, stream, nrounds);
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
-{
- chacha_init_generic(state, key, iv);
-}
-EXPORT_SYMBOL(chacha_init_arch);
-
-void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds)
-{
- /* s390 chacha20 implementation has 20 rounds hard-coded,
- * it cannot handle a block of data or less, but otherwise
- * it can handle data of arbitrary size
- */
- if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx())
- chacha_crypt_generic(state, dst, src, bytes, nrounds);
- else
- chacha20_crypt_s390(state, dst, src, bytes,
- &state[4], &state[12]);
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-static struct skcipher_alg chacha_algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-s390",
- .base.cra_priority = 900,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = chacha20_s390,
- .decrypt = chacha20_s390,
- }
-};
-
-static int __init chacha_mod_init(void)
-{
- return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
- crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0;
-}
-
-static void __exit chacha_mod_fini(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER))
- crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init);
-module_exit(chacha_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha20 stream cipher");
-MODULE_LICENSE("GPL v2");
-
-MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S
deleted file mode 100644
index 63f3102678c0..000000000000
--- a/arch/s390/crypto/chacha-s390.S
+++ /dev/null
@@ -1,908 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Original implementation written by Andy Polyakov, @dot-asm.
- * This is an adaptation of the original code for kernel use.
- *
- * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/nospec-insn.h>
-#include <asm/fpu-insn.h>
-
-#define SP %r15
-#define FRAME (16 * 8 + 4 * 8)
-
- .data
- .balign 32
-
-SYM_DATA_START_LOCAL(sigma)
- .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
- .long 1,0,0,0
- .long 2,0,0,0
- .long 3,0,0,0
- .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
-
- .long 0,1,2,3
- .long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma
- .long 0x3320646e,0x3320646e,0x3320646e,0x3320646e
- .long 0x79622d32,0x79622d32,0x79622d32,0x79622d32
- .long 0x6b206574,0x6b206574,0x6b206574,0x6b206574
-SYM_DATA_END(sigma)
-
- .previous
-
- GEN_BR_THUNK %r14
-
- .text
-
-#############################################################################
-# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
-# counst u32 *key, const u32 *counter)
-
-#define OUT %r2
-#define INP %r3
-#define LEN %r4
-#define KEY %r5
-#define COUNTER %r6
-
-#define BEPERM %v31
-#define CTR %v26
-
-#define K0 %v16
-#define K1 %v17
-#define K2 %v18
-#define K3 %v19
-
-#define XA0 %v0
-#define XA1 %v1
-#define XA2 %v2
-#define XA3 %v3
-
-#define XB0 %v4
-#define XB1 %v5
-#define XB2 %v6
-#define XB3 %v7
-
-#define XC0 %v8
-#define XC1 %v9
-#define XC2 %v10
-#define XC3 %v11
-
-#define XD0 %v12
-#define XD1 %v13
-#define XD2 %v14
-#define XD3 %v15
-
-#define XT0 %v27
-#define XT1 %v28
-#define XT2 %v29
-#define XT3 %v30
-
-SYM_FUNC_START(chacha20_vx_4x)
- stmg %r6,%r7,6*8(SP)
-
- larl %r7,sigma
- lhi %r0,10
- lhi %r1,0
-
- VL K0,0,,%r7 # load sigma
- VL K1,0,,KEY # load key
- VL K2,16,,KEY
- VL K3,0,,COUNTER # load counter
-
- VL BEPERM,0x40,,%r7
- VL CTR,0x50,,%r7
-
- VLM XA0,XA3,0x60,%r7,4 # load [smashed] sigma
-
- VREPF XB0,K1,0 # smash the key
- VREPF XB1,K1,1
- VREPF XB2,K1,2
- VREPF XB3,K1,3
-
- VREPF XD0,K3,0
- VREPF XD1,K3,1
- VREPF XD2,K3,2
- VREPF XD3,K3,3
- VAF XD0,XD0,CTR
-
- VREPF XC0,K2,0
- VREPF XC1,K2,1
- VREPF XC2,K2,2
- VREPF XC3,K2,3
-
-.Loop_4x:
- VAF XA0,XA0,XB0
- VX XD0,XD0,XA0
- VERLLF XD0,XD0,16
-
- VAF XA1,XA1,XB1
- VX XD1,XD1,XA1
- VERLLF XD1,XD1,16
-
- VAF XA2,XA2,XB2
- VX XD2,XD2,XA2
- VERLLF XD2,XD2,16
-
- VAF XA3,XA3,XB3
- VX XD3,XD3,XA3
- VERLLF XD3,XD3,16
-
- VAF XC0,XC0,XD0
- VX XB0,XB0,XC0
- VERLLF XB0,XB0,12
-
- VAF XC1,XC1,XD1
- VX XB1,XB1,XC1
- VERLLF XB1,XB1,12
-
- VAF XC2,XC2,XD2
- VX XB2,XB2,XC2
- VERLLF XB2,XB2,12
-
- VAF XC3,XC3,XD3
- VX XB3,XB3,XC3
- VERLLF XB3,XB3,12
-
- VAF XA0,XA0,XB0
- VX XD0,XD0,XA0
- VERLLF XD0,XD0,8
-
- VAF XA1,XA1,XB1
- VX XD1,XD1,XA1
- VERLLF XD1,XD1,8
-
- VAF XA2,XA2,XB2
- VX XD2,XD2,XA2
- VERLLF XD2,XD2,8
-
- VAF XA3,XA3,XB3
- VX XD3,XD3,XA3
- VERLLF XD3,XD3,8
-
- VAF XC0,XC0,XD0
- VX XB0,XB0,XC0
- VERLLF XB0,XB0,7
-
- VAF XC1,XC1,XD1
- VX XB1,XB1,XC1
- VERLLF XB1,XB1,7
-
- VAF XC2,XC2,XD2
- VX XB2,XB2,XC2
- VERLLF XB2,XB2,7
-
- VAF XC3,XC3,XD3
- VX XB3,XB3,XC3
- VERLLF XB3,XB3,7
-
- VAF XA0,XA0,XB1
- VX XD3,XD3,XA0
- VERLLF XD3,XD3,16
-
- VAF XA1,XA1,XB2
- VX XD0,XD0,XA1
- VERLLF XD0,XD0,16
-
- VAF XA2,XA2,XB3
- VX XD1,XD1,XA2
- VERLLF XD1,XD1,16
-
- VAF XA3,XA3,XB0
- VX XD2,XD2,XA3
- VERLLF XD2,XD2,16
-
- VAF XC2,XC2,XD3
- VX XB1,XB1,XC2
- VERLLF XB1,XB1,12
-
- VAF XC3,XC3,XD0
- VX XB2,XB2,XC3
- VERLLF XB2,XB2,12
-
- VAF XC0,XC0,XD1
- VX XB3,XB3,XC0
- VERLLF XB3,XB3,12
-
- VAF XC1,XC1,XD2
- VX XB0,XB0,XC1
- VERLLF XB0,XB0,12
-
- VAF XA0,XA0,XB1
- VX XD3,XD3,XA0
- VERLLF XD3,XD3,8
-
- VAF XA1,XA1,XB2
- VX XD0,XD0,XA1
- VERLLF XD0,XD0,8
-
- VAF XA2,XA2,XB3
- VX XD1,XD1,XA2
- VERLLF XD1,XD1,8
-
- VAF XA3,XA3,XB0
- VX XD2,XD2,XA3
- VERLLF XD2,XD2,8
-
- VAF XC2,XC2,XD3
- VX XB1,XB1,XC2
- VERLLF XB1,XB1,7
-
- VAF XC3,XC3,XD0
- VX XB2,XB2,XC3
- VERLLF XB2,XB2,7
-
- VAF XC0,XC0,XD1
- VX XB3,XB3,XC0
- VERLLF XB3,XB3,7
-
- VAF XC1,XC1,XD2
- VX XB0,XB0,XC1
- VERLLF XB0,XB0,7
- brct %r0,.Loop_4x
-
- VAF XD0,XD0,CTR
-
- VMRHF XT0,XA0,XA1 # transpose data
- VMRHF XT1,XA2,XA3
- VMRLF XT2,XA0,XA1
- VMRLF XT3,XA2,XA3
- VPDI XA0,XT0,XT1,0b0000
- VPDI XA1,XT0,XT1,0b0101
- VPDI XA2,XT2,XT3,0b0000
- VPDI XA3,XT2,XT3,0b0101
-
- VMRHF XT0,XB0,XB1
- VMRHF XT1,XB2,XB3
- VMRLF XT2,XB0,XB1
- VMRLF XT3,XB2,XB3
- VPDI XB0,XT0,XT1,0b0000
- VPDI XB1,XT0,XT1,0b0101
- VPDI XB2,XT2,XT3,0b0000
- VPDI XB3,XT2,XT3,0b0101
-
- VMRHF XT0,XC0,XC1
- VMRHF XT1,XC2,XC3
- VMRLF XT2,XC0,XC1
- VMRLF XT3,XC2,XC3
- VPDI XC0,XT0,XT1,0b0000
- VPDI XC1,XT0,XT1,0b0101
- VPDI XC2,XT2,XT3,0b0000
- VPDI XC3,XT2,XT3,0b0101
-
- VMRHF XT0,XD0,XD1
- VMRHF XT1,XD2,XD3
- VMRLF XT2,XD0,XD1
- VMRLF XT3,XD2,XD3
- VPDI XD0,XT0,XT1,0b0000
- VPDI XD1,XT0,XT1,0b0101
- VPDI XD2,XT2,XT3,0b0000
- VPDI XD3,XT2,XT3,0b0101
-
- VAF XA0,XA0,K0
- VAF XB0,XB0,K1
- VAF XC0,XC0,K2
- VAF XD0,XD0,K3
-
- VPERM XA0,XA0,XA0,BEPERM
- VPERM XB0,XB0,XB0,BEPERM
- VPERM XC0,XC0,XC0,BEPERM
- VPERM XD0,XD0,XD0,BEPERM
-
- VLM XT0,XT3,0,INP,0
-
- VX XT0,XT0,XA0
- VX XT1,XT1,XB0
- VX XT2,XT2,XC0
- VX XT3,XT3,XD0
-
- VSTM XT0,XT3,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
-
- VAF XA0,XA1,K0
- VAF XB0,XB1,K1
- VAF XC0,XC1,K2
- VAF XD0,XD1,K3
-
- VPERM XA0,XA0,XA0,BEPERM
- VPERM XB0,XB0,XB0,BEPERM
- VPERM XC0,XC0,XC0,BEPERM
- VPERM XD0,XD0,XD0,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_4x
-
- VLM XT0,XT3,0,INP,0
-
- VX XT0,XT0,XA0
- VX XT1,XT1,XB0
- VX XT2,XT2,XC0
- VX XT3,XT3,XD0
-
- VSTM XT0,XT3,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_4x
-
- VAF XA0,XA2,K0
- VAF XB0,XB2,K1
- VAF XC0,XC2,K2
- VAF XD0,XD2,K3
-
- VPERM XA0,XA0,XA0,BEPERM
- VPERM XB0,XB0,XB0,BEPERM
- VPERM XC0,XC0,XC0,BEPERM
- VPERM XD0,XD0,XD0,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_4x
-
- VLM XT0,XT3,0,INP,0
-
- VX XT0,XT0,XA0
- VX XT1,XT1,XB0
- VX XT2,XT2,XC0
- VX XT3,XT3,XD0
-
- VSTM XT0,XT3,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_4x
-
- VAF XA0,XA3,K0
- VAF XB0,XB3,K1
- VAF XC0,XC3,K2
- VAF XD0,XD3,K3
-
- VPERM XA0,XA0,XA0,BEPERM
- VPERM XB0,XB0,XB0,BEPERM
- VPERM XC0,XC0,XC0,BEPERM
- VPERM XD0,XD0,XD0,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_4x
-
- VLM XT0,XT3,0,INP,0
-
- VX XT0,XT0,XA0
- VX XT1,XT1,XB0
- VX XT2,XT2,XC0
- VX XT3,XT3,XD0
-
- VSTM XT0,XT3,0,OUT,0
-
-.Ldone_4x:
- lmg %r6,%r7,6*8(SP)
- BR_EX %r14
-
-.Ltail_4x:
- VLR XT0,XC0
- VLR XT1,XD0
-
- VST XA0,8*8+0x00,,SP
- VST XB0,8*8+0x10,,SP
- VST XT0,8*8+0x20,,SP
- VST XT1,8*8+0x30,,SP
-
- lghi %r1,0
-
-.Loop_tail_4x:
- llgc %r5,0(%r1,INP)
- llgc %r6,8*8(%r1,SP)
- xr %r6,%r5
- stc %r6,0(%r1,OUT)
- la %r1,1(%r1)
- brct LEN,.Loop_tail_4x
-
- lmg %r6,%r7,6*8(SP)
- BR_EX %r14
-SYM_FUNC_END(chacha20_vx_4x)
-
-#undef OUT
-#undef INP
-#undef LEN
-#undef KEY
-#undef COUNTER
-
-#undef BEPERM
-
-#undef K0
-#undef K1
-#undef K2
-#undef K3
-
-
-#############################################################################
-# void chacha20_vx(u8 *out, counst u8 *inp, size_t len,
-# counst u32 *key, const u32 *counter)
-
-#define OUT %r2
-#define INP %r3
-#define LEN %r4
-#define KEY %r5
-#define COUNTER %r6
-
-#define BEPERM %v31
-
-#define K0 %v27
-#define K1 %v24
-#define K2 %v25
-#define K3 %v26
-
-#define A0 %v0
-#define B0 %v1
-#define C0 %v2
-#define D0 %v3
-
-#define A1 %v4
-#define B1 %v5
-#define C1 %v6
-#define D1 %v7
-
-#define A2 %v8
-#define B2 %v9
-#define C2 %v10
-#define D2 %v11
-
-#define A3 %v12
-#define B3 %v13
-#define C3 %v14
-#define D3 %v15
-
-#define A4 %v16
-#define B4 %v17
-#define C4 %v18
-#define D4 %v19
-
-#define A5 %v20
-#define B5 %v21
-#define C5 %v22
-#define D5 %v23
-
-#define T0 %v27
-#define T1 %v28
-#define T2 %v29
-#define T3 %v30
-
-SYM_FUNC_START(chacha20_vx)
- clgfi LEN,256
- jle chacha20_vx_4x
- stmg %r6,%r7,6*8(SP)
-
- lghi %r1,-FRAME
- lgr %r0,SP
- la SP,0(%r1,SP)
- stg %r0,0(SP) # back-chain
-
- larl %r7,sigma
- lhi %r0,10
-
- VLM K1,K2,0,KEY,0 # load key
- VL K3,0,,COUNTER # load counter
-
- VLM K0,BEPERM,0,%r7,4 # load sigma, increments, ...
-
-.Loop_outer_vx:
- VLR A0,K0
- VLR B0,K1
- VLR A1,K0
- VLR B1,K1
- VLR A2,K0
- VLR B2,K1
- VLR A3,K0
- VLR B3,K1
- VLR A4,K0
- VLR B4,K1
- VLR A5,K0
- VLR B5,K1
-
- VLR D0,K3
- VAF D1,K3,T1 # K[3]+1
- VAF D2,K3,T2 # K[3]+2
- VAF D3,K3,T3 # K[3]+3
- VAF D4,D2,T2 # K[3]+4
- VAF D5,D2,T3 # K[3]+5
-
- VLR C0,K2
- VLR C1,K2
- VLR C2,K2
- VLR C3,K2
- VLR C4,K2
- VLR C5,K2
-
- VLR T1,D1
- VLR T2,D2
- VLR T3,D3
-
-.Loop_vx:
- VAF A0,A0,B0
- VAF A1,A1,B1
- VAF A2,A2,B2
- VAF A3,A3,B3
- VAF A4,A4,B4
- VAF A5,A5,B5
- VX D0,D0,A0
- VX D1,D1,A1
- VX D2,D2,A2
- VX D3,D3,A3
- VX D4,D4,A4
- VX D5,D5,A5
- VERLLF D0,D0,16
- VERLLF D1,D1,16
- VERLLF D2,D2,16
- VERLLF D3,D3,16
- VERLLF D4,D4,16
- VERLLF D5,D5,16
-
- VAF C0,C0,D0
- VAF C1,C1,D1
- VAF C2,C2,D2
- VAF C3,C3,D3
- VAF C4,C4,D4
- VAF C5,C5,D5
- VX B0,B0,C0
- VX B1,B1,C1
- VX B2,B2,C2
- VX B3,B3,C3
- VX B4,B4,C4
- VX B5,B5,C5
- VERLLF B0,B0,12
- VERLLF B1,B1,12
- VERLLF B2,B2,12
- VERLLF B3,B3,12
- VERLLF B4,B4,12
- VERLLF B5,B5,12
-
- VAF A0,A0,B0
- VAF A1,A1,B1
- VAF A2,A2,B2
- VAF A3,A3,B3
- VAF A4,A4,B4
- VAF A5,A5,B5
- VX D0,D0,A0
- VX D1,D1,A1
- VX D2,D2,A2
- VX D3,D3,A3
- VX D4,D4,A4
- VX D5,D5,A5
- VERLLF D0,D0,8
- VERLLF D1,D1,8
- VERLLF D2,D2,8
- VERLLF D3,D3,8
- VERLLF D4,D4,8
- VERLLF D5,D5,8
-
- VAF C0,C0,D0
- VAF C1,C1,D1
- VAF C2,C2,D2
- VAF C3,C3,D3
- VAF C4,C4,D4
- VAF C5,C5,D5
- VX B0,B0,C0
- VX B1,B1,C1
- VX B2,B2,C2
- VX B3,B3,C3
- VX B4,B4,C4
- VX B5,B5,C5
- VERLLF B0,B0,7
- VERLLF B1,B1,7
- VERLLF B2,B2,7
- VERLLF B3,B3,7
- VERLLF B4,B4,7
- VERLLF B5,B5,7
-
- VSLDB C0,C0,C0,8
- VSLDB C1,C1,C1,8
- VSLDB C2,C2,C2,8
- VSLDB C3,C3,C3,8
- VSLDB C4,C4,C4,8
- VSLDB C5,C5,C5,8
- VSLDB B0,B0,B0,4
- VSLDB B1,B1,B1,4
- VSLDB B2,B2,B2,4
- VSLDB B3,B3,B3,4
- VSLDB B4,B4,B4,4
- VSLDB B5,B5,B5,4
- VSLDB D0,D0,D0,12
- VSLDB D1,D1,D1,12
- VSLDB D2,D2,D2,12
- VSLDB D3,D3,D3,12
- VSLDB D4,D4,D4,12
- VSLDB D5,D5,D5,12
-
- VAF A0,A0,B0
- VAF A1,A1,B1
- VAF A2,A2,B2
- VAF A3,A3,B3
- VAF A4,A4,B4
- VAF A5,A5,B5
- VX D0,D0,A0
- VX D1,D1,A1
- VX D2,D2,A2
- VX D3,D3,A3
- VX D4,D4,A4
- VX D5,D5,A5
- VERLLF D0,D0,16
- VERLLF D1,D1,16
- VERLLF D2,D2,16
- VERLLF D3,D3,16
- VERLLF D4,D4,16
- VERLLF D5,D5,16
-
- VAF C0,C0,D0
- VAF C1,C1,D1
- VAF C2,C2,D2
- VAF C3,C3,D3
- VAF C4,C4,D4
- VAF C5,C5,D5
- VX B0,B0,C0
- VX B1,B1,C1
- VX B2,B2,C2
- VX B3,B3,C3
- VX B4,B4,C4
- VX B5,B5,C5
- VERLLF B0,B0,12
- VERLLF B1,B1,12
- VERLLF B2,B2,12
- VERLLF B3,B3,12
- VERLLF B4,B4,12
- VERLLF B5,B5,12
-
- VAF A0,A0,B0
- VAF A1,A1,B1
- VAF A2,A2,B2
- VAF A3,A3,B3
- VAF A4,A4,B4
- VAF A5,A5,B5
- VX D0,D0,A0
- VX D1,D1,A1
- VX D2,D2,A2
- VX D3,D3,A3
- VX D4,D4,A4
- VX D5,D5,A5
- VERLLF D0,D0,8
- VERLLF D1,D1,8
- VERLLF D2,D2,8
- VERLLF D3,D3,8
- VERLLF D4,D4,8
- VERLLF D5,D5,8
-
- VAF C0,C0,D0
- VAF C1,C1,D1
- VAF C2,C2,D2
- VAF C3,C3,D3
- VAF C4,C4,D4
- VAF C5,C5,D5
- VX B0,B0,C0
- VX B1,B1,C1
- VX B2,B2,C2
- VX B3,B3,C3
- VX B4,B4,C4
- VX B5,B5,C5
- VERLLF B0,B0,7
- VERLLF B1,B1,7
- VERLLF B2,B2,7
- VERLLF B3,B3,7
- VERLLF B4,B4,7
- VERLLF B5,B5,7
-
- VSLDB C0,C0,C0,8
- VSLDB C1,C1,C1,8
- VSLDB C2,C2,C2,8
- VSLDB C3,C3,C3,8
- VSLDB C4,C4,C4,8
- VSLDB C5,C5,C5,8
- VSLDB B0,B0,B0,12
- VSLDB B1,B1,B1,12
- VSLDB B2,B2,B2,12
- VSLDB B3,B3,B3,12
- VSLDB B4,B4,B4,12
- VSLDB B5,B5,B5,12
- VSLDB D0,D0,D0,4
- VSLDB D1,D1,D1,4
- VSLDB D2,D2,D2,4
- VSLDB D3,D3,D3,4
- VSLDB D4,D4,D4,4
- VSLDB D5,D5,D5,4
- brct %r0,.Loop_vx
-
- VAF A0,A0,K0
- VAF B0,B0,K1
- VAF C0,C0,K2
- VAF D0,D0,K3
- VAF A1,A1,K0
- VAF D1,D1,T1 # +K[3]+1
-
- VPERM A0,A0,A0,BEPERM
- VPERM B0,B0,B0,BEPERM
- VPERM C0,C0,C0,BEPERM
- VPERM D0,D0,D0,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VAF D2,D2,T2 # +K[3]+2
- VAF D3,D3,T3 # +K[3]+3
- VLM T0,T3,0,INP,0
-
- VX A0,A0,T0
- VX B0,B0,T1
- VX C0,C0,T2
- VX D0,D0,T3
-
- VLM K0,T3,0,%r7,4 # re-load sigma and increments
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_vx
-
- VAF B1,B1,K1
- VAF C1,C1,K2
-
- VPERM A0,A1,A1,BEPERM
- VPERM B0,B1,B1,BEPERM
- VPERM C0,C1,C1,BEPERM
- VPERM D0,D1,D1,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VLM A1,D1,0,INP,0
-
- VX A0,A0,A1
- VX B0,B0,B1
- VX C0,C0,C1
- VX D0,D0,D1
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_vx
-
- VAF A2,A2,K0
- VAF B2,B2,K1
- VAF C2,C2,K2
-
- VPERM A0,A2,A2,BEPERM
- VPERM B0,B2,B2,BEPERM
- VPERM C0,C2,C2,BEPERM
- VPERM D0,D2,D2,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VLM A1,D1,0,INP,0
-
- VX A0,A0,A1
- VX B0,B0,B1
- VX C0,C0,C1
- VX D0,D0,D1
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_vx
-
- VAF A3,A3,K0
- VAF B3,B3,K1
- VAF C3,C3,K2
- VAF D2,K3,T3 # K[3]+3
-
- VPERM A0,A3,A3,BEPERM
- VPERM B0,B3,B3,BEPERM
- VPERM C0,C3,C3,BEPERM
- VPERM D0,D3,D3,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VAF D3,D2,T1 # K[3]+4
- VLM A1,D1,0,INP,0
-
- VX A0,A0,A1
- VX B0,B0,B1
- VX C0,C0,C1
- VX D0,D0,D1
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_vx
-
- VAF A4,A4,K0
- VAF B4,B4,K1
- VAF C4,C4,K2
- VAF D4,D4,D3 # +K[3]+4
- VAF D3,D3,T1 # K[3]+5
- VAF K3,D2,T3 # K[3]+=6
-
- VPERM A0,A4,A4,BEPERM
- VPERM B0,B4,B4,BEPERM
- VPERM C0,C4,C4,BEPERM
- VPERM D0,D4,D4,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VLM A1,D1,0,INP,0
-
- VX A0,A0,A1
- VX B0,B0,B1
- VX C0,C0,C1
- VX D0,D0,D1
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_vx
-
- VAF A5,A5,K0
- VAF B5,B5,K1
- VAF C5,C5,K2
- VAF D5,D5,D3 # +K[3]+5
-
- VPERM A0,A5,A5,BEPERM
- VPERM B0,B5,B5,BEPERM
- VPERM C0,C5,C5,BEPERM
- VPERM D0,D5,D5,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VLM A1,D1,0,INP,0
-
- VX A0,A0,A1
- VX B0,B0,B1
- VX C0,C0,C1
- VX D0,D0,D1
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- lhi %r0,10
- aghi LEN,-0x40
- jne .Loop_outer_vx
-
-.Ldone_vx:
- lmg %r6,%r7,FRAME+6*8(SP)
- la SP,FRAME(SP)
- BR_EX %r14
-
-.Ltail_vx:
- VSTM A0,D0,8*8,SP,3
- lghi %r1,0
-
-.Loop_tail_vx:
- llgc %r5,0(%r1,INP)
- llgc %r6,8*8(%r1,SP)
- xr %r6,%r5
- stc %r6,0(%r1,OUT)
- la %r1,1(%r1)
- brct LEN,.Loop_tail_vx
-
- lmg %r6,%r7,FRAME+6*8(SP)
- la SP,FRAME(SP)
- BR_EX %r14
-SYM_FUNC_END(chacha20_vx)
-
-.previous
diff --git a/arch/s390/crypto/chacha-s390.h b/arch/s390/crypto/chacha-s390.h
deleted file mode 100644
index 733744ce30f5..000000000000
--- a/arch/s390/crypto/chacha-s390.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * s390 ChaCha stream cipher.
- *
- * Copyright IBM Corp. 2021
- */
-
-#ifndef _CHACHA_S390_H
-#define _CHACHA_S390_H
-
-void chacha20_vx(u8 *out, const u8 *inp, size_t len, const u32 *key,
- const u32 *counter);
-
-#endif /* _CHACHA_S390_H */
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
deleted file mode 100644
index 74f17c905d12..000000000000
--- a/arch/s390/crypto/crc32-vx.c
+++ /dev/null
@@ -1,305 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Crypto-API module for CRC-32 algorithms implemented with the
- * z/Architecture Vector Extension Facility.
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-#define KMSG_COMPONENT "crc32-vx"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <crypto/internal/hash.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-#define CRC32_BLOCK_SIZE 1
-#define CRC32_DIGEST_SIZE 4
-
-#define VX_MIN_LEN 64
-#define VX_ALIGNMENT 16L
-#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
-
-struct crc_ctx {
- u32 key;
-};
-
-struct crc_desc_ctx {
- u32 crc;
-};
-
-/*
- * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
- *
- * Creates a function to perform a particular CRC-32 computation. Depending
- * on the message buffer, the hardware-accelerated or software implementation
- * is used. Note that the message buffer is aligned to improve fetch
- * operations of VECTOR LOAD MULTIPLE instructions.
- *
- */
-#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \
- static u32 __pure ___fname(u32 crc, \
- unsigned char const *data, size_t datalen) \
- { \
- unsigned long prealign, aligned, remaining; \
- DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \
- \
- if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \
- return ___crc32_sw(crc, data, datalen); \
- \
- if ((unsigned long)data & VX_ALIGN_MASK) { \
- prealign = VX_ALIGNMENT - \
- ((unsigned long)data & VX_ALIGN_MASK); \
- datalen -= prealign; \
- crc = ___crc32_sw(crc, data, prealign); \
- data = (void *)((unsigned long)data + prealign); \
- } \
- \
- aligned = datalen & ~VX_ALIGN_MASK; \
- remaining = datalen & VX_ALIGN_MASK; \
- \
- kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \
- crc = ___crc32_vx(crc, data, aligned); \
- kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \
- \
- if (remaining) \
- crc = ___crc32_sw(crc, data + aligned, remaining); \
- \
- return crc; \
- }
-
-DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le)
-DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be)
-DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le)
-
-
-static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm)
-{
- struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
-
- mctx->key = 0;
- return 0;
-}
-
-static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm)
-{
- struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
-
- mctx->key = ~0;
- return 0;
-}
-
-static int crc32_vx_init(struct shash_desc *desc)
-{
- struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = mctx->key;
- return 0;
-}
-
-static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
- unsigned int newkeylen)
-{
- struct crc_ctx *mctx = crypto_shash_ctx(tfm);
-
- if (newkeylen != sizeof(mctx->key))
- return -EINVAL;
- mctx->key = le32_to_cpu(*(__le32 *)newkey);
- return 0;
-}
-
-static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
- unsigned int newkeylen)
-{
- struct crc_ctx *mctx = crypto_shash_ctx(tfm);
-
- if (newkeylen != sizeof(mctx->key))
- return -EINVAL;
- mctx->key = be32_to_cpu(*(__be32 *)newkey);
- return 0;
-}
-
-static int crc32le_vx_final(struct shash_desc *desc, u8 *out)
-{
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- *(__le32 *)out = cpu_to_le32p(&ctx->crc);
- return 0;
-}
-
-static int crc32be_vx_final(struct shash_desc *desc, u8 *out)
-{
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- *(__be32 *)out = cpu_to_be32p(&ctx->crc);
- return 0;
-}
-
-static int crc32c_vx_final(struct shash_desc *desc, u8 *out)
-{
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- /*
- * Perform a final XOR with 0xFFFFFFFF to be in sync
- * with the generic crc32c shash implementation.
- */
- *(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
- return 0;
-}
-
-static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len,
- u8 *out)
-{
- *(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len));
- return 0;
-}
-
-static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len,
- u8 *out)
-{
- *(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len));
- return 0;
-}
-
-static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len,
- u8 *out)
-{
- /*
- * Perform a final XOR with 0xFFFFFFFF to be in sync
- * with the generic crc32c shash implementation.
- */
- *(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len));
- return 0;
-}
-
-
-#define CRC32_VX_FINUP(alg, func) \
- static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \
- unsigned int datalen, u8 *out) \
- { \
- return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \
- data, datalen, out); \
- }
-
-CRC32_VX_FINUP(crc32le, crc32_le_vx)
-CRC32_VX_FINUP(crc32be, crc32_be_vx)
-CRC32_VX_FINUP(crc32c, crc32c_le_vx)
-
-#define CRC32_VX_DIGEST(alg, func) \
- static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \
- unsigned int len, u8 *out) \
- { \
- return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \
- data, len, out); \
- }
-
-CRC32_VX_DIGEST(crc32le, crc32_le_vx)
-CRC32_VX_DIGEST(crc32be, crc32_be_vx)
-CRC32_VX_DIGEST(crc32c, crc32c_le_vx)
-
-#define CRC32_VX_UPDATE(alg, func) \
- static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \
- unsigned int datalen) \
- { \
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc); \
- ctx->crc = func(ctx->crc, data, datalen); \
- return 0; \
- }
-
-CRC32_VX_UPDATE(crc32le, crc32_le_vx)
-CRC32_VX_UPDATE(crc32be, crc32_be_vx)
-CRC32_VX_UPDATE(crc32c, crc32c_le_vx)
-
-
-static struct shash_alg crc32_vx_algs[] = {
- /* CRC-32 LE */
- {
- .init = crc32_vx_init,
- .setkey = crc32_vx_setkey,
- .update = crc32le_vx_update,
- .final = crc32le_vx_final,
- .finup = crc32le_vx_finup,
- .digest = crc32le_vx_digest,
- .descsize = sizeof(struct crc_desc_ctx),
- .digestsize = CRC32_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32",
- .cra_driver_name = "crc32-vx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CRC32_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crc_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = crc32_vx_cra_init_zero,
- },
- },
- /* CRC-32 BE */
- {
- .init = crc32_vx_init,
- .setkey = crc32be_vx_setkey,
- .update = crc32be_vx_update,
- .final = crc32be_vx_final,
- .finup = crc32be_vx_finup,
- .digest = crc32be_vx_digest,
- .descsize = sizeof(struct crc_desc_ctx),
- .digestsize = CRC32_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32be",
- .cra_driver_name = "crc32be-vx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CRC32_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crc_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = crc32_vx_cra_init_zero,
- },
- },
- /* CRC-32C LE */
- {
- .init = crc32_vx_init,
- .setkey = crc32_vx_setkey,
- .update = crc32c_vx_update,
- .final = crc32c_vx_final,
- .finup = crc32c_vx_finup,
- .digest = crc32c_vx_digest,
- .descsize = sizeof(struct crc_desc_ctx),
- .digestsize = CRC32_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32c",
- .cra_driver_name = "crc32c-vx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CRC32_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crc_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = crc32_vx_cra_init_invert,
- },
- },
-};
-
-
-static int __init crc_vx_mod_init(void)
-{
- return crypto_register_shashes(crc32_vx_algs,
- ARRAY_SIZE(crc32_vx_algs));
-}
-
-static void __exit crc_vx_mod_exit(void)
-{
- crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init);
-module_exit(crc_vx_mod_exit);
-
-MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_CRYPTO("crc32");
-MODULE_ALIAS_CRYPTO("crc32-vx");
-MODULE_ALIAS_CRYPTO("crc32c");
-MODULE_ALIAS_CRYPTO("crc32c-vx");
diff --git a/arch/s390/crypto/crc32-vx.h b/arch/s390/crypto/crc32-vx.h
deleted file mode 100644
index 652c96e1a822..000000000000
--- a/arch/s390/crypto/crc32-vx.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _CRC32_VX_S390_H
-#define _CRC32_VX_S390_H
-
-#include <linux/types.h>
-
-u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-
-#endif /* _CRC32_VX_S390_H */
diff --git a/arch/s390/crypto/crc32be-vx.c b/arch/s390/crypto/crc32be-vx.c
deleted file mode 100644
index fed7c9c70d05..000000000000
--- a/arch/s390/crypto/crc32be-vx.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Hardware-accelerated CRC-32 variants for Linux on z Systems
- *
- * Use the z/Architecture Vector Extension Facility to accelerate the
- * computing of CRC-32 checksums.
- *
- * This CRC-32 implementation algorithm processes the most-significant
- * bit first (BE).
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#include <linux/types.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-/* Vector register range containing CRC-32 constants */
-#define CONST_R1R2 9
-#define CONST_R3R4 10
-#define CONST_R5 11
-#define CONST_R6 12
-#define CONST_RU_POLY 13
-#define CONST_CRC_POLY 14
-
-/*
- * The CRC-32 constant block contains reduction constants to fold and
- * process particular chunks of the input data stream in parallel.
- *
- * For the CRC-32 variants, the constants are precomputed according to
- * these definitions:
- *
- * R1 = x4*128+64 mod P(x)
- * R2 = x4*128 mod P(x)
- * R3 = x128+64 mod P(x)
- * R4 = x128 mod P(x)
- * R5 = x96 mod P(x)
- * R6 = x64 mod P(x)
- *
- * Barret reduction constant, u, is defined as floor(x**64 / P(x)).
- *
- * where P(x) is the polynomial in the normal domain and the P'(x) is the
- * polynomial in the reversed (bitreflected) domain.
- *
- * Note that the constant definitions below are extended in order to compute
- * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
- * The rightmost doubleword can be 0 to prevent contribution to the result or
- * can be multiplied by 1 to perform an XOR without the need for a separate
- * VECTOR EXCLUSIVE OR instruction.
- *
- * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
- *
- * P(x) = 0x04C11DB7
- * P'(x) = 0xEDB88320
- */
-
-static unsigned long constants_CRC_32_BE[] = {
- 0x08833794c, 0x0e6228b11, /* R1, R2 */
- 0x0c5b9cd4c, 0x0e8a45605, /* R3, R4 */
- 0x0f200aa66, 1UL << 32, /* R5, x32 */
- 0x0490d678d, 1, /* R6, 1 */
- 0x104d101df, 0, /* u */
- 0x104C11DB7, 0, /* P(x) */
-};
-
-/**
- * crc32_be_vgfm_16 - Compute CRC-32 (BE variant) with vector registers
- * @crc: Initial CRC value, typically ~0.
- * @buf: Input buffer pointer, performance might be improved if the
- * buffer is on a doubleword boundary.
- * @size: Size of the buffer, must be 64 bytes or greater.
- *
- * Register usage:
- * V0: Initial CRC value and intermediate constants and results.
- * V1..V4: Data for CRC computation.
- * V5..V8: Next data chunks that are fetched from the input buffer.
- * V9..V14: CRC-32 constants.
- */
-u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
- /* Load CRC-32 constants */
- fpu_vlm(CONST_R1R2, CONST_CRC_POLY, &constants_CRC_32_BE);
- fpu_vzero(0);
-
- /* Load the initial CRC value into the leftmost word of V0. */
- fpu_vlvgf(0, crc, 0);
-
- /* Load a 64-byte data chunk and XOR with CRC */
- fpu_vlm(1, 4, buf);
- fpu_vx(1, 0, 1);
- buf += 64;
- size -= 64;
-
- while (size >= 64) {
- /* Load the next 64-byte data chunk into V5 to V8 */
- fpu_vlm(5, 8, buf);
-
- /*
- * Perform a GF(2) multiplication of the doublewords in V1 with
- * the reduction constants in V0. The intermediate result is
- * then folded (accumulated) with the next data chunk in V5 and
- * stored in V1. Repeat this step for the register contents
- * in V2, V3, and V4 respectively.
- */
- fpu_vgfmag(1, CONST_R1R2, 1, 5);
- fpu_vgfmag(2, CONST_R1R2, 2, 6);
- fpu_vgfmag(3, CONST_R1R2, 3, 7);
- fpu_vgfmag(4, CONST_R1R2, 4, 8);
- buf += 64;
- size -= 64;
- }
-
- /* Fold V1 to V4 into a single 128-bit value in V1 */
- fpu_vgfmag(1, CONST_R3R4, 1, 2);
- fpu_vgfmag(1, CONST_R3R4, 1, 3);
- fpu_vgfmag(1, CONST_R3R4, 1, 4);
-
- while (size >= 16) {
- fpu_vl(2, buf);
- fpu_vgfmag(1, CONST_R3R4, 1, 2);
- buf += 16;
- size -= 16;
- }
-
- /*
- * The R5 constant is used to fold a 128-bit value into an 96-bit value
- * that is XORed with the next 96-bit input data chunk. To use a single
- * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to
- * form an intermediate 96-bit value (with appended zeros) which is then
- * XORed with the intermediate reduction result.
- */
- fpu_vgfmg(1, CONST_R5, 1);
-
- /*
- * Further reduce the remaining 96-bit value to a 64-bit value using a
- * single VGFMG, the rightmost doubleword is multiplied with 0x1. The
- * intermediate result is then XORed with the product of the leftmost
- * doubleword with R6. The result is a 64-bit value and is subject to
- * the Barret reduction.
- */
- fpu_vgfmg(1, CONST_R6, 1);
-
- /*
- * The input values to the Barret reduction are the degree-63 polynomial
- * in V1 (R(x)), degree-32 generator polynomial, and the reduction
- * constant u. The Barret reduction result is the CRC value of R(x) mod
- * P(x).
- *
- * The Barret reduction algorithm is defined as:
- *
- * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
- * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
- * 3. C(x) = R(x) XOR T2(x) mod x^32
- *
- * Note: To compensate the division by x^32, use the vector unpack
- * instruction to move the leftmost word into the leftmost doubleword
- * of the vector register. The rightmost doubleword is multiplied
- * with zero to not contribute to the intermediate results.
- */
-
- /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
- fpu_vupllf(2, 1);
- fpu_vgfmg(2, CONST_RU_POLY, 2);
-
- /*
- * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
- * V2 and XOR the intermediate result, T2(x), with the value in V1.
- * The final result is in the rightmost word of V2.
- */
- fpu_vupllf(2, 2);
- fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
- return fpu_vlgvf(2, 3);
-}
diff --git a/arch/s390/crypto/crc32le-vx.c b/arch/s390/crypto/crc32le-vx.c
deleted file mode 100644
index 2f629f394df7..000000000000
--- a/arch/s390/crypto/crc32le-vx.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Hardware-accelerated CRC-32 variants for Linux on z Systems
- *
- * Use the z/Architecture Vector Extension Facility to accelerate the
- * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet
- * and Castagnoli.
- *
- * This CRC-32 implementation algorithm is bitreflected and processes
- * the least-significant bit first (Little-Endian).
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#include <linux/types.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-/* Vector register range containing CRC-32 constants */
-#define CONST_PERM_LE2BE 9
-#define CONST_R2R1 10
-#define CONST_R4R3 11
-#define CONST_R5 12
-#define CONST_RU_POLY 13
-#define CONST_CRC_POLY 14
-
-/*
- * The CRC-32 constant block contains reduction constants to fold and
- * process particular chunks of the input data stream in parallel.
- *
- * For the CRC-32 variants, the constants are precomputed according to
- * these definitions:
- *
- * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
- * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
- * R3 = [(x128+32 mod P'(x) << 32)]' << 1
- * R4 = [(x128-32 mod P'(x) << 32)]' << 1
- * R5 = [(x64 mod P'(x) << 32)]' << 1
- * R6 = [(x32 mod P'(x) << 32)]' << 1
- *
- * The bitreflected Barret reduction constant, u', is defined as
- * the bit reversal of floor(x**64 / P(x)).
- *
- * where P(x) is the polynomial in the normal domain and the P'(x) is the
- * polynomial in the reversed (bitreflected) domain.
- *
- * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
- *
- * P(x) = 0x04C11DB7
- * P'(x) = 0xEDB88320
- *
- * CRC-32C (Castagnoli) polynomials:
- *
- * P(x) = 0x1EDC6F41
- * P'(x) = 0x82F63B78
- */
-
-static unsigned long constants_CRC_32_LE[] = {
- 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */
- 0x1c6e41596, 0x154442bd4, /* R2, R1 */
- 0x0ccaa009e, 0x1751997d0, /* R4, R3 */
- 0x0, 0x163cd6124, /* R5 */
- 0x0, 0x1f7011641, /* u' */
- 0x0, 0x1db710641 /* P'(x) << 1 */
-};
-
-static unsigned long constants_CRC_32C_LE[] = {
- 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */
- 0x09e4addf8, 0x740eef02, /* R2, R1 */
- 0x14cd00bd6, 0xf20c0dfe, /* R4, R3 */
- 0x0, 0x0dd45aab8, /* R5 */
- 0x0, 0x0dea713f1, /* u' */
- 0x0, 0x105ec76f0 /* P'(x) << 1 */
-};
-
-/**
- * crc32_le_vgfm_generic - Compute CRC-32 (LE variant) with vector registers
- * @crc: Initial CRC value, typically ~0.
- * @buf: Input buffer pointer, performance might be improved if the
- * buffer is on a doubleword boundary.
- * @size: Size of the buffer, must be 64 bytes or greater.
- * @constants: CRC-32 constant pool base pointer.
- *
- * Register usage:
- * V0: Initial CRC value and intermediate constants and results.
- * V1..V4: Data for CRC computation.
- * V5..V8: Next data chunks that are fetched from the input buffer.
- * V9: Constant for BE->LE conversion and shift operations
- * V10..V14: CRC-32 constants.
- */
-static u32 crc32_le_vgfm_generic(u32 crc, unsigned char const *buf, size_t size, unsigned long *constants)
-{
- /* Load CRC-32 constants */
- fpu_vlm(CONST_PERM_LE2BE, CONST_CRC_POLY, constants);
-
- /*
- * Load the initial CRC value.
- *
- * The CRC value is loaded into the rightmost word of the
- * vector register and is later XORed with the LSB portion
- * of the loaded input data.
- */
- fpu_vzero(0); /* Clear V0 */
- fpu_vlvgf(0, crc, 3); /* Load CRC into rightmost word */
-
- /* Load a 64-byte data chunk and XOR with CRC */
- fpu_vlm(1, 4, buf);
- fpu_vperm(1, 1, 1, CONST_PERM_LE2BE);
- fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
- fpu_vperm(3, 3, 3, CONST_PERM_LE2BE);
- fpu_vperm(4, 4, 4, CONST_PERM_LE2BE);
-
- fpu_vx(1, 0, 1); /* V1 ^= CRC */
- buf += 64;
- size -= 64;
-
- while (size >= 64) {
- fpu_vlm(5, 8, buf);
- fpu_vperm(5, 5, 5, CONST_PERM_LE2BE);
- fpu_vperm(6, 6, 6, CONST_PERM_LE2BE);
- fpu_vperm(7, 7, 7, CONST_PERM_LE2BE);
- fpu_vperm(8, 8, 8, CONST_PERM_LE2BE);
- /*
- * Perform a GF(2) multiplication of the doublewords in V1 with
- * the R1 and R2 reduction constants in V0. The intermediate
- * result is then folded (accumulated) with the next data chunk
- * in V5 and stored in V1. Repeat this step for the register
- * contents in V2, V3, and V4 respectively.
- */
- fpu_vgfmag(1, CONST_R2R1, 1, 5);
- fpu_vgfmag(2, CONST_R2R1, 2, 6);
- fpu_vgfmag(3, CONST_R2R1, 3, 7);
- fpu_vgfmag(4, CONST_R2R1, 4, 8);
- buf += 64;
- size -= 64;
- }
-
- /*
- * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3
- * and R4 and accumulating the next 128-bit chunk until a single 128-bit
- * value remains.
- */
- fpu_vgfmag(1, CONST_R4R3, 1, 2);
- fpu_vgfmag(1, CONST_R4R3, 1, 3);
- fpu_vgfmag(1, CONST_R4R3, 1, 4);
-
- while (size >= 16) {
- fpu_vl(2, buf);
- fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
- fpu_vgfmag(1, CONST_R4R3, 1, 2);
- buf += 16;
- size -= 16;
- }
-
- /*
- * Set up a vector register for byte shifts. The shift value must
- * be loaded in bits 1-4 in byte element 7 of a vector register.
- * Shift by 8 bytes: 0x40
- * Shift by 4 bytes: 0x20
- */
- fpu_vleib(9, 0x40, 7);
-
- /*
- * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
- * to move R4 into the rightmost doubleword and set the leftmost
- * doubleword to 0x1.
- */
- fpu_vsrlb(0, CONST_R4R3, 9);
- fpu_vleig(0, 1, 0);
-
- /*
- * Compute GF(2) product of V1 and V0. The rightmost doubleword
- * of V1 is multiplied with R4. The leftmost doubleword of V1 is
- * multiplied by 0x1 and is then XORed with rightmost product.
- * Implicitly, the intermediate leftmost product becomes padded
- */
- fpu_vgfmg(1, 0, 1);
-
- /*
- * Now do the final 32-bit fold by multiplying the rightmost word
- * in V1 with R5 and XOR the result with the remaining bits in V1.
- *
- * To achieve this by a single VGFMAG, right shift V1 by a word
- * and store the result in V2 which is then accumulated. Use the
- * vector unpack instruction to load the rightmost half of the
- * doubleword into the rightmost doubleword element of V1; the other
- * half is loaded in the leftmost doubleword.
- * The vector register with CONST_R5 contains the R5 constant in the
- * rightmost doubleword and the leftmost doubleword is zero to ignore
- * the leftmost product of V1.
- */
- fpu_vleib(9, 0x20, 7); /* Shift by words */
- fpu_vsrlb(2, 1, 9); /* Store remaining bits in V2 */
- fpu_vupllf(1, 1); /* Split rightmost doubleword */
- fpu_vgfmag(1, CONST_R5, 1, 2); /* V1 = (V1 * R5) XOR V2 */
-
- /*
- * Apply a Barret reduction to compute the final 32-bit CRC value.
- *
- * The input values to the Barret reduction are the degree-63 polynomial
- * in V1 (R(x)), degree-32 generator polynomial, and the reduction
- * constant u. The Barret reduction result is the CRC value of R(x) mod
- * P(x).
- *
- * The Barret reduction algorithm is defined as:
- *
- * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
- * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
- * 3. C(x) = R(x) XOR T2(x) mod x^32
- *
- * Note: The leftmost doubleword of vector register containing
- * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
- * is zero and does not contribute to the final result.
- */
-
- /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
- fpu_vupllf(2, 1);
- fpu_vgfmg(2, CONST_RU_POLY, 2);
-
- /*
- * Compute the GF(2) product of the CRC polynomial with T1(x) in
- * V2 and XOR the intermediate result, T2(x), with the value in V1.
- * The final result is stored in word element 2 of V2.
- */
- fpu_vupllf(2, 2);
- fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
-
- return fpu_vlgvf(2, 2);
-}
-
-u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
- return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32_LE[0]);
-}
-
-u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
- return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32C_LE[0]);
-}
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 0800a2a5799f..dcbcee37cb63 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -8,29 +8,28 @@
* Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
+#include <asm/cpacf.h>
+#include <crypto/ghash.h>
#include <crypto/internal/hash.h>
-#include <linux/module.h>
#include <linux/cpufeature.h>
-#include <asm/cpacf.h>
-
-#define GHASH_BLOCK_SIZE 16
-#define GHASH_DIGEST_SIZE 16
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
-struct ghash_ctx {
+struct s390_ghash_ctx {
u8 key[GHASH_BLOCK_SIZE];
};
-struct ghash_desc_ctx {
+struct s390_ghash_desc_ctx {
u8 icv[GHASH_BLOCK_SIZE];
u8 key[GHASH_BLOCK_SIZE];
- u8 buffer[GHASH_BLOCK_SIZE];
- u32 bytes;
};
static int ghash_init(struct shash_desc *desc)
{
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
memset(dctx, 0, sizeof(*dctx));
memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
@@ -41,7 +40,7 @@ static int ghash_init(struct shash_desc *desc)
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
- struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+ struct s390_ghash_ctx *ctx = crypto_shash_ctx(tfm);
if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
@@ -54,80 +53,71 @@ static int ghash_setkey(struct crypto_shash *tfm,
static int ghash_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
unsigned int n;
- u8 *buf = dctx->buffer;
-
- if (dctx->bytes) {
- u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
- n = min(srclen, dctx->bytes);
- dctx->bytes -= n;
- srclen -= n;
-
- memcpy(pos, src, n);
- src += n;
+ n = srclen & ~(GHASH_BLOCK_SIZE - 1);
+ cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
+ return srclen - n;
+}
- if (!dctx->bytes) {
- cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
- GHASH_BLOCK_SIZE);
- }
- }
+static void ghash_flush(struct s390_ghash_desc_ctx *dctx, const u8 *src,
+ unsigned int len)
+{
+ if (len) {
+ u8 buf[GHASH_BLOCK_SIZE] = {};
- n = srclen & ~(GHASH_BLOCK_SIZE - 1);
- if (n) {
- cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
- src += n;
- srclen -= n;
+ memcpy(buf, src, len);
+ cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
+ memzero_explicit(buf, sizeof(buf));
}
+}
- if (srclen) {
- dctx->bytes = GHASH_BLOCK_SIZE - srclen;
- memcpy(buf, src, srclen);
- }
+static int ghash_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *dst)
+{
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ ghash_flush(dctx, src, len);
+ memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
return 0;
}
-static int ghash_flush(struct ghash_desc_ctx *dctx)
+static int ghash_export(struct shash_desc *desc, void *out)
{
- u8 *buf = dctx->buffer;
-
- if (dctx->bytes) {
- u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
-
- memset(pos, 0, dctx->bytes);
- cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
- dctx->bytes = 0;
- }
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ memcpy(out, dctx->icv, GHASH_DIGEST_SIZE);
return 0;
}
-static int ghash_final(struct shash_desc *desc, u8 *dst)
+static int ghash_import(struct shash_desc *desc, const void *in)
{
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- int ret;
+ struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- ret = ghash_flush(dctx);
- if (!ret)
- memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
- return ret;
+ memcpy(dctx->icv, in, GHASH_DIGEST_SIZE);
+ memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
+ return 0;
}
static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
- .final = ghash_final,
+ .finup = ghash_finup,
.setkey = ghash_setkey,
- .descsize = sizeof(struct ghash_desc_ctx),
+ .export = ghash_export,
+ .import = ghash_import,
+ .statesize = sizeof(struct ghash_desc_ctx),
+ .descsize = sizeof(struct s390_ghash_desc_ctx),
.base = {
.cra_name = "ghash",
.cra_driver_name = "ghash-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = GHASH_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ghash_ctx),
+ .cra_ctxsize = sizeof(struct s390_ghash_ctx),
.cra_module = THIS_MODULE,
},
};
diff --git a/arch/s390/crypto/hmac_s390.c b/arch/s390/crypto/hmac_s390.c
new file mode 100644
index 000000000000..f8cd09f341d4
--- /dev/null
+++ b/arch/s390/crypto/hmac_s390.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * s390 specific HMAC support.
+ */
+
+#define pr_fmt(fmt) "hmac_s390: " fmt
+
+#include <asm/cpacf.h>
+#include <crypto/internal/hash.h>
+#include <crypto/hmac.h>
+#include <crypto/sha2.h>
+#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+/*
+ * KMAC param block layout for sha2 function codes:
+ * The layout of the param block for the KMAC instruction depends on the
+ * blocksize of the used hashing sha2-algorithm function codes. The param block
+ * contains the hash chaining value (cv), the input message bit-length (imbl)
+ * and the hmac-secret (key). To prevent code duplication, the sizes of all
+ * these are calculated based on the blocksize.
+ *
+ * param-block:
+ * +-------+
+ * | cv |
+ * +-------+
+ * | imbl |
+ * +-------+
+ * | key |
+ * +-------+
+ *
+ * sizes:
+ * part | sh2-alg | calculation | size | type
+ * -----+---------+-------------+------+--------
+ * cv | 224/256 | blocksize/2 | 32 | u64[8]
+ * | 384/512 | | 64 | u128[8]
+ * imbl | 224/256 | blocksize/8 | 8 | u64
+ * | 384/512 | | 16 | u128
+ * key | 224/256 | blocksize | 64 | u8[64]
+ * | 384/512 | | 128 | u8[128]
+ */
+
+#define MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
+#define MAX_IMBL_SIZE sizeof(u128)
+#define MAX_BLOCK_SIZE SHA512_BLOCK_SIZE
+
+#define SHA2_CV_SIZE(bs) ((bs) >> 1)
+#define SHA2_IMBL_SIZE(bs) ((bs) >> 3)
+
+#define SHA2_IMBL_OFFSET(bs) (SHA2_CV_SIZE(bs))
+#define SHA2_KEY_OFFSET(bs) (SHA2_CV_SIZE(bs) + SHA2_IMBL_SIZE(bs))
+
+struct s390_hmac_ctx {
+ u8 key[MAX_BLOCK_SIZE];
+};
+
+union s390_kmac_gr0 {
+ unsigned long reg;
+ struct {
+ unsigned long : 48;
+ unsigned long ikp : 1;
+ unsigned long iimp : 1;
+ unsigned long ccup : 1;
+ unsigned long : 6;
+ unsigned long fc : 7;
+ };
+};
+
+struct s390_kmac_sha2_ctx {
+ u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + MAX_BLOCK_SIZE];
+ union s390_kmac_gr0 gr0;
+ u64 buflen[2];
+};
+
+/*
+ * kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize
+ */
+static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo,
+ u64 buflen_hi, unsigned int blocksize)
+{
+ u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize);
+
+ switch (blocksize) {
+ case SHA256_BLOCK_SIZE:
+ *(u64 *)imbl = buflen_lo * BITS_PER_BYTE;
+ break;
+ case SHA512_BLOCK_SIZE:
+ *(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3;
+ break;
+ default:
+ break;
+ }
+}
+
+static int hash_data(const u8 *in, unsigned int inlen,
+ u8 *digest, unsigned int digestsize, bool final)
+{
+ unsigned long func;
+ union {
+ struct sha256_paramblock {
+ u32 h[8];
+ u64 mbl;
+ } sha256;
+ struct sha512_paramblock {
+ u64 h[8];
+ u128 mbl;
+ } sha512;
+ } __packed param;
+
+#define PARAM_INIT(x, y, z) \
+ param.sha##x.h[0] = SHA##y ## _H0; \
+ param.sha##x.h[1] = SHA##y ## _H1; \
+ param.sha##x.h[2] = SHA##y ## _H2; \
+ param.sha##x.h[3] = SHA##y ## _H3; \
+ param.sha##x.h[4] = SHA##y ## _H4; \
+ param.sha##x.h[5] = SHA##y ## _H5; \
+ param.sha##x.h[6] = SHA##y ## _H6; \
+ param.sha##x.h[7] = SHA##y ## _H7; \
+ param.sha##x.mbl = (z)
+
+ switch (digestsize) {
+ case SHA224_DIGEST_SIZE:
+ func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256;
+ PARAM_INIT(256, 224, inlen * 8);
+ if (!final)
+ digestsize = SHA256_DIGEST_SIZE;
+ break;
+ case SHA256_DIGEST_SIZE:
+ func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256;
+ PARAM_INIT(256, 256, inlen * 8);
+ break;
+ case SHA384_DIGEST_SIZE:
+ func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512;
+ PARAM_INIT(512, 384, inlen * 8);
+ if (!final)
+ digestsize = SHA512_DIGEST_SIZE;
+ break;
+ case SHA512_DIGEST_SIZE:
+ func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512;
+ PARAM_INIT(512, 512, inlen * 8);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+#undef PARAM_INIT
+
+ cpacf_klmd(func, &param, in, inlen);
+
+ memcpy(digest, &param, digestsize);
+
+ return 0;
+}
+
+static int hash_key(const u8 *in, unsigned int inlen,
+ u8 *digest, unsigned int digestsize)
+{
+ return hash_data(in, inlen, digest, digestsize, true);
+}
+
+static int s390_hmac_sha2_setkey(struct crypto_shash *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ struct s390_hmac_ctx *tfm_ctx = crypto_shash_ctx(tfm);
+ unsigned int ds = crypto_shash_digestsize(tfm);
+ unsigned int bs = crypto_shash_blocksize(tfm);
+
+ memset(tfm_ctx, 0, sizeof(*tfm_ctx));
+
+ if (keylen > bs)
+ return hash_key(key, keylen, tfm_ctx->key, ds);
+
+ memcpy(tfm_ctx->key, key, keylen);
+ return 0;
+}
+
+static int s390_hmac_sha2_init(struct shash_desc *desc)
+{
+ struct s390_hmac_ctx *tfm_ctx = crypto_shash_ctx(desc->tfm);
+ struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bs = crypto_shash_blocksize(desc->tfm);
+
+ memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
+ tfm_ctx->key, bs);
+
+ ctx->buflen[0] = 0;
+ ctx->buflen[1] = 0;
+ ctx->gr0.reg = 0;
+ switch (crypto_shash_digestsize(desc->tfm)) {
+ case SHA224_DIGEST_SIZE:
+ ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_224;
+ break;
+ case SHA256_DIGEST_SIZE:
+ ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_256;
+ break;
+ case SHA384_DIGEST_SIZE:
+ ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_384;
+ break;
+ case SHA512_DIGEST_SIZE:
+ ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_512;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int s390_hmac_sha2_update(struct shash_desc *desc,
+ const u8 *data, unsigned int len)
+{
+ struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bs = crypto_shash_blocksize(desc->tfm);
+ unsigned int n = round_down(len, bs);
+
+ ctx->buflen[0] += n;
+ if (ctx->buflen[0] < n)
+ ctx->buflen[1]++;
+
+ /* process as many blocks as possible */
+ ctx->gr0.iimp = 1;
+ _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n);
+ return len - n;
+}
+
+static int s390_hmac_sha2_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
+{
+ struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bs = crypto_shash_blocksize(desc->tfm);
+
+ ctx->buflen[0] += len;
+ if (ctx->buflen[0] < len)
+ ctx->buflen[1]++;
+
+ ctx->gr0.iimp = 0;
+ kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs);
+ _cpacf_kmac(&ctx->gr0.reg, ctx->param, src, len);
+ memcpy(out, ctx->param, crypto_shash_digestsize(desc->tfm));
+
+ return 0;
+}
+
+static int s390_hmac_sha2_digest(struct shash_desc *desc,
+ const u8 *data, unsigned int len, u8 *out)
+{
+ struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int ds = crypto_shash_digestsize(desc->tfm);
+ int rc;
+
+ rc = s390_hmac_sha2_init(desc);
+ if (rc)
+ return rc;
+
+ ctx->gr0.iimp = 0;
+ kmac_sha2_set_imbl(ctx->param, len, 0,
+ crypto_shash_blocksize(desc->tfm));
+ _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, len);
+ memcpy(out, ctx->param, ds);
+
+ return 0;
+}
+
+static int s390_hmac_export_zero(struct shash_desc *desc, void *out)
+{
+ struct crypto_shash *tfm = desc->tfm;
+ u8 ipad[SHA512_BLOCK_SIZE];
+ struct s390_hmac_ctx *ctx;
+ unsigned int bs;
+ int err, i;
+
+ ctx = crypto_shash_ctx(tfm);
+ bs = crypto_shash_blocksize(tfm);
+ for (i = 0; i < bs; i++)
+ ipad[i] = ctx->key[i] ^ HMAC_IPAD_VALUE;
+
+ err = hash_data(ipad, bs, out, crypto_shash_digestsize(tfm), false);
+ memzero_explicit(ipad, sizeof(ipad));
+ return err;
+}
+
+static int s390_hmac_export(struct shash_desc *desc, void *out)
+{
+ struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bs = crypto_shash_blocksize(desc->tfm);
+ unsigned int ds = bs / 2;
+ u64 lo = ctx->buflen[0];
+ union {
+ u8 *u8;
+ u64 *u64;
+ } p = { .u8 = out };
+ int err = 0;
+
+ if (!ctx->gr0.ikp)
+ err = s390_hmac_export_zero(desc, out);
+ else
+ memcpy(p.u8, ctx->param, ds);
+ p.u8 += ds;
+ lo += bs;
+ put_unaligned(lo, p.u64++);
+ if (ds == SHA512_DIGEST_SIZE)
+ put_unaligned(ctx->buflen[1] + (lo < bs), p.u64);
+ return err;
+}
+
+static int s390_hmac_import(struct shash_desc *desc, const void *in)
+{
+ struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bs = crypto_shash_blocksize(desc->tfm);
+ unsigned int ds = bs / 2;
+ union {
+ const u8 *u8;
+ const u64 *u64;
+ } p = { .u8 = in };
+ u64 lo;
+ int err;
+
+ err = s390_hmac_sha2_init(desc);
+ memcpy(ctx->param, p.u8, ds);
+ p.u8 += ds;
+ lo = get_unaligned(p.u64++);
+ ctx->buflen[0] = lo - bs;
+ if (ds == SHA512_DIGEST_SIZE)
+ ctx->buflen[1] = get_unaligned(p.u64) - (lo < bs);
+ if (ctx->buflen[0] | ctx->buflen[1])
+ ctx->gr0.ikp = 1;
+ return err;
+}
+
+#define S390_HMAC_SHA2_ALG(x, ss) { \
+ .fc = CPACF_KMAC_HMAC_SHA_##x, \
+ .alg = { \
+ .init = s390_hmac_sha2_init, \
+ .update = s390_hmac_sha2_update, \
+ .finup = s390_hmac_sha2_finup, \
+ .digest = s390_hmac_sha2_digest, \
+ .setkey = s390_hmac_sha2_setkey, \
+ .export = s390_hmac_export, \
+ .import = s390_hmac_import, \
+ .descsize = sizeof(struct s390_kmac_sha2_ctx), \
+ .halg = { \
+ .statesize = ss, \
+ .digestsize = SHA##x##_DIGEST_SIZE, \
+ .base = { \
+ .cra_name = "hmac(sha" #x ")", \
+ .cra_driver_name = "hmac_s390_sha" #x, \
+ .cra_blocksize = SHA##x##_BLOCK_SIZE, \
+ .cra_priority = 400, \
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | \
+ CRYPTO_AHASH_ALG_FINUP_MAX, \
+ .cra_ctxsize = sizeof(struct s390_hmac_ctx), \
+ .cra_module = THIS_MODULE, \
+ }, \
+ }, \
+ }, \
+}
+
+static struct s390_hmac_alg {
+ bool registered;
+ unsigned int fc;
+ struct shash_alg alg;
+} s390_hmac_algs[] = {
+ S390_HMAC_SHA2_ALG(224, sizeof(struct crypto_sha256_state)),
+ S390_HMAC_SHA2_ALG(256, sizeof(struct crypto_sha256_state)),
+ S390_HMAC_SHA2_ALG(384, SHA512_STATE_SIZE),
+ S390_HMAC_SHA2_ALG(512, SHA512_STATE_SIZE),
+};
+
+static __always_inline void _s390_hmac_algs_unregister(void)
+{
+ struct s390_hmac_alg *hmac;
+ int i;
+
+ for (i = ARRAY_SIZE(s390_hmac_algs) - 1; i >= 0; i--) {
+ hmac = &s390_hmac_algs[i];
+ if (!hmac->registered)
+ continue;
+ crypto_unregister_shash(&hmac->alg);
+ }
+}
+
+static int __init hmac_s390_init(void)
+{
+ struct s390_hmac_alg *hmac;
+ int i, rc = -ENODEV;
+
+ if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_256))
+ return -ENODEV;
+ if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_512))
+ return -ENODEV;
+
+ for (i = 0; i < ARRAY_SIZE(s390_hmac_algs); i++) {
+ hmac = &s390_hmac_algs[i];
+ if (!cpacf_query_func(CPACF_KMAC, hmac->fc))
+ continue;
+
+ rc = crypto_register_shash(&hmac->alg);
+ if (rc) {
+ pr_err("unable to register %s\n",
+ hmac->alg.halg.base.cra_name);
+ goto out;
+ }
+ hmac->registered = true;
+ pr_debug("registered %s\n", hmac->alg.halg.base.cra_name);
+ }
+ return rc;
+out:
+ _s390_hmac_algs_unregister();
+ return rc;
+}
+
+static void __exit hmac_s390_exit(void)
+{
+ _s390_hmac_algs_unregister();
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, hmac_s390_init);
+module_exit(hmac_s390_exit);
+
+MODULE_DESCRIPTION("S390 HMAC driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 99ea3f12c5d2..64aef7eb2030 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -5,24 +5,25 @@
* s390 implementation of the AES Cipher Algorithm with protected keys.
*
* s390 Version:
- * Copyright IBM Corp. 2017, 2023
+ * Copyright IBM Corp. 2017, 2025
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Harald Freudenberger <freude@de.ibm.com>
*/
-#define KMSG_COMPONENT "paes_s390"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "paes_s390: " fmt
-#include <crypto/aes.h>
-#include <crypto/algapi.h>
-#include <linux/bug.h>
-#include <linux/err.h>
-#include <linux/module.h>
+#include <linux/atomic.h>
#include <linux/cpufeature.h>
+#include <linux/delay.h>
+#include <linux/err.h>
#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
-#include <linux/delay.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/engine.h>
#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
#include <asm/cpacf.h>
@@ -34,215 +35,464 @@
* is called. As paes can handle different kinds of key blobs
* and padding is also possible, the limits need to be generous.
*/
-#define PAES_MIN_KEYSIZE 16
-#define PAES_MAX_KEYSIZE MAXEP11AESKEYBLOBSIZE
+#define PAES_MIN_KEYSIZE 16
+#define PAES_MAX_KEYSIZE MAXEP11AESKEYBLOBSIZE
+#define PAES_256_PROTKEY_SIZE (32 + 32) /* key + verification pattern */
+#define PXTS_256_PROTKEY_SIZE (32 + 32 + 32) /* k1 + k2 + verification pattern */
static u8 *ctrblk;
static DEFINE_MUTEX(ctrblk_lock);
static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
-struct key_blob {
- /*
- * Small keys will be stored in the keybuf. Larger keys are
- * stored in extra allocated memory. In both cases does
- * key point to the memory where the key is stored.
- * The code distinguishes by checking keylen against
- * sizeof(keybuf). See the two following helper functions.
- */
- u8 *key;
- u8 keybuf[128];
+static struct crypto_engine *paes_crypto_engine;
+#define MAX_QLEN 10
+
+/*
+ * protected key specific stuff
+ */
+
+struct paes_protkey {
+ u32 type;
+ u32 len;
+ u8 protkey[PXTS_256_PROTKEY_SIZE];
+};
+
+#define PK_STATE_NO_KEY 0
+#define PK_STATE_CONVERT_IN_PROGRESS 1
+#define PK_STATE_VALID 2
+
+struct s390_paes_ctx {
+ /* source key material used to derive a protected key from */
+ u8 keybuf[PAES_MAX_KEYSIZE];
+ unsigned int keylen;
+
+ /* cpacf function code to use with this protected key type */
+ long fc;
+
+ /* nr of requests enqueued via crypto engine which use this tfm ctx */
+ atomic_t via_engine_ctr;
+
+ /* spinlock to atomic read/update all the following fields */
+ spinlock_t pk_lock;
+
+ /* see PK_STATE* defines above, < 0 holds convert failure rc */
+ int pk_state;
+ /* if state is valid, pk holds the protected key */
+ struct paes_protkey pk;
+};
+
+struct s390_pxts_ctx {
+ /* source key material used to derive a protected key from */
+ u8 keybuf[2 * PAES_MAX_KEYSIZE];
unsigned int keylen;
+
+ /* cpacf function code to use with this protected key type */
+ long fc;
+
+ /* nr of requests enqueued via crypto engine which use this tfm ctx */
+ atomic_t via_engine_ctr;
+
+ /* spinlock to atomic read/update all the following fields */
+ spinlock_t pk_lock;
+
+ /* see PK_STATE* defines above, < 0 holds convert failure rc */
+ int pk_state;
+ /* if state is valid, pk[] hold(s) the protected key(s) */
+ struct paes_protkey pk[2];
};
-static inline int _key_to_kb(struct key_blob *kb,
- const u8 *key,
- unsigned int keylen)
+/*
+ * make_clrkey_token() - wrap the raw key ck with pkey clearkey token
+ * information.
+ * @returns the size of the clearkey token
+ */
+static inline u32 make_clrkey_token(const u8 *ck, size_t cklen, u8 *dest)
{
- struct clearkey_header {
+ struct clrkey_token {
u8 type;
u8 res0[3];
u8 version;
u8 res1[3];
u32 keytype;
u32 len;
- } __packed * h;
+ u8 key[];
+ } __packed *token = (struct clrkey_token *)dest;
+
+ token->type = 0x00;
+ token->version = 0x02;
+ token->keytype = (cklen - 8) >> 3;
+ token->len = cklen;
+ memcpy(token->key, ck, cklen);
+
+ return sizeof(*token) + cklen;
+}
+
+/*
+ * paes_ctx_setkey() - Set key value into context, maybe construct
+ * a clear key token digestible by pkey from a clear key value.
+ */
+static inline int paes_ctx_setkey(struct s390_paes_ctx *ctx,
+ const u8 *key, unsigned int keylen)
+{
+ if (keylen > sizeof(ctx->keybuf))
+ return -EINVAL;
switch (keylen) {
case 16:
case 24:
case 32:
/* clear key value, prepare pkey clear key token in keybuf */
- memset(kb->keybuf, 0, sizeof(kb->keybuf));
- h = (struct clearkey_header *) kb->keybuf;
- h->version = 0x02; /* TOKVER_CLEAR_KEY */
- h->keytype = (keylen - 8) >> 3;
- h->len = keylen;
- memcpy(kb->keybuf + sizeof(*h), key, keylen);
- kb->keylen = sizeof(*h) + keylen;
- kb->key = kb->keybuf;
+ memset(ctx->keybuf, 0, sizeof(ctx->keybuf));
+ ctx->keylen = make_clrkey_token(key, keylen, ctx->keybuf);
break;
default:
/* other key material, let pkey handle this */
- if (keylen <= sizeof(kb->keybuf))
- kb->key = kb->keybuf;
- else {
- kb->key = kmalloc(keylen, GFP_KERNEL);
- if (!kb->key)
- return -ENOMEM;
- }
- memcpy(kb->key, key, keylen);
- kb->keylen = keylen;
+ memcpy(ctx->keybuf, key, keylen);
+ ctx->keylen = keylen;
break;
}
return 0;
}
-static inline void _free_kb_keybuf(struct key_blob *kb)
+/*
+ * pxts_ctx_setkey() - Set key value into context, maybe construct
+ * a clear key token digestible by pkey from a clear key value.
+ */
+static inline int pxts_ctx_setkey(struct s390_pxts_ctx *ctx,
+ const u8 *key, unsigned int keylen)
{
- if (kb->key && kb->key != kb->keybuf
- && kb->keylen > sizeof(kb->keybuf)) {
- kfree_sensitive(kb->key);
- kb->key = NULL;
- }
-}
+ size_t cklen = keylen / 2;
-struct s390_paes_ctx {
- struct key_blob kb;
- struct pkey_protkey pk;
- spinlock_t pk_lock;
- unsigned long fc;
-};
+ if (keylen > sizeof(ctx->keybuf))
+ return -EINVAL;
-struct s390_pxts_ctx {
- struct key_blob kb[2];
- struct pkey_protkey pk[2];
- spinlock_t pk_lock;
- unsigned long fc;
-};
+ switch (keylen) {
+ case 32:
+ case 64:
+ /* clear key value, prepare pkey clear key tokens in keybuf */
+ memset(ctx->keybuf, 0, sizeof(ctx->keybuf));
+ ctx->keylen = make_clrkey_token(key, cklen, ctx->keybuf);
+ ctx->keylen += make_clrkey_token(key + cklen, cklen,
+ ctx->keybuf + ctx->keylen);
+ break;
+ default:
+ /* other key material, let pkey handle this */
+ memcpy(ctx->keybuf, key, keylen);
+ ctx->keylen = keylen;
+ break;
+ }
-static inline int __paes_keyblob2pkey(struct key_blob *kb,
- struct pkey_protkey *pk)
+ return 0;
+}
+
+/*
+ * Convert the raw key material into a protected key via PKEY api.
+ * This function may sleep - don't call in non-sleeping context.
+ */
+static inline int convert_key(const u8 *key, unsigned int keylen,
+ struct paes_protkey *pk)
{
- int i, ret = -EIO;
+ int rc, i;
- /* try three times in case of busy card */
- for (i = 0; ret && i < 3; i++) {
- if (ret == -EBUSY && in_task()) {
- if (msleep_interruptible(1000))
- return -EINTR;
+ pk->len = sizeof(pk->protkey);
+
+ /*
+ * In case of a busy card retry with increasing delay
+ * of 200, 400, 800 and 1600 ms - in total 3 s.
+ */
+ for (rc = -EIO, i = 0; rc && i < 5; i++) {
+ if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) {
+ rc = -EINTR;
+ goto out;
}
- ret = pkey_keyblob2pkey(kb->key, kb->keylen,
- pk->protkey, &pk->len, &pk->type);
+ rc = pkey_key2protkey(key, keylen,
+ pk->protkey, &pk->len, &pk->type,
+ PKEY_XFLAG_NOMEMALLOC);
}
- return ret;
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
+/*
+ * (Re-)Convert the raw key material from the ctx into a protected key
+ * via convert_key() function. Update the pk_state, pk_type, pk_len
+ * and the protected key in the tfm context.
+ * Please note this function may be invoked concurrently with the very
+ * same tfm context. The pk_lock spinlock in the context ensures an
+ * atomic update of the pk and the pk state but does not guarantee any
+ * order of update. So a fresh converted valid protected key may get
+ * updated with an 'old' expired key value. As the cpacf instructions
+ * detect this, refuse to operate with an invalid key and the calling
+ * code triggers a (re-)conversion this does no harm. This may lead to
+ * unnecessary additional conversion but never to invalid data on en-
+ * or decrypt operations.
+ */
+static int paes_convert_key(struct s390_paes_ctx *ctx)
{
- int ret;
- struct pkey_protkey pkey;
+ struct paes_protkey pk;
+ int rc;
+
+ spin_lock_bh(&ctx->pk_lock);
+ ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS;
+ spin_unlock_bh(&ctx->pk_lock);
- pkey.len = sizeof(pkey.protkey);
- ret = __paes_keyblob2pkey(&ctx->kb, &pkey);
- if (ret)
- return ret;
+ rc = convert_key(ctx->keybuf, ctx->keylen, &pk);
+ /* update context */
spin_lock_bh(&ctx->pk_lock);
- memcpy(&ctx->pk, &pkey, sizeof(pkey));
+ if (rc) {
+ ctx->pk_state = rc;
+ } else {
+ ctx->pk_state = PK_STATE_VALID;
+ ctx->pk = pk;
+ }
spin_unlock_bh(&ctx->pk_lock);
- return 0;
+ memzero_explicit(&pk, sizeof(pk));
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int ecb_paes_init(struct crypto_skcipher *tfm)
+/*
+ * (Re-)Convert the raw xts key material from the ctx into a
+ * protected key via convert_key() function. Update the pk_state,
+ * pk_type, pk_len and the protected key in the tfm context.
+ * See also comments on function paes_convert_key.
+ */
+static int pxts_convert_key(struct s390_pxts_ctx *ctx)
{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct paes_protkey pk0, pk1;
+ size_t split_keylen;
+ int rc;
- ctx->kb.key = NULL;
- spin_lock_init(&ctx->pk_lock);
+ spin_lock_bh(&ctx->pk_lock);
+ ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS;
+ spin_unlock_bh(&ctx->pk_lock);
- return 0;
-}
+ rc = convert_key(ctx->keybuf, ctx->keylen, &pk0);
+ if (rc)
+ goto out;
+
+ switch (pk0.type) {
+ case PKEY_KEYTYPE_AES_128:
+ case PKEY_KEYTYPE_AES_256:
+ /* second keytoken required */
+ if (ctx->keylen % 2) {
+ rc = -EINVAL;
+ goto out;
+ }
+ split_keylen = ctx->keylen / 2;
+ rc = convert_key(ctx->keybuf + split_keylen,
+ split_keylen, &pk1);
+ if (rc)
+ goto out;
+ if (pk0.type != pk1.type) {
+ rc = -EINVAL;
+ goto out;
+ }
+ break;
+ case PKEY_KEYTYPE_AES_XTS_128:
+ case PKEY_KEYTYPE_AES_XTS_256:
+ /* single key */
+ pk1.type = 0;
+ break;
+ default:
+ /* unsupported protected keytype */
+ rc = -EINVAL;
+ goto out;
+ }
-static void ecb_paes_exit(struct crypto_skcipher *tfm)
-{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+out:
+ /* update context */
+ spin_lock_bh(&ctx->pk_lock);
+ if (rc) {
+ ctx->pk_state = rc;
+ } else {
+ ctx->pk_state = PK_STATE_VALID;
+ ctx->pk[0] = pk0;
+ ctx->pk[1] = pk1;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
- _free_kb_keybuf(&ctx->kb);
+ memzero_explicit(&pk0, sizeof(pk0));
+ memzero_explicit(&pk1, sizeof(pk1));
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx)
+/*
+ * PAES ECB implementation
+ */
+
+struct ecb_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
+
+struct s390_pecb_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ struct ecb_param param;
+};
+
+static int ecb_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
{
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ long fc;
int rc;
- unsigned long fc;
- rc = __paes_convert_key(ctx);
+ /* set raw key into context */
+ rc = paes_ctx_setkey(ctx, in_key, key_len);
if (rc)
- return rc;
+ goto out;
- /* Pick the correct function code based on the protected key type */
- fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0;
+ /* convert key into protected key */
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
- /* Check if the function code is available */
+ /* Pick the correct function code based on the protected key type */
+ switch (ctx->pk.type) {
+ case PKEY_KEYTYPE_AES_128:
+ fc = CPACF_KM_PAES_128;
+ break;
+ case PKEY_KEYTYPE_AES_192:
+ fc = CPACF_KM_PAES_192;
+ break;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KM_PAES_256;
+ break;
+ default:
+ fc = 0;
+ break;
+ }
ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
- return ctx->fc ? 0 : -EINVAL;
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+static int ecb_paes_do_crypt(struct s390_paes_ctx *ctx,
+ struct s390_pecb_req_ctx *req_ctx,
+ bool maysleep)
{
- int rc;
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- _free_kb_keybuf(&ctx->kb);
- rc = _key_to_kb(&ctx->kb, in_key, key_len);
+ struct ecb_param *param = &req_ctx->param;
+ struct skcipher_walk *walk = &req_ctx->walk;
+ unsigned int nbytes, n, k;
+ int pk_state, rc = 0;
+
+ if (!req_ctx->param_init_done) {
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ req_ctx->param_init_done = true;
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ }
if (rc)
- return rc;
+ goto out;
- return __ecb_paes_set_key(ctx);
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ k = cpacf_km(ctx->fc | req_ctx->modifier, param,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
+ if (k)
+ rc = skcipher_walk_done(walk, nbytes - k);
+ if (k < n) {
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ }
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
+ struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes, n, k;
- int ret;
- struct {
- u8 key[MAXPROTKEYSIZE];
- } param;
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
- ret = skcipher_walk_virt(&walk, req, false);
- if (ret)
- return ret;
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
- spin_unlock_bh(&ctx->pk_lock);
+ rc = skcipher_walk_virt(walk, req, false);
+ if (rc)
+ goto out;
- while ((nbytes = walk.nbytes) != 0) {
- /* only use complete blocks */
- n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_km(ctx->fc | modifier, &param,
- walk.dst.virt.addr, walk.src.virt.addr, n);
- if (k)
- ret = skcipher_walk_done(&walk, nbytes - k);
- if (k < n) {
- if (__paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
- spin_unlock_bh(&ctx->pk_lock);
- }
+ req_ctx->modifier = modifier;
+ req_ctx->param_init_done = false;
+
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = ecb_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
}
- return ret;
+
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
static int ecb_paes_encrypt(struct skcipher_request *req)
@@ -255,113 +505,257 @@ static int ecb_paes_decrypt(struct skcipher_request *req)
return ecb_paes_crypt(req, CPACF_DECRYPT);
}
-static struct skcipher_alg ecb_paes_alg = {
- .base.cra_name = "ecb(paes)",
- .base.cra_driver_name = "ecb-paes-s390",
- .base.cra_priority = 401, /* combo: aes + ecb + 1 */
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
- .init = ecb_paes_init,
- .exit = ecb_paes_exit,
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .setkey = ecb_paes_set_key,
- .encrypt = ecb_paes_encrypt,
- .decrypt = ecb_paes_decrypt,
-};
-
-static int cbc_paes_init(struct crypto_skcipher *tfm)
+static int ecb_paes_init(struct crypto_skcipher *tfm)
{
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- ctx->kb.key = NULL;
+ memset(ctx, 0, sizeof(*ctx));
spin_lock_init(&ctx->pk_lock);
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pecb_req_ctx));
+
return 0;
}
-static void cbc_paes_exit(struct crypto_skcipher *tfm)
+static void ecb_paes_exit(struct crypto_skcipher *tfm)
{
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- _free_kb_keybuf(&ctx->kb);
+ memzero_explicit(ctx, sizeof(*ctx));
}
-static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
+static int ecb_paes_do_one_request(struct crypto_engine *engine, void *areq)
{
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
int rc;
- unsigned long fc;
- rc = __paes_convert_key(ctx);
- if (rc)
- return rc;
+ /* walk has already been prepared */
+
+ rc = ecb_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
+ }
- /* Pick the correct function code based on the protected key type */
- fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMC_PAES_192 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KMC_PAES_256 : 0;
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
+}
- /* Check if the function code is available */
- ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+static struct skcipher_engine_alg ecb_paes_alg = {
+ .base = {
+ .base.cra_name = "ecb(paes)",
+ .base.cra_driver_name = "ecb-paes-s390",
+ .base.cra_priority = 401, /* combo: aes + ecb + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.base.cra_list),
+ .init = ecb_paes_init,
+ .exit = ecb_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .setkey = ecb_paes_setkey,
+ .encrypt = ecb_paes_encrypt,
+ .decrypt = ecb_paes_decrypt,
+ },
+ .op = {
+ .do_one_request = ecb_paes_do_one_request,
+ },
+};
- return ctx->fc ? 0 : -EINVAL;
-}
+/*
+ * PAES CBC implementation
+ */
-static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+struct cbc_param {
+ u8 iv[AES_BLOCK_SIZE];
+ u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
+
+struct s390_pcbc_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ struct cbc_param param;
+};
+
+static int cbc_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
{
- int rc;
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ long fc;
+ int rc;
- _free_kb_keybuf(&ctx->kb);
- rc = _key_to_kb(&ctx->kb, in_key, key_len);
+ /* set raw key into context */
+ rc = paes_ctx_setkey(ctx, in_key, key_len);
if (rc)
- return rc;
+ goto out;
+
+ /* convert raw key into protected key */
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
- return __cbc_paes_set_key(ctx);
+ /* Pick the correct function code based on the protected key type */
+ switch (ctx->pk.type) {
+ case PKEY_KEYTYPE_AES_128:
+ fc = CPACF_KMC_PAES_128;
+ break;
+ case PKEY_KEYTYPE_AES_192:
+ fc = CPACF_KMC_PAES_192;
+ break;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KMC_PAES_256;
+ break;
+ default:
+ fc = 0;
+ break;
+ }
+ ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+static int cbc_paes_do_crypt(struct s390_paes_ctx *ctx,
+ struct s390_pcbc_req_ctx *req_ctx,
+ bool maysleep)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
+ struct cbc_param *param = &req_ctx->param;
+ struct skcipher_walk *walk = &req_ctx->walk;
unsigned int nbytes, n, k;
- int ret;
- struct {
- u8 iv[AES_BLOCK_SIZE];
- u8 key[MAXPROTKEYSIZE];
- } param;
-
- ret = skcipher_walk_virt(&walk, req, false);
- if (ret)
- return ret;
+ int pk_state, rc = 0;
+
+ if (!req_ctx->param_init_done) {
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ req_ctx->param_init_done = true;
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ if (rc)
+ goto out;
- memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
- spin_unlock_bh(&ctx->pk_lock);
+ memcpy(param->iv, walk->iv, AES_BLOCK_SIZE);
- while ((nbytes = walk.nbytes) != 0) {
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_kmc(ctx->fc | modifier, &param,
- walk.dst.virt.addr, walk.src.virt.addr, n);
+ k = cpacf_kmc(ctx->fc | req_ctx->modifier, param,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
if (k) {
- memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
- ret = skcipher_walk_done(&walk, nbytes - k);
+ memcpy(walk->iv, param->iv, AES_BLOCK_SIZE);
+ rc = skcipher_walk_done(walk, nbytes - k);
}
if (k < n) {
- if (__paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
spin_unlock_bh(&ctx->pk_lock);
}
}
- return ret;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+{
+ struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
+
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
+
+ rc = skcipher_walk_virt(walk, req, false);
+ if (rc)
+ goto out;
+
+ req_ctx->modifier = modifier;
+ req_ctx->param_init_done = false;
+
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = cbc_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
static int cbc_paes_encrypt(struct skcipher_request *req)
@@ -374,383 +768,881 @@ static int cbc_paes_decrypt(struct skcipher_request *req)
return cbc_paes_crypt(req, CPACF_DECRYPT);
}
-static struct skcipher_alg cbc_paes_alg = {
- .base.cra_name = "cbc(paes)",
- .base.cra_driver_name = "cbc-paes-s390",
- .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.cra_list),
- .init = cbc_paes_init,
- .exit = cbc_paes_exit,
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = cbc_paes_set_key,
- .encrypt = cbc_paes_encrypt,
- .decrypt = cbc_paes_decrypt,
-};
-
-static int xts_paes_init(struct crypto_skcipher *tfm)
+static int cbc_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- ctx->kb[0].key = NULL;
- ctx->kb[1].key = NULL;
+ memset(ctx, 0, sizeof(*ctx));
spin_lock_init(&ctx->pk_lock);
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pcbc_req_ctx));
+
return 0;
}
-static void xts_paes_exit(struct crypto_skcipher *tfm)
+static void cbc_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- _free_kb_keybuf(&ctx->kb[0]);
- _free_kb_keybuf(&ctx->kb[1]);
+ memzero_explicit(ctx, sizeof(*ctx));
}
-static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx)
+static int cbc_paes_do_one_request(struct crypto_engine *engine, void *areq)
{
- struct pkey_protkey pkey0, pkey1;
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
- pkey0.len = sizeof(pkey0.protkey);
- pkey1.len = sizeof(pkey1.protkey);
+ /* walk has already been prepared */
+
+ rc = cbc_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
+ }
- if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) ||
- __paes_keyblob2pkey(&ctx->kb[1], &pkey1))
- return -EINVAL;
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
+}
- spin_lock_bh(&ctx->pk_lock);
- memcpy(&ctx->pk[0], &pkey0, sizeof(pkey0));
- memcpy(&ctx->pk[1], &pkey1, sizeof(pkey1));
- spin_unlock_bh(&ctx->pk_lock);
+static struct skcipher_engine_alg cbc_paes_alg = {
+ .base = {
+ .base.cra_name = "cbc(paes)",
+ .base.cra_driver_name = "cbc-paes-s390",
+ .base.cra_priority = 402, /* cbc-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.base.cra_list),
+ .init = cbc_paes_init,
+ .exit = cbc_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = cbc_paes_setkey,
+ .encrypt = cbc_paes_encrypt,
+ .decrypt = cbc_paes_decrypt,
+ },
+ .op = {
+ .do_one_request = cbc_paes_do_one_request,
+ },
+};
- return 0;
-}
+/*
+ * PAES CTR implementation
+ */
+
+struct ctr_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
-static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
+struct s390_pctr_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ struct ctr_param param;
+};
+
+static int ctr_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
{
- unsigned long fc;
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ long fc;
+ int rc;
- if (__xts_paes_convert_key(ctx))
- return -EINVAL;
+ /* set raw key into context */
+ rc = paes_ctx_setkey(ctx, in_key, key_len);
+ if (rc)
+ goto out;
- if (ctx->pk[0].type != ctx->pk[1].type)
- return -EINVAL;
+ /* convert raw key into protected key */
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
/* Pick the correct function code based on the protected key type */
- fc = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PXTS_128 :
- (ctx->pk[0].type == PKEY_KEYTYPE_AES_256) ?
- CPACF_KM_PXTS_256 : 0;
+ switch (ctx->pk.type) {
+ case PKEY_KEYTYPE_AES_128:
+ fc = CPACF_KMCTR_PAES_128;
+ break;
+ case PKEY_KEYTYPE_AES_192:
+ fc = CPACF_KMCTR_PAES_192;
+ break;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KMCTR_PAES_256;
+ break;
+ default:
+ fc = 0;
+ break;
+ }
+ ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
- /* Check if the function code is available */
- ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static inline unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+{
+ unsigned int i, n;
+
+ /* only use complete blocks, max. PAGE_SIZE */
+ memcpy(ctrptr, iv, AES_BLOCK_SIZE);
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+ for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+ memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+ crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ ctrptr += AES_BLOCK_SIZE;
+ }
+ return n;
+}
+
+static int ctr_paes_do_crypt(struct s390_paes_ctx *ctx,
+ struct s390_pctr_req_ctx *req_ctx,
+ bool maysleep)
+{
+ struct ctr_param *param = &req_ctx->param;
+ struct skcipher_walk *walk = &req_ctx->walk;
+ u8 buf[AES_BLOCK_SIZE], *ctrptr;
+ unsigned int nbytes, n, k;
+ int pk_state, locked, rc = 0;
+
+ if (!req_ctx->param_init_done) {
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ req_ctx->param_init_done = true;
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ if (rc)
+ goto out;
+
+ locked = mutex_trylock(&ctrblk_lock);
+
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ n = AES_BLOCK_SIZE;
+ if (nbytes >= 2 * AES_BLOCK_SIZE && locked)
+ n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+ ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
+ k = cpacf_kmctr(ctx->fc, param, walk->dst.virt.addr,
+ walk->src.virt.addr, n, ctrptr);
+ if (k) {
+ if (ctrptr == ctrblk)
+ memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE);
+ crypto_inc(walk->iv, AES_BLOCK_SIZE);
+ rc = skcipher_walk_done(walk, nbytes - k);
+ }
+ if (k < n) {
+ if (!maysleep) {
+ if (locked)
+ mutex_unlock(&ctrblk_lock);
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc) {
+ if (locked)
+ mutex_unlock(&ctrblk_lock);
+ goto out;
+ }
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ }
+ if (locked)
+ mutex_unlock(&ctrblk_lock);
+
+ /* final block may be < AES_BLOCK_SIZE, copy only nbytes */
+ if (nbytes) {
+ memset(buf, 0, AES_BLOCK_SIZE);
+ memcpy(buf, walk->src.virt.addr, nbytes);
+ while (1) {
+ if (cpacf_kmctr(ctx->fc, param, buf,
+ buf, AES_BLOCK_SIZE,
+ walk->iv) == AES_BLOCK_SIZE)
+ break;
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ memcpy(walk->dst.virt.addr, buf, nbytes);
+ crypto_inc(walk->iv, AES_BLOCK_SIZE);
+ rc = skcipher_walk_done(walk, 0);
+ }
- return ctx->fc ? 0 : -EINVAL;
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int xts_key_len)
+static int ctr_paes_crypt(struct skcipher_request *req)
{
+ struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
int rc;
+
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
+
+ rc = skcipher_walk_virt(walk, req, false);
+ if (rc)
+ goto out;
+
+ req_ctx->param_init_done = false;
+
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = ctr_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int ctr_paes_init(struct crypto_skcipher *tfm)
+{
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ memset(ctx, 0, sizeof(*ctx));
+ spin_lock_init(&ctx->pk_lock);
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pctr_req_ctx));
+
+ return 0;
+}
+
+static void ctr_paes_exit(struct crypto_skcipher *tfm)
+{
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ memzero_explicit(ctx, sizeof(*ctx));
+}
+
+static int ctr_paes_do_one_request(struct crypto_engine *engine, void *areq)
+{
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
+
+ /* walk has already been prepared */
+
+ rc = ctr_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
+ }
+
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
+}
+
+static struct skcipher_engine_alg ctr_paes_alg = {
+ .base = {
+ .base.cra_name = "ctr(paes)",
+ .base.cra_driver_name = "ctr-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.base.cra_list),
+ .init = ctr_paes_init,
+ .exit = ctr_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_paes_setkey,
+ .encrypt = ctr_paes_crypt,
+ .decrypt = ctr_paes_crypt,
+ .chunksize = AES_BLOCK_SIZE,
+ },
+ .op = {
+ .do_one_request = ctr_paes_do_one_request,
+ },
+};
+
+/*
+ * PAES XTS implementation
+ */
+
+struct xts_full_km_param {
+ u8 key[64];
+ u8 tweak[16];
+ u8 nap[16];
+ u8 wkvp[32];
+} __packed;
+
+struct xts_km_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+ u8 init[16];
+} __packed;
+
+struct xts_pcc_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+ u8 tweak[16];
+ u8 block[16];
+ u8 bit[16];
+ u8 xts[16];
+} __packed;
+
+struct s390_pxts_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ union {
+ struct xts_full_km_param full_km_param;
+ struct xts_km_param km_param;
+ } param;
+};
+
+static int xts_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int in_keylen)
+{
struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 ckey[2 * AES_MAX_KEY_SIZE];
- unsigned int ckey_len, key_len;
+ unsigned int ckey_len;
+ long fc;
+ int rc;
- if (xts_key_len % 2)
+ if ((in_keylen == 32 || in_keylen == 64) &&
+ xts_verify_key(tfm, in_key, in_keylen))
return -EINVAL;
- key_len = xts_key_len / 2;
-
- _free_kb_keybuf(&ctx->kb[0]);
- _free_kb_keybuf(&ctx->kb[1]);
- rc = _key_to_kb(&ctx->kb[0], in_key, key_len);
+ /* set raw key into context */
+ rc = pxts_ctx_setkey(ctx, in_key, in_keylen);
if (rc)
- return rc;
- rc = _key_to_kb(&ctx->kb[1], in_key + key_len, key_len);
- if (rc)
- return rc;
+ goto out;
- rc = __xts_paes_set_key(ctx);
+ /* convert raw key(s) into protected key(s) */
+ rc = pxts_convert_key(ctx);
if (rc)
- return rc;
+ goto out;
/*
* xts_verify_key verifies the key length is not odd and makes
* sure that the two keys are not the same. This can be done
- * on the two protected keys as well
+ * on the two protected keys as well - but not for full xts keys.
*/
- ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ?
- AES_KEYSIZE_128 : AES_KEYSIZE_256;
- memcpy(ckey, ctx->pk[0].protkey, ckey_len);
- memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
- return xts_verify_key(tfm, ckey, 2*ckey_len);
+ if (ctx->pk[0].type == PKEY_KEYTYPE_AES_128 ||
+ ctx->pk[0].type == PKEY_KEYTYPE_AES_256) {
+ ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ?
+ AES_KEYSIZE_128 : AES_KEYSIZE_256;
+ memcpy(ckey, ctx->pk[0].protkey, ckey_len);
+ memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
+ rc = xts_verify_key(tfm, ckey, 2 * ckey_len);
+ memzero_explicit(ckey, sizeof(ckey));
+ if (rc)
+ goto out;
+ }
+
+ /* Pick the correct function code based on the protected key type */
+ switch (ctx->pk[0].type) {
+ case PKEY_KEYTYPE_AES_128:
+ fc = CPACF_KM_PXTS_128;
+ break;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KM_PXTS_256;
+ break;
+ case PKEY_KEYTYPE_AES_XTS_128:
+ fc = CPACF_KM_PXTS_128_FULL;
+ break;
+ case PKEY_KEYTYPE_AES_XTS_256:
+ fc = CPACF_KM_PXTS_256_FULL;
+ break;
+ default:
+ fc = 0;
+ break;
+ }
+ ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+static int xts_paes_do_crypt_fullkey(struct s390_pxts_ctx *ctx,
+ struct s390_pxts_req_ctx *req_ctx,
+ bool maysleep)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
+ struct xts_full_km_param *param = &req_ctx->param.full_km_param;
+ struct skcipher_walk *walk = &req_ctx->walk;
unsigned int keylen, offset, nbytes, n, k;
- int ret;
- struct {
- u8 key[MAXPROTKEYSIZE]; /* key + verification pattern */
- u8 tweak[16];
- u8 block[16];
- u8 bit[16];
- u8 xts[16];
- } pcc_param;
- struct {
- u8 key[MAXPROTKEYSIZE]; /* key + verification pattern */
- u8 init[16];
- } xts_param;
-
- ret = skcipher_walk_virt(&walk, req, false);
- if (ret)
- return ret;
+ int rc = 0;
- keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
- offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
+ /*
+ * The calling function xts_paes_do_crypt() ensures the
+ * protected key state is always PK_STATE_VALID when this
+ * function is invoked.
+ */
- memset(&pcc_param, 0, sizeof(pcc_param));
- memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
- spin_lock_bh(&ctx->pk_lock);
- memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
- memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
- spin_unlock_bh(&ctx->pk_lock);
- cpacf_pcc(ctx->fc, pcc_param.key + offset);
- memcpy(xts_param.init, pcc_param.xts, 16);
+ keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 64;
+ offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 0;
+
+ if (!req_ctx->param_init_done) {
+ memset(param, 0, sizeof(*param));
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+ memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp));
+ spin_unlock_bh(&ctx->pk_lock);
+ memcpy(param->tweak, walk->iv, sizeof(param->tweak));
+ param->nap[0] = 0x01; /* initial alpha power (1, little-endian) */
+ req_ctx->param_init_done = true;
+ }
- while ((nbytes = walk.nbytes) != 0) {
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
- walk.dst.virt.addr, walk.src.virt.addr, n);
+ k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
if (k)
- ret = skcipher_walk_done(&walk, nbytes - k);
+ rc = skcipher_walk_done(walk, nbytes - k);
if (k < n) {
- if (__xts_paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = pxts_convert_key(ctx);
+ if (rc)
+ goto out;
spin_lock_bh(&ctx->pk_lock);
- memcpy(xts_param.key + offset,
- ctx->pk[0].protkey, keylen);
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+ memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp));
spin_unlock_bh(&ctx->pk_lock);
}
}
- return ret;
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int xts_paes_encrypt(struct skcipher_request *req)
+static inline int __xts_2keys_prep_param(struct s390_pxts_ctx *ctx,
+ struct xts_km_param *param,
+ struct skcipher_walk *walk,
+ unsigned int keylen,
+ unsigned int offset, bool maysleep)
{
- return xts_paes_crypt(req, 0);
+ struct xts_pcc_param pcc_param;
+ unsigned long cc = 1;
+ int rc = 0;
+
+ while (cc) {
+ memset(&pcc_param, 0, sizeof(pcc_param));
+ memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+ spin_unlock_bh(&ctx->pk_lock);
+ cc = cpacf_pcc(ctx->fc, pcc_param.key + offset);
+ if (cc) {
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ break;
+ }
+ rc = pxts_convert_key(ctx);
+ if (rc)
+ break;
+ continue;
+ }
+ memcpy(param->init, pcc_param.xts, 16);
+ }
+
+ memzero_explicit(pcc_param.key, sizeof(pcc_param.key));
+ return rc;
}
-static int xts_paes_decrypt(struct skcipher_request *req)
+static int xts_paes_do_crypt_2keys(struct s390_pxts_ctx *ctx,
+ struct s390_pxts_req_ctx *req_ctx,
+ bool maysleep)
{
- return xts_paes_crypt(req, CPACF_DECRYPT);
-}
+ struct xts_km_param *param = &req_ctx->param.km_param;
+ struct skcipher_walk *walk = &req_ctx->walk;
+ unsigned int keylen, offset, nbytes, n, k;
+ int rc = 0;
-static struct skcipher_alg xts_paes_alg = {
- .base.cra_name = "xts(paes)",
- .base.cra_driver_name = "xts-paes-s390",
- .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct s390_pxts_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.cra_list),
- .init = xts_paes_init,
- .exit = xts_paes_exit,
- .min_keysize = 2 * PAES_MIN_KEYSIZE,
- .max_keysize = 2 * PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = xts_paes_set_key,
- .encrypt = xts_paes_encrypt,
- .decrypt = xts_paes_decrypt,
-};
+ /*
+ * The calling function xts_paes_do_crypt() ensures the
+ * protected key state is always PK_STATE_VALID when this
+ * function is invoked.
+ */
-static int ctr_paes_init(struct crypto_skcipher *tfm)
-{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
+ offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
- ctx->kb.key = NULL;
- spin_lock_init(&ctx->pk_lock);
+ if (!req_ctx->param_init_done) {
+ rc = __xts_2keys_prep_param(ctx, param, walk,
+ keylen, offset, maysleep);
+ if (rc)
+ goto out;
+ req_ctx->param_init_done = true;
+ }
- return 0;
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
+ if (k)
+ rc = skcipher_walk_done(walk, nbytes - k);
+ if (k < n) {
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = pxts_convert_key(ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ }
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static void ctr_paes_exit(struct crypto_skcipher *tfm)
+static int xts_paes_do_crypt(struct s390_pxts_ctx *ctx,
+ struct s390_pxts_req_ctx *req_ctx,
+ bool maysleep)
{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int pk_state, rc = 0;
- _free_kb_keybuf(&ctx->kb);
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ if (rc)
+ goto out;
+
+ /* Call the 'real' crypt function based on the xts prot key type. */
+ switch (ctx->fc) {
+ case CPACF_KM_PXTS_128:
+ case CPACF_KM_PXTS_256:
+ rc = xts_paes_do_crypt_2keys(ctx, req_ctx, maysleep);
+ break;
+ case CPACF_KM_PXTS_128_FULL:
+ case CPACF_KM_PXTS_256_FULL:
+ rc = xts_paes_do_crypt_fullkey(ctx, req_ctx, maysleep);
+ break;
+ default:
+ rc = -EINVAL;
+ }
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
+static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
+ struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
int rc;
- unsigned long fc;
- rc = __paes_convert_key(ctx);
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
+
+ rc = skcipher_walk_virt(walk, req, false);
if (rc)
- return rc;
+ goto out;
- /* Pick the correct function code based on the protected key type */
- fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMCTR_PAES_192 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_256) ?
- CPACF_KMCTR_PAES_256 : 0;
+ req_ctx->modifier = modifier;
+ req_ctx->param_init_done = false;
- /* Check if the function code is available */
- ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = xts_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
+ }
- return ctx->fc ? 0 : -EINVAL;
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+static int xts_paes_encrypt(struct skcipher_request *req)
{
- int rc;
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ return xts_paes_crypt(req, 0);
+}
- _free_kb_keybuf(&ctx->kb);
- rc = _key_to_kb(&ctx->kb, in_key, key_len);
- if (rc)
- return rc;
+static int xts_paes_decrypt(struct skcipher_request *req)
+{
+ return xts_paes_crypt(req, CPACF_DECRYPT);
+}
- return __ctr_paes_set_key(ctx);
+static int xts_paes_init(struct crypto_skcipher *tfm)
+{
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ memset(ctx, 0, sizeof(*ctx));
+ spin_lock_init(&ctx->pk_lock);
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pxts_req_ctx));
+
+ return 0;
}
-static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+static void xts_paes_exit(struct crypto_skcipher *tfm)
{
- unsigned int i, n;
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
- /* only use complete blocks, max. PAGE_SIZE */
- memcpy(ctrptr, iv, AES_BLOCK_SIZE);
- n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
- for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
- memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
- crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
- ctrptr += AES_BLOCK_SIZE;
- }
- return n;
+ memzero_explicit(ctx, sizeof(*ctx));
}
-static int ctr_paes_crypt(struct skcipher_request *req)
+static int xts_paes_do_one_request(struct crypto_engine *engine, void *areq)
{
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- u8 buf[AES_BLOCK_SIZE], *ctrptr;
- struct skcipher_walk walk;
- unsigned int nbytes, n, k;
- int ret, locked;
- struct {
- u8 key[MAXPROTKEYSIZE];
- } param;
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
- ret = skcipher_walk_virt(&walk, req, false);
- if (ret)
- return ret;
+ /* walk has already been prepared */
+
+ rc = xts_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
+ }
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
- spin_unlock_bh(&ctx->pk_lock);
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
+}
- locked = mutex_trylock(&ctrblk_lock);
+static struct skcipher_engine_alg xts_paes_alg = {
+ .base = {
+ .base.cra_name = "xts(paes)",
+ .base.cra_driver_name = "xts-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_pxts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.base.cra_list),
+ .init = xts_paes_init,
+ .exit = xts_paes_exit,
+ .min_keysize = 2 * PAES_MIN_KEYSIZE,
+ .max_keysize = 2 * PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_paes_setkey,
+ .encrypt = xts_paes_encrypt,
+ .decrypt = xts_paes_decrypt,
+ },
+ .op = {
+ .do_one_request = xts_paes_do_one_request,
+ },
+};
- while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
- n = AES_BLOCK_SIZE;
- if (nbytes >= 2*AES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk.iv, nbytes);
- ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
- k = cpacf_kmctr(ctx->fc, &param, walk.dst.virt.addr,
- walk.src.virt.addr, n, ctrptr);
- if (k) {
- if (ctrptr == ctrblk)
- memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE,
- AES_BLOCK_SIZE);
- crypto_inc(walk.iv, AES_BLOCK_SIZE);
- ret = skcipher_walk_done(&walk, nbytes - k);
- }
- if (k < n) {
- if (__paes_convert_key(ctx)) {
- if (locked)
- mutex_unlock(&ctrblk_lock);
- return skcipher_walk_done(&walk, -EIO);
- }
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
- spin_unlock_bh(&ctx->pk_lock);
- }
- }
- if (locked)
- mutex_unlock(&ctrblk_lock);
- /*
- * final block may be < AES_BLOCK_SIZE, copy only nbytes
- */
- if (nbytes) {
- memset(buf, 0, AES_BLOCK_SIZE);
- memcpy(buf, walk.src.virt.addr, nbytes);
- while (1) {
- if (cpacf_kmctr(ctx->fc, &param, buf,
- buf, AES_BLOCK_SIZE,
- walk.iv) == AES_BLOCK_SIZE)
- break;
- if (__paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
- spin_unlock_bh(&ctx->pk_lock);
- }
- memcpy(walk.dst.virt.addr, buf, nbytes);
- crypto_inc(walk.iv, AES_BLOCK_SIZE);
- ret = skcipher_walk_done(&walk, nbytes);
- }
-
- return ret;
-}
-
-static struct skcipher_alg ctr_paes_alg = {
- .base.cra_name = "ctr(paes)",
- .base.cra_driver_name = "ctr-paes-s390",
- .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.cra_list),
- .init = ctr_paes_init,
- .exit = ctr_paes_exit,
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ctr_paes_set_key,
- .encrypt = ctr_paes_crypt,
- .decrypt = ctr_paes_crypt,
- .chunksize = AES_BLOCK_SIZE,
+/*
+ * alg register, unregister, module init, exit
+ */
+
+static struct miscdevice paes_dev = {
+ .name = "paes",
+ .minor = MISC_DYNAMIC_MINOR,
};
-static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg)
+static inline void __crypto_unregister_skcipher(struct skcipher_engine_alg *alg)
{
- if (!list_empty(&alg->base.cra_list))
- crypto_unregister_skcipher(alg);
+ if (!list_empty(&alg->base.base.cra_list))
+ crypto_engine_unregister_skcipher(alg);
}
static void paes_s390_fini(void)
{
+ if (paes_crypto_engine) {
+ crypto_engine_stop(paes_crypto_engine);
+ crypto_engine_exit(paes_crypto_engine);
+ }
__crypto_unregister_skcipher(&ctr_paes_alg);
__crypto_unregister_skcipher(&xts_paes_alg);
__crypto_unregister_skcipher(&cbc_paes_alg);
__crypto_unregister_skcipher(&ecb_paes_alg);
if (ctrblk)
- free_page((unsigned long) ctrblk);
+ free_page((unsigned long)ctrblk);
+ misc_deregister(&paes_dev);
}
static int __init paes_s390_init(void)
{
- int ret;
+ int rc;
+
+ /* register a simple paes pseudo misc device */
+ rc = misc_register(&paes_dev);
+ if (rc)
+ return rc;
+
+ /* with this pseudo devie alloc and start a crypto engine */
+ paes_crypto_engine =
+ crypto_engine_alloc_init_and_set(paes_dev.this_device,
+ true, false, MAX_QLEN);
+ if (!paes_crypto_engine) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ rc = crypto_engine_start(paes_crypto_engine);
+ if (rc) {
+ crypto_engine_exit(paes_crypto_engine);
+ paes_crypto_engine = NULL;
+ goto out_err;
+ }
/* Query available functions for KM, KMC and KMCTR */
cpacf_query(CPACF_KM, &km_functions);
@@ -760,49 +1652,57 @@ static int __init paes_s390_init(void)
if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
- ret = crypto_register_skcipher(&ecb_paes_alg);
- if (ret)
+ rc = crypto_engine_register_skcipher(&ecb_paes_alg);
+ if (rc)
goto out_err;
+ pr_debug("%s registered\n", ecb_paes_alg.base.base.cra_driver_name);
}
if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
- ret = crypto_register_skcipher(&cbc_paes_alg);
- if (ret)
+ rc = crypto_engine_register_skcipher(&cbc_paes_alg);
+ if (rc)
goto out_err;
+ pr_debug("%s registered\n", cbc_paes_alg.base.base.cra_driver_name);
}
if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
- ret = crypto_register_skcipher(&xts_paes_alg);
- if (ret)
+ rc = crypto_engine_register_skcipher(&xts_paes_alg);
+ if (rc)
goto out_err;
+ pr_debug("%s registered\n", xts_paes_alg.base.base.cra_driver_name);
}
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
- ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+ ctrblk = (u8 *)__get_free_page(GFP_KERNEL);
if (!ctrblk) {
- ret = -ENOMEM;
+ rc = -ENOMEM;
goto out_err;
}
- ret = crypto_register_skcipher(&ctr_paes_alg);
- if (ret)
+ rc = crypto_engine_register_skcipher(&ctr_paes_alg);
+ if (rc)
goto out_err;
+ pr_debug("%s registered\n", ctr_paes_alg.base.base.cra_driver_name);
}
return 0;
+
out_err:
paes_s390_fini();
- return ret;
+ return rc;
}
module_init(paes_s390_init);
module_exit(paes_s390_fini);
-MODULE_ALIAS_CRYPTO("paes");
+MODULE_ALIAS_CRYPTO("ecb(paes)");
+MODULE_ALIAS_CRYPTO("cbc(paes)");
+MODULE_ALIAS_CRYPTO("ctr(paes)");
+MODULE_ALIAS_CRYPTO("xts(paes)");
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys");
MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/phmac_s390.c b/arch/s390/crypto/phmac_s390.c
new file mode 100644
index 000000000000..88342bd4c37a
--- /dev/null
+++ b/arch/s390/crypto/phmac_s390.c
@@ -0,0 +1,1063 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright IBM Corp. 2025
+ *
+ * s390 specific HMAC support for protected keys.
+ */
+
+#define pr_fmt(fmt) "phmac_s390: " fmt
+
+#include <asm/cpacf.h>
+#include <asm/pkey.h>
+#include <crypto/engine.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha2.h>
+#include <linux/atomic.h>
+#include <linux/cpufeature.h>
+#include <linux/delay.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+static struct crypto_engine *phmac_crypto_engine;
+#define MAX_QLEN 10
+
+/*
+ * A simple hash walk helper
+ */
+
+struct hash_walk_helper {
+ struct crypto_hash_walk walk;
+ const u8 *walkaddr;
+ int walkbytes;
+};
+
+/*
+ * Prepare hash walk helper.
+ * Set up the base hash walk, fill walkaddr and walkbytes.
+ * Returns 0 on success or negative value on error.
+ */
+static inline int hwh_prepare(struct ahash_request *req,
+ struct hash_walk_helper *hwh)
+{
+ hwh->walkbytes = crypto_hash_walk_first(req, &hwh->walk);
+ if (hwh->walkbytes < 0)
+ return hwh->walkbytes;
+ hwh->walkaddr = hwh->walk.data;
+ return 0;
+}
+
+/*
+ * Advance hash walk helper by n bytes.
+ * Progress the walkbytes and walkaddr fields by n bytes.
+ * If walkbytes is then 0, pull next hunk from hash walk
+ * and update walkbytes and walkaddr.
+ * If n is negative, unmap hash walk and return error.
+ * Returns 0 on success or negative value on error.
+ */
+static inline int hwh_advance(struct hash_walk_helper *hwh, int n)
+{
+ if (n < 0)
+ return crypto_hash_walk_done(&hwh->walk, n);
+
+ hwh->walkbytes -= n;
+ hwh->walkaddr += n;
+ if (hwh->walkbytes > 0)
+ return 0;
+
+ hwh->walkbytes = crypto_hash_walk_done(&hwh->walk, 0);
+ if (hwh->walkbytes < 0)
+ return hwh->walkbytes;
+
+ hwh->walkaddr = hwh->walk.data;
+ return 0;
+}
+
+/*
+ * KMAC param block layout for sha2 function codes:
+ * The layout of the param block for the KMAC instruction depends on the
+ * blocksize of the used hashing sha2-algorithm function codes. The param block
+ * contains the hash chaining value (cv), the input message bit-length (imbl)
+ * and the hmac-secret (key). To prevent code duplication, the sizes of all
+ * these are calculated based on the blocksize.
+ *
+ * param-block:
+ * +-------+
+ * | cv |
+ * +-------+
+ * | imbl |
+ * +-------+
+ * | key |
+ * +-------+
+ *
+ * sizes:
+ * part | sh2-alg | calculation | size | type
+ * -----+---------+-------------+------+--------
+ * cv | 224/256 | blocksize/2 | 32 | u64[8]
+ * | 384/512 | | 64 | u128[8]
+ * imbl | 224/256 | blocksize/8 | 8 | u64
+ * | 384/512 | | 16 | u128
+ * key | 224/256 | blocksize | 96 | u8[96]
+ * | 384/512 | | 160 | u8[160]
+ */
+
+#define MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
+#define MAX_IMBL_SIZE sizeof(u128)
+#define MAX_BLOCK_SIZE SHA512_BLOCK_SIZE
+
+#define SHA2_CV_SIZE(bs) ((bs) >> 1)
+#define SHA2_IMBL_SIZE(bs) ((bs) >> 3)
+
+#define SHA2_IMBL_OFFSET(bs) (SHA2_CV_SIZE(bs))
+#define SHA2_KEY_OFFSET(bs) (SHA2_CV_SIZE(bs) + SHA2_IMBL_SIZE(bs))
+
+#define PHMAC_MAX_KEYSIZE 256
+#define PHMAC_SHA256_PK_SIZE (SHA256_BLOCK_SIZE + 32)
+#define PHMAC_SHA512_PK_SIZE (SHA512_BLOCK_SIZE + 32)
+#define PHMAC_MAX_PK_SIZE PHMAC_SHA512_PK_SIZE
+
+/* phmac protected key struct */
+struct phmac_protkey {
+ u32 type;
+ u32 len;
+ u8 protkey[PHMAC_MAX_PK_SIZE];
+};
+
+#define PK_STATE_NO_KEY 0
+#define PK_STATE_CONVERT_IN_PROGRESS 1
+#define PK_STATE_VALID 2
+
+/* phmac tfm context */
+struct phmac_tfm_ctx {
+ /* source key material used to derive a protected key from */
+ u8 keybuf[PHMAC_MAX_KEYSIZE];
+ unsigned int keylen;
+
+ /* cpacf function code to use with this protected key type */
+ long fc;
+
+ /* nr of requests enqueued via crypto engine which use this tfm ctx */
+ atomic_t via_engine_ctr;
+
+ /* spinlock to atomic read/update all the following fields */
+ spinlock_t pk_lock;
+
+ /* see PK_STATE* defines above, < 0 holds convert failure rc */
+ int pk_state;
+ /* if state is valid, pk holds the protected key */
+ struct phmac_protkey pk;
+};
+
+union kmac_gr0 {
+ unsigned long reg;
+ struct {
+ unsigned long : 48;
+ unsigned long ikp : 1;
+ unsigned long iimp : 1;
+ unsigned long ccup : 1;
+ unsigned long : 6;
+ unsigned long fc : 7;
+ };
+};
+
+struct kmac_sha2_ctx {
+ u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + PHMAC_MAX_PK_SIZE];
+ union kmac_gr0 gr0;
+ u8 buf[MAX_BLOCK_SIZE];
+ u64 buflen[2];
+};
+
+enum async_op {
+ OP_NOP = 0,
+ OP_UPDATE,
+ OP_FINAL,
+ OP_FINUP,
+};
+
+/* phmac request context */
+struct phmac_req_ctx {
+ struct hash_walk_helper hwh;
+ struct kmac_sha2_ctx kmac_ctx;
+ enum async_op async_op;
+};
+
+/*
+ * Pkey 'token' struct used to derive a protected key value from a clear key.
+ */
+struct hmac_clrkey_token {
+ u8 type;
+ u8 res0[3];
+ u8 version;
+ u8 res1[3];
+ u32 keytype;
+ u32 len;
+ u8 key[];
+} __packed;
+
+static int hash_key(const u8 *in, unsigned int inlen,
+ u8 *digest, unsigned int digestsize)
+{
+ unsigned long func;
+ union {
+ struct sha256_paramblock {
+ u32 h[8];
+ u64 mbl;
+ } sha256;
+ struct sha512_paramblock {
+ u64 h[8];
+ u128 mbl;
+ } sha512;
+ } __packed param;
+
+#define PARAM_INIT(x, y, z) \
+ param.sha##x.h[0] = SHA##y ## _H0; \
+ param.sha##x.h[1] = SHA##y ## _H1; \
+ param.sha##x.h[2] = SHA##y ## _H2; \
+ param.sha##x.h[3] = SHA##y ## _H3; \
+ param.sha##x.h[4] = SHA##y ## _H4; \
+ param.sha##x.h[5] = SHA##y ## _H5; \
+ param.sha##x.h[6] = SHA##y ## _H6; \
+ param.sha##x.h[7] = SHA##y ## _H7; \
+ param.sha##x.mbl = (z)
+
+ switch (digestsize) {
+ case SHA224_DIGEST_SIZE:
+ func = CPACF_KLMD_SHA_256;
+ PARAM_INIT(256, 224, inlen * 8);
+ break;
+ case SHA256_DIGEST_SIZE:
+ func = CPACF_KLMD_SHA_256;
+ PARAM_INIT(256, 256, inlen * 8);
+ break;
+ case SHA384_DIGEST_SIZE:
+ func = CPACF_KLMD_SHA_512;
+ PARAM_INIT(512, 384, inlen * 8);
+ break;
+ case SHA512_DIGEST_SIZE:
+ func = CPACF_KLMD_SHA_512;
+ PARAM_INIT(512, 512, inlen * 8);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+#undef PARAM_INIT
+
+ cpacf_klmd(func, &param, in, inlen);
+
+ memcpy(digest, &param, digestsize);
+
+ return 0;
+}
+
+/*
+ * make_clrkey_token() - wrap the clear key into a pkey clearkey token.
+ */
+static inline int make_clrkey_token(const u8 *clrkey, size_t clrkeylen,
+ unsigned int digestsize, u8 *dest)
+{
+ struct hmac_clrkey_token *token = (struct hmac_clrkey_token *)dest;
+ unsigned int blocksize;
+ int rc;
+
+ token->type = 0x00;
+ token->version = 0x02;
+ switch (digestsize) {
+ case SHA224_DIGEST_SIZE:
+ case SHA256_DIGEST_SIZE:
+ token->keytype = PKEY_KEYTYPE_HMAC_512;
+ blocksize = 64;
+ break;
+ case SHA384_DIGEST_SIZE:
+ case SHA512_DIGEST_SIZE:
+ token->keytype = PKEY_KEYTYPE_HMAC_1024;
+ blocksize = 128;
+ break;
+ default:
+ return -EINVAL;
+ }
+ token->len = blocksize;
+
+ if (clrkeylen > blocksize) {
+ rc = hash_key(clrkey, clrkeylen, token->key, digestsize);
+ if (rc)
+ return rc;
+ } else {
+ memcpy(token->key, clrkey, clrkeylen);
+ }
+
+ return 0;
+}
+
+/*
+ * phmac_tfm_ctx_setkey() - Set key value into tfm context, maybe construct
+ * a clear key token digestible by pkey from a clear key value.
+ */
+static inline int phmac_tfm_ctx_setkey(struct phmac_tfm_ctx *tfm_ctx,
+ const u8 *key, unsigned int keylen)
+{
+ if (keylen > sizeof(tfm_ctx->keybuf))
+ return -EINVAL;
+
+ memcpy(tfm_ctx->keybuf, key, keylen);
+ tfm_ctx->keylen = keylen;
+
+ return 0;
+}
+
+/*
+ * Convert the raw key material into a protected key via PKEY api.
+ * This function may sleep - don't call in non-sleeping context.
+ */
+static inline int convert_key(const u8 *key, unsigned int keylen,
+ struct phmac_protkey *pk)
+{
+ int rc, i;
+
+ pk->len = sizeof(pk->protkey);
+
+ /*
+ * In case of a busy card retry with increasing delay
+ * of 200, 400, 800 and 1600 ms - in total 3 s.
+ */
+ for (rc = -EIO, i = 0; rc && i < 5; i++) {
+ if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) {
+ rc = -EINTR;
+ goto out;
+ }
+ rc = pkey_key2protkey(key, keylen,
+ pk->protkey, &pk->len, &pk->type,
+ PKEY_XFLAG_NOMEMALLOC);
+ }
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+/*
+ * (Re-)Convert the raw key material from the tfm ctx into a protected
+ * key via convert_key() function. Update the pk_state, pk_type, pk_len
+ * and the protected key in the tfm context.
+ * Please note this function may be invoked concurrently with the very
+ * same tfm context. The pk_lock spinlock in the context ensures an
+ * atomic update of the pk and the pk state but does not guarantee any
+ * order of update. So a fresh converted valid protected key may get
+ * updated with an 'old' expired key value. As the cpacf instructions
+ * detect this, refuse to operate with an invalid key and the calling
+ * code triggers a (re-)conversion this does no harm. This may lead to
+ * unnecessary additional conversion but never to invalid data on the
+ * hash operation.
+ */
+static int phmac_convert_key(struct phmac_tfm_ctx *tfm_ctx)
+{
+ struct phmac_protkey pk;
+ int rc;
+
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ tfm_ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS;
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+
+ rc = convert_key(tfm_ctx->keybuf, tfm_ctx->keylen, &pk);
+
+ /* update context */
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ if (rc) {
+ tfm_ctx->pk_state = rc;
+ } else {
+ tfm_ctx->pk_state = PK_STATE_VALID;
+ tfm_ctx->pk = pk;
+ }
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+
+ memzero_explicit(&pk, sizeof(pk));
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+/*
+ * kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize
+ */
+static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo,
+ u64 buflen_hi, unsigned int blocksize)
+{
+ u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize);
+
+ switch (blocksize) {
+ case SHA256_BLOCK_SIZE:
+ *(u64 *)imbl = buflen_lo * BITS_PER_BYTE;
+ break;
+ case SHA512_BLOCK_SIZE:
+ *(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3;
+ break;
+ default:
+ break;
+ }
+}
+
+static int phmac_kmac_update(struct ahash_request *req, bool maysleep)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+ struct hash_walk_helper *hwh = &req_ctx->hwh;
+ unsigned int bs = crypto_ahash_blocksize(tfm);
+ unsigned int offset, k, n;
+ int rc = 0;
+
+ /*
+ * The walk is always mapped when this function is called.
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+
+ while (hwh->walkbytes > 0) {
+ /* check sha2 context buffer */
+ offset = ctx->buflen[0] % bs;
+ if (offset + hwh->walkbytes < bs)
+ goto store;
+
+ if (offset) {
+ /* fill ctx buffer up to blocksize and process this block */
+ n = bs - offset;
+ memcpy(ctx->buf + offset, hwh->walkaddr, n);
+ ctx->gr0.iimp = 1;
+ for (;;) {
+ k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, bs);
+ if (likely(k == bs))
+ break;
+ if (unlikely(k > 0)) {
+ /*
+ * Can't deal with hunks smaller than blocksize.
+ * And kmac should always return the nr of
+ * processed bytes as 0 or a multiple of the
+ * blocksize.
+ */
+ rc = -EIO;
+ goto out;
+ }
+ /* protected key is invalid and needs re-conversion */
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = phmac_convert_key(tfm_ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
+ tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+ }
+ ctx->buflen[0] += n;
+ if (ctx->buflen[0] < n)
+ ctx->buflen[1]++;
+ rc = hwh_advance(hwh, n);
+ if (unlikely(rc))
+ goto out;
+ offset = 0;
+ }
+
+ /* process as many blocks as possible from the walk */
+ while (hwh->walkbytes >= bs) {
+ n = (hwh->walkbytes / bs) * bs;
+ ctx->gr0.iimp = 1;
+ k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, hwh->walkaddr, n);
+ if (likely(k > 0)) {
+ ctx->buflen[0] += k;
+ if (ctx->buflen[0] < k)
+ ctx->buflen[1]++;
+ rc = hwh_advance(hwh, k);
+ if (unlikely(rc))
+ goto out;
+ }
+ if (unlikely(k < n)) {
+ /* protected key is invalid and needs re-conversion */
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = phmac_convert_key(tfm_ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
+ tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+ }
+ }
+
+store:
+ /* store incomplete block in context buffer */
+ if (hwh->walkbytes) {
+ memcpy(ctx->buf + offset, hwh->walkaddr, hwh->walkbytes);
+ ctx->buflen[0] += hwh->walkbytes;
+ if (ctx->buflen[0] < hwh->walkbytes)
+ ctx->buflen[1]++;
+ rc = hwh_advance(hwh, hwh->walkbytes);
+ if (unlikely(rc))
+ goto out;
+ }
+
+ } /* end of while (hwh->walkbytes > 0) */
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_kmac_final(struct ahash_request *req, bool maysleep)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+ unsigned int ds = crypto_ahash_digestsize(tfm);
+ unsigned int bs = crypto_ahash_blocksize(tfm);
+ unsigned int k, n;
+ int rc = 0;
+
+ n = ctx->buflen[0] % bs;
+ ctx->gr0.iimp = 0;
+ kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs);
+ for (;;) {
+ k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, n);
+ if (likely(k == n))
+ break;
+ if (unlikely(k > 0)) {
+ /* Can't deal with hunks smaller than blocksize. */
+ rc = -EIO;
+ goto out;
+ }
+ /* protected key is invalid and needs re-conversion */
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = phmac_convert_key(tfm_ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
+ tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+ }
+
+ memcpy(req->result, ctx->param, ds);
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_init(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+ unsigned int bs = crypto_ahash_blocksize(tfm);
+ int rc = 0;
+
+ /* zero request context (includes the kmac sha2 context) */
+ memset(req_ctx, 0, sizeof(*req_ctx));
+
+ /*
+ * setkey() should have set a valid fc into the tfm context.
+ * Copy this function code into the gr0 field of the kmac context.
+ */
+ if (!tfm_ctx->fc) {
+ rc = -ENOKEY;
+ goto out;
+ }
+ kmac_ctx->gr0.fc = tfm_ctx->fc;
+
+ /*
+ * Copy the pk from tfm ctx into kmac ctx. The protected key
+ * may be outdated but update() and final() will handle this.
+ */
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ memcpy(kmac_ctx->param + SHA2_KEY_OFFSET(bs),
+ tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_update(struct ahash_request *req)
+{
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+ struct hash_walk_helper *hwh = &req_ctx->hwh;
+ int rc;
+
+ /* prep the walk in the request context */
+ rc = hwh_prepare(req, hwh);
+ if (rc)
+ goto out;
+
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&tfm_ctx->via_engine_ctr)) {
+ rc = phmac_kmac_update(req, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ req_ctx->async_op = OP_UPDATE;
+ atomic_inc(&tfm_ctx->via_engine_ctr);
+ rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&tfm_ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS) {
+ hwh_advance(hwh, rc);
+ memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+ }
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_final(struct ahash_request *req)
+{
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+ int rc = 0;
+
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&tfm_ctx->via_engine_ctr)) {
+ rc = phmac_kmac_final(req, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ req_ctx->async_op = OP_FINAL;
+ atomic_inc(&tfm_ctx->via_engine_ctr);
+ rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&tfm_ctx->via_engine_ctr);
+ }
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_finup(struct ahash_request *req)
+{
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+ struct hash_walk_helper *hwh = &req_ctx->hwh;
+ int rc;
+
+ /* prep the walk in the request context */
+ rc = hwh_prepare(req, hwh);
+ if (rc)
+ goto out;
+
+ req_ctx->async_op = OP_FINUP;
+
+ /* Try synchronous operations if no active engine usage */
+ if (!atomic_read(&tfm_ctx->via_engine_ctr)) {
+ rc = phmac_kmac_update(req, false);
+ if (rc == 0)
+ req_ctx->async_op = OP_FINAL;
+ }
+ if (!rc && req_ctx->async_op == OP_FINAL &&
+ !atomic_read(&tfm_ctx->via_engine_ctr)) {
+ rc = phmac_kmac_final(req, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ /* req->async_op has been set to either OP_FINUP or OP_FINAL */
+ atomic_inc(&tfm_ctx->via_engine_ctr);
+ rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&tfm_ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS)
+ hwh_advance(hwh, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_digest(struct ahash_request *req)
+{
+ int rc;
+
+ rc = phmac_init(req);
+ if (rc)
+ goto out;
+
+ rc = phmac_finup(req);
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_setkey(struct crypto_ahash *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ unsigned int ds = crypto_ahash_digestsize(tfm);
+ unsigned int bs = crypto_ahash_blocksize(tfm);
+ unsigned int tmpkeylen;
+ u8 *tmpkey = NULL;
+ int rc = 0;
+
+ if (!crypto_ahash_tested(tfm)) {
+ /*
+ * selftest running: key is a raw hmac clear key and needs
+ * to get embedded into a 'clear key token' in order to have
+ * it correctly processed by the pkey module.
+ */
+ tmpkeylen = sizeof(struct hmac_clrkey_token) + bs;
+ tmpkey = kzalloc(tmpkeylen, GFP_KERNEL);
+ if (!tmpkey) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ rc = make_clrkey_token(key, keylen, ds, tmpkey);
+ if (rc)
+ goto out;
+ keylen = tmpkeylen;
+ key = tmpkey;
+ }
+
+ /* copy raw key into tfm context */
+ rc = phmac_tfm_ctx_setkey(tfm_ctx, key, keylen);
+ if (rc)
+ goto out;
+
+ /* convert raw key into protected key */
+ rc = phmac_convert_key(tfm_ctx);
+ if (rc)
+ goto out;
+
+ /* set function code in tfm context, check for valid pk type */
+ switch (ds) {
+ case SHA224_DIGEST_SIZE:
+ if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_512)
+ rc = -EINVAL;
+ else
+ tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_224;
+ break;
+ case SHA256_DIGEST_SIZE:
+ if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_512)
+ rc = -EINVAL;
+ else
+ tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_256;
+ break;
+ case SHA384_DIGEST_SIZE:
+ if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_1024)
+ rc = -EINVAL;
+ else
+ tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_384;
+ break;
+ case SHA512_DIGEST_SIZE:
+ if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_1024)
+ rc = -EINVAL;
+ else
+ tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_512;
+ break;
+ default:
+ tfm_ctx->fc = 0;
+ rc = -EINVAL;
+ }
+
+out:
+ kfree(tmpkey);
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_export(struct ahash_request *req, void *out)
+{
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+
+ memcpy(out, ctx, sizeof(*ctx));
+
+ return 0;
+}
+
+static int phmac_import(struct ahash_request *req, const void *in)
+{
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+
+ memset(req_ctx, 0, sizeof(*req_ctx));
+ memcpy(ctx, in, sizeof(*ctx));
+
+ return 0;
+}
+
+static int phmac_init_tfm(struct crypto_ahash *tfm)
+{
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+
+ memset(tfm_ctx, 0, sizeof(*tfm_ctx));
+ spin_lock_init(&tfm_ctx->pk_lock);
+
+ crypto_ahash_set_reqsize(tfm, sizeof(struct phmac_req_ctx));
+
+ return 0;
+}
+
+static void phmac_exit_tfm(struct crypto_ahash *tfm)
+{
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+
+ memzero_explicit(tfm_ctx->keybuf, sizeof(tfm_ctx->keybuf));
+ memzero_explicit(&tfm_ctx->pk, sizeof(tfm_ctx->pk));
+}
+
+static int phmac_do_one_request(struct crypto_engine *engine, void *areq)
+{
+ struct ahash_request *req = ahash_request_cast(areq);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+ struct hash_walk_helper *hwh = &req_ctx->hwh;
+ int rc = -EINVAL;
+
+ /*
+ * Three kinds of requests come in here:
+ * 1. req->async_op == OP_UPDATE with req->nbytes > 0
+ * 2. req->async_op == OP_FINUP with req->nbytes > 0
+ * 3. req->async_op == OP_FINAL
+ * For update and finup the hwh walk has already been prepared
+ * by the caller. For final there is no hwh walk needed.
+ */
+
+ switch (req_ctx->async_op) {
+ case OP_UPDATE:
+ case OP_FINUP:
+ rc = phmac_kmac_update(req, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell scheduler to voluntarily give up the CPU here.
+ */
+ pr_debug("rescheduling request\n");
+ cond_resched();
+ return -ENOSPC;
+ } else if (rc) {
+ hwh_advance(hwh, rc);
+ goto out;
+ }
+ if (req_ctx->async_op == OP_UPDATE)
+ break;
+ req_ctx->async_op = OP_FINAL;
+ fallthrough;
+ case OP_FINAL:
+ rc = phmac_kmac_final(req, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell scheduler to voluntarily give up the CPU here.
+ */
+ pr_debug("rescheduling request\n");
+ cond_resched();
+ return -ENOSPC;
+ }
+ break;
+ default:
+ /* unknown/unsupported/unimplemented asynch op */
+ return -EOPNOTSUPP;
+ }
+
+out:
+ if (rc || req_ctx->async_op == OP_FINAL)
+ memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&tfm_ctx->via_engine_ctr);
+ crypto_finalize_hash_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
+}
+
+#define S390_ASYNC_PHMAC_ALG(x) \
+{ \
+ .base = { \
+ .init = phmac_init, \
+ .update = phmac_update, \
+ .final = phmac_final, \
+ .finup = phmac_finup, \
+ .digest = phmac_digest, \
+ .setkey = phmac_setkey, \
+ .import = phmac_import, \
+ .export = phmac_export, \
+ .init_tfm = phmac_init_tfm, \
+ .exit_tfm = phmac_exit_tfm, \
+ .halg = { \
+ .digestsize = SHA##x##_DIGEST_SIZE, \
+ .statesize = sizeof(struct kmac_sha2_ctx), \
+ .base = { \
+ .cra_name = "phmac(sha" #x ")", \
+ .cra_driver_name = "phmac_s390_sha" #x, \
+ .cra_blocksize = SHA##x##_BLOCK_SIZE, \
+ .cra_priority = 400, \
+ .cra_flags = CRYPTO_ALG_ASYNC | \
+ CRYPTO_ALG_NO_FALLBACK, \
+ .cra_ctxsize = sizeof(struct phmac_tfm_ctx), \
+ .cra_module = THIS_MODULE, \
+ }, \
+ }, \
+ }, \
+ .op = { \
+ .do_one_request = phmac_do_one_request, \
+ }, \
+}
+
+static struct phmac_alg {
+ unsigned int fc;
+ struct ahash_engine_alg alg;
+ bool registered;
+} phmac_algs[] = {
+ {
+ .fc = CPACF_KMAC_PHMAC_SHA_224,
+ .alg = S390_ASYNC_PHMAC_ALG(224),
+ }, {
+ .fc = CPACF_KMAC_PHMAC_SHA_256,
+ .alg = S390_ASYNC_PHMAC_ALG(256),
+ }, {
+ .fc = CPACF_KMAC_PHMAC_SHA_384,
+ .alg = S390_ASYNC_PHMAC_ALG(384),
+ }, {
+ .fc = CPACF_KMAC_PHMAC_SHA_512,
+ .alg = S390_ASYNC_PHMAC_ALG(512),
+ }
+};
+
+static struct miscdevice phmac_dev = {
+ .name = "phmac",
+ .minor = MISC_DYNAMIC_MINOR,
+};
+
+static void s390_phmac_exit(void)
+{
+ struct phmac_alg *phmac;
+ int i;
+
+ if (phmac_crypto_engine) {
+ crypto_engine_stop(phmac_crypto_engine);
+ crypto_engine_exit(phmac_crypto_engine);
+ }
+
+ for (i = ARRAY_SIZE(phmac_algs) - 1; i >= 0; i--) {
+ phmac = &phmac_algs[i];
+ if (phmac->registered)
+ crypto_engine_unregister_ahash(&phmac->alg);
+ }
+
+ misc_deregister(&phmac_dev);
+}
+
+static int __init s390_phmac_init(void)
+{
+ struct phmac_alg *phmac;
+ int i, rc;
+
+ /* for selftest cpacf klmd subfunction is needed */
+ if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_256))
+ return -ENODEV;
+ if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_512))
+ return -ENODEV;
+
+ /* register a simple phmac pseudo misc device */
+ rc = misc_register(&phmac_dev);
+ if (rc)
+ return rc;
+
+ /* with this pseudo device alloc and start a crypto engine */
+ phmac_crypto_engine =
+ crypto_engine_alloc_init_and_set(phmac_dev.this_device,
+ true, false, MAX_QLEN);
+ if (!phmac_crypto_engine) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ rc = crypto_engine_start(phmac_crypto_engine);
+ if (rc) {
+ crypto_engine_exit(phmac_crypto_engine);
+ phmac_crypto_engine = NULL;
+ goto out_err;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(phmac_algs); i++) {
+ phmac = &phmac_algs[i];
+ if (!cpacf_query_func(CPACF_KMAC, phmac->fc))
+ continue;
+ rc = crypto_engine_register_ahash(&phmac->alg);
+ if (rc)
+ goto out_err;
+ phmac->registered = true;
+ pr_debug("%s registered\n", phmac->alg.base.halg.base.cra_name);
+ }
+
+ return 0;
+
+out_err:
+ s390_phmac_exit();
+ return rc;
+}
+
+module_init(s390_phmac_init);
+module_exit(s390_phmac_exit);
+
+MODULE_ALIAS_CRYPTO("phmac(sha224)");
+MODULE_ALIAS_CRYPTO("phmac(sha256)");
+MODULE_ALIAS_CRYPTO("phmac(sha384)");
+MODULE_ALIAS_CRYPTO("phmac(sha512)");
+
+MODULE_DESCRIPTION("S390 HMAC driver for protected keys");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index a077087bc6cc..84e3d3c6ba09 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -6,8 +6,7 @@
* Driver for the s390 pseudo random number generator
*/
-#define KMSG_COMPONENT "prng"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "prng: " fmt
#include <linux/fs.h>
#include <linux/fips.h>
@@ -679,7 +678,7 @@ static ssize_t prng_chunksize_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
+ return sysfs_emit(buf, "%u\n", prng_chunk_size);
}
static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL);
@@ -698,7 +697,7 @@ static ssize_t prng_counter_show(struct device *dev,
counter = prng_data->prngws.byte_counter;
mutex_unlock(&prng_data->mutex);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", counter);
+ return sysfs_emit(buf, "%llu\n", counter);
}
static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL);
@@ -707,7 +706,7 @@ static ssize_t prng_errorflag_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
+ return sysfs_emit(buf, "%d\n", prng_errorflag);
}
static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL);
@@ -717,9 +716,9 @@ static ssize_t prng_mode_show(struct device *dev,
char *buf)
{
if (prng_mode == PRNG_MODE_TDES)
- return scnprintf(buf, PAGE_SIZE, "TDES\n");
+ return sysfs_emit(buf, "TDES\n");
else
- return scnprintf(buf, PAGE_SIZE, "SHA512\n");
+ return sysfs_emit(buf, "SHA512\n");
}
static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL);
@@ -742,7 +741,7 @@ static ssize_t prng_reseed_limit_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
+ return sysfs_emit(buf, "%u\n", prng_reseed_limit);
}
static ssize_t prng_reseed_limit_store(struct device *dev,
struct device_attribute *attr,
@@ -773,7 +772,7 @@ static ssize_t prng_strength_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return scnprintf(buf, PAGE_SIZE, "256\n");
+ return sysfs_emit(buf, "256\n");
}
static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL);
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
deleted file mode 100644
index 65ea12fc87a1..000000000000
--- a/arch/s390/crypto/sha.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Cryptographic API.
- *
- * s390 generic implementation of the SHA Secure Hash Algorithms.
- *
- * Copyright IBM Corp. 2007
- * Author(s): Jan Glauber (jang@de.ibm.com)
- */
-#ifndef _CRYPTO_ARCH_S390_SHA_H
-#define _CRYPTO_ARCH_S390_SHA_H
-
-#include <linux/crypto.h>
-#include <crypto/sha1.h>
-#include <crypto/sha2.h>
-#include <crypto/sha3.h>
-
-/* must be big enough for the largest SHA variant */
-#define SHA3_STATE_SIZE 200
-#define CPACF_MAX_PARMBLOCK_SIZE SHA3_STATE_SIZE
-#define SHA_MAX_BLOCK_SIZE SHA3_224_BLOCK_SIZE
-
-struct s390_sha_ctx {
- u64 count; /* message length in bytes */
- u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)];
- u8 buf[SHA_MAX_BLOCK_SIZE];
- int func; /* KIMD function to use */
-};
-
-struct shash_desc;
-
-int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len);
-int s390_sha_final(struct shash_desc *desc, u8 *out);
-
-#endif
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
deleted file mode 100644
index bc3a22704e09..000000000000
--- a/arch/s390/crypto/sha1_s390.c
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA1 Secure Hash Algorithm.
- *
- * Derived from cryptoapi implementation, adapted for in-place
- * scatterlist interface. Originally based on the public domain
- * implementation written by Steve Reid.
- *
- * s390 Version:
- * Copyright IBM Corp. 2003, 2007
- * Author(s): Thomas Spatzier
- * Jan Glauber (jan.glauber@de.ibm.com)
- *
- * Derived from "crypto/sha1_generic.c"
- * Copyright (c) Alan Smithee.
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
- */
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <crypto/sha1.h>
-#include <asm/cpacf.h>
-
-#include "sha.h"
-
-static int s390_sha1_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA1_H0;
- sctx->state[1] = SHA1_H1;
- sctx->state[2] = SHA1_H2;
- sctx->state[3] = SHA1_H3;
- sctx->state[4] = SHA1_H4;
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA_1;
-
- return 0;
-}
-
-static int s390_sha1_export(struct shash_desc *desc, void *out)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- struct sha1_state *octx = out;
-
- octx->count = sctx->count;
- memcpy(octx->state, sctx->state, sizeof(octx->state));
- memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer));
- return 0;
-}
-
-static int s390_sha1_import(struct shash_desc *desc, const void *in)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha1_state *ictx = in;
-
- sctx->count = ictx->count;
- memcpy(sctx->state, ictx->state, sizeof(ictx->state));
- memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer));
- sctx->func = CPACF_KIMD_SHA_1;
- return 0;
-}
-
-static struct shash_alg alg = {
- .digestsize = SHA1_DIGEST_SIZE,
- .init = s390_sha1_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = s390_sha1_export,
- .import = s390_sha1_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha1_state),
- .base = {
- .cra_name = "sha1",
- .cra_driver_name= "sha1-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA1_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int __init sha1_s390_init(void)
-{
- if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
- return -ENODEV;
- return crypto_register_shash(&alg);
-}
-
-static void __exit sha1_s390_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha1_s390_init);
-module_exit(sha1_s390_fini);
-
-MODULE_ALIAS_CRYPTO("sha1");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
deleted file mode 100644
index 6f1ccdf93d3e..000000000000
--- a/arch/s390/crypto/sha256_s390.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm.
- *
- * s390 Version:
- * Copyright IBM Corp. 2005, 2011
- * Author(s): Jan Glauber (jang@de.ibm.com)
- */
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <crypto/sha2.h>
-#include <asm/cpacf.h>
-
-#include "sha.h"
-
-static int s390_sha256_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA256_H0;
- sctx->state[1] = SHA256_H1;
- sctx->state[2] = SHA256_H2;
- sctx->state[3] = SHA256_H3;
- sctx->state[4] = SHA256_H4;
- sctx->state[5] = SHA256_H5;
- sctx->state[6] = SHA256_H6;
- sctx->state[7] = SHA256_H7;
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA_256;
-
- return 0;
-}
-
-static int sha256_export(struct shash_desc *desc, void *out)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- struct sha256_state *octx = out;
-
- octx->count = sctx->count;
- memcpy(octx->state, sctx->state, sizeof(octx->state));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
- return 0;
-}
-
-static int sha256_import(struct shash_desc *desc, const void *in)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha256_state *ictx = in;
-
- sctx->count = ictx->count;
- memcpy(sctx->state, ictx->state, sizeof(ictx->state));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->func = CPACF_KIMD_SHA_256;
- return 0;
-}
-
-static struct shash_alg sha256_alg = {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = s390_sha256_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha256_export,
- .import = sha256_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name= "sha256-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int s390_sha224_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA224_H0;
- sctx->state[1] = SHA224_H1;
- sctx->state[2] = SHA224_H2;
- sctx->state[3] = SHA224_H3;
- sctx->state[4] = SHA224_H4;
- sctx->state[5] = SHA224_H5;
- sctx->state[6] = SHA224_H6;
- sctx->state[7] = SHA224_H7;
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA_256;
-
- return 0;
-}
-
-static struct shash_alg sha224_alg = {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = s390_sha224_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha256_export,
- .import = sha256_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name= "sha224-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int __init sha256_s390_init(void)
-{
- int ret;
-
- if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
- return -ENODEV;
- ret = crypto_register_shash(&sha256_alg);
- if (ret < 0)
- goto out;
- ret = crypto_register_shash(&sha224_alg);
- if (ret < 0)
- crypto_unregister_shash(&sha256_alg);
-out:
- return ret;
-}
-
-static void __exit sha256_s390_fini(void)
-{
- crypto_unregister_shash(&sha224_alg);
- crypto_unregister_shash(&sha256_alg);
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha256_s390_init);
-module_exit(sha256_s390_fini);
-
-MODULE_ALIAS_CRYPTO("sha256");
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c
deleted file mode 100644
index e1350e033a32..000000000000
--- a/arch/s390/crypto/sha3_256_s390.c
+++ /dev/null
@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm.
- *
- * s390 Version:
- * Copyright IBM Corp. 2019
- * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com)
- */
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <crypto/sha3.h>
-#include <asm/cpacf.h>
-
-#include "sha.h"
-
-static int sha3_256_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- memset(sctx->state, 0, sizeof(sctx->state));
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA3_256;
-
- return 0;
-}
-
-static int sha3_256_export(struct shash_desc *desc, void *out)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- struct sha3_state *octx = out;
-
- octx->rsiz = sctx->count;
- memcpy(octx->st, sctx->state, sizeof(octx->st));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
-
- return 0;
-}
-
-static int sha3_256_import(struct shash_desc *desc, const void *in)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha3_state *ictx = in;
-
- sctx->count = ictx->rsiz;
- memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->func = CPACF_KIMD_SHA3_256;
-
- return 0;
-}
-
-static int sha3_224_import(struct shash_desc *desc, const void *in)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha3_state *ictx = in;
-
- sctx->count = ictx->rsiz;
- memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->func = CPACF_KIMD_SHA3_224;
-
- return 0;
-}
-
-static struct shash_alg sha3_256_alg = {
- .digestsize = SHA3_256_DIGEST_SIZE, /* = 32 */
- .init = sha3_256_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha3_256_export,
- .import = sha3_256_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
- .base = {
- .cra_name = "sha3-256",
- .cra_driver_name = "sha3-256-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA3_256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int sha3_224_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- memset(sctx->state, 0, sizeof(sctx->state));
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA3_224;
-
- return 0;
-}
-
-static struct shash_alg sha3_224_alg = {
- .digestsize = SHA3_224_DIGEST_SIZE,
- .init = sha3_224_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha3_256_export, /* same as for 256 */
- .import = sha3_224_import, /* function code different! */
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
- .base = {
- .cra_name = "sha3-224",
- .cra_driver_name = "sha3-224-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA3_224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int __init sha3_256_s390_init(void)
-{
- int ret;
-
- if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA3_256))
- return -ENODEV;
-
- ret = crypto_register_shash(&sha3_256_alg);
- if (ret < 0)
- goto out;
-
- ret = crypto_register_shash(&sha3_224_alg);
- if (ret < 0)
- crypto_unregister_shash(&sha3_256_alg);
-out:
- return ret;
-}
-
-static void __exit sha3_256_s390_fini(void)
-{
- crypto_unregister_shash(&sha3_224_alg);
- crypto_unregister_shash(&sha3_256_alg);
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha3_256_s390_init);
-module_exit(sha3_256_s390_fini);
-
-MODULE_ALIAS_CRYPTO("sha3-256");
-MODULE_ALIAS_CRYPTO("sha3-224");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA3-256 and SHA3-224 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c
deleted file mode 100644
index 06c142ed9bb1..000000000000
--- a/arch/s390/crypto/sha3_512_s390.c
+++ /dev/null
@@ -1,154 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA512 and SHA384 Secure Hash Algorithm.
- *
- * Copyright IBM Corp. 2019
- * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com)
- */
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <crypto/sha3.h>
-#include <asm/cpacf.h>
-
-#include "sha.h"
-
-static int sha3_512_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- memset(sctx->state, 0, sizeof(sctx->state));
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA3_512;
-
- return 0;
-}
-
-static int sha3_512_export(struct shash_desc *desc, void *out)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- struct sha3_state *octx = out;
-
- octx->rsiz = sctx->count;
- octx->rsizw = sctx->count >> 32;
-
- memcpy(octx->st, sctx->state, sizeof(octx->st));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
-
- return 0;
-}
-
-static int sha3_512_import(struct shash_desc *desc, const void *in)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha3_state *ictx = in;
-
- if (unlikely(ictx->rsizw))
- return -ERANGE;
- sctx->count = ictx->rsiz;
-
- memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->func = CPACF_KIMD_SHA3_512;
-
- return 0;
-}
-
-static int sha3_384_import(struct shash_desc *desc, const void *in)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha3_state *ictx = in;
-
- if (unlikely(ictx->rsizw))
- return -ERANGE;
- sctx->count = ictx->rsiz;
-
- memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->func = CPACF_KIMD_SHA3_384;
-
- return 0;
-}
-
-static struct shash_alg sha3_512_alg = {
- .digestsize = SHA3_512_DIGEST_SIZE,
- .init = sha3_512_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha3_512_export,
- .import = sha3_512_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
- .base = {
- .cra_name = "sha3-512",
- .cra_driver_name = "sha3-512-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA3_512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-MODULE_ALIAS_CRYPTO("sha3-512");
-
-static int sha3_384_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- memset(sctx->state, 0, sizeof(sctx->state));
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA3_384;
-
- return 0;
-}
-
-static struct shash_alg sha3_384_alg = {
- .digestsize = SHA3_384_DIGEST_SIZE,
- .init = sha3_384_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha3_512_export, /* same as for 512 */
- .import = sha3_384_import, /* function code different! */
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
- .base = {
- .cra_name = "sha3-384",
- .cra_driver_name = "sha3-384-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA3_384_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_sha_ctx),
- .cra_module = THIS_MODULE,
- }
-};
-
-MODULE_ALIAS_CRYPTO("sha3-384");
-
-static int __init init(void)
-{
- int ret;
-
- if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA3_512))
- return -ENODEV;
- ret = crypto_register_shash(&sha3_512_alg);
- if (ret < 0)
- goto out;
- ret = crypto_register_shash(&sha3_384_alg);
- if (ret < 0)
- crypto_unregister_shash(&sha3_512_alg);
-out:
- return ret;
-}
-
-static void __exit fini(void)
-{
- crypto_unregister_shash(&sha3_512_alg);
- crypto_unregister_shash(&sha3_384_alg);
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA3-512 and SHA3-384 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
deleted file mode 100644
index 04f11c407763..000000000000
--- a/arch/s390/crypto/sha512_s390.c
+++ /dev/null
@@ -1,149 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA512 and SHA38 Secure Hash Algorithm.
- *
- * Copyright IBM Corp. 2007
- * Author(s): Jan Glauber (jang@de.ibm.com)
- */
-#include <crypto/internal/hash.h>
-#include <crypto/sha2.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <asm/cpacf.h>
-
-#include "sha.h"
-
-static int sha512_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
-
- *(__u64 *)&ctx->state[0] = SHA512_H0;
- *(__u64 *)&ctx->state[2] = SHA512_H1;
- *(__u64 *)&ctx->state[4] = SHA512_H2;
- *(__u64 *)&ctx->state[6] = SHA512_H3;
- *(__u64 *)&ctx->state[8] = SHA512_H4;
- *(__u64 *)&ctx->state[10] = SHA512_H5;
- *(__u64 *)&ctx->state[12] = SHA512_H6;
- *(__u64 *)&ctx->state[14] = SHA512_H7;
- ctx->count = 0;
- ctx->func = CPACF_KIMD_SHA_512;
-
- return 0;
-}
-
-static int sha512_export(struct shash_desc *desc, void *out)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- struct sha512_state *octx = out;
-
- octx->count[0] = sctx->count;
- octx->count[1] = 0;
- memcpy(octx->state, sctx->state, sizeof(octx->state));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
- return 0;
-}
-
-static int sha512_import(struct shash_desc *desc, const void *in)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha512_state *ictx = in;
-
- if (unlikely(ictx->count[1]))
- return -ERANGE;
- sctx->count = ictx->count[0];
-
- memcpy(sctx->state, ictx->state, sizeof(ictx->state));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->func = CPACF_KIMD_SHA_512;
- return 0;
-}
-
-static struct shash_alg sha512_alg = {
- .digestsize = SHA512_DIGEST_SIZE,
- .init = sha512_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha512_export,
- .import = sha512_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha512_state),
- .base = {
- .cra_name = "sha512",
- .cra_driver_name= "sha512-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-MODULE_ALIAS_CRYPTO("sha512");
-
-static int sha384_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
-
- *(__u64 *)&ctx->state[0] = SHA384_H0;
- *(__u64 *)&ctx->state[2] = SHA384_H1;
- *(__u64 *)&ctx->state[4] = SHA384_H2;
- *(__u64 *)&ctx->state[6] = SHA384_H3;
- *(__u64 *)&ctx->state[8] = SHA384_H4;
- *(__u64 *)&ctx->state[10] = SHA384_H5;
- *(__u64 *)&ctx->state[12] = SHA384_H6;
- *(__u64 *)&ctx->state[14] = SHA384_H7;
- ctx->count = 0;
- ctx->func = CPACF_KIMD_SHA_512;
-
- return 0;
-}
-
-static struct shash_alg sha384_alg = {
- .digestsize = SHA384_DIGEST_SIZE,
- .init = sha384_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha512_export,
- .import = sha512_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha512_state),
- .base = {
- .cra_name = "sha384",
- .cra_driver_name= "sha384-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA384_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_sha_ctx),
- .cra_module = THIS_MODULE,
- }
-};
-
-MODULE_ALIAS_CRYPTO("sha384");
-
-static int __init init(void)
-{
- int ret;
-
- if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
- return -ENODEV;
- if ((ret = crypto_register_shash(&sha512_alg)) < 0)
- goto out;
- if ((ret = crypto_register_shash(&sha384_alg)) < 0)
- crypto_unregister_shash(&sha512_alg);
-out:
- return ret;
-}
-
-static void __exit fini(void)
-{
- crypto_unregister_shash(&sha512_alg);
- crypto_unregister_shash(&sha384_alg);
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA512 and SHA-384 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
deleted file mode 100644
index 686fe7aa192f..000000000000
--- a/arch/s390/crypto/sha_common.c
+++ /dev/null
@@ -1,124 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 generic implementation of the SHA Secure Hash Algorithms.
- *
- * Copyright IBM Corp. 2007
- * Author(s): Jan Glauber (jang@de.ibm.com)
- */
-
-#include <crypto/internal/hash.h>
-#include <linux/module.h>
-#include <asm/cpacf.h>
-#include "sha.h"
-
-int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
-{
- struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
- unsigned int bsize = crypto_shash_blocksize(desc->tfm);
- unsigned int index, n;
-
- /* how much is already in the buffer? */
- index = ctx->count % bsize;
- ctx->count += len;
-
- if ((index + len) < bsize)
- goto store;
-
- /* process one stored block */
- if (index) {
- memcpy(ctx->buf + index, data, bsize - index);
- cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize);
- data += bsize - index;
- len -= bsize - index;
- index = 0;
- }
-
- /* process as many blocks as possible */
- if (len >= bsize) {
- n = (len / bsize) * bsize;
- cpacf_kimd(ctx->func, ctx->state, data, n);
- data += n;
- len -= n;
- }
-store:
- if (len)
- memcpy(ctx->buf + index , data, len);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(s390_sha_update);
-
-static int s390_crypto_shash_parmsize(int func)
-{
- switch (func) {
- case CPACF_KLMD_SHA_1:
- return 20;
- case CPACF_KLMD_SHA_256:
- return 32;
- case CPACF_KLMD_SHA_512:
- return 64;
- case CPACF_KLMD_SHA3_224:
- case CPACF_KLMD_SHA3_256:
- case CPACF_KLMD_SHA3_384:
- case CPACF_KLMD_SHA3_512:
- return 200;
- default:
- return -EINVAL;
- }
-}
-
-int s390_sha_final(struct shash_desc *desc, u8 *out)
-{
- struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
- unsigned int bsize = crypto_shash_blocksize(desc->tfm);
- u64 bits;
- unsigned int n;
- int mbl_offset;
-
- n = ctx->count % bsize;
- bits = ctx->count * 8;
- mbl_offset = s390_crypto_shash_parmsize(ctx->func);
- if (mbl_offset < 0)
- return -EINVAL;
-
- mbl_offset = mbl_offset / sizeof(u32);
-
- /* set total msg bit length (mbl) in CPACF parmblock */
- switch (ctx->func) {
- case CPACF_KLMD_SHA_1:
- case CPACF_KLMD_SHA_256:
- memcpy(ctx->state + mbl_offset, &bits, sizeof(bits));
- break;
- case CPACF_KLMD_SHA_512:
- /*
- * the SHA512 parmblock has a 128-bit mbl field, clear
- * high-order u64 field, copy bits to low-order u64 field
- */
- memset(ctx->state + mbl_offset, 0x00, sizeof(bits));
- mbl_offset += sizeof(u64) / sizeof(u32);
- memcpy(ctx->state + mbl_offset, &bits, sizeof(bits));
- break;
- case CPACF_KLMD_SHA3_224:
- case CPACF_KLMD_SHA3_256:
- case CPACF_KLMD_SHA3_384:
- case CPACF_KLMD_SHA3_512:
- break;
- default:
- return -EINVAL;
- }
-
- cpacf_klmd(ctx->func, ctx->state, ctx->buf, n);
-
- /* copy digest to out */
- memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm));
- /* wipe context */
- memset(ctx, 0, sizeof *ctx);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(s390_sha_final);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("s390 SHA cipher common functions");
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index 65f4036fd541..2bb7104124ca 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -22,11 +22,9 @@
extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name);
-extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name,
- __u64 value);
+extern int hypfs_create_u64(struct dentry *dir, const char *name, __u64 value);
-extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name,
- char *string);
+extern int hypfs_create_str(struct dentry *dir, const char *name, char *string);
/* LPAR Hypervisor */
extern int hypfs_diag_init(void);
@@ -48,7 +46,7 @@ void hypfs_sprp_exit(void);
int __hypfs_fs_init(void);
-static inline int hypfs_fs_init(void)
+static __always_inline int hypfs_fs_init(void)
{
if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
return __hypfs_fs_init();
@@ -78,7 +76,6 @@ struct hypfs_dbfs_file {
struct dentry *dentry;
};
-extern void hypfs_dbfs_exit(void);
extern void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df);
extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df);
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index 4024599eb448..41a0d2066fa0 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -6,6 +6,7 @@
* Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
+#include <linux/security.h>
#include <linux/slab.h>
#include "hypfs.h"
@@ -39,7 +40,9 @@ static ssize_t dbfs_read(struct file *file, char __user *buf,
return 0;
df = file_inode(file)->i_private;
- mutex_lock(&df->lock);
+ if (mutex_lock_interruptible(&df->lock))
+ return -ERESTARTSYS;
+
data = hypfs_dbfs_data_alloc(df);
if (!data) {
mutex_unlock(&df->lock);
@@ -64,24 +67,27 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
long rc;
mutex_lock(&df->lock);
- if (df->unlocked_ioctl)
- rc = df->unlocked_ioctl(file, cmd, arg);
- else
- rc = -ENOTTY;
+ rc = df->unlocked_ioctl(file, cmd, arg);
mutex_unlock(&df->lock);
return rc;
}
-static const struct file_operations dbfs_ops = {
+static const struct file_operations dbfs_ops_ioctl = {
.read = dbfs_read,
- .llseek = no_llseek,
.unlocked_ioctl = dbfs_ioctl,
};
+static const struct file_operations dbfs_ops = {
+ .read = dbfs_read,
+};
+
void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
{
- df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df,
- &dbfs_ops);
+ const struct file_operations *fops = &dbfs_ops;
+
+ if (df->unlocked_ioctl && !security_locked_down(LOCKDOWN_DEBUGFS))
+ fops = &dbfs_ops_ioctl;
+ df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, fops);
mutex_init(&df->lock);
}
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 279b7bba4d43..08777f57bd24 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -7,8 +7,7 @@
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
*/
-#define KMSG_COMPONENT "hypfs"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "hypfs: " fmt
#include <linux/types.h>
#include <linux/errno.h>
@@ -29,8 +28,6 @@ static enum diag204_format diag204_info_type; /* used diag 204 data format */
static void *diag204_buf; /* 4K aligned buffer for diag204 data */
static int diag204_buf_pages; /* number of pages for diag204 data */
-static struct dentry *dbfs_d204_file;
-
enum diag204_format diag204_get_info_type(void)
{
return diag204_info_type;
@@ -140,11 +137,22 @@ fail_alloc:
int diag204_store(void *buf, int pages)
{
+ unsigned long subcode;
int rc;
- rc = diag204((unsigned long)diag204_store_sc |
- (unsigned long)diag204_get_info_type(), pages, buf);
- return rc < 0 ? -EOPNOTSUPP : 0;
+ subcode = diag204_get_info_type();
+ subcode |= diag204_store_sc;
+ if (diag204_has_bif())
+ subcode |= DIAG204_BIF_BIT;
+ while (1) {
+ rc = diag204(subcode, pages, buf);
+ if (rc != -EBUSY)
+ break;
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+ schedule_timeout_interruptible(DIAG204_BUSY_WAIT);
+ }
+ return rc < 0 ? rc : 0;
}
struct dbfs_d204_hdr {
@@ -203,16 +211,13 @@ __init int hypfs_diag_init(void)
hypfs_dbfs_create_file(&dbfs_file_d204);
rc = hypfs_diag_fs_init();
- if (rc) {
+ if (rc)
pr_err("The hardware system does not provide all functions required by hypfs\n");
- debugfs_remove(dbfs_d204_file);
- }
return rc;
}
void hypfs_diag_exit(void)
{
- debugfs_remove(dbfs_d204_file);
hypfs_diag_fs_exit();
diag204_free_buffer();
hypfs_dbfs_remove_file(&dbfs_file_d204);
diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h
index 7090eff27fef..b5218135b8fe 100644
--- a/arch/s390/hypfs/hypfs_diag.h
+++ b/arch/s390/hypfs/hypfs_diag.h
@@ -19,7 +19,7 @@ int diag204_store(void *buf, int pages);
int __hypfs_diag_fs_init(void);
void __hypfs_diag_fs_exit(void);
-static inline int hypfs_diag_fs_init(void)
+static __always_inline int hypfs_diag_fs_init(void)
{
if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
return __hypfs_diag_fs_init();
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index 4131f0daa5ea..61220e717af0 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -9,6 +9,7 @@
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/hypfs.h>
#include "hypfs.h"
@@ -107,7 +108,7 @@ static struct hypfs_dbfs_file dbfs_file_0c = {
*/
int __init hypfs_diag0c_init(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 0;
hypfs_dbfs_create_file(&dbfs_file_0c);
return 0;
@@ -118,7 +119,7 @@ int __init hypfs_diag0c_init(void)
*/
void hypfs_diag0c_exit(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return;
hypfs_dbfs_remove_file(&dbfs_file_0c);
}
diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c
index 00a6d370a280..39621c326010 100644
--- a/arch/s390/hypfs/hypfs_diag_fs.c
+++ b/arch/s390/hypfs/hypfs_diag_fs.c
@@ -7,8 +7,7 @@
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
*/
-#define KMSG_COMPONENT "hypfs"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "hypfs: " fmt
#include <linux/types.h>
#include <linux/errno.h>
@@ -16,6 +15,7 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include "hypfs_diag.h"
@@ -203,30 +203,31 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
{
struct dentry *cpu_dir;
char buffer[TMP_SIZE];
- void *rc;
+ int rc;
snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(),
cpu_info));
cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+ if (IS_ERR(cpu_dir))
+ return PTR_ERR(cpu_dir);
rc = hypfs_create_u64(cpu_dir, "mgmtime",
cpu_info__acc_time(diag204_get_info_type(), cpu_info) -
cpu_info__lp_time(diag204_get_info_type(), cpu_info));
- if (IS_ERR(rc))
- return PTR_ERR(rc);
+ if (rc)
+ return rc;
rc = hypfs_create_u64(cpu_dir, "cputime",
cpu_info__lp_time(diag204_get_info_type(), cpu_info));
- if (IS_ERR(rc))
- return PTR_ERR(rc);
+ if (rc)
+ return rc;
if (diag204_get_info_type() == DIAG204_INFO_EXT) {
rc = hypfs_create_u64(cpu_dir, "onlinetime",
cpu_info__online_time(diag204_get_info_type(),
cpu_info));
- if (IS_ERR(rc))
- return PTR_ERR(rc);
+ if (rc)
+ return rc;
}
diag224_idx2name(cpu_info__ctidx(diag204_get_info_type(), cpu_info), buffer);
- rc = hypfs_create_str(cpu_dir, "type", buffer);
- return PTR_ERR_OR_ZERO(rc);
+ return hypfs_create_str(cpu_dir, "type", buffer);
}
static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
@@ -261,7 +262,7 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
{
struct dentry *cpu_dir;
char buffer[TMP_SIZE];
- void *rc;
+ int rc;
snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_get_info_type(),
cpu_info));
@@ -270,11 +271,10 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
return PTR_ERR(cpu_dir);
rc = hypfs_create_u64(cpu_dir, "mgmtime",
phys_cpu__mgm_time(diag204_get_info_type(), cpu_info));
- if (IS_ERR(rc))
- return PTR_ERR(rc);
+ if (rc)
+ return rc;
diag224_idx2name(phys_cpu__ctidx(diag204_get_info_type(), cpu_info), buffer);
- rc = hypfs_create_str(cpu_dir, "type", buffer);
- return PTR_ERR_OR_ZERO(rc);
+ return hypfs_create_str(cpu_dir, "type", buffer);
}
static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
@@ -313,41 +313,25 @@ int hypfs_diag_create_files(struct dentry *root)
return rc;
systems_dir = hypfs_mkdir(root, "systems");
- if (IS_ERR(systems_dir)) {
- rc = PTR_ERR(systems_dir);
- goto err_out;
- }
+ if (IS_ERR(systems_dir))
+ return PTR_ERR(systems_dir);
time_hdr = (struct x_info_blk_hdr *)buffer;
part_hdr = time_hdr + info_blk_hdr__size(diag204_get_info_type());
for (i = 0; i < info_blk_hdr__npar(diag204_get_info_type(), time_hdr); i++) {
part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
- if (IS_ERR(part_hdr)) {
- rc = PTR_ERR(part_hdr);
- goto err_out;
- }
+ if (IS_ERR(part_hdr))
+ return PTR_ERR(part_hdr);
}
if (info_blk_hdr__flags(diag204_get_info_type(), time_hdr) &
DIAG204_LPAR_PHYS_FLG) {
ptr = hypfs_create_phys_files(root, part_hdr);
- if (IS_ERR(ptr)) {
- rc = PTR_ERR(ptr);
- goto err_out;
- }
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
}
hyp_dir = hypfs_mkdir(root, "hyp");
- if (IS_ERR(hyp_dir)) {
- rc = PTR_ERR(hyp_dir);
- goto err_out;
- }
- ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
- if (IS_ERR(ptr)) {
- rc = PTR_ERR(ptr);
- goto err_out;
- }
- rc = 0;
-
-err_out:
- return rc;
+ if (IS_ERR(hyp_dir))
+ return PTR_ERR(hyp_dir);
+ return hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
}
/* Diagnose 224 functions */
@@ -382,7 +366,7 @@ static void diag224_delete_name_table(void)
int __init __hypfs_diag_fs_init(void)
{
- if (MACHINE_IS_LPAR)
+ if (machine_is_lpar())
return diag224_get_name_table();
return 0;
}
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
index 9fc3f0dae8f0..a72576221cab 100644
--- a/arch/s390/hypfs/hypfs_sprp.c
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -7,7 +7,6 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
-#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/gfp.h>
#include <linux/string.h>
@@ -27,7 +26,7 @@ static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd)
{
union register_pair r1 = { .even = virt_to_phys(data), };
- asm volatile("diag %[r1],%[r3],0x304\n"
+ asm volatile("diag %[r1],%[r3],0x304"
: [r1] "+&d" (r1.pair)
: [r3] "d" (cmd)
: "memory");
@@ -116,10 +115,7 @@ static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd,
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- if (is_compat_task())
- argp = compat_ptr(arg);
- else
- argp = (void __user *) arg;
+ argp = (void __user *)arg;
switch (cmd) {
case HYPFS_DIAG304:
return __hypfs_sprp_ioctl(argp);
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index 3db40ad853e0..4db2895e4da3 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <asm/extable.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/timex.h>
@@ -121,7 +122,7 @@ static struct hypfs_dbfs_file dbfs_file_2fc = {
int hypfs_vm_init(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 0;
if (diag2fc(0, all_guests, NULL) > 0)
diag2fc_guest_query = all_guests;
@@ -135,7 +136,7 @@ int hypfs_vm_init(void)
void hypfs_vm_exit(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return;
hypfs_dbfs_remove_file(&dbfs_file_2fc);
}
diff --git a/arch/s390/hypfs/hypfs_vm_fs.c b/arch/s390/hypfs/hypfs_vm_fs.c
index 6011289afa8c..a149a9f92e40 100644
--- a/arch/s390/hypfs/hypfs_vm_fs.c
+++ b/arch/s390/hypfs/hypfs_vm_fs.c
@@ -19,10 +19,9 @@
#define ATTRIBUTE(dir, name, member) \
do { \
- void *rc; \
- rc = hypfs_create_u64(dir, name, member); \
- if (IS_ERR(rc)) \
- return PTR_ERR(rc); \
+ int rc = hypfs_create_u64(dir, name, member); \
+ if (rc) \
+ return rc; \
} while (0)
static int hypfs_vm_create_guest(struct dentry *systems_dir,
@@ -85,7 +84,7 @@ static int hypfs_vm_create_guest(struct dentry *systems_dir,
int hypfs_vm_create_files(struct dentry *root)
{
- struct dentry *dir, *file;
+ struct dentry *dir;
struct diag2fc_data *data;
unsigned int count = 0;
int rc, i;
@@ -100,11 +99,9 @@ int hypfs_vm_create_files(struct dentry *root)
rc = PTR_ERR(dir);
goto failed;
}
- file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
- if (IS_ERR(file)) {
- rc = PTR_ERR(file);
+ rc = hypfs_create_str(dir, "type", "z/VM Hypervisor");
+ if (rc)
goto failed;
- }
/* physical cpus */
dir = hypfs_mkdir(root, "cpus");
@@ -112,11 +109,9 @@ int hypfs_vm_create_files(struct dentry *root)
rc = PTR_ERR(dir);
goto failed;
}
- file = hypfs_create_u64(dir, "count", data->lcpus);
- if (IS_ERR(file)) {
- rc = PTR_ERR(file);
+ rc = hypfs_create_u64(dir, "count", data->lcpus);
+ if (rc)
goto failed;
- }
/* guests */
dir = hypfs_mkdir(root, "systems");
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 858beaf4a8cb..3a47c2e24b6e 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -6,8 +6,7 @@
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
*/
-#define KMSG_COMPONENT "hypfs"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "hypfs: " fmt
#include <linux/types.h>
#include <linux/errno.h>
@@ -24,6 +23,7 @@
#include <linux/kobject.h>
#include <linux/seq_file.h>
#include <linux/uio.h>
+#include <asm/machine.h>
#include <asm/ebcdic.h>
#include "hypfs.h"
@@ -60,33 +60,17 @@ static void hypfs_update_update(struct super_block *sb)
static void hypfs_add_dentry(struct dentry *dentry)
{
- dentry->d_fsdata = hypfs_last_dentry;
- hypfs_last_dentry = dentry;
-}
-
-static void hypfs_remove(struct dentry *dentry)
-{
- struct dentry *parent;
-
- parent = dentry->d_parent;
- inode_lock(d_inode(parent));
- if (simple_positive(dentry)) {
- if (d_is_dir(dentry))
- simple_rmdir(d_inode(parent), dentry);
- else
- simple_unlink(d_inode(parent), dentry);
+ if (IS_ROOT(dentry->d_parent)) {
+ dentry->d_fsdata = hypfs_last_dentry;
+ hypfs_last_dentry = dentry;
}
- d_drop(dentry);
- dput(dentry);
- inode_unlock(d_inode(parent));
}
-static void hypfs_delete_tree(struct dentry *root)
+static void hypfs_delete_tree(void)
{
while (hypfs_last_dentry) {
- struct dentry *next_dentry;
- next_dentry = hypfs_last_dentry->d_fsdata;
- hypfs_remove(hypfs_last_dentry);
+ struct dentry *next_dentry = hypfs_last_dentry->d_fsdata;
+ simple_recursive_removal(hypfs_last_dentry, NULL);
hypfs_last_dentry = next_dentry;
}
}
@@ -183,14 +167,14 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
rc = -EBUSY;
goto out;
}
- hypfs_delete_tree(sb->s_root);
- if (MACHINE_IS_VM)
+ hypfs_delete_tree();
+ if (machine_is_vm())
rc = hypfs_vm_create_files(sb->s_root);
else
rc = hypfs_diag_create_files(sb->s_root);
if (rc) {
pr_err("Updating the hypfs tree failed\n");
- hypfs_delete_tree(sb->s_root);
+ hypfs_delete_tree();
goto out;
}
hypfs_update_update(sb);
@@ -273,7 +257,7 @@ static int hypfs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_root = root_dentry = d_make_root(root_inode);
if (!root_dentry)
return -ENOMEM;
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
rc = hypfs_vm_create_files(root_dentry);
else
rc = hypfs_diag_create_files(root_dentry);
@@ -325,13 +309,9 @@ static void hypfs_kill_super(struct super_block *sb)
{
struct hypfs_sb_info *sb_info = sb->s_fs_info;
- if (sb->s_root)
- hypfs_delete_tree(sb->s_root);
- if (sb_info && sb_info->update_file)
- hypfs_remove(sb_info->update_file);
- kfree(sb->s_fs_info);
- sb->s_fs_info = NULL;
- kill_litter_super(sb);
+ hypfs_last_dentry = NULL;
+ kill_anon_super(sb);
+ kfree(sb_info);
}
static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
@@ -340,17 +320,13 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
struct dentry *dentry;
struct inode *inode;
- inode_lock(d_inode(parent));
- dentry = lookup_one_len(name, parent, strlen(name));
- if (IS_ERR(dentry)) {
- dentry = ERR_PTR(-ENOMEM);
- goto fail;
- }
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry))
+ return ERR_PTR(-ENOMEM);
inode = hypfs_make_inode(parent->d_sb, mode);
if (!inode) {
- dput(dentry);
- dentry = ERR_PTR(-ENOMEM);
- goto fail;
+ simple_done_creating(dentry);
+ return ERR_PTR(-ENOMEM);
}
if (S_ISREG(mode)) {
inode->i_fop = &hypfs_file_ops;
@@ -365,11 +341,9 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
} else
BUG();
inode->i_private = data;
- d_instantiate(dentry, inode);
- dget(dentry);
-fail:
- inode_unlock(d_inode(parent));
- return dentry;
+ d_make_persistent(dentry, inode);
+ simple_done_creating(dentry);
+ return dentry; // borrowed
}
struct dentry *hypfs_mkdir(struct dentry *parent, const char *name)
@@ -397,8 +371,7 @@ static struct dentry *hypfs_create_update_file(struct dentry *dir)
return dentry;
}
-struct dentry *hypfs_create_u64(struct dentry *dir,
- const char *name, __u64 value)
+int hypfs_create_u64(struct dentry *dir, const char *name, __u64 value)
{
char *buffer;
char tmp[TMP_SIZE];
@@ -407,35 +380,34 @@ struct dentry *hypfs_create_u64(struct dentry *dir,
snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value);
buffer = kstrdup(tmp, GFP_KERNEL);
if (!buffer)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
dentry =
hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
if (IS_ERR(dentry)) {
kfree(buffer);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
hypfs_add_dentry(dentry);
- return dentry;
+ return 0;
}
-struct dentry *hypfs_create_str(struct dentry *dir,
- const char *name, char *string)
+int hypfs_create_str(struct dentry *dir, const char *name, char *string)
{
char *buffer;
struct dentry *dentry;
buffer = kmalloc(strlen(string) + 2, GFP_KERNEL);
if (!buffer)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
sprintf(buffer, "%s\n", string);
dentry =
hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
if (IS_ERR(dentry)) {
kfree(buffer);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
hypfs_add_dentry(dentry);
- return dentry;
+ return 0;
}
static const struct file_operations hypfs_file_ops = {
@@ -443,7 +415,6 @@ static const struct file_operations hypfs_file_ops = {
.release = hypfs_release,
.read_iter = hypfs_read_iter,
.write_iter = hypfs_write_iter,
- .llseek = no_llseek,
};
static struct file_system_type hypfs_type = {
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 4b904110d27c..297bf7157968 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -7,3 +7,4 @@ generated-y += unistd_nr.h
generic-y += asm-offsets.h
generic-y += kvm_types.h
generic-y += mcs_spinlock.h
+generic-y += mmzone.h
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
index 6f264b79e377..317c07c09ae4 100644
--- a/arch/s390/include/asm/abs_lowcore.h
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -2,6 +2,7 @@
#ifndef _ASM_S390_ABS_LOWCORE_H
#define _ASM_S390_ABS_LOWCORE_H
+#include <linux/smp.h>
#include <asm/lowcore.h>
#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
deleted file mode 100644
index 608f6287ca9c..000000000000
--- a/arch/s390/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_S390_ALTERNATIVE_ASM_H
-#define _ASM_S390_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
- .long \orig_start - .
- .long \alt_start - .
- .word \feature
- .byte \orig_end - \orig_start
- .org . - ( \orig_end - \orig_start ) & 1
- .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
- .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
- .pushsection .altinstr_replacement,"ax"
-770: \newinstr
-771: .popsection
-772: \oldinstr
-773: .pushsection .altinstructions,"a"
- alt_entry 772b, 773b, 770b, 771b, \feature
- .popsection
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
- .pushsection .altinstr_replacement,"ax"
-770: \newinstr1
-771: \newinstr2
-772: .popsection
-773: \oldinstr
-774: .pushsection .altinstructions,"a"
- alt_entry 773b, 774b, 770b, 771b,\feature1
- alt_entry 773b, 774b, 771b, 772b,\feature2
- .popsection
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_S390_ALTERNATIVE_ASM_H */
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index dd93b92c3ab6..1c56480def9e 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -2,7 +2,56 @@
#ifndef _ASM_S390_ALTERNATIVE_H
#define _ASM_S390_ALTERNATIVE_H
-#ifndef __ASSEMBLY__
+/*
+ * Each alternative comes with a 32 bit feature field:
+ * union {
+ * u32 feature;
+ * struct {
+ * u32 ctx : 4;
+ * u32 type : 8;
+ * u32 data : 20;
+ * };
+ * }
+ *
+ * @ctx is a bitfield, where only one bit must be set. Each bit defines
+ * in which context an alternative is supposed to be applied to the
+ * kernel image:
+ *
+ * - from the decompressor before the kernel itself is executed
+ * - from early kernel code from within the kernel
+ *
+ * @type is a number which defines the type and with that the type
+ * specific alternative patching.
+ *
+ * @data is additional type specific information which defines if an
+ * alternative should be applied.
+ */
+
+#define ALT_CTX_EARLY 1
+#define ALT_CTX_LATE 2
+#define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE)
+
+#define ALT_TYPE_FACILITY 0
+#define ALT_TYPE_FEATURE 1
+#define ALT_TYPE_SPEC 2
+
+#define ALT_DATA_SHIFT 0
+#define ALT_TYPE_SHIFT 20
+#define ALT_CTX_SHIFT 28
+
+#define ALT_FACILITY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \
+ ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \
+ (facility) << ALT_DATA_SHIFT)
+
+#define ALT_FEATURE(feature) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \
+ ALT_TYPE_FEATURE << ALT_TYPE_SHIFT | \
+ (feature) << ALT_DATA_SHIFT)
+
+#define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \
+ ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \
+ (facility) << ALT_DATA_SHIFT)
+
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/stddef.h>
@@ -11,12 +60,30 @@
struct alt_instr {
s32 instr_offset; /* original instruction */
s32 repl_offset; /* offset to replacement instruction */
- u16 facility; /* facility bit set for replacement */
+ union {
+ u32 feature; /* feature required for replacement */
+ struct {
+ u32 ctx : 4; /* context */
+ u32 type : 8; /* type of alternative */
+ u32 data : 20; /* patching information */
+ };
+ };
u8 instrlen; /* length of original instruction */
} __packed;
-void apply_alternative_instructions(void);
-void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
+void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx);
+
+static inline void apply_alternative_instructions(void)
+{
+ __apply_alternatives(__alt_instructions, __alt_instructions_end, ALT_CTX_LATE);
+}
+
+static inline void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+{
+ __apply_alternatives(start, end, ALT_CTX_ALL);
+}
/*
* +---------------------------------+
@@ -48,10 +115,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
#define OLDINSTR(oldinstr) \
"661:\n\t" oldinstr "\n662:\n"
-#define ALTINSTR_ENTRY(facility, num) \
+#define ALTINSTR_ENTRY(feature, num) \
"\t.long 661b - .\n" /* old instruction */ \
"\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \
- "\t.word " __stringify(facility) "\n" /* facility bit */ \
+ "\t.long " __stringify(feature) "\n" /* feature */ \
"\t.byte " oldinstr_len "\n" /* instruction len */ \
"\t.org . - (" oldinstr_len ") & 1\n" \
"\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n" \
@@ -61,24 +128,24 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n"
/* alternative assembly primitive: */
-#define ALTERNATIVE(oldinstr, altinstr, facility) \
+#define ALTERNATIVE(oldinstr, altinstr, feature) \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(altinstr, 1) \
".popsection\n" \
OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(facility, 1) \
+ ALTINSTR_ENTRY(feature, 1) \
".popsection\n"
-#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\
+#define ALTERNATIVE_2(oldinstr, altinstr1, feature1, altinstr2, feature2)\
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(altinstr1, 1) \
ALTINSTR_REPLACEMENT(altinstr2, 2) \
".popsection\n" \
OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(facility1, 1) \
- ALTINSTR_ENTRY(facility2, 2) \
+ ALTINSTR_ENTRY(feature1, 1) \
+ ALTINSTR_ENTRY(feature2, 2) \
".popsection\n"
/*
@@ -93,12 +160,12 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
* For non barrier like inlines please define new variants
* without volatile and memory clobber.
*/
-#define alternative(oldinstr, altinstr, facility) \
- asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
+#define alternative(oldinstr, altinstr, feature) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) : : : "memory")
-#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
- asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \
- altinstr2, facility2) ::: "memory")
+#define alternative_2(oldinstr, altinstr1, feature1, altinstr2, feature2) \
+ asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, feature1, \
+ altinstr2, feature2) ::: "memory")
/* Alternative inline assembly with input. */
#define alternative_input(oldinstr, newinstr, feature, input...) \
@@ -106,8 +173,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
: : input)
/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, altinstr, facility, output, input...) \
- asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) \
+#define alternative_io(oldinstr, altinstr, feature, output, input...) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) \
: output : input)
/* Use this macro if more than one output parameter is needed. */
@@ -116,6 +183,56 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
/* Use this macro if clobbers are needed without inputs. */
#define ASM_NO_INPUT_CLOBBER(clobber...) : clobber
-#endif /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
+ .long \orig_start - .
+ .long \alt_start - .
+ .long \feature
+ .byte \orig_end - \orig_start
+ .org . - ( \orig_end - \orig_start ) & 1
+ .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
+ .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr
+771: .popsection
+772: \oldinstr
+773: .pushsection .altinstructions,"a"
+ alt_entry 772b, 773b, 770b, 771b, \feature
+ .popsection
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr1
+771: \newinstr2
+772: .popsection
+773: \oldinstr
+774: .pushsection .altinstructions,"a"
+ alt_entry 773b, 774b, 770b, 771b,\feature1
+ alt_entry 773b, 774b, 771b, 772b,\feature2
+ .popsection
+.endm
+
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_ALTERNATIVE_H */
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index 395b02d6a133..b24459f692fa 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -38,16 +38,30 @@ typedef unsigned int ap_qid_t;
* The ap queue status word is returned by all three AP functions
* (PQAP, NQAP and DQAP). There's a set of flags in the first
* byte, followed by a 1 byte response code.
+ *
+ * For convenience the 'value' field is a 32 bit access of the
+ * whole status and the 'status_bits' and 'rc' fields comprise
+ * the leftmost 8 status bits and the response_code.
*/
struct ap_queue_status {
- unsigned int queue_empty : 1;
- unsigned int replies_waiting : 1;
- unsigned int queue_full : 1;
- unsigned int : 3;
- unsigned int async : 1;
- unsigned int irq_enabled : 1;
- unsigned int response_code : 8;
- unsigned int : 16;
+ union {
+ unsigned int value : 32;
+ struct {
+ unsigned int status_bits : 8;
+ unsigned int rc : 8;
+ unsigned int : 16;
+ };
+ struct {
+ unsigned int queue_empty : 1;
+ unsigned int replies_waiting : 1;
+ unsigned int queue_full : 1;
+ unsigned int : 3;
+ unsigned int async : 1;
+ unsigned int irq_enabled : 1;
+ unsigned int response_code : 8;
+ unsigned int : 16;
+ };
+ };
};
/*
@@ -103,7 +117,7 @@ struct ap_tapq_hwinfo {
unsigned int accel : 1; /* A */
unsigned int ep11 : 1; /* X */
unsigned int apxa : 1; /* APXA */
- unsigned int : 1;
+ unsigned int slcf : 1; /* Cmd filtering avail. */
unsigned int class : 8;
unsigned int bs : 2; /* SE bind/assoc */
unsigned int : 14;
@@ -143,7 +157,7 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid,
" lghi 2,0\n" /* 0 into gr2 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(TAPQ) */
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
- " lgr %[reg2],2\n" /* gr2 into reg2 */
+ " lgr %[reg2],2" /* gr2 into reg2 */
: [reg1] "=&d" (reg1.value), [reg2] "=&d" (reg2)
: [qid] "d" (qid)
: "cc", "0", "1", "2");
@@ -186,7 +200,7 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid, int fbit)
asm volatile(
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(RAPQ) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg1] "=&d" (reg1.value)
: [reg0] "d" (reg0)
: "cc", "0", "1");
@@ -211,7 +225,7 @@ static inline struct ap_queue_status ap_zapq(ap_qid_t qid, int fbit)
asm volatile(
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(ZAPQ) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg1] "=&d" (reg1.value)
: [reg0] "d" (reg0)
: "cc", "0", "1");
@@ -315,7 +329,7 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
" lgr 1,%[reg1]\n" /* irq ctrl into gr1 */
" lgr 2,%[reg2]\n" /* ni addr into gr2 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(AQIC) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg1] "+&d" (reg1.value)
: [reg0] "d" (reg0), [reg2] "d" (reg2)
: "cc", "memory", "0", "1", "2");
@@ -363,7 +377,7 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
" lgr 1,%[reg1]\n" /* qact in info into gr1 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(QACT) */
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
- " lgr %[reg2],2\n" /* qact out info into reg2 */
+ " lgr %[reg2],2" /* qact out info into reg2 */
: [reg1] "+&d" (reg1.value), [reg2] "=&d" (reg2)
: [reg0] "d" (reg0)
: "cc", "0", "1", "2");
@@ -388,7 +402,7 @@ static inline struct ap_queue_status ap_bapq(ap_qid_t qid)
asm volatile(
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(BAPQ) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg1] "=&d" (reg1.value)
: [reg0] "d" (reg0)
: "cc", "0", "1");
@@ -416,7 +430,7 @@ static inline struct ap_queue_status ap_aapq(ap_qid_t qid, unsigned int sec_idx)
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
" lgr 2,%[reg2]\n" /* secret index into gr2 */
" .insn rre,0xb2af0000,0,0\n" /* PQAP(AAPQ) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg1] "=&d" (reg1.value)
: [reg0] "d" (reg0), [reg2] "d" (reg2)
: "cc", "0", "1", "2");
@@ -453,7 +467,7 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
" lgr 0,%[reg0]\n" /* qid param in gr0 */
"0: .insn rre,0xb2ad0000,%[nqap_r1],%[nqap_r2]\n"
" brc 2,0b\n" /* handle partial completion */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg1],1" /* gr1 (status) into reg1 */
: [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value),
[nqap_r2] "+&d" (nqap_r2.pair)
: [nqap_r1] "d" (nqap_r1.pair)
@@ -518,7 +532,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
" brc 6,0b\n" /* handle partial complete */
"2: lgr %[reg0],0\n" /* gr0 (qid + info) into reg0 */
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
- " lgr %[reg2],2\n" /* gr2 (res length) into reg2 */
+ " lgr %[reg2],2" /* gr2 (res length) into reg2 */
: [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value),
[reg2] "=&d" (reg2), [rp1] "+&d" (rp1.pair),
[rp2] "+&d" (rp2.pair)
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
index a92ebbc7aa7a..99b2902c10fd 100644
--- a/arch/s390/include/asm/appldata.h
+++ b/arch/s390/include/asm/appldata.h
@@ -9,6 +9,7 @@
#define _ASM_S390_APPLDATA_H
#include <linux/io.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#define APPLDATA_START_INTERVAL_REC 0x80
@@ -48,7 +49,7 @@ static inline int appldata_asm(struct appldata_parameter_list *parm_list,
{
int ry;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return -EOPNOTSUPP;
parm_list->diag = 0xdc;
parm_list->function = fn;
diff --git a/arch/s390/include/asm/arch-stackprotector.h b/arch/s390/include/asm/arch-stackprotector.h
new file mode 100644
index 000000000000..953627259e91
--- /dev/null
+++ b/arch/s390/include/asm/arch-stackprotector.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_ARCH_STACKPROTECTOR_H
+#define _ASM_S390_ARCH_STACKPROTECTOR_H
+
+extern unsigned long __stack_chk_guard;
+extern int stack_protector_debug;
+
+void __stack_protector_apply_early(unsigned long kernel_start);
+int __stack_protector_apply(unsigned long *start, unsigned long *end, unsigned long kernel_start);
+
+static inline void stack_protector_apply_early(unsigned long kernel_start)
+{
+ if (IS_ENABLED(CONFIG_STACKPROTECTOR))
+ __stack_protector_apply_early(kernel_start);
+}
+
+static inline int stack_protector_apply(unsigned long *start, unsigned long *end)
+{
+ if (IS_ENABLED(CONFIG_STACKPROTECTOR))
+ return __stack_protector_apply(start, end, 0);
+ return 0;
+}
+
+#endif /* _ASM_S390_ARCH_STACKPROTECTOR_H */
diff --git a/arch/s390/include/asm/arch_hweight.h b/arch/s390/include/asm/arch_hweight.h
new file mode 100644
index 000000000000..aca08b0acbc1
--- /dev/null
+++ b/arch/s390/include/asm/arch_hweight.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_ARCH_HWEIGHT_H
+#define _ASM_S390_ARCH_HWEIGHT_H
+
+#include <linux/types.h>
+#include <asm/march.h>
+
+static __always_inline unsigned long popcnt_z196(unsigned long w)
+{
+ unsigned long cnt;
+
+ asm volatile(".insn rrf,0xb9e10000,%[cnt],%[w],0,0"
+ : [cnt] "=d" (cnt)
+ : [w] "d" (w)
+ : "cc");
+ return cnt;
+}
+
+static __always_inline unsigned long popcnt_z15(unsigned long w)
+{
+ unsigned long cnt;
+
+ asm volatile(".insn rrf,0xb9e10000,%[cnt],%[w],8,0"
+ : [cnt] "=d" (cnt)
+ : [w] "d" (w)
+ : "cc");
+ return cnt;
+}
+
+static __always_inline unsigned long __arch_hweight64(__u64 w)
+{
+ if (__is_defined(MARCH_HAS_Z15_FEATURES))
+ return popcnt_z15(w);
+ if (__is_defined(MARCH_HAS_Z196_FEATURES)) {
+ w = popcnt_z196(w);
+ w += w >> 32;
+ w += w >> 16;
+ w += w >> 8;
+ return w & 0xff;
+ }
+ return __sw_hweight64(w);
+}
+
+static __always_inline unsigned int __arch_hweight32(unsigned int w)
+{
+ if (__is_defined(MARCH_HAS_Z15_FEATURES))
+ return popcnt_z15(w);
+ if (__is_defined(MARCH_HAS_Z196_FEATURES)) {
+ w = popcnt_z196(w);
+ w += w >> 16;
+ w += w >> 8;
+ return w & 0xff;
+ }
+ return __sw_hweight32(w);
+}
+
+static __always_inline unsigned int __arch_hweight16(unsigned int w)
+{
+ if (__is_defined(MARCH_HAS_Z15_FEATURES))
+ return popcnt_z15((unsigned short)w);
+ if (__is_defined(MARCH_HAS_Z196_FEATURES)) {
+ w = popcnt_z196(w);
+ w += w >> 8;
+ return w & 0xff;
+ }
+ return __sw_hweight16(w);
+}
+
+static __always_inline unsigned int __arch_hweight8(unsigned int w)
+{
+ if (__is_defined(MARCH_HAS_Z196_FEATURES))
+ return popcnt_z196((unsigned char)w);
+ return __sw_hweight8(w);
+}
+
+#endif /* _ASM_S390_ARCH_HWEIGHT_H */
diff --git a/arch/s390/include/asm/asce.h b/arch/s390/include/asm/asce.h
new file mode 100644
index 000000000000..f6dfaaba735a
--- /dev/null
+++ b/arch/s390/include/asm/asce.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_ASCE_H
+#define _ASM_S390_ASCE_H
+
+#include <linux/thread_info.h>
+#include <linux/irqflags.h>
+#include <asm/lowcore.h>
+#include <asm/ctlreg.h>
+
+static inline bool enable_sacf_uaccess(void)
+{
+ unsigned long flags;
+
+ if (test_thread_flag(TIF_ASCE_PRIMARY))
+ return true;
+ local_irq_save(flags);
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ set_thread_flag(TIF_ASCE_PRIMARY);
+ local_irq_restore(flags);
+ return false;
+}
+
+static inline void disable_sacf_uaccess(bool previous)
+{
+ unsigned long flags;
+
+ if (previous)
+ return;
+ local_irq_save(flags);
+ local_ctl_load(1, &get_lowcore()->user_asce);
+ clear_thread_flag(TIF_ASCE_PRIMARY);
+ local_irq_restore(flags);
+}
+
+#endif /* _ASM_S390_ASCE_H */
diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h
index 11f615eb0066..1cfffad9eea0 100644
--- a/arch/s390/include/asm/asm-const.h
+++ b/arch/s390/include/asm/asm-const.h
@@ -2,7 +2,7 @@
#ifndef _ASM_S390_ASM_CONST_H
#define _ASM_S390_ASM_CONST_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define stringify_in_c(...) __VA_ARGS__
#else
/* This version of stringify will deal with commas... */
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
index 4a6b0a8b6412..d23ea0c94e4e 100644
--- a/arch/s390/include/asm/asm-extable.h
+++ b/arch/s390/include/asm/asm-extable.h
@@ -9,11 +9,13 @@
#define EX_TYPE_NONE 0
#define EX_TYPE_FIXUP 1
#define EX_TYPE_BPF 2
-#define EX_TYPE_UA_STORE 3
-#define EX_TYPE_UA_LOAD_MEM 4
+#define EX_TYPE_UA_FAULT 3
#define EX_TYPE_UA_LOAD_REG 5
#define EX_TYPE_UA_LOAD_REGPAIR 6
#define EX_TYPE_ZEROPAD 7
+#define EX_TYPE_FPC 8
+#define EX_TYPE_UA_MVCOS_TO 9
+#define EX_TYPE_UA_MVCOS_FROM 10
#define EX_DATA_REG_ERR_SHIFT 0
#define EX_DATA_REG_ERR GENMASK(3, 0)
@@ -69,11 +71,8 @@
#define EX_TABLE_AMODE31(_fault, _target) \
__EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0)
-#define EX_TABLE_UA_STORE(_fault, _target, _regerr) \
- __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0)
-
-#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \
- __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len)
+#define EX_TABLE_UA_FAULT(_fault, _target, _regerr) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_FAULT, _regerr, _regerr, 0)
#define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \
__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
@@ -84,4 +83,13 @@
#define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr) \
__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0)
+#define EX_TABLE_FPC(_fault, _target) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FPC, __stringify(%%r0), __stringify(%%r0), 0)
+
+#define EX_TABLE_UA_MVCOS_TO(_fault, _target) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_TO, __stringify(%%r0), __stringify(%%r0), 0)
+
+#define EX_TABLE_UA_MVCOS_FROM(_fault, _target) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_FROM, __stringify(%%r0), __stringify(%%r0), 0)
+
#endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/asm.h b/arch/s390/include/asm/asm.h
new file mode 100644
index 000000000000..e9062b01e2a2
--- /dev/null
+++ b/arch/s390/include/asm/asm.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ASM_H
+#define _ASM_S390_ASM_H
+
+#include <linux/stringify.h>
+
+/*
+ * Helper macros to be used for flag output operand handling.
+ * Inline assemblies must use four of the five supplied macros:
+ *
+ * Use CC_IPM(sym) at the end of the inline assembly; this extracts the
+ * condition code and program mask with the ipm instruction and writes it to
+ * the variable with symbolic name [sym] if the compiler has no support for
+ * flag output operands. If the compiler has support for flag output operands
+ * this generates no code.
+ *
+ * Use CC_OUT(sym, var) at the output operand list of an inline assembly. This
+ * defines an output operand with symbolic name [sym] for the variable
+ * [var]. [var] must be an int variable and [sym] must be identical with [sym]
+ * used with CC_IPM().
+ *
+ * Use either CC_CLOBBER or CC_CLOBBER_LIST() for the clobber list. Use
+ * CC_CLOBBER if the clobber list contains only "cc", otherwise use
+ * CC_CLOBBER_LIST() and add all clobbers as argument to the macro.
+ *
+ * Use CC_TRANSFORM() to convert the variable [var] which contains the
+ * extracted condition code. If the condition code is extracted with ipm, the
+ * [var] also contains the program mask. CC_TRANSFORM() moves the condition
+ * code to the two least significant bits and sets all other bits to zero.
+ */
+#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_ASM_FLAG_OUTPUT_BROKEN))
+
+#define __HAVE_ASM_FLAG_OUTPUTS__
+
+#define CC_IPM(sym)
+#define CC_OUT(sym, var) "=@cc" (var)
+#define CC_TRANSFORM(cc) ({ cc; })
+#define CC_CLOBBER
+#define CC_CLOBBER_LIST(...) __VA_ARGS__
+
+#else
+
+#define CC_IPM(sym) " ipm %[" __stringify(sym) "]\n"
+#define CC_OUT(sym, var) [sym] "=d" (var)
+#define CC_TRANSFORM(cc) ({ (cc) >> 28; })
+#define CC_CLOBBER "cc"
+#define CC_CLOBBER_LIST(...) "cc", __VA_ARGS__
+
+#endif
+
+#endif /* _ASM_S390_ASM_H */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 0c4cad7d5a5b..b36dd6a1d652 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -17,13 +17,13 @@
static __always_inline int arch_atomic_read(const atomic_t *v)
{
- return __atomic_read(v);
+ return __atomic_read(&v->counter);
}
#define arch_atomic_read arch_atomic_read
static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
- __atomic_set(v, i);
+ __atomic_set(&v->counter, i);
}
#define arch_atomic_set arch_atomic_set
@@ -45,6 +45,36 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
}
#define arch_atomic_add arch_atomic_add
+static __always_inline void arch_atomic_inc(atomic_t *v)
+{
+ __atomic_add_const(1, &v->counter);
+}
+#define arch_atomic_inc arch_atomic_inc
+
+static __always_inline void arch_atomic_dec(atomic_t *v)
+{
+ __atomic_add_const(-1, &v->counter);
+}
+#define arch_atomic_dec arch_atomic_dec
+
+static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
+{
+ return __atomic_add_and_test_barrier(-i, &v->counter);
+}
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
+
+static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
+{
+ return __atomic_add_const_and_test_barrier(-1, &v->counter);
+}
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
+
+static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
+{
+ return __atomic_add_const_and_test_barrier(1, &v->counter);
+}
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
+
#define arch_atomic_sub(_i, _v) arch_atomic_add(-(int)(_i), _v)
#define arch_atomic_sub_return(_i, _v) arch_atomic_add_return(-(int)(_i), _v)
#define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v)
@@ -72,25 +102,35 @@ ATOMIC_OPS(xor)
#define arch_atomic_fetch_or arch_atomic_fetch_or
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
+static __always_inline int arch_atomic_xchg(atomic_t *v, int new)
+{
+ return arch_xchg(&v->counter, new);
+}
+#define arch_atomic_xchg arch_atomic_xchg
static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
- return __atomic_cmpxchg(&v->counter, old, new);
+ return arch_cmpxchg(&v->counter, old, new);
}
#define arch_atomic_cmpxchg arch_atomic_cmpxchg
+static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+{
+ return arch_try_cmpxchg(&v->counter, old, new);
+}
+#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
+
#define ATOMIC64_INIT(i) { (i) }
static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
- return __atomic64_read(v);
+ return __atomic64_read((long *)&v->counter);
}
#define arch_atomic64_read arch_atomic64_read
static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
- __atomic64_set(v, i);
+ __atomic64_set((long *)&v->counter, i);
}
#define arch_atomic64_set arch_atomic64_set
@@ -112,14 +152,54 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
}
#define arch_atomic64_add arch_atomic64_add
-#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
+static __always_inline void arch_atomic64_inc(atomic64_t *v)
+{
+ __atomic64_add_const(1, (long *)&v->counter);
+}
+#define arch_atomic64_inc arch_atomic64_inc
+
+static __always_inline void arch_atomic64_dec(atomic64_t *v)
+{
+ __atomic64_add_const(-1, (long *)&v->counter);
+}
+#define arch_atomic64_dec arch_atomic64_dec
+
+static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
+{
+ return __atomic64_add_and_test_barrier(-i, (long *)&v->counter);
+}
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
+
+static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v)
+{
+ return __atomic64_add_const_and_test_barrier(-1, (long *)&v->counter);
+}
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
+
+static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v)
+{
+ return __atomic64_add_const_and_test_barrier(1, (long *)&v->counter);
+}
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
+
+static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
+{
+ return arch_xchg(&v->counter, new);
+}
+#define arch_atomic64_xchg arch_atomic64_xchg
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
- return __atomic64_cmpxchg((long *)&v->counter, old, new);
+ return arch_cmpxchg(&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+ return arch_try_cmpxchg(&v->counter, old, new);
+}
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
+
#define ATOMIC64_OPS(op) \
static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
{ \
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 7fa5f96a553a..845b77864412 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -8,41 +8,57 @@
#ifndef __ARCH_S390_ATOMIC_OPS__
#define __ARCH_S390_ATOMIC_OPS__
-static __always_inline int __atomic_read(const atomic_t *v)
+#include <linux/limits.h>
+#include <asm/march.h>
+#include <asm/asm.h>
+
+static __always_inline int __atomic_read(const int *ptr)
{
- int c;
+ int val;
asm volatile(
- " l %0,%1\n"
- : "=d" (c) : "R" (v->counter));
- return c;
+ " l %[val],%[ptr]"
+ : [val] "=d" (val) : [ptr] "R" (*ptr));
+ return val;
}
-static __always_inline void __atomic_set(atomic_t *v, int i)
+static __always_inline void __atomic_set(int *ptr, int val)
{
- asm volatile(
- " st %1,%0\n"
- : "=R" (v->counter) : "d" (i));
+ if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
+ asm volatile(
+ " mvhi %[ptr],%[val]"
+ : [ptr] "=Q" (*ptr) : [val] "K" (val));
+ } else {
+ asm volatile(
+ " st %[val],%[ptr]"
+ : [ptr] "=R" (*ptr) : [val] "d" (val));
+ }
}
-static __always_inline s64 __atomic64_read(const atomic64_t *v)
+static __always_inline long __atomic64_read(const long *ptr)
{
- s64 c;
+ long val;
asm volatile(
- " lg %0,%1\n"
- : "=d" (c) : "RT" (v->counter));
- return c;
+ " lg %[val],%[ptr]"
+ : [val] "=d" (val) : [ptr] "RT" (*ptr));
+ return val;
}
-static __always_inline void __atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void __atomic64_set(long *ptr, long val)
{
- asm volatile(
- " stg %1,%0\n"
- : "=RT" (v->counter) : "d" (i));
+ if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
+ asm volatile(
+ " mvghi %[ptr],%[val]"
+ : [ptr] "=Q" (*ptr) : [val] "K" (val));
+ } else {
+ asm volatile(
+ " stg %[val],%[ptr]"
+ : [ptr] "=RT" (*ptr) : [val] "d" (val));
+ }
}
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
#define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \
static __always_inline op_type op_name(op_type val, op_type *ptr) \
@@ -50,7 +66,7 @@ static __always_inline op_type op_name(op_type val, op_type *ptr) \
op_type old; \
\
asm volatile( \
- op_string " %[old],%[val],%[ptr]\n" \
+ op_string " %[old],%[val],%[ptr]" \
op_barrier \
: [old] "=d" (old), [ptr] "+QS" (*ptr) \
: [val] "d" (val) : "cc", "memory"); \
@@ -58,8 +74,8 @@ static __always_inline op_type op_name(op_type val, op_type *ptr) \
} \
#define __ATOMIC_OPS(op_name, op_type, op_string) \
- __ATOMIC_OP(op_name, op_type, op_string, "\n") \
- __ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+ __ATOMIC_OP(op_name, op_type, op_string, "") \
+ __ATOMIC_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0")
__ATOMIC_OPS(__atomic_add, int, "laa")
__ATOMIC_OPS(__atomic_and, int, "lan")
@@ -78,14 +94,14 @@ __ATOMIC_OPS(__atomic64_xor, long, "laxg")
static __always_inline void op_name(op_type val, op_type *ptr) \
{ \
asm volatile( \
- op_string " %[ptr],%[val]\n" \
+ op_string " %[ptr],%[val]" \
op_barrier \
: [ptr] "+QS" (*ptr) : [val] "i" (val) : "cc", "memory");\
}
#define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \
- __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \
- __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+ __ATOMIC_CONST_OP(op_name, op_type, op_string, "") \
+ __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0")
__ATOMIC_CONST_OPS(__atomic_add_const, int, "asi")
__ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
@@ -93,7 +109,7 @@ __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
#undef __ATOMIC_CONST_OPS
#undef __ATOMIC_CONST_OP
-#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+#else /* MARCH_HAS_Z196_FEATURES */
#define __ATOMIC_OP(op_name, op_string) \
static __always_inline int op_name(int val, int *ptr) \
@@ -147,55 +163,83 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
#undef __ATOMIC64_OPS
-#define __atomic_add_const(val, ptr) __atomic_add(val, ptr)
-#define __atomic_add_const_barrier(val, ptr) __atomic_add(val, ptr)
-#define __atomic64_add_const(val, ptr) __atomic64_add(val, ptr)
-#define __atomic64_add_const_barrier(val, ptr) __atomic64_add(val, ptr)
+#define __atomic_add_const(val, ptr) ((void)__atomic_add(val, ptr))
+#define __atomic_add_const_barrier(val, ptr) ((void)__atomic_add(val, ptr))
+#define __atomic64_add_const(val, ptr) ((void)__atomic64_add(val, ptr))
+#define __atomic64_add_const_barrier(val, ptr) ((void)__atomic64_add(val, ptr))
-#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+#endif /* MARCH_HAS_Z196_FEATURES */
-static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new)
-{
- asm volatile(
- " cs %[old],%[new],%[ptr]"
- : [old] "+d" (old), [ptr] "+Q" (*ptr)
- : [new] "d" (new)
- : "cc", "memory");
- return old;
-}
+#if defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__)
-static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
-{
- int old_expected = old;
+#define __ATOMIC_TEST_OP(op_name, op_type, op_string, op_barrier) \
+static __always_inline bool op_name(op_type val, op_type *ptr) \
+{ \
+ op_type tmp; \
+ int cc; \
+ \
+ asm volatile( \
+ op_string " %[tmp],%[val],%[ptr]" \
+ op_barrier \
+ : "=@cc" (cc), [tmp] "=d" (tmp), [ptr] "+QS" (*ptr) \
+ : [val] "d" (val) \
+ : "memory"); \
+ return (cc == 0) || (cc == 2); \
+} \
- asm volatile(
- " cs %[old],%[new],%[ptr]"
- : [old] "+d" (old), [ptr] "+Q" (*ptr)
- : [new] "d" (new)
- : "cc", "memory");
- return old == old_expected;
-}
+#define __ATOMIC_TEST_OPS(op_name, op_type, op_string) \
+ __ATOMIC_TEST_OP(op_name, op_type, op_string, "") \
+ __ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0")
-static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new)
-{
- asm volatile(
- " csg %[old],%[new],%[ptr]"
- : [old] "+d" (old), [ptr] "+QS" (*ptr)
- : [new] "d" (new)
- : "cc", "memory");
- return old;
+__ATOMIC_TEST_OPS(__atomic_add_and_test, int, "laal")
+__ATOMIC_TEST_OPS(__atomic64_add_and_test, long, "laalg")
+
+#undef __ATOMIC_TEST_OPS
+#undef __ATOMIC_TEST_OP
+
+#define __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, op_barrier) \
+static __always_inline bool op_name(op_type val, op_type *ptr) \
+{ \
+ int cc; \
+ \
+ asm volatile( \
+ op_string " %[ptr],%[val]" \
+ op_barrier \
+ : "=@cc" (cc), [ptr] "+QS" (*ptr) \
+ : [val] "i" (val) \
+ : "memory"); \
+ return (cc == 0) || (cc == 2); \
}
-static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
-{
- long old_expected = old;
+#define __ATOMIC_CONST_TEST_OPS(op_name, op_type, op_string) \
+ __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, "") \
+ __ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0")
- asm volatile(
- " csg %[old],%[new],%[ptr]"
- : [old] "+d" (old), [ptr] "+QS" (*ptr)
- : [new] "d" (new)
- : "cc", "memory");
- return old == old_expected;
+__ATOMIC_CONST_TEST_OPS(__atomic_add_const_and_test, int, "alsi")
+__ATOMIC_CONST_TEST_OPS(__atomic64_add_const_and_test, long, "algsi")
+
+#undef __ATOMIC_CONST_TEST_OPS
+#undef __ATOMIC_CONST_TEST_OP
+
+#else /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */
+
+#define __ATOMIC_TEST_OP(op_name, op_func, op_type) \
+static __always_inline bool op_name(op_type val, op_type *ptr) \
+{ \
+ return op_func(val, ptr) == -val; \
}
+__ATOMIC_TEST_OP(__atomic_add_and_test, __atomic_add, int)
+__ATOMIC_TEST_OP(__atomic_add_and_test_barrier, __atomic_add_barrier, int)
+__ATOMIC_TEST_OP(__atomic_add_const_and_test, __atomic_add, int)
+__ATOMIC_TEST_OP(__atomic_add_const_and_test_barrier, __atomic_add_barrier, int)
+__ATOMIC_TEST_OP(__atomic64_add_and_test, __atomic64_add, long)
+__ATOMIC_TEST_OP(__atomic64_add_and_test_barrier, __atomic64_add_barrier, long)
+__ATOMIC_TEST_OP(__atomic64_add_const_and_test, __atomic64_add, long)
+__ATOMIC_TEST_OP(__atomic64_add_const_and_test_barrier, __atomic64_add_barrier, long)
+
+#undef __ATOMIC_TEST_OP
+
+#endif /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */
+
#endif /* __ARCH_S390_ATOMIC_OPS__ */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 82de2a7c4160..f3184073e754 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -8,17 +8,19 @@
#ifndef __ASM_BARRIER_H
#define __ASM_BARRIER_H
+#include <asm/march.h>
+
/*
* Force strict CPU ordering.
* And yes, this is required on UP too when we're talking
* to devices.
*/
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
/* Fast-BCR without checkpoint synchronization */
-#define __ASM_BCR_SERIALIZE "bcr 14,0\n"
+#define __ASM_BCR_SERIALIZE "bcr 14,0"
#else
-#define __ASM_BCR_SERIALIZE "bcr 15,0\n"
+#define __ASM_BCR_SERIALIZE "bcr 15,0"
#endif
static __always_inline void bcr_serialize(void)
@@ -67,12 +69,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
if (__builtin_constant_p(size) && size > 0) {
asm(" clgr %2,%1\n"
- " slbgr %0,%0\n"
+ " slbgr %0,%0"
:"=d" (mask) : "d" (size-1), "d" (index) :"cc");
return mask;
}
asm(" clgr %1,%2\n"
- " slbgr %0,%0\n"
+ " slbgr %0,%0"
:"=d" (mask) : "d" (size), "d" (index) :"cc");
return ~mask;
}
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index c467dffa8c12..5f10074665b0 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -36,184 +36,45 @@
#include <linux/typecheck.h>
#include <linux/compiler.h>
#include <linux/types.h>
-#include <asm/atomic_ops.h>
-#include <asm/barrier.h>
-
-#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
-
-static inline unsigned long *
-__bitops_word(unsigned long nr, const volatile unsigned long *ptr)
-{
- unsigned long addr;
-
- addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3);
- return (unsigned long *)addr;
-}
-
-static inline unsigned long __bitops_mask(unsigned long nr)
-{
- return 1UL << (nr & (BITS_PER_LONG - 1));
-}
-
-static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
-
- __atomic64_or(mask, (long *)addr);
-}
-
-static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
-
- __atomic64_and(~mask, (long *)addr);
-}
-
-static __always_inline void arch_change_bit(unsigned long nr,
- volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
-
- __atomic64_xor(mask, (long *)addr);
-}
-
-static inline bool arch_test_and_set_bit(unsigned long nr,
- volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = __atomic64_or_barrier(mask, (long *)addr);
- return old & mask;
-}
-
-static inline bool arch_test_and_clear_bit(unsigned long nr,
- volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = __atomic64_and_barrier(~mask, (long *)addr);
- return old & mask;
-}
-
-static inline bool arch_test_and_change_bit(unsigned long nr,
- volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = __atomic64_xor_barrier(mask, (long *)addr);
- return old & mask;
-}
-
-static __always_inline void
-arch___set_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
-
- *p |= mask;
-}
-
-static __always_inline void
-arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
-
- *p &= ~mask;
-}
-
-static __always_inline void
-arch___change_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
-
- *p ^= mask;
-}
-
-static __always_inline bool
-arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = *p;
- *p |= mask;
- return old & mask;
-}
-
-static __always_inline bool
-arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = *p;
- *p &= ~mask;
- return old & mask;
-}
-
-static __always_inline bool
-arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = *p;
- *p ^= mask;
- return old & mask;
-}
-
-#define arch_test_bit generic_test_bit
-#define arch_test_bit_acquire generic_test_bit_acquire
-
-static inline bool arch_test_and_set_bit_lock(unsigned long nr,
- volatile unsigned long *ptr)
-{
- if (arch_test_bit(nr, ptr))
- return true;
- return arch_test_and_set_bit(nr, ptr);
-}
-
-static inline void arch_clear_bit_unlock(unsigned long nr,
- volatile unsigned long *ptr)
-{
- smp_mb__before_atomic();
- arch_clear_bit(nr, ptr);
-}
-
-static inline void arch___clear_bit_unlock(unsigned long nr,
- volatile unsigned long *ptr)
-{
- smp_mb();
- arch___clear_bit(nr, ptr);
-}
-
-static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
- volatile unsigned long *ptr)
-{
- unsigned long old;
-
- old = __atomic64_xor_barrier(mask, (long *)ptr);
- return old & BIT(7);
+#include <asm/asm.h>
+
+#define arch___set_bit generic___set_bit
+#define arch___clear_bit generic___clear_bit
+#define arch___change_bit generic___change_bit
+#define arch___test_and_set_bit generic___test_and_set_bit
+#define arch___test_and_clear_bit generic___test_and_clear_bit
+#define arch___test_and_change_bit generic___test_and_change_bit
+#define arch_test_bit_acquire generic_test_bit_acquire
+
+static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsigned long *ptr)
+{
+#ifdef __HAVE_ASM_FLAG_OUTPUTS__
+ const volatile unsigned char *addr;
+ unsigned long mask;
+ int cc;
+
+ /*
+ * With CONFIG_PROFILE_ALL_BRANCHES enabled gcc fails to
+ * handle __builtin_constant_p() in some cases.
+ */
+ if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && __builtin_constant_p(nr)) {
+ addr = (const volatile unsigned char *)ptr;
+ addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE;
+ mask = 1UL << (nr & (BITS_PER_BYTE - 1));
+ asm volatile(
+ " tm %[addr],%[mask]"
+ : "=@cc" (cc)
+ : [addr] "Q" (*addr), [mask] "I" (mask)
+ );
+ return cc == 3;
+ }
+#endif
+ return generic_test_bit(nr, ptr);
}
-#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
-#include <asm-generic/bitops/instrumented-atomic.h>
-#include <asm-generic/bitops/instrumented-non-atomic.h>
-#include <asm-generic/bitops/instrumented-lock.h>
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-instrumented-non-atomic.h>
+#include <asm-generic/bitops/lock.h>
/*
* Functions which use MSB0 bit numbering.
@@ -261,6 +122,8 @@ static inline bool test_bit_inv(unsigned long nr,
return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
+#ifndef CONFIG_CC_HAS_BUILTIN_FFS
+
/**
* __flogr - find leftmost one
* @word - The word to search
@@ -269,11 +132,12 @@ static inline bool test_bit_inv(unsigned long nr,
* where the most significant bit has bit number 0.
* If no bit is set this function returns 64.
*/
-static inline unsigned char __flogr(unsigned long word)
+static __always_inline __attribute_const__ unsigned long __flogr(unsigned long word)
{
- if (__builtin_constant_p(word)) {
- unsigned long bit = 0;
+ unsigned long bit;
+ if (__builtin_constant_p(word)) {
+ bit = 0;
if (!word)
return 64;
if (!(word & 0xffffffff00000000UL)) {
@@ -302,85 +166,49 @@ static inline unsigned char __flogr(unsigned long word)
}
return bit;
} else {
- union register_pair rp;
+ union register_pair rp __uninitialized;
rp.even = word;
- asm volatile(
- " flogr %[rp],%[rp]\n"
- : [rp] "+d" (rp.pair) : : "cc");
- return rp.even;
+ asm("flogr %[rp],%[rp]"
+ : [rp] "+d" (rp.pair) : : "cc");
+ bit = rp.even;
+ /*
+ * The result of the flogr instruction is a value in the range
+ * of 0..64. Let the compiler know that the AND operation can
+ * be optimized away.
+ */
+ __assume(bit <= 64);
+ return bit & 127;
}
}
/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
- return __flogr(-word & word) ^ (BITS_PER_LONG - 1);
-}
-
-/**
* ffs - find first bit set
* @word: the word to search
*
* This is defined the same way as the libc and
* compiler builtin ffs routines (man ffs).
*/
-static inline int ffs(int word)
+static __always_inline __flatten __attribute_const__ int ffs(int word)
{
- unsigned long mask = 2 * BITS_PER_LONG - 1;
unsigned int val = (unsigned int)word;
- return (1 + (__flogr(-val & val) ^ (BITS_PER_LONG - 1))) & mask;
+ return BITS_PER_LONG - __flogr(-val & val);
}
-/**
- * __fls - find last (most-significant) set bit in a long word
- * @word: the word to search
- *
- * Undefined if no set bit exists, so code should check against 0 first.
- */
-static inline unsigned long __fls(unsigned long word)
-{
- return __flogr(word) ^ (BITS_PER_LONG - 1);
-}
+#else /* CONFIG_CC_HAS_BUILTIN_FFS */
-/**
- * fls64 - find last set bit in a 64-bit word
- * @word: the word to search
- *
- * This is defined in a similar way as the libc and compiler builtin
- * ffsll, but returns the position of the most significant set bit.
- *
- * fls64(value) returns 0 if value is 0 or the position of the last
- * set bit if value is nonzero. The last (most significant) bit is
- * at position 64.
- */
-static inline int fls64(unsigned long word)
-{
- unsigned long mask = 2 * BITS_PER_LONG - 1;
-
- return (1 + (__flogr(word) ^ (BITS_PER_LONG - 1))) & mask;
-}
+#include <asm-generic/bitops/builtin-ffs.h>
-/**
- * fls - find last (most-significant) bit set
- * @word: the word to search
- *
- * This is defined the same way as ffs.
- * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
- */
-static inline int fls(unsigned int word)
-{
- return fls64(word);
-}
+#endif /* CONFIG_CC_HAS_BUILTIN_FFS */
+#include <asm-generic/bitops/builtin-__ffs.h>
#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/builtin-__fls.h>
+#include <asm-generic/bitops/builtin-fls.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm/arch_hweight.h>
+#include <asm-generic/bitops/const_hweight.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h
index f7eed27b3220..f55f8227058e 100644
--- a/arch/s390/include/asm/boot_data.h
+++ b/arch/s390/include/asm/boot_data.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_BOOT_DATA_H
+#include <linux/string.h>
#include <asm/setup.h>
#include <asm/ipl.h>
@@ -15,4 +16,54 @@ extern unsigned long ipl_cert_list_size;
extern unsigned long early_ipl_comp_list_addr;
extern unsigned long early_ipl_comp_list_size;
+extern char boot_rb[PAGE_SIZE * 2];
+extern bool boot_earlyprintk;
+extern size_t boot_rb_off;
+extern char bootdebug_filter[128];
+extern bool bootdebug;
+
+#define boot_rb_foreach(cb) \
+ do { \
+ size_t off = boot_rb_off + strlen(boot_rb + boot_rb_off) + 1; \
+ size_t len; \
+ for (; off < sizeof(boot_rb) && (len = strlen(boot_rb + off)); off += len + 1) \
+ cb(boot_rb + off); \
+ for (off = 0; off < boot_rb_off && (len = strlen(boot_rb + off)); off += len + 1) \
+ cb(boot_rb + off); \
+ } while (0)
+
+/*
+ * bootdebug_filter is a comma separated list of strings,
+ * where each string can be a prefix of the message.
+ */
+static inline bool bootdebug_filter_match(const char *buf)
+{
+ char *p = bootdebug_filter, *s;
+ char *end;
+
+ if (!*p)
+ return true;
+
+ end = p + strlen(p);
+ while (p < end) {
+ p = skip_spaces(p);
+ s = memscan(p, ',', end - p);
+ if (!strncmp(p, buf, s - p))
+ return true;
+ p = s + 1;
+ }
+ return false;
+}
+
+static inline const char *skip_timestamp(const char *buf)
+{
+#ifdef CONFIG_PRINTK_TIME
+ const char *p = memchr(buf, ']', strlen(buf));
+
+ if (p && p[1] == ' ')
+ return p + 2;
+#endif
+ return buf;
+}
+
#endif /* _ASM_S390_BOOT_DATA_H */
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index c500d45fb465..acb4b13d98c5 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -2,69 +2,55 @@
#ifndef _ASM_S390_BUG_H
#define _ASM_S390_BUG_H
-#include <linux/compiler.h>
-
-#ifdef CONFIG_BUG
-
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-
-#define __EMIT_BUG(x) do { \
- asm_inline volatile( \
- "0: mc 0,0\n" \
- ".section .rodata.str,\"aMS\",@progbits,1\n" \
- "1: .asciz \""__FILE__"\"\n" \
- ".previous\n" \
- ".section __bug_table,\"aw\"\n" \
- "2: .long 0b-.\n" \
- " .long 1b-.\n" \
- " .short %0,%1\n" \
- " .org 2b+%2\n" \
- ".previous\n" \
- : : "i" (__LINE__), \
- "i" (x), \
- "i" (sizeof(struct bug_entry))); \
-} while (0)
-
-#else /* CONFIG_DEBUG_BUGVERBOSE */
-
-#define __EMIT_BUG(x) do { \
- asm_inline volatile( \
- "0: mc 0,0\n" \
- ".section __bug_table,\"aw\"\n" \
- "1: .long 0b-.\n" \
- " .short %0\n" \
- " .org 1b+%1\n" \
- ".previous\n" \
- : : "i" (x), \
- "i" (sizeof(struct bug_entry))); \
+#include <linux/stringify.h>
+
+#ifndef CONFIG_DEBUG_BUGVERBOSE
+#define _BUGVERBOSE_LOCATION(file, line)
+#else
+#define __BUGVERBOSE_LOCATION(file, line) \
+ .pushsection .rodata.str, "aMS", @progbits, 1; \
+ 10002: .ascii file "\0"; \
+ .popsection; \
+ \
+ .long 10002b - .; \
+ .short line;
+#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
+#endif
+
+#ifndef CONFIG_GENERIC_BUG
+#define __BUG_ENTRY(cond_str, flags)
+#else
+#define __BUG_ENTRY(cond_str, flags) \
+ .pushsection __bug_table, "aw"; \
+ .align 4; \
+ 10000: .long 10001f - .; \
+ _BUGVERBOSE_LOCATION(WARN_CONDITION_STR(cond_str) __FILE__, __LINE__) \
+ .short flags; \
+ .popsection; \
+ 10001:
+#endif
+
+#define ASM_BUG_FLAGS(cond_str, flags) \
+ __BUG_ENTRY(cond_str, flags) \
+ mc 0,0
+
+#define ASM_BUG() ASM_BUG_FLAGS("", 0)
+
+#define __BUG_FLAGS(cond_str, flags) \
+ asm_inline volatile(__stringify(ASM_BUG_FLAGS(cond_str, flags)));
+
+#define __WARN_FLAGS(cond_str, flags) \
+do { \
+ __BUG_FLAGS(cond_str, BUGFLAG_WARNING|(flags)); \
} while (0)
-#endif /* CONFIG_DEBUG_BUGVERBOSE */
-
-#define BUG() do { \
- __EMIT_BUG(0); \
- unreachable(); \
+#define BUG() \
+do { \
+ __BUG_FLAGS("", 0); \
+ unreachable(); \
} while (0)
-#define __WARN_FLAGS(flags) do { \
- __EMIT_BUG(BUGFLAG_WARNING|(flags)); \
-} while (0)
-
-#define WARN_ON(x) ({ \
- int __ret_warn_on = !!(x); \
- if (__builtin_constant_p(__ret_warn_on)) { \
- if (__ret_warn_on) \
- __WARN(); \
- } else { \
- if (unlikely(__ret_warn_on)) \
- __WARN(); \
- } \
- unlikely(__ret_warn_on); \
-})
-
#define HAVE_ARCH_BUG
-#define HAVE_ARCH_WARN_ON
-#endif /* CONFIG_BUG */
#include <asm-generic/bug.h>
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 436365ff6c19..e3afcece375e 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -210,7 +210,7 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *);
#define get_ccwdev_lock(x) (x)->ccwlock
#define to_ccwdev(n) container_of(n, struct ccw_device, dev)
-#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver)
+#define to_ccwdrv(n) container_of_const(n, struct ccw_driver, driver)
extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
extern void ccw_device_destroy_console(struct ccw_device *);
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index b89159591ca0..7e83dc2d3b06 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -13,6 +13,7 @@
#define _S390_CHECKSUM_H
#include <linux/instrumented.h>
+#include <linux/kmsan-checks.h>
#include <linux/in6.h>
static inline __wsum cksm(const void *buff, int len, __wsum sum)
@@ -23,9 +24,10 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum)
};
instrument_read(buff, len);
- asm volatile("\n"
+ kmsan_check_memory(buff, len);
+ asm volatile(
"0: cksm %[sum],%[rp]\n"
- " jo 0b\n"
+ " jo 0b"
: [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory");
return sum;
}
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index b6b619f340a5..0a82ae2300b6 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -18,6 +18,8 @@
#include <asm/scsw.h>
+#define CCW_MAX_BYTE_COUNT 65535
+
/**
* struct ccw1 - channel command word
* @cmd_code: command code
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index aae0315374de..008357996262 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -11,191 +11,237 @@
#include <linux/mmdebug.h>
#include <linux/types.h>
#include <linux/bug.h>
+#include <asm/asm.h>
-void __xchg_called_with_bad_pointer(void);
+void __cmpxchg_called_with_bad_pointer(void);
+
+static __always_inline u32 __cs_asm(u64 ptr, u32 old, u32 new)
+{
+ asm volatile(
+ " cs %[old],%[new],%[ptr]"
+ : [old] "+d" (old), [ptr] "+Q" (*(u32 *)ptr)
+ : [new] "d" (new)
+ : "memory", "cc");
+ return old;
+}
+
+static __always_inline u64 __csg_asm(u64 ptr, u64 old, u64 new)
+{
+ asm volatile(
+ " csg %[old],%[new],%[ptr]"
+ : [old] "+d" (old), [ptr] "+QS" (*(u64 *)ptr)
+ : [new] "d" (new)
+ : "memory", "cc");
+ return old;
+}
-static __always_inline unsigned long
-__arch_xchg(unsigned long x, unsigned long address, int size)
+static inline u8 __arch_cmpxchg1(u64 ptr, u8 old, u8 new)
{
- unsigned long old;
- int shift;
+ union {
+ u8 b[4];
+ u32 w;
+ } old32, new32;
+ u32 prev;
+ int i;
+ i = ptr & 3;
+ ptr &= ~0x3;
+ prev = READ_ONCE(*(u32 *)ptr);
+ do {
+ old32.w = prev;
+ if (old32.b[i] != old)
+ return old32.b[i];
+ new32.w = old32.w;
+ new32.b[i] = new;
+ prev = __cs_asm(ptr, old32.w, new32.w);
+ } while (prev != old32.w);
+ return old;
+}
+
+static inline u16 __arch_cmpxchg2(u64 ptr, u16 old, u16 new)
+{
+ union {
+ u16 b[2];
+ u32 w;
+ } old32, new32;
+ u32 prev;
+ int i;
+
+ i = (ptr & 3) >> 1;
+ ptr &= ~0x3;
+ prev = READ_ONCE(*(u32 *)ptr);
+ do {
+ old32.w = prev;
+ if (old32.b[i] != old)
+ return old32.b[i];
+ new32.w = old32.w;
+ new32.b[i] = new;
+ prev = __cs_asm(ptr, old32.w, new32.w);
+ } while (prev != old32.w);
+ return old;
+}
+
+static __always_inline u64 __arch_cmpxchg(u64 ptr, u64 old, u64 new, int size)
+{
switch (size) {
- case 1:
- shift = (3 ^ (address & 3)) << 3;
- address ^= address & 3;
- asm volatile(
- " l %0,%1\n"
- "0: lr 0,%0\n"
- " nr 0,%3\n"
- " or 0,%2\n"
- " cs %0,0,%1\n"
- " jl 0b\n"
- : "=&d" (old), "+Q" (*(int *) address)
- : "d" ((x & 0xff) << shift), "d" (~(0xff << shift))
- : "memory", "cc", "0");
- return old >> shift;
- case 2:
- shift = (2 ^ (address & 2)) << 3;
- address ^= address & 2;
- asm volatile(
- " l %0,%1\n"
- "0: lr 0,%0\n"
- " nr 0,%3\n"
- " or 0,%2\n"
- " cs %0,0,%1\n"
- " jl 0b\n"
- : "=&d" (old), "+Q" (*(int *) address)
- : "d" ((x & 0xffff) << shift), "d" (~(0xffff << shift))
- : "memory", "cc", "0");
- return old >> shift;
- case 4:
- asm volatile(
- " l %0,%1\n"
- "0: cs %0,%2,%1\n"
- " jl 0b\n"
- : "=&d" (old), "+Q" (*(int *) address)
- : "d" (x)
- : "memory", "cc");
- return old;
- case 8:
- asm volatile(
- " lg %0,%1\n"
- "0: csg %0,%2,%1\n"
- " jl 0b\n"
- : "=&d" (old), "+QS" (*(long *) address)
- : "d" (x)
- : "memory", "cc");
- return old;
+ case 1: return __arch_cmpxchg1(ptr, old & 0xff, new & 0xff);
+ case 2: return __arch_cmpxchg2(ptr, old & 0xffff, new & 0xffff);
+ case 4: return __cs_asm(ptr, old & 0xffffffff, new & 0xffffffff);
+ case 8: return __csg_asm(ptr, old, new);
+ default: __cmpxchg_called_with_bad_pointer();
}
- __xchg_called_with_bad_pointer();
- return x;
+ return old;
}
-#define arch_xchg(ptr, x) \
+#define arch_cmpxchg(ptr, o, n) \
+({ \
+ (__typeof__(*(ptr)))__arch_cmpxchg((unsigned long)(ptr), \
+ (unsigned long)(o), \
+ (unsigned long)(n), \
+ sizeof(*(ptr))); \
+})
+
+#define arch_cmpxchg64 arch_cmpxchg
+#define arch_cmpxchg_local arch_cmpxchg
+#define arch_cmpxchg64_local arch_cmpxchg
+
+#ifdef __HAVE_ASM_FLAG_OUTPUTS__
+
+#define arch_try_cmpxchg(ptr, oldp, new) \
({ \
- __typeof__(*(ptr)) __ret; \
+ __typeof__(ptr) __oldp = (__typeof__(ptr))(oldp); \
+ __typeof__(*(ptr)) __old = *__oldp; \
+ __typeof__(*(ptr)) __new = (new); \
+ __typeof__(*(ptr)) __prev; \
+ int __cc; \
\
- __ret = (__typeof__(*(ptr))) \
- __arch_xchg((unsigned long)(x), (unsigned long)(ptr), \
- sizeof(*(ptr))); \
- __ret; \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ case 2: { \
+ __prev = arch_cmpxchg((ptr), (__old), (__new)); \
+ __cc = (__prev != __old); \
+ if (unlikely(__cc)) \
+ *__oldp = __prev; \
+ break; \
+ } \
+ case 4: { \
+ asm volatile( \
+ " cs %[__old],%[__new],%[__ptr]" \
+ : [__old] "+d" (*__oldp), \
+ [__ptr] "+Q" (*(ptr)), \
+ "=@cc" (__cc) \
+ : [__new] "d" (__new) \
+ : "memory"); \
+ break; \
+ } \
+ case 8: { \
+ asm volatile( \
+ " csg %[__old],%[__new],%[__ptr]" \
+ : [__old] "+d" (*__oldp), \
+ [__ptr] "+QS" (*(ptr)), \
+ "=@cc" (__cc) \
+ : [__new] "d" (__new) \
+ : "memory"); \
+ break; \
+ } \
+ default: \
+ __cmpxchg_called_with_bad_pointer(); \
+ } \
+ likely(__cc == 0); \
})
-void __cmpxchg_called_with_bad_pointer(void);
+#else /* __HAVE_ASM_FLAG_OUTPUTS__ */
-static __always_inline unsigned long __cmpxchg(unsigned long address,
- unsigned long old,
- unsigned long new, int size)
+#define arch_try_cmpxchg(ptr, oldp, new) \
+({ \
+ __typeof__((ptr)) __oldp = (__typeof__(ptr))(oldp); \
+ __typeof__(*(ptr)) __old = *__oldp; \
+ __typeof__(*(ptr)) __new = (new); \
+ __typeof__(*(ptr)) __prev; \
+ \
+ __prev = arch_cmpxchg((ptr), (__old), (__new)); \
+ if (unlikely(__prev != __old)) \
+ *__oldp = __prev; \
+ likely(__prev == __old); \
+})
+
+#endif /* __HAVE_ASM_FLAG_OUTPUTS__ */
+
+#define arch_try_cmpxchg64 arch_try_cmpxchg
+#define arch_try_cmpxchg_local arch_try_cmpxchg
+#define arch_try_cmpxchg64_local arch_try_cmpxchg
+
+void __xchg_called_with_bad_pointer(void);
+
+static inline u8 __arch_xchg1(u64 ptr, u8 x)
+{
+ int shift = (3 ^ (ptr & 3)) << 3;
+ u32 mask, old, new;
+
+ ptr &= ~0x3;
+ mask = ~(0xff << shift);
+ old = READ_ONCE(*(u32 *)ptr);
+ do {
+ new = old & mask;
+ new |= x << shift;
+ } while (!arch_try_cmpxchg((u32 *)ptr, &old, new));
+ return old >> shift;
+}
+
+static inline u16 __arch_xchg2(u64 ptr, u16 x)
+{
+ int shift = (2 ^ (ptr & 2)) << 3;
+ u32 mask, old, new;
+
+ ptr &= ~0x3;
+ mask = ~(0xffff << shift);
+ old = READ_ONCE(*(u32 *)ptr);
+ do {
+ new = old & mask;
+ new |= x << shift;
+ } while (!arch_try_cmpxchg((u32 *)ptr, &old, new));
+ return old >> shift;
+}
+
+static __always_inline u64 __arch_xchg(u64 ptr, u64 x, int size)
{
switch (size) {
- case 1: {
- unsigned int prev, shift, mask;
-
- shift = (3 ^ (address & 3)) << 3;
- address ^= address & 3;
- old = (old & 0xff) << shift;
- new = (new & 0xff) << shift;
- mask = ~(0xff << shift);
- asm volatile(
- " l %[prev],%[address]\n"
- " nr %[prev],%[mask]\n"
- " xilf %[mask],0xffffffff\n"
- " or %[new],%[prev]\n"
- " or %[prev],%[tmp]\n"
- "0: lr %[tmp],%[prev]\n"
- " cs %[prev],%[new],%[address]\n"
- " jnl 1f\n"
- " xr %[tmp],%[prev]\n"
- " xr %[new],%[tmp]\n"
- " nr %[tmp],%[mask]\n"
- " jz 0b\n"
- "1:"
- : [prev] "=&d" (prev),
- [address] "+Q" (*(int *)address),
- [tmp] "+&d" (old),
- [new] "+&d" (new),
- [mask] "+&d" (mask)
- :: "memory", "cc");
- return prev >> shift;
- }
- case 2: {
- unsigned int prev, shift, mask;
-
- shift = (2 ^ (address & 2)) << 3;
- address ^= address & 2;
- old = (old & 0xffff) << shift;
- new = (new & 0xffff) << shift;
- mask = ~(0xffff << shift);
- asm volatile(
- " l %[prev],%[address]\n"
- " nr %[prev],%[mask]\n"
- " xilf %[mask],0xffffffff\n"
- " or %[new],%[prev]\n"
- " or %[prev],%[tmp]\n"
- "0: lr %[tmp],%[prev]\n"
- " cs %[prev],%[new],%[address]\n"
- " jnl 1f\n"
- " xr %[tmp],%[prev]\n"
- " xr %[new],%[tmp]\n"
- " nr %[tmp],%[mask]\n"
- " jz 0b\n"
- "1:"
- : [prev] "=&d" (prev),
- [address] "+Q" (*(int *)address),
- [tmp] "+&d" (old),
- [new] "+&d" (new),
- [mask] "+&d" (mask)
- :: "memory", "cc");
- return prev >> shift;
- }
+ case 1:
+ return __arch_xchg1(ptr, x & 0xff);
+ case 2:
+ return __arch_xchg2(ptr, x & 0xffff);
case 4: {
- unsigned int prev = old;
-
- asm volatile(
- " cs %[prev],%[new],%[address]\n"
- : [prev] "+&d" (prev),
- [address] "+Q" (*(int *)address)
- : [new] "d" (new)
- : "memory", "cc");
- return prev;
+ u32 old = READ_ONCE(*(u32 *)ptr);
+
+ do {
+ } while (!arch_try_cmpxchg((u32 *)ptr, &old, x & 0xffffffff));
+ return old;
}
case 8: {
- unsigned long prev = old;
-
- asm volatile(
- " csg %[prev],%[new],%[address]\n"
- : [prev] "+&d" (prev),
- [address] "+QS" (*(long *)address)
- : [new] "d" (new)
- : "memory", "cc");
- return prev;
+ u64 old = READ_ONCE(*(u64 *)ptr);
+
+ do {
+ } while (!arch_try_cmpxchg((u64 *)ptr, &old, x));
+ return old;
}
}
- __cmpxchg_called_with_bad_pointer();
- return old;
+ __xchg_called_with_bad_pointer();
+ return x;
}
-#define arch_cmpxchg(ptr, o, n) \
+#define arch_xchg(ptr, x) \
({ \
- __typeof__(*(ptr)) __ret; \
- \
- __ret = (__typeof__(*(ptr))) \
- __cmpxchg((unsigned long)(ptr), (unsigned long)(o), \
- (unsigned long)(n), sizeof(*(ptr))); \
- __ret; \
+ (__typeof__(*(ptr)))__arch_xchg((unsigned long)(ptr), \
+ (unsigned long)(x), \
+ sizeof(*(ptr))); \
})
-#define arch_cmpxchg64 arch_cmpxchg
-#define arch_cmpxchg_local arch_cmpxchg
-#define arch_cmpxchg64_local arch_cmpxchg
-
#define system_has_cmpxchg128() 1
static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
{
asm volatile(
- " cdsg %[old],%[new],%[ptr]\n"
+ " cdsg %[old],%[new],%[ptr]"
: [old] "+d" (old), [ptr] "+QS" (*ptr)
: [new] "d" (new)
: "memory", "cc");
@@ -203,5 +249,25 @@ static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 n
}
#define arch_cmpxchg128 arch_cmpxchg128
+#define arch_cmpxchg128_local arch_cmpxchg128
+
+#ifdef __HAVE_ASM_FLAG_OUTPUTS__
+
+static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new)
+{
+ int cc;
+
+ asm volatile(
+ " cdsg %[old],%[new],%[ptr]"
+ : [old] "+d" (*oldp), [ptr] "+QS" (*ptr), "=@cc" (cc)
+ : [new] "d" (new)
+ : "memory");
+ return likely(cc == 0);
+}
+
+#define arch_try_cmpxchg128 arch_try_cmpxchg128
+#define arch_try_cmpxchg128_local arch_try_cmpxchg128
+
+#endif /* __HAVE_ASM_FLAG_OUTPUTS__ */
#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
deleted file mode 100644
index 3cb9d813f022..000000000000
--- a/arch/s390/include/asm/compat.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_S390X_COMPAT_H
-#define _ASM_S390X_COMPAT_H
-/*
- * Architecture specific compatibility types
- */
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
-#include <linux/thread_info.h>
-#include <asm/ptrace.h>
-
-#define compat_mode_t compat_mode_t
-typedef u16 compat_mode_t;
-
-#define __compat_uid_t __compat_uid_t
-typedef u16 __compat_uid_t;
-typedef u16 __compat_gid_t;
-
-#define compat_dev_t compat_dev_t
-typedef u16 compat_dev_t;
-
-#define compat_ipc_pid_t compat_ipc_pid_t
-typedef u16 compat_ipc_pid_t;
-
-#define compat_statfs compat_statfs
-
-#include <asm-generic/compat.h>
-
-#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \
- typeof(0?(__force t)0:0ULL), u64))
-
-#define __SC_DELOUSE(t,v) ({ \
- BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \
- (__force t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
-})
-
-#define PSW32_MASK_USER 0x0000FF00UL
-
-#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
- PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
- PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
- PSW32_ASC_PRIMARY)
-
-#define COMPAT_UTS_MACHINE "s390\0\0\0\0"
-
-typedef u16 compat_nlink_t;
-
-typedef struct {
- u32 mask;
- u32 addr;
-} __aligned(8) psw_compat_t;
-
-typedef struct {
- psw_compat_t psw;
- u32 gprs[NUM_GPRS];
- u32 acrs[NUM_ACRS];
- u32 orig_gpr2;
-} s390_compat_regs;
-
-typedef struct {
- u32 gprs_high[NUM_GPRS];
-} s390_compat_regs_high;
-
-struct compat_stat {
- compat_dev_t st_dev;
- u16 __pad1;
- compat_ino_t st_ino;
- compat_mode_t st_mode;
- compat_nlink_t st_nlink;
- __compat_uid_t st_uid;
- __compat_gid_t st_gid;
- compat_dev_t st_rdev;
- u16 __pad2;
- u32 st_size;
- u32 st_blksize;
- u32 st_blocks;
- u32 st_atime;
- u32 st_atime_nsec;
- u32 st_mtime;
- u32 st_mtime_nsec;
- u32 st_ctime;
- u32 st_ctime_nsec;
- u32 __unused4;
- u32 __unused5;
-};
-
-struct compat_statfs {
- u32 f_type;
- u32 f_bsize;
- u32 f_blocks;
- u32 f_bfree;
- u32 f_bavail;
- u32 f_files;
- u32 f_ffree;
- compat_fsid_t f_fsid;
- u32 f_namelen;
- u32 f_frsize;
- u32 f_flags;
- u32 f_spare[4];
-};
-
-struct compat_statfs64 {
- u32 f_type;
- u32 f_bsize;
- u64 f_blocks;
- u64 f_bfree;
- u64 f_bavail;
- u64 f_files;
- u64 f_ffree;
- compat_fsid_t f_fsid;
- u32 f_namelen;
- u32 f_frsize;
- u32 f_flags;
- u32 f_spare[5];
-};
-
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)(uptr & 0x7fffffffUL);
-}
-#define compat_ptr(uptr) compat_ptr(uptr)
-
-#ifdef CONFIG_COMPAT
-
-static inline int is_compat_task(void)
-{
- return test_thread_flag(TIF_31BIT);
-}
-
-#endif
-
-#endif /* _ASM_S390X_COMPAT_H */
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index c786538e397c..a83683169d98 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -12,6 +12,7 @@
#define _ASM_S390_CPACF_H
#include <asm/facility.h>
+#include <linux/kmsan-checks.h>
/*
* Instruction opcodes for the CPACF instructions
@@ -53,6 +54,10 @@
#define CPACF_KM_XTS_256 0x34
#define CPACF_KM_PXTS_128 0x3a
#define CPACF_KM_PXTS_256 0x3c
+#define CPACF_KM_XTS_128_FULL 0x52
+#define CPACF_KM_XTS_256_FULL 0x54
+#define CPACF_KM_PXTS_128_FULL 0x5a
+#define CPACF_KM_PXTS_256_FULL 0x5c
/*
* Function codes for the KMC (CIPHER MESSAGE WITH CHAINING)
@@ -120,23 +125,35 @@
#define CPACF_KMAC_DEA 0x01
#define CPACF_KMAC_TDEA_128 0x02
#define CPACF_KMAC_TDEA_192 0x03
+#define CPACF_KMAC_HMAC_SHA_224 0x70
+#define CPACF_KMAC_HMAC_SHA_256 0x71
+#define CPACF_KMAC_HMAC_SHA_384 0x72
+#define CPACF_KMAC_HMAC_SHA_512 0x73
+#define CPACF_KMAC_PHMAC_SHA_224 0x78
+#define CPACF_KMAC_PHMAC_SHA_256 0x79
+#define CPACF_KMAC_PHMAC_SHA_384 0x7a
+#define CPACF_KMAC_PHMAC_SHA_512 0x7b
/*
* Function codes for the PCKMO (PERFORM CRYPTOGRAPHIC KEY MANAGEMENT)
* instruction
*/
-#define CPACF_PCKMO_QUERY 0x00
-#define CPACF_PCKMO_ENC_DES_KEY 0x01
-#define CPACF_PCKMO_ENC_TDES_128_KEY 0x02
-#define CPACF_PCKMO_ENC_TDES_192_KEY 0x03
-#define CPACF_PCKMO_ENC_AES_128_KEY 0x12
-#define CPACF_PCKMO_ENC_AES_192_KEY 0x13
-#define CPACF_PCKMO_ENC_AES_256_KEY 0x14
-#define CPACF_PCKMO_ENC_ECC_P256_KEY 0x20
-#define CPACF_PCKMO_ENC_ECC_P384_KEY 0x21
-#define CPACF_PCKMO_ENC_ECC_P521_KEY 0x22
-#define CPACF_PCKMO_ENC_ECC_ED25519_KEY 0x28
-#define CPACF_PCKMO_ENC_ECC_ED448_KEY 0x29
+#define CPACF_PCKMO_QUERY 0x00
+#define CPACF_PCKMO_ENC_DES_KEY 0x01
+#define CPACF_PCKMO_ENC_TDES_128_KEY 0x02
+#define CPACF_PCKMO_ENC_TDES_192_KEY 0x03
+#define CPACF_PCKMO_ENC_AES_128_KEY 0x12
+#define CPACF_PCKMO_ENC_AES_192_KEY 0x13
+#define CPACF_PCKMO_ENC_AES_256_KEY 0x14
+#define CPACF_PCKMO_ENC_AES_XTS_128_DOUBLE_KEY 0x15
+#define CPACF_PCKMO_ENC_AES_XTS_256_DOUBLE_KEY 0x16
+#define CPACF_PCKMO_ENC_ECC_P256_KEY 0x20
+#define CPACF_PCKMO_ENC_ECC_P384_KEY 0x21
+#define CPACF_PCKMO_ENC_ECC_P521_KEY 0x22
+#define CPACF_PCKMO_ENC_ECC_ED25519_KEY 0x28
+#define CPACF_PCKMO_ENC_ECC_ED448_KEY 0x29
+#define CPACF_PCKMO_ENC_HMAC_512_KEY 0x76
+#define CPACF_PCKMO_ENC_HMAC_1024_KEY 0x7a
/*
* Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION)
@@ -164,7 +181,40 @@
#define CPACF_KMA_LAAD 0x200 /* Last-AAD */
#define CPACF_KMA_HS 0x400 /* Hash-subkey Supplied */
+/*
+ * Flags for the KIMD/KLMD (COMPUTE INTERMEDIATE/LAST MESSAGE DIGEST)
+ * instructions
+ */
+#define CPACF_KIMD_NIP 0x8000
+#define CPACF_KLMD_DUFOP 0x4000
+#define CPACF_KLMD_NIP 0x8000
+
+/*
+ * Function codes for KDSA (COMPUTE DIGITAL SIGNATURE AUTHENTICATION)
+ * instruction
+ */
+#define CPACF_KDSA_QUERY 0x00
+#define CPACF_KDSA_ECDSA_VERIFY_P256 0x01
+#define CPACF_KDSA_ECDSA_VERIFY_P384 0x02
+#define CPACF_KDSA_ECDSA_VERIFY_P521 0x03
+#define CPACF_KDSA_ECDSA_SIGN_P256 0x09
+#define CPACF_KDSA_ECDSA_SIGN_P384 0x0a
+#define CPACF_KDSA_ECDSA_SIGN_P521 0x0b
+#define CPACF_KDSA_ENC_ECDSA_SIGN_P256 0x11
+#define CPACF_KDSA_ENC_ECDSA_SIGN_P384 0x12
+#define CPACF_KDSA_ENC_ECDSA_SIGN_P521 0x13
+#define CPACF_KDSA_EDDSA_VERIFY_ED25519 0x20
+#define CPACF_KDSA_EDDSA_VERIFY_ED448 0x24
+#define CPACF_KDSA_EDDSA_SIGN_ED25519 0x28
+#define CPACF_KDSA_EDDSA_SIGN_ED448 0x2c
+#define CPACF_KDSA_ENC_EDDSA_SIGN_ED25519 0x30
+#define CPACF_KDSA_ENC_EDDSA_SIGN_ED448 0x34
+
+#define CPACF_FC_QUERY 0x00
+#define CPACF_FC_QUERY_AUTH_INFO 0x7F
+
typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
+typedef struct { unsigned char bytes[256]; } cpacf_qai_t;
/*
* Prototype for a not existing function to produce a link
@@ -174,80 +224,85 @@ typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
void __cpacf_bad_opcode(void);
static __always_inline void __cpacf_query_rre(u32 opc, u8 r1, u8 r2,
- cpacf_mask_t *mask)
+ u8 *pb, u8 fc)
{
asm volatile(
- " la %%r1,%[mask]\n"
- " xgr %%r0,%%r0\n"
- " .insn rre,%[opc] << 16,%[r1],%[r2]\n"
- : [mask] "=R" (*mask)
- : [opc] "i" (opc),
+ " la %%r1,%[pb]\n"
+ " lghi %%r0,%[fc]\n"
+ " .insn rre,%[opc] << 16,%[r1],%[r2]"
+ : [pb] "=R" (*pb)
+ : [opc] "i" (opc), [fc] "i" (fc),
[r1] "i" (r1), [r2] "i" (r2)
- : "cc", "r0", "r1");
+ : "cc", "memory", "r0", "r1");
}
-static __always_inline void __cpacf_query_rrf(u32 opc,
- u8 r1, u8 r2, u8 r3, u8 m4,
- cpacf_mask_t *mask)
+static __always_inline void __cpacf_query_rrf(u32 opc, u8 r1, u8 r2, u8 r3,
+ u8 m4, u8 *pb, u8 fc)
{
asm volatile(
- " la %%r1,%[mask]\n"
- " xgr %%r0,%%r0\n"
- " .insn rrf,%[opc] << 16,%[r1],%[r2],%[r3],%[m4]\n"
- : [mask] "=R" (*mask)
- : [opc] "i" (opc), [r1] "i" (r1), [r2] "i" (r2),
- [r3] "i" (r3), [m4] "i" (m4)
- : "cc", "r0", "r1");
+ " la %%r1,%[pb]\n"
+ " lghi %%r0,%[fc]\n"
+ " .insn rrf,%[opc] << 16,%[r1],%[r2],%[r3],%[m4]"
+ : [pb] "=R" (*pb)
+ : [opc] "i" (opc), [fc] "i" (fc), [r1] "i" (r1),
+ [r2] "i" (r2), [r3] "i" (r3), [m4] "i" (m4)
+ : "cc", "memory", "r0", "r1");
}
-static __always_inline void __cpacf_query(unsigned int opcode,
- cpacf_mask_t *mask)
+static __always_inline void __cpacf_query_insn(unsigned int opcode, void *pb,
+ u8 fc)
{
switch (opcode) {
case CPACF_KDSA:
- __cpacf_query_rre(CPACF_KDSA, 0, 2, mask);
+ __cpacf_query_rre(CPACF_KDSA, 0, 2, pb, fc);
break;
case CPACF_KIMD:
- __cpacf_query_rre(CPACF_KIMD, 0, 2, mask);
+ __cpacf_query_rre(CPACF_KIMD, 0, 2, pb, fc);
break;
case CPACF_KLMD:
- __cpacf_query_rre(CPACF_KLMD, 0, 2, mask);
+ __cpacf_query_rre(CPACF_KLMD, 0, 2, pb, fc);
break;
case CPACF_KM:
- __cpacf_query_rre(CPACF_KM, 2, 4, mask);
+ __cpacf_query_rre(CPACF_KM, 2, 4, pb, fc);
break;
case CPACF_KMA:
- __cpacf_query_rrf(CPACF_KMA, 2, 4, 6, 0, mask);
+ __cpacf_query_rrf(CPACF_KMA, 2, 4, 6, 0, pb, fc);
break;
case CPACF_KMAC:
- __cpacf_query_rre(CPACF_KMAC, 0, 2, mask);
+ __cpacf_query_rre(CPACF_KMAC, 0, 2, pb, fc);
break;
case CPACF_KMC:
- __cpacf_query_rre(CPACF_KMC, 2, 4, mask);
+ __cpacf_query_rre(CPACF_KMC, 2, 4, pb, fc);
break;
case CPACF_KMCTR:
- __cpacf_query_rrf(CPACF_KMCTR, 2, 4, 6, 0, mask);
+ __cpacf_query_rrf(CPACF_KMCTR, 2, 4, 6, 0, pb, fc);
break;
case CPACF_KMF:
- __cpacf_query_rre(CPACF_KMF, 2, 4, mask);
+ __cpacf_query_rre(CPACF_KMF, 2, 4, pb, fc);
break;
case CPACF_KMO:
- __cpacf_query_rre(CPACF_KMO, 2, 4, mask);
+ __cpacf_query_rre(CPACF_KMO, 2, 4, pb, fc);
break;
case CPACF_PCC:
- __cpacf_query_rre(CPACF_PCC, 0, 0, mask);
+ __cpacf_query_rre(CPACF_PCC, 0, 0, pb, fc);
break;
case CPACF_PCKMO:
- __cpacf_query_rre(CPACF_PCKMO, 0, 0, mask);
+ __cpacf_query_rre(CPACF_PCKMO, 0, 0, pb, fc);
break;
case CPACF_PRNO:
- __cpacf_query_rre(CPACF_PRNO, 2, 4, mask);
+ __cpacf_query_rre(CPACF_PRNO, 2, 4, pb, fc);
break;
default:
__cpacf_bad_opcode();
}
}
+static __always_inline void __cpacf_query(unsigned int opcode,
+ cpacf_mask_t *mask)
+{
+ __cpacf_query_insn(opcode, mask, CPACF_FC_QUERY);
+}
+
static __always_inline int __cpacf_check_opcode(unsigned int opcode)
{
switch (opcode) {
@@ -268,6 +323,8 @@ static __always_inline int __cpacf_check_opcode(unsigned int opcode)
return test_facility(57); /* check for MSA5 */
case CPACF_KMA:
return test_facility(146); /* check for MSA8 */
+ case CPACF_KDSA:
+ return test_facility(155); /* check for MSA9 */
default:
__cpacf_bad_opcode();
return 0;
@@ -275,14 +332,15 @@ static __always_inline int __cpacf_check_opcode(unsigned int opcode)
}
/**
- * cpacf_query() - check if a specific CPACF function is available
+ * cpacf_query() - Query the function code mask for this CPACF opcode
* @opcode: the opcode of the crypto instruction
- * @func: the function code to test for
+ * @mask: ptr to struct cpacf_mask_t
*
* Executes the query function for the given crypto instruction @opcode
* and checks if @func is available
*
- * Returns 1 if @func is available for @opcode, 0 otherwise
+ * On success 1 is returned and the mask is filled with the function
+ * code mask for this CPACF opcode, otherwise 0 is returned.
*/
static __always_inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
{
@@ -299,7 +357,8 @@ static inline int cpacf_test_func(cpacf_mask_t *mask, unsigned int func)
return (mask->bytes[func >> 3] & (0x80 >> (func & 7))) != 0;
}
-static __always_inline int cpacf_query_func(unsigned int opcode, unsigned int func)
+static __always_inline int cpacf_query_func(unsigned int opcode,
+ unsigned int func)
{
cpacf_mask_t mask;
@@ -308,6 +367,32 @@ static __always_inline int cpacf_query_func(unsigned int opcode, unsigned int fu
return 0;
}
+static __always_inline void __cpacf_qai(unsigned int opcode, cpacf_qai_t *qai)
+{
+ __cpacf_query_insn(opcode, qai, CPACF_FC_QUERY_AUTH_INFO);
+}
+
+/**
+ * cpacf_qai() - Get the query authentication information for a CPACF opcode
+ * @opcode: the opcode of the crypto instruction
+ * @mask: ptr to struct cpacf_qai_t
+ *
+ * Executes the query authentication information function for the given crypto
+ * instruction @opcode and checks if @func is available
+ *
+ * On success 1 is returned and the mask is filled with the query authentication
+ * information for this CPACF opcode, otherwise 0 is returned.
+ */
+static __always_inline int cpacf_qai(unsigned int opcode, cpacf_qai_t *qai)
+{
+ if (cpacf_query_func(opcode, CPACF_FC_QUERY_AUTH_INFO)) {
+ __cpacf_qai(opcode, qai);
+ return 1;
+ }
+ memset(qai, 0, sizeof(*qai));
+ return 0;
+}
+
/**
* cpacf_km() - executes the KM (CIPHER MESSAGE) instruction
* @func: the function code passed to KM; see CPACF_KM_xxx defines
@@ -331,7 +416,7 @@ static inline int cpacf_km(unsigned long func, void *param,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,%[dst],%[src]\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [src] "+&d" (s.pair), [dst] "+&d" (d.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_KM)
@@ -363,7 +448,7 @@ static inline int cpacf_kmc(unsigned long func, void *param,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,%[dst],%[src]\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [src] "+&d" (s.pair), [dst] "+&d" (d.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_KMC)
@@ -390,8 +475,8 @@ static inline void cpacf_kimd(unsigned long func, void *param,
asm volatile(
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
- "0: .insn rre,%[opc] << 16,0,%[src]\n"
- " brc 1,0b\n" /* handle partial completion */
+ "0: .insn rrf,%[opc] << 16,0,%[src],8,0\n"
+ " brc 1,0b" /* handle partial completion */
: [src] "+&d" (s.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)(param)),
[opc] "i" (CPACF_KIMD)
@@ -415,8 +500,8 @@ static inline void cpacf_klmd(unsigned long func, void *param,
asm volatile(
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
- "0: .insn rre,%[opc] << 16,0,%[src]\n"
- " brc 1,0b\n" /* handle partial completion */
+ "0: .insn rrf,%[opc] << 16,0,%[src],8,0\n"
+ " brc 1,0b" /* handle partial completion */
: [src] "+&d" (s.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_KLMD)
@@ -424,29 +509,30 @@ static inline void cpacf_klmd(unsigned long func, void *param,
}
/**
- * cpacf_kmac() - executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
- * instruction
- * @func: the function code passed to KM; see CPACF_KMAC_xxx defines
+ * _cpacf_kmac() - executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
+ * instruction and updates flags in gr0
+ * @gr0: pointer to gr0 (fc and flags) passed to KMAC; see CPACF_KMAC_xxx defines
* @param: address of parameter block; see POP for details on each func
* @src: address of source memory area
* @src_len: length of src operand in bytes
*
* Returns 0 for the query func, number of processed bytes for digest funcs
*/
-static inline int cpacf_kmac(unsigned long func, void *param,
- const u8 *src, long src_len)
+static inline int _cpacf_kmac(unsigned long *gr0, void *param,
+ const u8 *src, long src_len)
{
union register_pair s;
s.even = (unsigned long)src;
s.odd = (unsigned long)src_len;
asm volatile(
- " lgr 0,%[fc]\n"
+ " lgr 0,%[r0]\n"
" lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,0,%[src]\n"
" brc 1,0b\n" /* handle partial completion */
- : [src] "+&d" (s.pair)
- : [fc] "d" (func), [pba] "d" ((unsigned long)param),
+ " lgr %[r0],0"
+ : [r0] "+d" (*gr0), [src] "+&d" (s.pair)
+ : [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_KMAC)
: "cc", "memory", "0", "1");
@@ -454,6 +540,22 @@ static inline int cpacf_kmac(unsigned long func, void *param,
}
/**
+ * cpacf_kmac() - executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
+ * instruction
+ * @func: function code passed to KMAC; see CPACF_KMAC_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for digest funcs
+ */
+static inline int cpacf_kmac(unsigned long func, void *param,
+ const u8 *src, long src_len)
+{
+ return _cpacf_kmac(&func, param, src, src_len);
+}
+
+/**
* cpacf_kmctr() - executes the KMCTR (CIPHER MESSAGE WITH COUNTER) instruction
* @func: the function code passed to KMCTR; see CPACF_KMCTR_xxx defines
* @param: address of parameter block; see POP for details on each func
@@ -478,7 +580,7 @@ static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rrf,%[opc] << 16,%[dst],%[src],%[ctr],0\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [src] "+&d" (s.pair), [dst] "+&d" (d.pair),
[ctr] "+&d" (c.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
@@ -512,7 +614,7 @@ static inline void cpacf_prno(unsigned long func, void *param,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,%[dst],%[seed]\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [dst] "+&d" (d.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[seed] "d" (s.pair), [opc] "i" (CPACF_PRNO)
@@ -538,10 +640,12 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
asm volatile (
" lghi 0,%[fc]\n"
"0: .insn rre,%[opc] << 16,%[ucbuf],%[cbuf]\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [ucbuf] "+&d" (u.pair), [cbuf] "+&d" (c.pair)
: [fc] "K" (CPACF_PRNO_TRNG), [opc] "i" (CPACF_PRNO)
: "cc", "memory", "0");
+ kmsan_unpoison_memory(ucbuf, ucbuf_len);
+ kmsan_unpoison_memory(cbuf, cbuf_len);
}
/**
@@ -549,18 +653,30 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
* instruction
* @func: the function code passed to PCC; see CPACF_KM_xxx defines
* @param: address of parameter block; see POP for details on each func
+ *
+ * Returns the condition code, this is
+ * 0 - cc code 0 (normal completion)
+ * 1 - cc code 1 (protected key wkvp mismatch or src operand out of range)
+ * 2 - cc code 2 (something invalid, scalar multiply infinity, ...)
+ * Condition code 3 (partial completion) is handled within the asm code
+ * and never returned.
*/
-static inline void cpacf_pcc(unsigned long func, void *param)
+static inline int cpacf_pcc(unsigned long func, void *param)
{
+ int cc;
+
asm volatile(
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,0,0\n" /* PCC opcode */
" brc 1,0b\n" /* handle partial completion */
- :
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_PCC)
- : "cc", "memory", "0", "1");
+ : CC_CLOBBER_LIST("memory", "0", "1"));
+
+ return CC_TRANSFORM(cc);
}
/**
@@ -576,7 +692,7 @@ static inline void cpacf_pckmo(long func, void *param)
asm volatile(
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
- " .insn rre,%[opc] << 16,0,0\n" /* PCKMO opcode */
+ " .insn rre,%[opc] << 16,0,0" /* PCKMO opcode */
:
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_PCKMO)
@@ -609,7 +725,7 @@ static inline void cpacf_kma(unsigned long func, void *param, u8 *dest,
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rrf,%[opc] << 16,%[dst],%[src],%[aad],0\n"
- " brc 1,0b\n" /* handle partial completion */
+ " brc 1,0b" /* handle partial completion */
: [dst] "+&d" (d.pair), [src] "+&d" (s.pair),
[aad] "+&d" (a.pair)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index 26c710cd3485..5672e3fab52b 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -9,7 +9,7 @@
#ifndef _ASM_S390_CPU_H
#define _ASM_S390_CPU_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/jump_label.h>
@@ -24,5 +24,5 @@ struct cpuid
DECLARE_STATIC_KEY_FALSE(cpu_has_bear);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_CPU_H */
diff --git a/arch/s390/include/asm/cpu_mf-insn.h b/arch/s390/include/asm/cpu_mf-insn.h
index a68b362e0964..941663939cc7 100644
--- a/arch/s390/include/asm/cpu_mf-insn.h
+++ b/arch/s390/include/asm/cpu_mf-insn.h
@@ -8,7 +8,7 @@
#ifndef _ASM_S390_CPU_MF_INSN_H
#define _ASM_S390_CPU_MF_INSN_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/* Macro to generate the STCCTM instruction with a customized
* M3 field designating the counter set.
@@ -17,6 +17,6 @@
.insn rsy,0xeb0000000017,\r1,\m3 & 0xf,\db2
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index a0de5b9b02ea..1798fbd59068 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -10,8 +10,10 @@
#define _ASM_S390_CPU_MF_H
#include <linux/errno.h>
+#include <linux/kmsan-checks.h>
#include <asm/asm-extable.h>
#include <asm/facility.h>
+#include <asm/asm.h>
asm(".include \"asm/cpu_mf-insn.h\"\n");
@@ -169,7 +171,7 @@ static inline int qctri(struct cpumf_ctr_info *info)
{
int rc = -EINVAL;
- asm volatile (
+ asm_inline volatile (
"0: qctri %1\n"
"1: lhi %0,0\n"
"2:\n"
@@ -183,12 +185,13 @@ static inline int lcctl(u64 ctl)
{
int cc;
- asm volatile (
- " lcctl %1\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc) : "Q" (ctl) : "cc");
- return cc;
+ asm_inline volatile (
+ " lcctl %[ctl]\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [ctl] "Q" (ctl)
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
}
/* Extract CPU counter */
@@ -197,13 +200,14 @@ static inline int __ecctr(u64 ctr, u64 *content)
u64 _content;
int cc;
- asm volatile (
- " ecctr %0,%2\n"
- " ipm %1\n"
- " srl %1,28\n"
- : "=d" (_content), "=d" (cc) : "d" (ctr) : "cc");
+ asm_inline volatile (
+ " ecctr %[_content],%[ctr]\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [_content] "=d" (_content)
+ : [ctr] "d" (ctr)
+ : CC_CLOBBER);
*content = _content;
- return cc;
+ return CC_TRANSFORM(cc);
}
/* Extract CPU counter */
@@ -233,13 +237,17 @@ static __always_inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest)
int cc;
asm volatile (
- " STCCTM %2,%3,%1\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc)
- : "Q" (*dest), "d" (range), "i" (set)
- : "cc", "memory");
- return cc;
+ " STCCTM %[range],%[set],%[dest]\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [dest] "Q" (*dest), [range] "d" (range), [set] "i" (set)
+ : CC_CLOBBER_LIST("memory"));
+ /*
+ * If cc == 2, less than RANGE counters are stored, but it's not easy
+ * to tell how many. Always unpoison the whole range for simplicity.
+ */
+ kmsan_unpoison_memory(dest, range * sizeof(u64));
+ return CC_TRANSFORM(cc);
}
/* Query sampling information */
@@ -259,19 +267,20 @@ static inline int qsi(struct hws_qsi_info_block *info)
/* Load sampling controls */
static inline int lsctl(struct hws_lsctl_request_block *req)
{
- int cc;
+ int cc, exception;
- cc = 1;
+ exception = 1;
asm volatile(
- "0: lsctl 0(%1)\n"
- "1: ipm %0\n"
- " srl %0,28\n"
+ "0: lsctl %[req]\n"
+ "1: lhi %[exc],0\n"
"2:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "+d" (cc), "+a" (req)
- : "m" (*req)
- : "cc", "memory");
-
- return cc ? -EINVAL : 0;
+ : CC_OUT(cc, cc), [exc] "+d" (exception)
+ : [req] "Q" (*req)
+ : CC_CLOBBER);
+ if (exception || CC_TRANSFORM(cc))
+ return -EINVAL;
+ return 0;
}
#endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
index 931204613753..d6fb999c8c6d 100644
--- a/arch/s390/include/asm/cpufeature.h
+++ b/arch/s390/include/asm/cpufeature.h
@@ -9,10 +9,13 @@
#ifndef __ASM_S390_CPUFEATURE_H
#define __ASM_S390_CPUFEATURE_H
+#include <asm/facility.h>
+
enum {
S390_CPU_FEATURE_MSA,
S390_CPU_FEATURE_VXRS,
S390_CPU_FEATURE_UV,
+ S390_CPU_FEATURE_D288,
MAX_CPU_FEATURES
};
@@ -20,4 +23,15 @@ enum {
int cpu_have_feature(unsigned int nr);
+#define cpu_has_bear() test_facility(193)
+#define cpu_has_edat1() test_facility(8)
+#define cpu_has_edat2() test_facility(78)
+#define cpu_has_gs() test_facility(133)
+#define cpu_has_nx() test_facility(130)
+#define cpu_has_rdp() test_facility(194)
+#define cpu_has_seq_insn() test_facility(85)
+#define cpu_has_tlb_lc() test_facility(51)
+#define cpu_has_topology() test_facility(11)
+#define cpu_has_vx() test_facility(129)
+
#endif /* __ASM_S390_CPUFEATURE_H */
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 638137d46c85..a03f64033760 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -25,7 +25,7 @@ struct css_general_char {
u64 : 2;
u64 : 3;
- u64 aif_osa : 1; /* bit 67 */
+ u64 aif_qdio : 1;/* bit 67 */
u64 : 12;
u64 eadm_rf : 1; /* bit 80 */
u64 : 1;
diff --git a/arch/s390/include/asm/ctlreg.h b/arch/s390/include/asm/ctlreg.h
index 72a9556d04f3..1765a0320933 100644
--- a/arch/s390/include/asm/ctlreg.h
+++ b/arch/s390/include/asm/ctlreg.h
@@ -80,7 +80,7 @@
#define CR14_EXTERNAL_DAMAGE_SUBMASK BIT(CR14_EXTERNAL_DAMAGE_SUBMASK_BIT)
#define CR14_WARNING_SUBMASK BIT(CR14_WARNING_SUBMASK_BIT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/bug.h>
@@ -100,7 +100,7 @@ struct ctlreg {
BUILD_BUG_ON(sizeof(struct addrtype) != _esize); \
typecheck(struct ctlreg, array[0]); \
asm volatile( \
- " lctlg %[_low],%[_high],%[_arr]\n" \
+ " lctlg %[_low],%[_high],%[_arr]" \
: \
: [_arr] "Q" (*(struct addrtype *)(&array)), \
[_low] "i" (low), [_high] "i" (high) \
@@ -119,7 +119,7 @@ struct ctlreg {
BUILD_BUG_ON(sizeof(struct addrtype) != _esize); \
typecheck(struct ctlreg, array[0]); \
asm volatile( \
- " stctg %[_low],%[_high],%[_arr]\n" \
+ " stctg %[_low],%[_high],%[_arr]" \
: [_arr] "=Q" (*(struct addrtype *)(&array)) \
: [_low] "i" (low), [_high] "i" (high)); \
} while (0)
@@ -127,7 +127,7 @@ struct ctlreg {
static __always_inline void local_ctl_load(unsigned int cr, struct ctlreg *reg)
{
asm volatile(
- " lctlg %[cr],%[cr],%[reg]\n"
+ " lctlg %[cr],%[cr],%[reg]"
:
: [reg] "Q" (*reg), [cr] "i" (cr)
: "memory");
@@ -136,7 +136,7 @@ static __always_inline void local_ctl_load(unsigned int cr, struct ctlreg *reg)
static __always_inline void local_ctl_store(unsigned int cr, struct ctlreg *reg)
{
asm volatile(
- " stctg %[cr],%[cr],%[reg]\n"
+ " stctg %[cr],%[cr],%[reg]"
: [reg] "=Q" (*reg)
: [cr] "i" (cr));
}
@@ -202,8 +202,9 @@ union ctlreg0 {
unsigned long : 3;
unsigned long ccc : 1; /* Cryptography counter control */
unsigned long pec : 1; /* PAI extension control */
- unsigned long : 17;
- unsigned long : 3;
+ unsigned long : 15;
+ unsigned long wti : 1; /* Warning-track */
+ unsigned long : 4;
unsigned long lap : 1; /* Low-address-protection control */
unsigned long : 4;
unsigned long edat : 1; /* Enhanced-DAT-enablement control */
@@ -251,5 +252,5 @@ union ctlreg15 {
};
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_S390_CTLREG_H */
diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h
index 68f84315277c..f9529f7cf62c 100644
--- a/arch/s390/include/asm/current.h
+++ b/arch/s390/include/asm/current.h
@@ -11,9 +11,25 @@
#define _S390_CURRENT_H
#include <asm/lowcore.h>
+#include <asm/machine.h>
struct task_struct;
-#define current ((struct task_struct *const)S390_lowcore.current_task)
+static __always_inline struct task_struct *get_current(void)
+{
+ unsigned long ptr, lc_current;
+
+ lc_current = offsetof(struct lowcore, current_task);
+ asm_inline(
+ ALTERNATIVE(" lg %[ptr],%[offzero](%%r0)\n",
+ " lg %[ptr],%[offalt](%%r0)\n",
+ ALT_FEATURE(MFEATURE_LOWCORE))
+ : [ptr] "=d" (ptr)
+ : [offzero] "i" (lc_current),
+ [offalt] "i" (lc_current + LOWCORE_ALT_ADDRESS));
+ return (struct task_struct *)ptr;
+}
+
+#define current get_current()
#endif /* !(_S390_CURRENT_H) */
diff --git a/arch/s390/include/asm/dat-bits.h b/arch/s390/include/asm/dat-bits.h
new file mode 100644
index 000000000000..8d65eec2f124
--- /dev/null
+++ b/arch/s390/include/asm/dat-bits.h
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DAT table and related structures
+ *
+ * Copyright IBM Corp. 2024
+ *
+ */
+
+#ifndef _S390_DAT_BITS_H
+#define _S390_DAT_BITS_H
+
+union asce {
+ unsigned long val;
+ struct {
+ unsigned long rsto: 52;/* Region- or Segment-Table Origin */
+ unsigned long : 2;
+ unsigned long g : 1; /* Subspace Group control */
+ unsigned long p : 1; /* Private Space control */
+ unsigned long s : 1; /* Storage-Alteration-Event control */
+ unsigned long x : 1; /* Space-Switch-Event control */
+ unsigned long r : 1; /* Real-Space control */
+ unsigned long : 1;
+ unsigned long dt : 2; /* Designation-Type control */
+ unsigned long tl : 2; /* Region- or Segment-Table Length */
+ };
+};
+
+enum {
+ ASCE_TYPE_SEGMENT = 0,
+ ASCE_TYPE_REGION3 = 1,
+ ASCE_TYPE_REGION2 = 2,
+ ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Second-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Second-Table Length */
+ };
+};
+
+union region2_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Third-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Third-Table Length */
+ };
+};
+
+struct region3_table_entry_fc0 {
+ unsigned long sto: 52;/* Segment-Table Origin */
+ unsigned long : 1;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Segment-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+ unsigned long rfaa: 33;/* Region-Frame Absolute Address */
+ unsigned long : 14;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc : 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long iep : 1; /* Instruction-Execution-Protection */
+ unsigned long : 2;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union region3_table_entry {
+ unsigned long val;
+ struct region3_table_entry_fc0 fc0;
+ struct region3_table_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc: 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr: 1; /* Common-Region Bit */
+ unsigned long tt: 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+struct segment_table_entry_fc0 {
+ unsigned long pto: 53;/* Page-Table Origin */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 3;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+struct segment_table_entry_fc1 {
+ unsigned long sfaa: 44;/* Segment-Frame Absolute Address */
+ unsigned long : 3;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc : 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long iep : 1; /* Instruction-Execution-Protection */
+ unsigned long : 2;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union segment_table_entry {
+ unsigned long val;
+ struct segment_table_entry_fc0 fc0;
+ struct segment_table_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc: 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs: 1; /* Common-Segment Bit */
+ unsigned long tt: 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+union page_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long pfra: 52;/* Page-Frame Real Address */
+ unsigned long z : 1; /* Zero Bit */
+ unsigned long i : 1; /* Page-Invalid Bit */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long iep : 1; /* Instruction-Execution-Protection */
+ unsigned long : 8;
+ };
+};
+
+enum {
+ TABLE_TYPE_SEGMENT = 0,
+ TABLE_TYPE_REGION3 = 1,
+ TABLE_TYPE_REGION2 = 2,
+ TABLE_TYPE_REGION1 = 3
+};
+
+#endif /* _S390_DAT_BITS_H */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index ccd4e148b5ed..6375276d94ea 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -66,14 +66,15 @@ typedef int (debug_header_proc_t) (debug_info_t *id,
struct debug_view *view,
int area,
debug_entry_t *entry,
- char *out_buf);
+ char *out_buf, size_t out_buf_size);
typedef int (debug_format_proc_t) (debug_info_t *id,
struct debug_view *view, char *out_buf,
+ size_t out_buf_size,
const char *in_buf);
typedef int (debug_prolog_proc_t) (debug_info_t *id,
struct debug_view *view,
- char *out_buf);
+ char *out_buf, size_t out_buf_size);
typedef int (debug_input_proc_t) (debug_info_t *id,
struct debug_view *view,
struct file *file,
@@ -81,8 +82,13 @@ typedef int (debug_input_proc_t) (debug_info_t *id,
size_t in_buf_size, loff_t *offset);
int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
- int area, debug_entry_t *entry, char *out_buf);
+ int area, debug_entry_t *entry,
+ char *out_buf, size_t out_buf_size);
+#define DEBUG_SPRINTF_MAX_ARGS 10
+int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size,
+ const char *inbuf);
struct debug_view {
char name[DEBUG_MAX_NAME_LEN];
debug_prolog_proc_t *prolog_proc;
@@ -112,6 +118,9 @@ debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas,
int buf_size, umode_t mode, uid_t uid,
gid_t gid);
+ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
+ char *buf, size_t buf_size, bool reverse);
+
void debug_unregister(debug_info_t *id);
void debug_set_level(debug_info_t *id, int new_level);
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 20b94220113b..8db8db3b1018 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -12,6 +12,7 @@
#include <linux/if_ether.h>
#include <linux/percpu.h>
#include <asm/asm-extable.h>
+#include <asm/sclp.h>
#include <asm/cio.h>
enum diag_stat_enum {
@@ -35,8 +36,11 @@ enum diag_stat_enum {
DIAG_STAT_X2FC,
DIAG_STAT_X304,
DIAG_STAT_X308,
+ DIAG_STAT_X310,
DIAG_STAT_X318,
DIAG_STAT_X320,
+ DIAG_STAT_X324,
+ DIAG_STAT_X49C,
DIAG_STAT_X500,
NR_DIAG_STAT
};
@@ -62,7 +66,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
end_addr = pfn_to_phys(start_pfn + num_pfn - 1);
diag_stat_inc(DIAG_STAT_X010);
- asm volatile(
+ asm_inline volatile(
"0: diag %0,%1,0x10\n"
"1: nopr %%r7\n"
EX_TABLE(0b, 1b)
@@ -117,6 +121,8 @@ enum diag204_sc {
};
#define DIAG204_SUBCODE_MASK 0xffff
+#define DIAG204_BIF_BIT 0x80000000
+#define DIAG204_BUSY_WAIT (HZ / 10)
/* The two available diag 204 data formats */
enum diag204_format {
@@ -326,6 +332,11 @@ union diag318_info {
};
};
+static inline bool diag204_has_bif(void)
+{
+ return sclp.has_diag204_bif;
+}
+
int diag204(unsigned long subcode, unsigned long size, void *addr);
int diag224(void *ptr);
int diag26c(void *req, void *resp, enum diag26c_sc subcode);
@@ -355,4 +366,12 @@ void _diag0c_amode31(unsigned long rx);
void _diag308_reset_amode31(void);
int _diag8c_amode31(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
+/* diag 49c subcodes */
+enum diag49c_sc {
+ DIAG49C_SUBC_ACK = 0,
+ DIAG49C_SUBC_REG = 1
+};
+
+int diag49c(unsigned long subcode);
+
#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/diag288.h b/arch/s390/include/asm/diag288.h
new file mode 100644
index 000000000000..5e1b43cea9d6
--- /dev/null
+++ b/arch/s390/include/asm/diag288.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_DIAG288_H
+#define _ASM_S390_DIAG288_H
+
+#include <asm/asm-extable.h>
+#include <asm/types.h>
+
+#define MIN_INTERVAL 15 /* Minimal time supported by diag288 */
+#define MAX_INTERVAL 3600 /* One hour should be enough - pure estimation */
+
+#define WDT_DEFAULT_TIMEOUT 30
+
+/* Function codes - init, change, cancel */
+#define WDT_FUNC_INIT 0
+#define WDT_FUNC_CHANGE 1
+#define WDT_FUNC_CANCEL 2
+#define WDT_FUNC_CONCEAL 0x80000000
+
+/* Action codes for LPAR watchdog */
+#define LPARWDT_RESTART 0
+
+static inline int __diag288(unsigned int func, unsigned int timeout,
+ unsigned long action, unsigned int len)
+{
+ union register_pair r1 = { .even = func, .odd = timeout, };
+ union register_pair r3 = { .even = action, .odd = len, };
+ int rc = -EINVAL;
+
+ asm volatile(
+ " diag %[r1],%[r3],0x288\n"
+ "0: lhi %[rc],0\n"
+ "1:"
+ EX_TABLE(0b, 1b)
+ : [rc] "+d" (rc)
+ : [r1] "d" (r1.pair), [r3] "d" (r3.pair)
+ : "cc", "memory");
+ return rc;
+}
+
+#endif /* _ASM_S390_DIAG288_H */
diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h
index 390906b8e386..e3ad6798d0cd 100644
--- a/arch/s390/include/asm/dwarf.h
+++ b/arch/s390/include/asm/dwarf.h
@@ -2,7 +2,7 @@
#ifndef _ASM_S390_DWARF_H
#define _ASM_S390_DWARF_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define CFI_STARTPROC .cfi_startproc
#define CFI_ENDPROC .cfi_endproc
@@ -33,6 +33,6 @@
.cfi_sections .eh_frame, .debug_frame
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_DWARF_H */
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
index efb50fc6866c..7164cb658435 100644
--- a/arch/s390/include/asm/ebcdic.h
+++ b/arch/s390/include/asm/ebcdic.h
@@ -22,18 +22,18 @@ extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */
static inline void
codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr)
{
- if (nr-- <= 0)
+ if (!nr--)
return;
asm volatile(
- " bras 1,1f\n"
- " tr 0(1,%0),0(%2)\n"
- "0: tr 0(256,%0),0(%2)\n"
+ " j 2f\n"
+ "0: tr 0(1,%0),0(%2)\n"
+ "1: tr 0(256,%0),0(%2)\n"
" la %0,256(%0)\n"
- "1: ahi %1,-256\n"
- " jnm 0b\n"
- " ex %1,0(1)"
+ "2: aghi %1,-256\n"
+ " jnm 1b\n"
+ " exrl %1,0b"
: "+&a" (addr), "+&a" (nr)
- : "a" (codepage) : "cc", "memory", "1");
+ : "a" (codepage) : "cc", "memory");
}
#define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr)
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 70a30ae258b7..bb63fa4d20bb 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -91,6 +91,14 @@
/* Keep this the last entry. */
#define R_390_NUM 61
+/*
+ * HWCAP flags - for AT_HWCAP
+ *
+ * Bits 32-63 are reserved for use by libc.
+ * Bit 31 is reserved and will be used by libc to determine if a second
+ * argument is passed to IFUNC resolvers. This will be implemented when
+ * there is a need for AT_HWCAP2.
+ */
enum {
HWCAP_NR_ESAN3 = 0,
HWCAP_NR_ZARCH = 1,
@@ -150,15 +158,10 @@ enum {
#define ELF_DATA ELFDATA2MSB
#define ELF_ARCH EM_S390
-/* s390 specific phdr types */
-#define PT_S390_PGSTE 0x70000000
-
/*
* ELF register definitions..
*/
-#include <linux/compat.h>
-
#include <asm/ptrace.h>
#include <asm/syscall.h>
#include <asm/user.h>
@@ -166,9 +169,6 @@ enum {
typedef s390_fp_regs elf_fpregset_t;
typedef s390_regs elf_gregset_t;
-typedef s390_fp_regs compat_elf_fpregset_t;
-typedef s390_compat_regs compat_elf_gregset_t;
-
#include <linux/sched/mm.h> /* for task_struct */
#include <asm/mmu_context.h>
@@ -178,39 +178,6 @@ typedef s390_compat_regs compat_elf_gregset_t;
#define elf_check_arch(x) \
(((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
&& (x)->e_ident[EI_CLASS] == ELF_CLASS)
-#define compat_elf_check_arch(x) \
- (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
- && (x)->e_ident[EI_CLASS] == ELF_CLASS)
-#define compat_start_thread start_thread31
-
-struct arch_elf_state {
- int rc;
-};
-
-#define INIT_ARCH_ELF_STATE { .rc = 0 }
-
-#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0)
-#ifdef CONFIG_PGSTE
-#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \
-({ \
- struct arch_elf_state *_state = state; \
- if ((phdr)->p_type == PT_S390_PGSTE && \
- !page_table_allocate_pgste && \
- !test_thread_flag(TIF_PGSTE) && \
- !current->mm->context.alloc_pgste) { \
- set_thread_flag(TIF_PGSTE); \
- set_pt_regs_flag(task_pt_regs(current), \
- PIF_EXECVE_PGSTE_RESTART); \
- _state->rc = -EAGAIN; \
- } \
- _state->rc; \
-})
-#else
-#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \
-({ \
- (state)->rc; \
-})
-#endif
/* For SVR4/S390 the function pointer to be registered with `atexit` is
passed in R14. */
@@ -227,9 +194,7 @@ struct arch_elf_state {
the loader. We need to make sure that it is out of the way of the program
that it will "exec", and that there is sufficient room for the brk. 64-bit
tasks are aligned to 4GB. */
-#define ELF_ET_DYN_BASE (is_compat_task() ? \
- (STACK_TOP / 3 * 2) : \
- (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
+#define ELF_ET_DYN_BASE ((STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
/* This yields a mask that user programs can use to figure out what
instruction set this CPU supports. */
@@ -248,43 +213,21 @@ extern unsigned long elf_hwcap;
extern char elf_platform[];
#define ELF_PLATFORM (elf_platform)
-#ifndef CONFIG_COMPAT
#define SET_PERSONALITY(ex) \
do { \
set_personality(PER_LINUX | \
(current->personality & (~PER_MASK))); \
- current->thread.sys_call_table = sys_call_table; \
-} while (0)
-#else /* CONFIG_COMPAT */
-#define SET_PERSONALITY(ex) \
-do { \
- if (personality(current->personality) != PER_LINUX32) \
- set_personality(PER_LINUX | \
- (current->personality & ~PER_MASK)); \
- if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \
- set_thread_flag(TIF_31BIT); \
- current->thread.sys_call_table = \
- sys_call_table_emu; \
- } else { \
- clear_thread_flag(TIF_31BIT); \
- current->thread.sys_call_table = \
- sys_call_table; \
- } \
} while (0)
-#endif /* CONFIG_COMPAT */
/*
* Cache aliasing on the latest machines calls for a mapping granularity
- * of 512KB for the anonymous mapping base. For 64-bit processes use a
- * 512KB alignment and a randomization of up to 1GB. For 31-bit processes
- * the virtual address space is limited, use no alignment and limit the
- * randomization to 8MB.
- * For the additional randomization of the program break use 32MB for
- * 64-bit and 8MB for 31-bit.
+ * of 512KB for the anonymous mapping base. Use a 512KB alignment and a
+ * randomization of up to 1GB.
+ * For the additional randomization of the program break use 32MB.
*/
-#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x1fffUL)
-#define MMAP_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ff80UL)
-#define MMAP_ALIGN_MASK (is_compat_task() ? 0 : 0x7fUL)
+#define BRK_RND_MASK (0x1fffUL)
+#define MMAP_RND_MASK (0x3ff80UL)
+#define MMAP_ALIGN_MASK (0x7fUL)
#define STACK_RND_MASK MMAP_RND_MASK
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 35555c944630..979af986a8fe 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -59,4 +59,14 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
+static __always_inline bool arch_in_rcu_eqs(void)
+{
+ if (IS_ENABLED(CONFIG_KVM))
+ return current->flags & PF_VCPU;
+
+ return false;
+}
+
+#define arch_in_rcu_eqs arch_in_rcu_eqs
+
#endif
diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h
index e0a06060afdd..225ee89c3f5e 100644
--- a/arch/s390/include/asm/extmem.h
+++ b/arch/s390/include/asm/extmem.h
@@ -6,7 +6,7 @@
#ifndef _ASM_S390X_DCSS_H
#define _ASM_S390X_DCSS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* DCSS segment is defined as a contiguous range of pages using DEFSEG command.
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 796007125dff..5f5b1aa6c233 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -14,13 +14,12 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/preempt.h>
-
+#include <asm/alternative.h>
#include <asm/lowcore.h>
#define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8)
extern u64 stfle_fac_list[16];
-extern u64 alt_stfle_fac_list[16];
static inline void __set_facility(unsigned long nr, void *facilities)
{
@@ -40,33 +39,56 @@ static inline void __clear_facility(unsigned long nr, void *facilities)
ptr[nr >> 3] &= ~(0x80 >> (nr & 7));
}
-static inline int __test_facility(unsigned long nr, void *facilities)
+static __always_inline bool __test_facility(unsigned long nr, void *facilities)
{
unsigned char *ptr;
if (nr >= MAX_FACILITY_BIT)
- return 0;
+ return false;
ptr = (unsigned char *) facilities + (nr >> 3);
return (*ptr & (0x80 >> (nr & 7))) != 0;
}
/*
+ * __test_facility_constant() generates a single instruction branch. If the
+ * tested facility is available (likely) the branch is patched into a nop.
+ *
+ * Do not use this function unless you know what you are doing. All users are
+ * supposed to use test_facility() which will do the right thing.
+ */
+static __always_inline bool __test_facility_constant(unsigned long nr)
+{
+ asm goto(
+ ALTERNATIVE("brcl 15,%l[l_no]", "brcl 0,0", ALT_FACILITY(%[nr]))
+ :
+ : [nr] "i" (nr)
+ :
+ : l_no);
+ return true;
+l_no:
+ return false;
+}
+
+/*
* The test_facility function uses the bit ordering where the MSB is bit 0.
* That makes it easier to query facility bits with the bit number as
* documented in the Principles of Operation.
*/
-static inline int test_facility(unsigned long nr)
+static __always_inline bool test_facility(unsigned long nr)
{
unsigned long facilities_als[] = { FACILITIES_ALS };
- if (__builtin_constant_p(nr) && nr < sizeof(facilities_als) * 8) {
- if (__test_facility(nr, &facilities_als))
- return 1;
+ if (!__is_defined(__DECOMPRESSOR) && __builtin_constant_p(nr)) {
+ if (nr < sizeof(facilities_als) * 8) {
+ if (__test_facility(nr, &facilities_als))
+ return true;
+ }
+ return __test_facility_constant(nr);
}
return __test_facility(nr, &stfle_fac_list);
}
-static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
+static inline unsigned long __stfle_asm(u64 *fac_list, int size)
{
unsigned long reg0 = size - 1;
@@ -74,7 +96,7 @@ static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
" lgr 0,%[reg0]\n"
" .insn s,0xb2b00000,%[list]\n" /* stfle */
" lgr %[reg0],0\n"
- : [reg0] "+&d" (reg0), [list] "+Q" (*stfle_fac_list)
+ : [reg0] "+&d" (reg0), [list] "+Q" (*fac_list)
:
: "memory", "cc", "0");
return reg0;
@@ -82,32 +104,32 @@ static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
/**
* stfle - Store facility list extended
- * @stfle_fac_list: array where facility list can be stored
+ * @fac_list: array where facility list can be stored
* @size: size of passed in array in double words
*/
-static inline void __stfle(u64 *stfle_fac_list, int size)
+static inline void __stfle(u64 *fac_list, int size)
{
unsigned long nr;
u32 stfl_fac_list;
asm volatile(
" stfl 0(0)\n"
- : "=m" (S390_lowcore.stfl_fac_list));
- stfl_fac_list = S390_lowcore.stfl_fac_list;
- memcpy(stfle_fac_list, &stfl_fac_list, 4);
+ : "=m" (get_lowcore()->stfl_fac_list));
+ stfl_fac_list = get_lowcore()->stfl_fac_list;
+ memcpy(fac_list, &stfl_fac_list, 4);
nr = 4; /* bytes stored by stfl */
if (stfl_fac_list & 0x01000000) {
/* More facility bits available with stfle */
- nr = __stfle_asm(stfle_fac_list, size);
+ nr = __stfle_asm(fac_list, size);
nr = min_t(unsigned long, (nr + 1) * 8, size * 8);
}
- memset((char *) stfle_fac_list + nr, 0, size * 8 - nr);
+ memset((char *)fac_list + nr, 0, size * 8 - nr);
}
-static inline void stfle(u64 *stfle_fac_list, int size)
+static inline void stfle(u64 *fac_list, int size)
{
preempt_disable();
- __stfle(stfle_fac_list, size);
+ __stfle(fac_list, size);
preempt_enable();
}
diff --git a/arch/s390/include/asm/fprobe.h b/arch/s390/include/asm/fprobe.h
new file mode 100644
index 000000000000..5ef600b372f4
--- /dev/null
+++ b/arch/s390/include/asm/fprobe.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_FPROBE_H
+#define _ASM_S390_FPROBE_H
+
+#include <asm-generic/fprobe.h>
+
+#undef FPROBE_HEADER_MSB_PATTERN
+#define FPROBE_HEADER_MSB_PATTERN 0
+
+#endif /* _ASM_S390_FPROBE_H */
diff --git a/arch/s390/include/asm/fpu-insn-asm.h b/arch/s390/include/asm/fpu-insn-asm.h
index 02ccfe46050a..cc0468fdf2d0 100644
--- a/arch/s390/include/asm/fpu-insn-asm.h
+++ b/arch/s390/include/asm/fpu-insn-asm.h
@@ -16,7 +16,7 @@
#error only <asm/fpu-insn.h> can be included directly
#endif
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/* Macros to generate vector instruction byte code */
@@ -407,6 +407,28 @@
MRXBOPC 0, 0x0E, v1
.endm
+/* VECTOR STORE BYTE REVERSED ELEMENTS */
+ .macro VSTBR vr1, disp, index="%r0", base, m
+ VX_NUM v1, \vr1
+ GR_NUM x2, \index
+ GR_NUM b2, \base
+ .word 0xE600 | ((v1&15) << 4) | (x2&15)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC \m, 0x0E, v1
+.endm
+.macro VSTBRH vr1, disp, index="%r0", base
+ VSTBR \vr1, \disp, \index, \base, 1
+.endm
+.macro VSTBRF vr1, disp, index="%r0", base
+ VSTBR \vr1, \disp, \index, \base, 2
+.endm
+.macro VSTBRG vr1, disp, index="%r0", base
+ VSTBR \vr1, \disp, \index, \base, 3
+.endm
+.macro VSTBRQ vr1, disp, index="%r0", base
+ VSTBR \vr1, \disp, \index, \base, 4
+.endm
+
/* VECTOR STORE MULTIPLE */
.macro VSTM vfrom, vto, disp, base, hint=3
VX_NUM v1, \vfrom
@@ -728,5 +750,5 @@
MRXBOPC 0, 0x77, v1, v2, v3
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_S390_FPU_INSN_ASM_H */
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
index c1e2e521d9af..96727f3bd0dc 100644
--- a/arch/s390/include/asm/fpu-insn.h
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -9,9 +9,10 @@
#include <asm/fpu-insn-asm.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/instrumented.h>
+#include <linux/kmsan.h>
#include <asm/asm-extable.h>
asm(".include \"asm/fpu-insn-asm.h\"\n");
@@ -38,7 +39,7 @@ asm(".include \"asm/fpu-insn-asm.h\"\n");
static __always_inline void fpu_cefbr(u8 f1, s32 val)
{
- asm volatile("cefbr %[f1],%[val]\n"
+ asm volatile("cefbr %[f1],%[val]"
:
: [f1] "I" (f1), [val] "d" (val)
: "memory");
@@ -48,7 +49,7 @@ static __always_inline unsigned long fpu_cgebr(u8 f2, u8 mode)
{
unsigned long val;
- asm volatile("cgebr %[val],%[mode],%[f2]\n"
+ asm volatile("cgebr %[val],%[mode],%[f2]"
: [val] "=d" (val)
: [f2] "I" (f2), [mode] "I" (mode)
: "memory");
@@ -57,7 +58,7 @@ static __always_inline unsigned long fpu_cgebr(u8 f2, u8 mode)
static __always_inline void fpu_debr(u8 f1, u8 f2)
{
- asm volatile("debr %[f1],%[f2]\n"
+ asm volatile("debr %[f1],%[f2]"
:
: [f1] "I" (f1), [f2] "I" (f2)
: "memory");
@@ -66,7 +67,7 @@ static __always_inline void fpu_debr(u8 f1, u8 f2)
static __always_inline void fpu_ld(unsigned short fpr, freg_t *reg)
{
instrument_read(reg, sizeof(*reg));
- asm volatile("ld %[fpr],%[reg]\n"
+ asm volatile("ld %[fpr],%[reg]"
:
: [fpr] "I" (fpr), [reg] "Q" (reg->ui)
: "memory");
@@ -74,7 +75,7 @@ static __always_inline void fpu_ld(unsigned short fpr, freg_t *reg)
static __always_inline void fpu_ldgr(u8 f1, u32 val)
{
- asm volatile("ldgr %[f1],%[val]\n"
+ asm volatile("ldgr %[f1],%[val]"
:
: [f1] "I" (f1), [val] "d" (val)
: "memory");
@@ -100,19 +101,12 @@ static __always_inline void fpu_lfpc(unsigned int *fpc)
*/
static inline void fpu_lfpc_safe(unsigned int *fpc)
{
- u32 tmp;
-
instrument_read(fpc, sizeof(*fpc));
- asm volatile("\n"
- "0: lfpc %[fpc]\n"
- "1: nopr %%r7\n"
- ".pushsection .fixup, \"ax\"\n"
- "2: lghi %[tmp],0\n"
- " sfpc %[tmp]\n"
- " jg 1b\n"
- ".popsection\n"
- EX_TABLE(1b, 2b)
- : [tmp] "=d" (tmp)
+ asm_inline volatile(
+ " lfpc %[fpc]\n"
+ "0: nopr %%r7\n"
+ EX_TABLE_FPC(0b, 0b)
+ :
: [fpc] "Q" (*fpc)
: "memory");
}
@@ -120,7 +114,7 @@ static inline void fpu_lfpc_safe(unsigned int *fpc)
static __always_inline void fpu_std(unsigned short fpr, freg_t *reg)
{
instrument_write(reg, sizeof(*reg));
- asm volatile("std %[fpr],%[reg]\n"
+ asm volatile("std %[fpr],%[reg]"
: [reg] "=Q" (reg->ui)
: [fpr] "I" (fpr)
: "memory");
@@ -183,33 +177,33 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3)
: "memory");
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
static __always_inline void fpu_vl(u8 v1, const void *vxr)
{
instrument_read(vxr, sizeof(__vector128));
- asm volatile("\n"
- " la 1,%[vxr]\n"
- " VL %[v1],0,,1\n"
- :
- : [vxr] "R" (*(__vector128 *)vxr),
- [v1] "I" (v1)
- : "memory", "1");
+ asm volatile("VL %[v1],%O[vxr],,%R[vxr]"
+ :
+ : [vxr] "Q" (*(__vector128 *)vxr),
+ [v1] "I" (v1)
+ : "memory");
}
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vl(u8 v1, const void *vxr)
{
instrument_read(vxr, sizeof(__vector128));
- asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n"
- :
- : [vxr] "Q" (*(__vector128 *)vxr),
- [v1] "I" (v1)
- : "memory");
+ asm volatile(
+ " la 1,%[vxr]\n"
+ " VL %[v1],0,,1"
+ :
+ : [vxr] "R" (*(__vector128 *)vxr),
+ [v1] "I" (v1)
+ : "memory", "1");
}
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vleib(u8 v, s16 val, u8 index)
{
@@ -238,7 +232,7 @@ static __always_inline u64 fpu_vlgvf(u8 v, u16 index)
return val;
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
{
@@ -246,17 +240,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_read(vxr, size);
- asm volatile("\n"
- " la 1,%[vxr]\n"
- " VLL %[v1],%[index],0,1\n"
- :
- : [vxr] "R" (*(u8 *)vxr),
- [index] "d" (index),
- [v1] "I" (v1)
- : "memory", "1");
+ asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]"
+ :
+ : [vxr] "Q" (*(u8 *)vxr),
+ [index] "d" (index),
+ [v1] "I" (v1)
+ : "memory");
}
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
{
@@ -264,17 +256,19 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_read(vxr, size);
- asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n"
- :
- : [vxr] "Q" (*(u8 *)vxr),
- [index] "d" (index),
- [v1] "I" (v1)
- : "memory");
+ asm volatile(
+ " la 1,%[vxr]\n"
+ " VLL %[v1],%[index],0,1"
+ :
+ : [vxr] "R" (*(u8 *)vxr),
+ [index] "d" (index),
+ [v1] "I" (v1)
+ : "memory", "1");
}
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
#define fpu_vlm(_v1, _v3, _vxrs) \
({ \
@@ -284,17 +278,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_read(_v, size); \
- asm volatile("\n" \
- " la 1,%[vxrs]\n" \
- " VLM %[v1],%[v3],0,1\n" \
- : \
- : [vxrs] "R" (*_v), \
- [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory", "1"); \
+ asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]" \
+ : \
+ : [vxrs] "Q" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
(_v3) - (_v1) + 1; \
})
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
#define fpu_vlm(_v1, _v3, _vxrs) \
({ \
@@ -304,15 +296,17 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_read(_v, size); \
- asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
- : \
- : [vxrs] "Q" (*_v), \
- [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory"); \
+ asm volatile( \
+ " la 1,%[vxrs]\n" \
+ " VLM %[v1],%[v3],0,1" \
+ : \
+ : [vxrs] "R" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
(_v3) - (_v1) + 1; \
})
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vlr(u8 v1, u8 v2)
{
@@ -362,33 +356,33 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3)
: "memory");
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
static __always_inline void fpu_vst(u8 v1, const void *vxr)
{
instrument_write(vxr, sizeof(__vector128));
- asm volatile("\n"
- " la 1,%[vxr]\n"
- " VST %[v1],0,,1\n"
- : [vxr] "=R" (*(__vector128 *)vxr)
- : [v1] "I" (v1)
- : "memory", "1");
+ asm volatile("VST %[v1],%O[vxr],,%R[vxr]"
+ : [vxr] "=Q" (*(__vector128 *)vxr)
+ : [v1] "I" (v1)
+ : "memory");
}
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vst(u8 v1, const void *vxr)
{
instrument_write(vxr, sizeof(__vector128));
- asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n"
- : [vxr] "=Q" (*(__vector128 *)vxr)
- : [v1] "I" (v1)
- : "memory");
+ asm volatile(
+ " la 1,%[vxr]\n"
+ " VST %[v1],0,,1"
+ : [vxr] "=R" (*(__vector128 *)vxr)
+ : [v1] "I" (v1)
+ : "memory", "1");
}
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
{
@@ -396,15 +390,14 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_write(vxr, size);
- asm volatile("\n"
- " la 1,%[vxr]\n"
- " VSTL %[v1],%[index],0,1\n"
- : [vxr] "=R" (*(u8 *)vxr)
- : [index] "d" (index), [v1] "I" (v1)
- : "memory", "1");
+ asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]"
+ : [vxr] "=Q" (*(u8 *)vxr)
+ : [index] "d" (index), [v1] "I" (v1)
+ : "memory");
+ kmsan_unpoison_memory(vxr, size);
}
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
{
@@ -412,15 +405,18 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_write(vxr, size);
- asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n"
- : [vxr] "=Q" (*(u8 *)vxr)
- : [index] "d" (index), [v1] "I" (v1)
- : "memory");
+ asm volatile(
+ " la 1,%[vxr]\n"
+ " VSTL %[v1],%[index],0,1"
+ : [vxr] "=R" (*(u8 *)vxr)
+ : [index] "d" (index), [v1] "I" (v1)
+ : "memory", "1");
+ kmsan_unpoison_memory(vxr, size);
}
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
#define fpu_vstm(_v1, _v3, _vxrs) \
({ \
@@ -430,16 +426,14 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_write(_v, size); \
- asm volatile("\n" \
- " la 1,%[vxrs]\n" \
- " VSTM %[v1],%[v3],0,1\n" \
- : [vxrs] "=R" (*_v) \
- : [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory", "1"); \
+ asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]" \
+ : [vxrs] "=Q" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
(_v3) - (_v1) + 1; \
})
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
#define fpu_vstm(_v1, _v3, _vxrs) \
({ \
@@ -449,14 +443,16 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_write(_v, size); \
- asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
- : [vxrs] "=Q" (*_v) \
- : [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory"); \
+ asm volatile( \
+ " la 1,%[vxrs]\n" \
+ " VSTM %[v1],%[v3],0,1" \
+ : [vxrs] "=R" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
(_v3) - (_v1) + 1; \
})
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vupllf(u8 v1, u8 v2)
{
@@ -482,5 +478,5 @@ static __always_inline void fpu_vzero(u8 v)
: "memory");
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_S390_FPU_INSN_H */
diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h
index c84cb33913e2..960c6c67ad6c 100644
--- a/arch/s390/include/asm/fpu.h
+++ b/arch/s390/include/asm/fpu.h
@@ -44,6 +44,7 @@
#ifndef _ASM_S390_FPU_H
#define _ASM_S390_FPU_H
+#include <linux/cpufeature.h>
#include <linux/processor.h>
#include <linux/preempt.h>
#include <linux/string.h>
@@ -51,12 +52,6 @@
#include <asm/sigcontext.h>
#include <asm/fpu-types.h>
#include <asm/fpu-insn.h>
-#include <asm/facility.h>
-
-static inline bool cpu_has_vx(void)
-{
- return likely(test_facility(129));
-}
enum {
KERNEL_FPC_BIT = 0,
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 77e479d44f1e..692c484ec163 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -2,13 +2,27 @@
#ifndef _ASM_S390_FTRACE_H
#define _ASM_S390_FTRACE_H
-#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
#define ARCH_SUPPORTS_FTRACE_OPS 1
#define MCOUNT_INSN_SIZE 6
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <asm/stacktrace.h>
-unsigned long return_address(unsigned int n);
+static __always_inline unsigned long return_address(unsigned int n)
+{
+ struct stack_frame *sf;
+
+ if (!n)
+ return (unsigned long)__builtin_return_address(0);
+
+ sf = (struct stack_frame *)current_frame_address();
+ do {
+ sf = (struct stack_frame *)sf->back_chain;
+ if (!sf)
+ return 0;
+ } while (--n);
+ return sf->gprs[8];
+}
#define ftrace_return_address(n) return_address(n)
void ftrace_caller(void);
@@ -25,6 +39,7 @@ struct dyn_arch_ftrace { };
struct module;
struct dyn_ftrace;
+struct ftrace_ops;
bool ftrace_need_init_nop(void);
#define ftrace_need_init_nop ftrace_need_init_nop
@@ -36,62 +51,44 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr;
}
+#define ftrace_get_symaddr(fentry_ip) ((unsigned long)(fentry_ip))
-struct ftrace_regs {
- struct pt_regs regs;
-};
+#include <linux/ftrace_regs.h>
static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
{
- struct pt_regs *regs = &fregs->regs;
+ struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs;
if (test_pt_regs_flag(regs, PIF_FTRACE_FULL_REGS))
return regs;
return NULL;
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-struct fgraph_ret_regs {
- unsigned long gpr2;
- unsigned long fp;
-};
-
-static __always_inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
-{
- return ret_regs->gpr2;
-}
-
-static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
+static __always_inline void
+ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
+ unsigned long ip)
{
- return ret_regs->fp;
+ arch_ftrace_regs(fregs)->regs.psw.addr = ip;
}
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+#undef ftrace_regs_get_frame_pointer
static __always_inline unsigned long
-ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs)
+ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs)
{
- return fregs->regs.psw.addr;
+ return ftrace_regs_get_stack_pointer(fregs);
}
-static __always_inline void
-ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
- unsigned long ip)
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
{
- fregs->regs.psw.addr = ip;
+ return arch_ftrace_regs(fregs)->regs.gprs[14];
}
-#define ftrace_regs_get_argument(fregs, n) \
- regs_get_kernel_argument(&(fregs)->regs, n)
-#define ftrace_regs_get_stack_pointer(fregs) \
- kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
- regs_return_value(&(fregs)->regs)
-#define ftrace_regs_set_return_value(fregs, ret) \
- regs_set_return_value(&(fregs)->regs, ret)
-#define ftrace_override_function_with_return(fregs) \
- override_function_with_return(&(fregs)->regs)
-#define ftrace_regs_query_register_offset(name) \
- regs_query_register_offset(name)
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
+ (_regs)->psw.mask = 0; \
+ (_regs)->psw.addr = arch_ftrace_regs(fregs)->regs.psw.addr; \
+ (_regs)->gprs[15] = arch_ftrace_regs(fregs)->regs.gprs[15]; \
+ } while (0)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
/*
@@ -103,37 +100,24 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
*/
static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr)
{
- struct pt_regs *regs = &fregs->regs;
+ struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs;
regs->orig_gpr2 = addr;
}
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
-/*
- * Even though the system call numbers are identical for s390/s390x a
- * different system call table is used for compat tasks. This may lead
- * to e.g. incorrect or missing trace event sysfs files.
- * Therefore simply do not trace compat system calls at all.
- * See kernel/trace/trace_syscalls.c.
- */
-#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
-static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
-{
- return is_compat_task();
-}
-
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
static inline bool arch_syscall_match_sym_name(const char *sym,
const char *name)
{
- /*
- * Skip __s390_ and __s390x_ prefix - due to compat wrappers
- * and aliasing some symbols of 64 bit system call functions
- * may get the __s390_ prefix instead of the __s390x_ prefix.
- */
+ /* Skip the __s390x_ prefix. */
return !strcmp(sym + 7, name) || !strcmp(sym + 8, name);
}
-#endif /* __ASSEMBLY__ */
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
+
+#endif /* __ASSEMBLER__ */
#ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index eaeaeb3ff0be..942f21c39697 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -2,80 +2,101 @@
#ifndef _ASM_S390_FUTEX_H
#define _ASM_S390_FUTEX_H
+#include <linux/instrumented.h>
#include <linux/uaccess.h>
#include <linux/futex.h>
#include <asm/asm-extable.h>
#include <asm/mmu_context.h>
#include <asm/errno.h>
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
- asm volatile( \
- " sacf 256\n" \
- "0: l %1,0(%6)\n" \
- "1:"insn \
- "2: cs %1,%2,0(%6)\n" \
- "3: jl 1b\n" \
- " lhi %0,0\n" \
- "4: sacf 768\n" \
- EX_TABLE(0b,4b) EX_TABLE(1b,4b) \
- EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
- : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
- "=m" (*uaddr) \
- : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
- "m" (*uaddr) : "cc");
+#define FUTEX_OP_FUNC(name, insn) \
+static uaccess_kmsan_or_inline int \
+__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \
+{ \
+ bool sacf_flag; \
+ int rc, new; \
+ \
+ instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \
+ sacf_flag = enable_sacf_uaccess(); \
+ asm_inline volatile( \
+ " sacf 256\n" \
+ "0: l %[old],%[uaddr]\n" \
+ "1:"insn \
+ "2: cs %[old],%[new],%[uaddr]\n" \
+ "3: jl 1b\n" \
+ " lhi %[rc],0\n" \
+ "4: sacf 768\n" \
+ EX_TABLE_UA_FAULT(0b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(2b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(3b, 4b, %[rc]) \
+ : [rc] "=d" (rc), [old] "=&d" (*old), \
+ [new] "=&d" (new), [uaddr] "+Q" (*uaddr) \
+ : [oparg] "d" (oparg) \
+ : "cc"); \
+ disable_sacf_uaccess(sacf_flag); \
+ if (!rc) \
+ instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \
+ return rc; \
+}
+
+FUTEX_OP_FUNC(set, "lr %[new],%[oparg]\n")
+FUTEX_OP_FUNC(add, "lr %[new],%[old]\n ar %[new],%[oparg]\n")
+FUTEX_OP_FUNC(or, "lr %[new],%[old]\n or %[new],%[oparg]\n")
+FUTEX_OP_FUNC(and, "lr %[new],%[old]\n nr %[new],%[oparg]\n")
+FUTEX_OP_FUNC(xor, "lr %[new],%[old]\n xr %[new],%[oparg]\n")
-static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
- u32 __user *uaddr)
+static inline
+int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int oldval = 0, newval, ret;
+ int old, rc;
switch (op) {
case FUTEX_OP_SET:
- __futex_atomic_op("lr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_set(oparg, &old, uaddr);
break;
case FUTEX_OP_ADD:
- __futex_atomic_op("lr %2,%1\nar %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_add(oparg, &old, uaddr);
break;
case FUTEX_OP_OR:
- __futex_atomic_op("lr %2,%1\nor %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_or(oparg, &old, uaddr);
break;
case FUTEX_OP_ANDN:
- __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_and(~oparg, &old, uaddr);
break;
case FUTEX_OP_XOR:
- __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_xor(oparg, &old, uaddr);
break;
default:
- ret = -ENOSYS;
+ rc = -ENOSYS;
}
-
- if (!ret)
- *oval = oldval;
-
- return ret;
+ if (!rc)
+ *oval = old;
+ return rc;
}
-static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
- u32 oldval, u32 newval)
+static uaccess_kmsan_or_inline
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval)
{
- int ret;
+ bool sacf_flag;
+ int rc;
- asm volatile(
- " sacf 256\n"
- "0: cs %1,%4,0(%5)\n"
- "1: la %0,0\n"
- "2: sacf 768\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
- : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
- : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+ instrument_copy_from_user_before(uval, uaddr, sizeof(*uval));
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile(
+ " sacf 256\n"
+ "0: cs %[old],%[new],%[uaddr]\n"
+ "1: lhi %[rc],0\n"
+ "2: sacf 768\n"
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc])
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc])
+ : [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr)
+ : [new] "d" (newval)
: "cc", "memory");
+ disable_sacf_uaccess(sacf_flag);
*uval = oldval;
- return ret;
+ instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0);
+ return rc;
}
#endif /* _ASM_S390_FUTEX_H */
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 9725586f4259..66c5808fd011 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -17,13 +17,12 @@
#define GMAP_NOTIFY_MPROT 0x1
/* Status bits only for huge segment entries */
-#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */
-#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */
+#define _SEGMENT_ENTRY_GMAP_IN 0x0800 /* invalidation notify bit */
+#define _SEGMENT_ENTRY_GMAP_UC 0x0002 /* dirty (migration) */
/**
* struct gmap_struct - guest address space
* @list: list head for the mm->context gmap list
- * @crst_list: list of all crst tables used in the guest address space
* @mm: pointer to the parent mm_struct
* @guest_to_host: radix tree with guest to host address translation
* @host_to_guest: radix tree with pointer to segment table entries
@@ -35,7 +34,6 @@
* @guest_handle: protected virtual machine handle for the ultravisor
* @host_to_rmap: radix tree with gmap_rmap lists
* @children: list of shadow gmap structures
- * @pt_list: list of all page tables used in the shadow guest address space
* @shadow_lock: spinlock to protect the shadow gmap list
* @parent: pointer to the parent gmap for shadow guest address spaces
* @orig_asce: ASCE for which the shadow page table has been created
@@ -45,7 +43,6 @@
*/
struct gmap {
struct list_head list;
- struct list_head crst_list;
struct mm_struct *mm;
struct radix_tree_root guest_to_host;
struct radix_tree_root host_to_guest;
@@ -61,7 +58,6 @@ struct gmap {
/* Additional data for shadow guest address spaces */
struct radix_tree_root host_to_rmap;
struct list_head children;
- struct list_head pt_list;
spinlock_t shadow_lock;
struct gmap *parent;
unsigned long orig_asce;
@@ -106,26 +102,20 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit);
void gmap_remove(struct gmap *gmap);
struct gmap *gmap_get(struct gmap *gmap);
void gmap_put(struct gmap *gmap);
+void gmap_free(struct gmap *gmap);
+struct gmap *gmap_alloc(unsigned long limit);
-void gmap_enable(struct gmap *gmap);
-void gmap_disable(struct gmap *gmap);
-struct gmap *gmap_get_enabled(void);
int gmap_map_segment(struct gmap *gmap, unsigned long from,
unsigned long to, unsigned long len);
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
-unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
-void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
void __gmap_zap(struct gmap *, unsigned long gaddr);
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val);
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
- int edat_level);
-int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+void gmap_unshadow(struct gmap *sg);
int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
int fake);
int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
@@ -134,24 +124,20 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
int fake);
int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
int fake);
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
- unsigned long *pgt, int *dat_protection, int *fake);
int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
void gmap_register_pte_notifier(struct gmap_notifier *);
void gmap_unregister_pte_notifier(struct gmap_notifier *);
-int gmap_mprotect_notify(struct gmap *, unsigned long start,
- unsigned long len, int prot);
+int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits);
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
-int s390_disable_cow_sharing(void);
-void s390_unlist_old_asce(struct gmap *gmap);
int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
unsigned long end, bool interruptible);
+unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level);
/**
* s390_uv_destroy_range - Destroy a range of pages in the given mm.
diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h
new file mode 100644
index 000000000000..5356446a61c4
--- /dev/null
+++ b/arch/s390/include/asm/gmap_helpers.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Helper functions for KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2025
+ */
+
+#ifndef _ASM_S390_GMAP_HELPERS_H
+#define _ASM_S390_GMAP_HELPERS_H
+
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
+int gmap_helper_disable_cow_sharing(void);
+
+#endif /* _ASM_S390_GMAP_HELPERS_H */
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 58668ffb5488..a5b45388c91f 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -13,9 +13,9 @@
#include <asm/lowcore.h>
-#define local_softirq_pending() (S390_lowcore.softirq_pending)
-#define set_softirq_pending(x) (S390_lowcore.softirq_pending = (x))
-#define or_softirq_pending(x) (S390_lowcore.softirq_pending |= (x))
+#define local_softirq_pending() (get_lowcore()->softirq_pending)
+#define set_softirq_pending(x) (get_lowcore()->softirq_pending = (x))
+#define or_softirq_pending(x) (get_lowcore()->softirq_pending |= (x))
#define __ARCH_IRQ_STAT
#define __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/arch/s390/include/asm/hiperdispatch.h b/arch/s390/include/asm/hiperdispatch.h
new file mode 100644
index 000000000000..27e23aa27a24
--- /dev/null
+++ b/arch/s390/include/asm/hiperdispatch.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#ifndef _ASM_HIPERDISPATCH_H
+#define _ASM_HIPERDISPATCH_H
+
+void hd_reset_state(void);
+void hd_add_core(int cpu);
+void hd_disable_hiperdispatch(void);
+int hd_enable_hiperdispatch(void);
+
+#endif /* _ASM_HIPERDISPATCH_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index ce5f4fe8be4d..69131736daaa 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -9,42 +9,41 @@
#ifndef _ASM_S390_HUGETLB_H
#define _ASM_S390_HUGETLB_H
+#include <linux/cpufeature.h>
#include <linux/pgtable.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
#include <asm/page.h>
-#define hugetlb_free_pgd_range free_pgd_range
-#define hugepages_supported() (MACHINE_HAS_EDAT1)
+#define hugepages_supported() cpu_has_edat1()
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned long sz);
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte);
-pte_t huge_ptep_get(pte_t *ptep);
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep);
+ pte_t *ptep, pte_t pte);
-/*
- * If the arch doesn't supply something else, assume that hugepage
- * size aligned regions are ok without further preparation.
- */
-static inline int prepare_hugepage_range(struct file *file,
- unsigned long addr, unsigned long len)
-{
- struct hstate *h = hstate_file(file);
+#define __HAVE_ARCH_HUGE_PTEP_GET
+pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
- if (len & ~huge_page_mask(h))
- return -EINVAL;
- if (addr & ~huge_page_mask(h))
- return -EINVAL;
- return 0;
+pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep);
+
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned long sz)
+{
+ return __huge_ptep_get_and_clear(mm, addr, ptep);
}
static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
- clear_bit(PG_arch_1, &folio->flags);
+ clear_bit(PG_arch_1, &folio->flags.f);
}
#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz)
{
@@ -54,94 +53,54 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
set_pte(ptep, __pte(_SEGMENT_ENTRY_EMPTY));
}
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- return huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
+ return __huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
}
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
{
- int changed = !pte_same(huge_ptep_get(ptep), pte);
+ int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte);
+
if (changed) {
- huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ __huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
__set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
}
return changed;
}
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
- __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
-}
+ pte_t pte = __huge_ptep_get_and_clear(mm, addr, ptep);
-static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
-{
- return mk_pte(page, pgprot);
-}
-
-static inline int huge_pte_none(pte_t pte)
-{
- return pte_none(pte);
-}
-
-static inline int huge_pte_none_mostly(pte_t pte)
-{
- return huge_pte_none(pte);
-}
-
-static inline int huge_pte_write(pte_t pte)
-{
- return pte_write(pte);
-}
-
-static inline int huge_pte_dirty(pte_t pte)
-{
- return pte_dirty(pte);
-}
-
-static inline pte_t huge_pte_mkwrite(pte_t pte)
-{
- return pte_mkwrite_novma(pte);
-}
-
-static inline pte_t huge_pte_mkdirty(pte_t pte)
-{
- return pte_mkdirty(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
- return pte_wrprotect(pte);
-}
-
-static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
-{
- return pte_modify(pte, newprot);
+ __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
+#define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP
static inline pte_t huge_pte_mkuffd_wp(pte_t pte)
{
return pte;
}
+#define __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD_WP
static inline pte_t huge_pte_clear_uffd_wp(pte_t pte)
{
return pte;
}
+#define __HAVE_ARCH_HUGE_PTE_UFFD_WP
static inline int huge_pte_uffd_wp(pte_t pte)
{
return 0;
}
-static inline bool gigantic_page_runtime_supported(void)
-{
- return true;
-}
+#include <asm-generic/hugetlb.h>
#endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index ac68c657b28c..e5000ee6cdc6 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -181,6 +181,82 @@ static inline void idal_buffer_free(struct idal_buffer *ib)
}
/*
+ * Allocate an array of IDAL buffers to cover a total data size of @size. The
+ * resulting array is null-terminated.
+ *
+ * The amount of individual IDAL buffers is determined based on @size.
+ * Each IDAL buffer can have a maximum size of @CCW_MAX_BYTE_COUNT.
+ */
+static inline struct idal_buffer **idal_buffer_array_alloc(size_t size, int page_order)
+{
+ struct idal_buffer **ibs;
+ size_t ib_size; /* Size of a single idal buffer */
+ int count; /* Amount of individual idal buffers */
+ int i;
+
+ count = (size + CCW_MAX_BYTE_COUNT - 1) / CCW_MAX_BYTE_COUNT;
+ ibs = kmalloc_array(count + 1, sizeof(*ibs), GFP_KERNEL);
+ for (i = 0; i < count; i++) {
+ /* Determine size for the current idal buffer */
+ ib_size = min(size, CCW_MAX_BYTE_COUNT);
+ size -= ib_size;
+ ibs[i] = idal_buffer_alloc(ib_size, page_order);
+ if (IS_ERR(ibs[i])) {
+ while (i--)
+ idal_buffer_free(ibs[i]);
+ kfree(ibs);
+ ibs = NULL;
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+ ibs[i] = NULL;
+ return ibs;
+}
+
+/*
+ * Free array of IDAL buffers
+ */
+static inline void idal_buffer_array_free(struct idal_buffer ***ibs)
+{
+ struct idal_buffer **p;
+
+ if (!ibs || !*ibs)
+ return;
+ for (p = *ibs; *p; p++)
+ idal_buffer_free(*p);
+ kfree(*ibs);
+ *ibs = NULL;
+}
+
+/*
+ * Determine size of IDAL buffer array
+ */
+static inline int idal_buffer_array_size(struct idal_buffer **ibs)
+{
+ int size = 0;
+
+ while (ibs && *ibs) {
+ size++;
+ ibs++;
+ }
+ return size;
+}
+
+/*
+ * Determine total data size covered by IDAL buffer array
+ */
+static inline size_t idal_buffer_array_datasize(struct idal_buffer **ibs)
+{
+ size_t size = 0;
+
+ while (ibs && *ibs) {
+ size += (*ibs)->size;
+ ibs++;
+ }
+ return size;
+}
+
+/*
* Test if a idal list is really needed.
*/
static inline bool __idal_buffer_is_needed(struct idal_buffer *ib)
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index 0fbc992d7a5e..faddb9aef3b8 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -16,8 +16,10 @@
#include <asm/pci_io.h>
#define xlate_dev_mem_ptr xlate_dev_mem_ptr
+#define kc_xlate_dev_mem_ptr xlate_dev_mem_ptr
void *xlate_dev_mem_ptr(phys_addr_t phys);
#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+#define kc_unxlate_dev_mem_ptr unxlate_dev_mem_ptr
void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define IO_SPACE_LIMIT 0
@@ -31,9 +33,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL)
#define ioremap_wc(addr, size) \
- ioremap_prot((addr), (size), pgprot_val(pgprot_writecombine(PAGE_KERNEL)))
-#define ioremap_wt(addr, size) \
- ioremap_prot((addr), (size), pgprot_val(pgprot_writethrough(PAGE_KERNEL)))
+ ioremap_prot((addr), (size), pgprot_writecombine(PAGE_KERNEL))
static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
{
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 54b42817f70a..697497e7d13e 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -25,7 +25,7 @@
#define EXT_IRQ_CP_SERVICE 0x2603
#define EXT_IRQ_IUCV 0x4000
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/hardirq.h>
#include <linux/percpu.h>
@@ -47,13 +47,13 @@ enum interruption_class {
IRQEXT_CMS,
IRQEXT_CMC,
IRQEXT_FTP,
+ IRQEXT_WTI,
IRQIO_CIO,
IRQIO_DAS,
IRQIO_C15,
IRQIO_C70,
IRQIO_TAP,
IRQIO_VMR,
- IRQIO_LCS,
IRQIO_CTC,
IRQIO_ADM,
IRQIO_CSC,
@@ -99,6 +99,7 @@ int unregister_external_irq(u16 code, ext_int_handler_t handler);
enum irq_subclass {
IRQ_SUBCLASS_MEASUREMENT_ALERT = 5,
IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
+ IRQ_SUBCLASS_WARNING_TRACK = 33,
};
#define CR0_IRQ_SUBCLASS_MASK \
@@ -119,6 +120,6 @@ void irq_subclass_unregister(enum irq_subclass subclass);
#define irq_canonicalize(irq) (irq)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
index 02427b205c11..bcab456dfb80 100644
--- a/arch/s390/include/asm/irqflags.h
+++ b/arch/s390/include/asm/irqflags.h
@@ -37,12 +37,18 @@ static __always_inline void __arch_local_irq_ssm(unsigned long flags)
asm volatile("ssm %0" : : "Q" (flags) : "memory");
}
-static __always_inline unsigned long arch_local_save_flags(void)
+#ifdef CONFIG_KMSAN
+#define arch_local_irq_attributes noinline notrace __no_sanitize_memory __maybe_unused
+#else
+#define arch_local_irq_attributes __always_inline
+#endif
+
+static arch_local_irq_attributes unsigned long arch_local_save_flags(void)
{
return __arch_local_irq_stnsm(0xff);
}
-static __always_inline unsigned long arch_local_irq_save(void)
+static arch_local_irq_attributes unsigned long arch_local_irq_save(void)
{
return __arch_local_irq_stnsm(0xfc);
}
@@ -52,7 +58,12 @@ static __always_inline void arch_local_irq_disable(void)
arch_local_irq_save();
}
-static __always_inline void arch_local_irq_enable(void)
+static arch_local_irq_attributes void arch_local_irq_enable_external(void)
+{
+ __arch_local_irq_stosm(0x01);
+}
+
+static arch_local_irq_attributes void arch_local_irq_enable(void)
{
__arch_local_irq_stosm(0x03);
}
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index bf78cf381dfc..d9cbc18f6b2e 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -4,7 +4,7 @@
#define HAVE_JUMP_LABEL_BATCH
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/stringify.h>
@@ -51,5 +51,5 @@ label:
return true;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 1bd08eb56d5f..9084b750350d 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -94,6 +94,9 @@ void arch_kexec_protect_crashkres(void);
void arch_kexec_unprotect_crashkres(void);
#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
+
+bool is_kdump_kernel(void);
+#define is_kdump_kernel is_kdump_kernel
#endif
#ifdef CONFIG_KEXEC_FILE
diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h
index e47fd8cbe701..e95e35eb8a3f 100644
--- a/arch/s390/include/asm/kfence.h
+++ b/arch/s390/include/asm/kfence.h
@@ -12,27 +12,16 @@ void __kernel_map_pages(struct page *page, int numpages, int enable);
static __always_inline bool arch_kfence_init_pool(void)
{
- return true;
-}
-
-#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK)
-
-/*
- * Do not split kfence pool to 4k mapping with arch_kfence_init_pool(),
- * but earlier where page table allocations still happen with memblock.
- * Reason is that arch_kfence_init_pool() gets called when the system
- * is still in a limbo state - disabling and enabling bottom halves is
- * not yet allowed, but that is what our page_table_alloc() would do.
- */
-static __always_inline void kfence_split_mapping(void)
-{
#ifdef CONFIG_KFENCE
unsigned long pool_pages = KFENCE_POOL_SIZE >> PAGE_SHIFT;
set_memory_4k((unsigned long)__kfence_pool, pool_pages);
#endif
+ return true;
}
+#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK)
+
static inline bool kfence_protect_page(unsigned long addr, bool protect)
{
__kernel_map_pages(virt_to_page((void *)addr), 1, !protect);
diff --git a/arch/s390/include/asm/kmsan.h b/arch/s390/include/asm/kmsan.h
new file mode 100644
index 000000000000..f73e181d09ae
--- /dev/null
+++ b/arch/s390/include/asm/kmsan.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_KMSAN_H
+#define _ASM_S390_KMSAN_H
+
+#include <asm/lowcore.h>
+#include <asm/page.h>
+#include <linux/kmsan.h>
+#include <linux/mmzone.h>
+#include <linux/stddef.h>
+
+#ifndef MODULE
+
+static inline bool is_lowcore_addr(void *addr)
+{
+ return addr >= (void *)get_lowcore() &&
+ addr < (void *)(get_lowcore() + 1);
+}
+
+static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
+{
+ if (is_lowcore_addr(addr)) {
+ /*
+ * Different lowcores accessed via S390_lowcore are described
+ * by the same struct page. Resolve the prefix manually in
+ * order to get a distinct struct page.
+ */
+ addr += (void *)lowcore_ptr[raw_smp_processor_id()] -
+ (void *)get_lowcore();
+ if (KMSAN_WARN_ON(is_lowcore_addr(addr)))
+ return NULL;
+ return kmsan_get_metadata(addr, is_origin);
+ }
+ return NULL;
+}
+
+static inline bool kmsan_virt_addr_valid(void *addr)
+{
+ bool ret;
+
+ /*
+ * pfn_valid() relies on RCU, and may call into the scheduler on exiting
+ * the critical section. However, this would result in recursion with
+ * KMSAN. Therefore, disable preemption here, and re-enable preemption
+ * below while suppressing reschedules to avoid recursion.
+ *
+ * Note, this sacrifices occasionally breaking scheduling guarantees.
+ * Although, a kernel compiled with KMSAN has already given up on any
+ * performance guarantees due to being heavily instrumented.
+ */
+ preempt_disable();
+ ret = virt_addr_valid(addr);
+ preempt_enable_no_resched();
+
+ return ret;
+}
+
+#endif /* !MODULE */
+
+#endif /* _ASM_S390_KMSAN_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 9281063636a7..ae1223264d3c 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -15,22 +15,22 @@
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_types.h>
-#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/seqlock.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/mmu_notifier.h>
+#include <asm/kvm_host_types.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu.h>
#include <asm/isc.h>
#include <asm/guarded_storage.h>
-#define KVM_S390_BSCA_CPU_SLOTS 64
-#define KVM_S390_ESCA_CPU_SLOTS 248
#define KVM_MAX_VCPUS 255
+#define KVM_INTERNAL_MEM_SLOTS 1
+
/*
* These seem to be used for allocating ->chip in the routing table, which we
* don't use. 1 is as small as we can get to reduce the needed memory. If we
@@ -50,336 +50,6 @@
#define KVM_REQ_REFRESH_GUEST_PREFIX \
KVM_ARCH_REQ_FLAGS(6, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define SIGP_CTRL_C 0x80
-#define SIGP_CTRL_SCN_MASK 0x3f
-
-union bsca_sigp_ctrl {
- __u8 value;
- struct {
- __u8 c : 1;
- __u8 r : 1;
- __u8 scn : 6;
- };
-};
-
-union esca_sigp_ctrl {
- __u16 value;
- struct {
- __u8 c : 1;
- __u8 reserved: 7;
- __u8 scn;
- };
-};
-
-struct esca_entry {
- union esca_sigp_ctrl sigp_ctrl;
- __u16 reserved1[3];
- __u64 sda;
- __u64 reserved2[6];
-};
-
-struct bsca_entry {
- __u8 reserved0;
- union bsca_sigp_ctrl sigp_ctrl;
- __u16 reserved[3];
- __u64 sda;
- __u64 reserved2[2];
-};
-
-union ipte_control {
- unsigned long val;
- struct {
- unsigned long k : 1;
- unsigned long kh : 31;
- unsigned long kg : 32;
- };
-};
-
-union sca_utility {
- __u16 val;
- struct {
- __u16 mtcr : 1;
- __u16 reserved : 15;
- };
-};
-
-struct bsca_block {
- union ipte_control ipte_control;
- __u64 reserved[5];
- __u64 mcn;
- union sca_utility utility;
- __u8 reserved2[6];
- struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
-};
-
-struct esca_block {
- union ipte_control ipte_control;
- __u64 reserved1[6];
- union sca_utility utility;
- __u8 reserved2[6];
- __u64 mcn[4];
- __u64 reserved3[20];
- struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
-};
-
-/*
- * This struct is used to store some machine check info from lowcore
- * for machine checks that happen while the guest is running.
- * This info in host's lowcore might be overwritten by a second machine
- * check from host when host is in the machine check's high-level handling.
- * The size is 24 bytes.
- */
-struct mcck_volatile_info {
- __u64 mcic;
- __u64 failing_storage_address;
- __u32 ext_damage_code;
- __u32 reserved;
-};
-
-#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \
- CR0_MEASUREMENT_ALERT_SUBMASK)
-#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
- CR14_EXTERNAL_DAMAGE_SUBMASK)
-
-#define SIDAD_SIZE_MASK 0xff
-#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK)
-#define sida_size(sie_block) \
- ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
-
-#define CPUSTAT_STOPPED 0x80000000
-#define CPUSTAT_WAIT 0x10000000
-#define CPUSTAT_ECALL_PEND 0x08000000
-#define CPUSTAT_STOP_INT 0x04000000
-#define CPUSTAT_IO_INT 0x02000000
-#define CPUSTAT_EXT_INT 0x01000000
-#define CPUSTAT_RUNNING 0x00800000
-#define CPUSTAT_RETAINED 0x00400000
-#define CPUSTAT_TIMING_SUB 0x00020000
-#define CPUSTAT_SIE_SUB 0x00010000
-#define CPUSTAT_RRF 0x00008000
-#define CPUSTAT_SLSV 0x00004000
-#define CPUSTAT_SLSR 0x00002000
-#define CPUSTAT_ZARCH 0x00000800
-#define CPUSTAT_MCDS 0x00000100
-#define CPUSTAT_KSS 0x00000200
-#define CPUSTAT_SM 0x00000080
-#define CPUSTAT_IBS 0x00000040
-#define CPUSTAT_GED2 0x00000010
-#define CPUSTAT_G 0x00000008
-#define CPUSTAT_GED 0x00000004
-#define CPUSTAT_J 0x00000002
-#define CPUSTAT_P 0x00000001
-
-struct kvm_s390_sie_block {
- atomic_t cpuflags; /* 0x0000 */
- __u32 : 1; /* 0x0004 */
- __u32 prefix : 18;
- __u32 : 1;
- __u32 ibc : 12;
- __u8 reserved08[4]; /* 0x0008 */
-#define PROG_IN_SIE (1<<0)
- __u32 prog0c; /* 0x000c */
- union {
- __u8 reserved10[16]; /* 0x0010 */
- struct {
- __u64 pv_handle_cpu;
- __u64 pv_handle_config;
- };
- };
-#define PROG_BLOCK_SIE (1<<0)
-#define PROG_REQUEST (1<<1)
- atomic_t prog20; /* 0x0020 */
- __u8 reserved24[4]; /* 0x0024 */
- __u64 cputm; /* 0x0028 */
- __u64 ckc; /* 0x0030 */
- __u64 epoch; /* 0x0038 */
- __u32 svcc; /* 0x0040 */
-#define LCTL_CR0 0x8000
-#define LCTL_CR6 0x0200
-#define LCTL_CR9 0x0040
-#define LCTL_CR10 0x0020
-#define LCTL_CR11 0x0010
-#define LCTL_CR14 0x0002
- __u16 lctl; /* 0x0044 */
- __s16 icpua; /* 0x0046 */
-#define ICTL_OPEREXC 0x80000000
-#define ICTL_PINT 0x20000000
-#define ICTL_LPSW 0x00400000
-#define ICTL_STCTL 0x00040000
-#define ICTL_ISKE 0x00004000
-#define ICTL_SSKE 0x00002000
-#define ICTL_RRBE 0x00001000
-#define ICTL_TPROT 0x00000200
- __u32 ictl; /* 0x0048 */
-#define ECA_CEI 0x80000000
-#define ECA_IB 0x40000000
-#define ECA_SIGPI 0x10000000
-#define ECA_MVPGI 0x01000000
-#define ECA_AIV 0x00200000
-#define ECA_VX 0x00020000
-#define ECA_PROTEXCI 0x00002000
-#define ECA_APIE 0x00000008
-#define ECA_SII 0x00000001
- __u32 eca; /* 0x004c */
-#define ICPT_INST 0x04
-#define ICPT_PROGI 0x08
-#define ICPT_INSTPROGI 0x0C
-#define ICPT_EXTREQ 0x10
-#define ICPT_EXTINT 0x14
-#define ICPT_IOREQ 0x18
-#define ICPT_WAIT 0x1c
-#define ICPT_VALIDITY 0x20
-#define ICPT_STOP 0x28
-#define ICPT_OPEREXC 0x2C
-#define ICPT_PARTEXEC 0x38
-#define ICPT_IOINST 0x40
-#define ICPT_KSS 0x5c
-#define ICPT_MCHKREQ 0x60
-#define ICPT_INT_ENABLE 0x64
-#define ICPT_PV_INSTR 0x68
-#define ICPT_PV_NOTIFY 0x6c
-#define ICPT_PV_PREF 0x70
- __u8 icptcode; /* 0x0050 */
- __u8 icptstatus; /* 0x0051 */
- __u16 ihcpu; /* 0x0052 */
- __u8 reserved54; /* 0x0054 */
-#define IICTL_CODE_NONE 0x00
-#define IICTL_CODE_MCHK 0x01
-#define IICTL_CODE_EXT 0x02
-#define IICTL_CODE_IO 0x03
-#define IICTL_CODE_RESTART 0x04
-#define IICTL_CODE_SPECIFICATION 0x10
-#define IICTL_CODE_OPERAND 0x11
- __u8 iictl; /* 0x0055 */
- __u16 ipa; /* 0x0056 */
- __u32 ipb; /* 0x0058 */
- __u32 scaoh; /* 0x005c */
-#define FPF_BPBC 0x20
- __u8 fpf; /* 0x0060 */
-#define ECB_GS 0x40
-#define ECB_TE 0x10
-#define ECB_SPECI 0x08
-#define ECB_SRSI 0x04
-#define ECB_HOSTPROTINT 0x02
-#define ECB_PTF 0x01
- __u8 ecb; /* 0x0061 */
-#define ECB2_CMMA 0x80
-#define ECB2_IEP 0x20
-#define ECB2_PFMFI 0x08
-#define ECB2_ESCA 0x04
-#define ECB2_ZPCI_LSI 0x02
- __u8 ecb2; /* 0x0062 */
-#define ECB3_AISI 0x20
-#define ECB3_AISII 0x10
-#define ECB3_DEA 0x08
-#define ECB3_AES 0x04
-#define ECB3_RI 0x01
- __u8 ecb3; /* 0x0063 */
-#define ESCA_SCAOL_MASK ~0x3fU
- __u32 scaol; /* 0x0064 */
- __u8 sdf; /* 0x0068 */
- __u8 epdx; /* 0x0069 */
- __u8 cpnc; /* 0x006a */
- __u8 reserved6b; /* 0x006b */
- __u32 todpr; /* 0x006c */
-#define GISA_FORMAT1 0x00000001
- __u32 gd; /* 0x0070 */
- __u8 reserved74[12]; /* 0x0074 */
- __u64 mso; /* 0x0080 */
- __u64 msl; /* 0x0088 */
- psw_t gpsw; /* 0x0090 */
- __u64 gg14; /* 0x00a0 */
- __u64 gg15; /* 0x00a8 */
- __u8 reservedb0[8]; /* 0x00b0 */
-#define HPID_KVM 0x4
-#define HPID_VSIE 0x5
- __u8 hpid; /* 0x00b8 */
- __u8 reservedb9[7]; /* 0x00b9 */
- union {
- struct {
- __u32 eiparams; /* 0x00c0 */
- __u16 extcpuaddr; /* 0x00c4 */
- __u16 eic; /* 0x00c6 */
- };
- __u64 mcic; /* 0x00c0 */
- } __packed;
- __u32 reservedc8; /* 0x00c8 */
- union {
- struct {
- __u16 pgmilc; /* 0x00cc */
- __u16 iprcc; /* 0x00ce */
- };
- __u32 edc; /* 0x00cc */
- } __packed;
- union {
- struct {
- __u32 dxc; /* 0x00d0 */
- __u16 mcn; /* 0x00d4 */
- __u8 perc; /* 0x00d6 */
- __u8 peratmid; /* 0x00d7 */
- };
- __u64 faddr; /* 0x00d0 */
- } __packed;
- __u64 peraddr; /* 0x00d8 */
- __u8 eai; /* 0x00e0 */
- __u8 peraid; /* 0x00e1 */
- __u8 oai; /* 0x00e2 */
- __u8 armid; /* 0x00e3 */
- __u8 reservede4[4]; /* 0x00e4 */
- union {
- __u64 tecmc; /* 0x00e8 */
- struct {
- __u16 subchannel_id; /* 0x00e8 */
- __u16 subchannel_nr; /* 0x00ea */
- __u32 io_int_parm; /* 0x00ec */
- __u32 io_int_word; /* 0x00f0 */
- };
- } __packed;
- __u8 reservedf4[8]; /* 0x00f4 */
-#define CRYCB_FORMAT_MASK 0x00000003
-#define CRYCB_FORMAT0 0x00000000
-#define CRYCB_FORMAT1 0x00000001
-#define CRYCB_FORMAT2 0x00000003
- __u32 crycbd; /* 0x00fc */
- __u64 gcr[16]; /* 0x0100 */
- union {
- __u64 gbea; /* 0x0180 */
- __u64 sidad;
- };
- __u8 reserved188[8]; /* 0x0188 */
- __u64 sdnxo; /* 0x0190 */
- __u8 reserved198[8]; /* 0x0198 */
- __u32 fac; /* 0x01a0 */
- __u8 reserved1a4[20]; /* 0x01a4 */
- __u64 cbrlo; /* 0x01b8 */
- __u8 reserved1c0[8]; /* 0x01c0 */
-#define ECD_HOSTREGMGMT 0x20000000
-#define ECD_MEF 0x08000000
-#define ECD_ETOKENF 0x02000000
-#define ECD_ECC 0x00200000
- __u32 ecd; /* 0x01c8 */
- __u8 reserved1cc[18]; /* 0x01cc */
- __u64 pp; /* 0x01de */
- __u8 reserved1e6[2]; /* 0x01e6 */
- __u64 itdba; /* 0x01e8 */
- __u64 riccbd; /* 0x01f0 */
- __u64 gvrd; /* 0x01f8 */
-} __packed __aligned(512);
-
-struct kvm_s390_itdb {
- __u8 data[256];
-};
-
-struct sie_page {
- struct kvm_s390_sie_block sie_block;
- struct mcck_volatile_info mcck_info; /* 0x0200 */
- __u8 reserved218[360]; /* 0x0218 */
- __u64 pv_grregs[16]; /* 0x0380 */
- __u8 reserved400[512]; /* 0x0400 */
- struct kvm_s390_itdb itdb; /* 0x0600 */
- __u8 reserved700[2304]; /* 0x0700 */
-};
-
struct kvm_vcpu_stat {
struct kvm_vcpu_stat_generic generic;
u64 exit_userspace;
@@ -476,6 +146,7 @@ struct kvm_vcpu_stat {
u64 instruction_diagnose_500;
u64 instruction_diagnose_other;
u64 pfault_sync;
+ u64 signal_exits;
};
#define PGM_OPERATION 0x01
@@ -528,6 +199,9 @@ struct kvm_vcpu_stat {
#define PGM_REGION_FIRST_TRANS 0x39
#define PGM_REGION_SECOND_TRANS 0x3a
#define PGM_REGION_THIRD_TRANS 0x3b
+#define PGM_SECURE_STORAGE_ACCESS 0x3d
+#define PGM_NON_SECURE_STORAGE_ACCESS 0x3e
+#define PGM_SECURE_STORAGE_VIOLATION 0x3f
#define PGM_MONITOR 0x40
#define PGM_PER 0x80
#define PGM_CRYPTO_OPERATION 0x119
@@ -683,7 +357,7 @@ struct kvm_s390_float_interrupt {
int counters[FIRQ_MAX_COUNT];
struct kvm_s390_mchk_info mchk;
struct kvm_s390_ext_info srv_signal;
- int next_rr_cpu;
+ int last_sleep_cpu;
struct mutex ais_lock;
u8 simm;
u8 nimm;
@@ -748,8 +422,6 @@ struct kvm_vcpu_arch {
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
struct gmap *gmap;
- /* backup location for the currently enabled gmap when scheduled out */
- struct gmap *enabled_gmap;
struct kvm_guestdbg_info_arch guestdbg;
unsigned long pfault_token;
unsigned long pfault_select;
@@ -925,12 +597,14 @@ struct sie_page2 {
u8 reserved928[0x1000 - 0x928]; /* 0x0928 */
};
+struct vsie_page;
+
struct kvm_s390_vsie {
struct mutex mutex;
struct radix_tree_root addr_to_page;
int page_count;
int next;
- struct page *pages[KVM_MAX_VCPUS];
+ struct vsie_page *pages[KVM_MAX_VCPUS];
};
struct kvm_s390_gisa_iam {
@@ -958,10 +632,8 @@ struct kvm_s390_pv {
struct mmu_notifier mmu_notifier;
};
-struct kvm_arch{
- void *sca;
- int use_esca;
- rwlock_t sca_lock;
+struct kvm_arch {
+ struct esca_block *sca;
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
struct kvm_device *flic;
@@ -977,6 +649,7 @@ struct kvm_arch{
int user_sigp;
int user_stsi;
int user_instr0;
+ int user_operexec;
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
int ipte_lock_count;
@@ -1030,11 +703,12 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm);
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
unsigned long *aqm, unsigned long *adm);
-int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa);
+int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa,
+ unsigned long gasce);
-static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa)
+static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa, unsigned long gasce)
{
- return __sie64a(virt_to_phys(sie_block), sie_block, rsa);
+ return __sie64a(virt_to_phys(sie_block), sie_block, rsa, gasce);
}
extern char sie_exit;
@@ -1042,11 +716,14 @@ extern char sie_exit;
bool kvm_s390_pv_is_protected(struct kvm *kvm);
bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
+extern int kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
+ u64 *gprs, unsigned long gasce);
+
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+bool kvm_s390_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa);
+
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
diff --git a/arch/s390/include/asm/kvm_host_types.h b/arch/s390/include/asm/kvm_host_types.h
new file mode 100644
index 000000000000..1394d3fb648f
--- /dev/null
+++ b/arch/s390/include/asm/kvm_host_types.h
@@ -0,0 +1,348 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_KVM_HOST_TYPES_H
+#define _ASM_KVM_HOST_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/types.h>
+
+#define KVM_S390_BSCA_CPU_SLOTS 64
+#define KVM_S390_ESCA_CPU_SLOTS 248
+
+#define SIGP_CTRL_C 0x80
+#define SIGP_CTRL_SCN_MASK 0x3f
+
+union bsca_sigp_ctrl {
+ __u8 value;
+ struct {
+ __u8 c : 1;
+ __u8 r : 1;
+ __u8 scn : 6;
+ };
+};
+
+union esca_sigp_ctrl {
+ __u16 value;
+ struct {
+ __u8 c : 1;
+ __u8 reserved: 7;
+ __u8 scn;
+ };
+};
+
+struct esca_entry {
+ union esca_sigp_ctrl sigp_ctrl;
+ __u16 reserved1[3];
+ __u64 sda;
+ __u64 reserved2[6];
+};
+
+struct bsca_entry {
+ __u8 reserved0;
+ union bsca_sigp_ctrl sigp_ctrl;
+ __u16 reserved[3];
+ __u64 sda;
+ __u64 reserved2[2];
+};
+
+union ipte_control {
+ unsigned long val;
+ struct {
+ unsigned long k : 1;
+ unsigned long kh : 31;
+ unsigned long kg : 32;
+ };
+};
+
+/*
+ * Utility is defined as two bytes but having it four bytes wide
+ * generates more efficient code. Since the following bytes are
+ * reserved this makes no functional difference.
+ */
+union sca_utility {
+ __u32 val;
+ struct {
+ __u32 mtcr : 1;
+ __u32 : 31;
+ };
+};
+
+struct bsca_block {
+ union ipte_control ipte_control;
+ __u64 reserved[5];
+ __u64 mcn;
+ union sca_utility utility;
+ __u8 reserved2[4];
+ struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
+};
+
+struct esca_block {
+ union ipte_control ipte_control;
+ __u64 reserved1[6];
+ union sca_utility utility;
+ __u8 reserved2[4];
+ __u64 mcn[4];
+ __u64 reserved3[20];
+ struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
+};
+
+/*
+ * This struct is used to store some machine check info from lowcore
+ * for machine checks that happen while the guest is running.
+ * This info in host's lowcore might be overwritten by a second machine
+ * check from host when host is in the machine check's high-level handling.
+ * The size is 24 bytes.
+ */
+struct mcck_volatile_info {
+ __u64 mcic;
+ __u64 failing_storage_address;
+ __u32 ext_damage_code;
+ __u32 reserved;
+};
+
+#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \
+ CR0_MEASUREMENT_ALERT_SUBMASK)
+#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
+ CR14_EXTERNAL_DAMAGE_SUBMASK)
+
+#define SIDAD_SIZE_MASK 0xff
+#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK)
+#define sida_size(sie_block) \
+ ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
+
+#define CPUSTAT_STOPPED 0x80000000
+#define CPUSTAT_WAIT 0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT 0x04000000
+#define CPUSTAT_IO_INT 0x02000000
+#define CPUSTAT_EXT_INT 0x01000000
+#define CPUSTAT_RUNNING 0x00800000
+#define CPUSTAT_RETAINED 0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB 0x00010000
+#define CPUSTAT_RRF 0x00008000
+#define CPUSTAT_SLSV 0x00004000
+#define CPUSTAT_SLSR 0x00002000
+#define CPUSTAT_ZARCH 0x00000800
+#define CPUSTAT_MCDS 0x00000100
+#define CPUSTAT_KSS 0x00000200
+#define CPUSTAT_SM 0x00000080
+#define CPUSTAT_IBS 0x00000040
+#define CPUSTAT_GED2 0x00000010
+#define CPUSTAT_G 0x00000008
+#define CPUSTAT_GED 0x00000004
+#define CPUSTAT_J 0x00000002
+#define CPUSTAT_P 0x00000001
+
+struct kvm_s390_sie_block {
+ atomic_t cpuflags; /* 0x0000 */
+ __u32 : 1; /* 0x0004 */
+ __u32 prefix : 18;
+ __u32 : 1;
+ __u32 ibc : 12;
+ __u8 reserved08[4]; /* 0x0008 */
+#define PROG_IN_SIE (1<<0)
+ __u32 prog0c; /* 0x000c */
+ union {
+ __u8 reserved10[16]; /* 0x0010 */
+ struct {
+ __u64 pv_handle_cpu;
+ __u64 pv_handle_config;
+ };
+ };
+#define PROG_BLOCK_SIE (1<<0)
+#define PROG_REQUEST (1<<1)
+ atomic_t prog20; /* 0x0020 */
+ __u8 reserved24[4]; /* 0x0024 */
+ __u64 cputm; /* 0x0028 */
+ __u64 ckc; /* 0x0030 */
+ __u64 epoch; /* 0x0038 */
+ __u32 svcc; /* 0x0040 */
+#define LCTL_CR0 0x8000
+#define LCTL_CR6 0x0200
+#define LCTL_CR9 0x0040
+#define LCTL_CR10 0x0020
+#define LCTL_CR11 0x0010
+#define LCTL_CR14 0x0002
+ __u16 lctl; /* 0x0044 */
+ __s16 icpua; /* 0x0046 */
+#define ICTL_OPEREXC 0x80000000
+#define ICTL_PINT 0x20000000
+#define ICTL_LPSW 0x00400000
+#define ICTL_STCTL 0x00040000
+#define ICTL_ISKE 0x00004000
+#define ICTL_SSKE 0x00002000
+#define ICTL_RRBE 0x00001000
+#define ICTL_TPROT 0x00000200
+ __u32 ictl; /* 0x0048 */
+#define ECA_CEI 0x80000000
+#define ECA_IB 0x40000000
+#define ECA_SIGPI 0x10000000
+#define ECA_MVPGI 0x01000000
+#define ECA_AIV 0x00200000
+#define ECA_VX 0x00020000
+#define ECA_PROTEXCI 0x00002000
+#define ECA_APIE 0x00000008
+#define ECA_SII 0x00000001
+ __u32 eca; /* 0x004c */
+#define ICPT_INST 0x04
+#define ICPT_PROGI 0x08
+#define ICPT_INSTPROGI 0x0C
+#define ICPT_EXTREQ 0x10
+#define ICPT_EXTINT 0x14
+#define ICPT_IOREQ 0x18
+#define ICPT_WAIT 0x1c
+#define ICPT_VALIDITY 0x20
+#define ICPT_STOP 0x28
+#define ICPT_OPEREXC 0x2C
+#define ICPT_PARTEXEC 0x38
+#define ICPT_IOINST 0x40
+#define ICPT_KSS 0x5c
+#define ICPT_MCHKREQ 0x60
+#define ICPT_INT_ENABLE 0x64
+#define ICPT_PV_INSTR 0x68
+#define ICPT_PV_NOTIFY 0x6c
+#define ICPT_PV_PREF 0x70
+ __u8 icptcode; /* 0x0050 */
+ __u8 icptstatus; /* 0x0051 */
+ __u16 ihcpu; /* 0x0052 */
+ __u8 reserved54; /* 0x0054 */
+#define IICTL_CODE_NONE 0x00
+#define IICTL_CODE_MCHK 0x01
+#define IICTL_CODE_EXT 0x02
+#define IICTL_CODE_IO 0x03
+#define IICTL_CODE_RESTART 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND 0x11
+ __u8 iictl; /* 0x0055 */
+ __u16 ipa; /* 0x0056 */
+ __u32 ipb; /* 0x0058 */
+ __u32 scaoh; /* 0x005c */
+#define FPF_BPBC 0x20
+ __u8 fpf; /* 0x0060 */
+#define ECB_GS 0x40
+#define ECB_TE 0x10
+#define ECB_SPECI 0x08
+#define ECB_SRSI 0x04
+#define ECB_HOSTPROTINT 0x02
+#define ECB_PTF 0x01
+ __u8 ecb; /* 0x0061 */
+#define ECB2_CMMA 0x80
+#define ECB2_IEP 0x20
+#define ECB2_PFMFI 0x08
+#define ECB2_ESCA 0x04
+#define ECB2_ZPCI_LSI 0x02
+ __u8 ecb2; /* 0x0062 */
+#define ECB3_AISI 0x20
+#define ECB3_AISII 0x10
+#define ECB3_DEA 0x08
+#define ECB3_AES 0x04
+#define ECB3_RI 0x01
+ __u8 ecb3; /* 0x0063 */
+#define ESCA_SCAOL_MASK ~0x3fU
+ __u32 scaol; /* 0x0064 */
+ __u8 sdf; /* 0x0068 */
+ __u8 epdx; /* 0x0069 */
+ __u8 cpnc; /* 0x006a */
+ __u8 reserved6b; /* 0x006b */
+ __u32 todpr; /* 0x006c */
+#define GISA_FORMAT1 0x00000001
+ __u32 gd; /* 0x0070 */
+ __u8 reserved74[12]; /* 0x0074 */
+ __u64 mso; /* 0x0080 */
+ __u64 msl; /* 0x0088 */
+ psw_t gpsw; /* 0x0090 */
+ __u64 gg14; /* 0x00a0 */
+ __u64 gg15; /* 0x00a8 */
+ __u8 reservedb0[8]; /* 0x00b0 */
+#define HPID_KVM 0x4
+#define HPID_VSIE 0x5
+ __u8 hpid; /* 0x00b8 */
+ __u8 reservedb9[7]; /* 0x00b9 */
+ union {
+ struct {
+ __u32 eiparams; /* 0x00c0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ };
+ __u64 mcic; /* 0x00c0 */
+ } __packed;
+ __u32 reservedc8; /* 0x00c8 */
+ union {
+ struct {
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ };
+ __u32 edc; /* 0x00cc */
+ } __packed;
+ union {
+ struct {
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ };
+ __u64 faddr; /* 0x00d0 */
+ } __packed;
+ __u64 peraddr; /* 0x00d8 */
+ __u8 eai; /* 0x00e0 */
+ __u8 peraid; /* 0x00e1 */
+ __u8 oai; /* 0x00e2 */
+ __u8 armid; /* 0x00e3 */
+ __u8 reservede4[4]; /* 0x00e4 */
+ union {
+ __u64 tecmc; /* 0x00e8 */
+ struct {
+ __u16 subchannel_id; /* 0x00e8 */
+ __u16 subchannel_nr; /* 0x00ea */
+ __u32 io_int_parm; /* 0x00ec */
+ __u32 io_int_word; /* 0x00f0 */
+ };
+ } __packed;
+ __u8 reservedf4[8]; /* 0x00f4 */
+#define CRYCB_FORMAT_MASK 0x00000003
+#define CRYCB_FORMAT0 0x00000000
+#define CRYCB_FORMAT1 0x00000001
+#define CRYCB_FORMAT2 0x00000003
+ __u32 crycbd; /* 0x00fc */
+ __u64 gcr[16]; /* 0x0100 */
+ union {
+ __u64 gbea; /* 0x0180 */
+ __u64 sidad;
+ };
+ __u8 reserved188[8]; /* 0x0188 */
+ __u64 sdnxo; /* 0x0190 */
+ __u8 reserved198[8]; /* 0x0198 */
+ __u32 fac; /* 0x01a0 */
+ __u8 reserved1a4[20]; /* 0x01a4 */
+ __u64 cbrlo; /* 0x01b8 */
+ __u8 reserved1c0[8]; /* 0x01c0 */
+#define ECD_HOSTREGMGMT 0x20000000
+#define ECD_MEF 0x08000000
+#define ECD_ETOKENF 0x02000000
+#define ECD_ECC 0x00200000
+#define ECD_HMAC 0x00004000
+ __u32 ecd; /* 0x01c8 */
+ __u8 reserved1cc[18]; /* 0x01cc */
+ __u64 pp; /* 0x01de */
+ __u8 reserved1e6[2]; /* 0x01e6 */
+ __u64 itdba; /* 0x01e8 */
+ __u64 riccbd; /* 0x01f0 */
+ __u64 gvrd; /* 0x01f8 */
+} __packed __aligned(512);
+
+struct kvm_s390_itdb {
+ __u8 data[256];
+};
+
+struct sie_page {
+ struct kvm_s390_sie_block sie_block;
+ struct mcck_volatile_info mcck_info; /* 0x0200 */
+ __u8 reserved218[360]; /* 0x0218 */
+ __u64 pv_grregs[16]; /* 0x0380 */
+ __u8 reserved400[512]; /* 0x0400 */
+ struct kvm_s390_itdb itdb; /* 0x0600 */
+ __u8 reserved700[2304]; /* 0x0700 */
+};
+
+#endif /* _ASM_KVM_HOST_TYPES_H */
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index df73a052760c..00cc8c916cfb 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -76,7 +76,7 @@ long __kvm_hypercall##args(unsigned long nr HYPERCALL_PARM_##args) \
HYPERCALL_REGS_##args; \
\
asm volatile ( \
- " diag 2,4,0x500\n" \
+ " diag 2,4,0x500" \
: "=d" (__rc) \
: "d" (__nr) HYPERCALL_FMT_##args \
: "memory", "cc"); \
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 8c5f16857539..50ffe75adeb4 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -10,14 +10,20 @@
#define _ASM_S390_LOWCORE_H
#include <linux/types.h>
+#include <asm/machine.h>
#include <asm/ptrace.h>
#include <asm/ctlreg.h>
#include <asm/cpu.h>
#include <asm/types.h>
+#include <asm/alternative.h>
#define LC_ORDER 1
#define LC_PAGES 2
+#define LOWCORE_ALT_ADDRESS _AC(0x70000, UL)
+
+#ifndef __ASSEMBLER__
+
struct pgm_tdb {
u64 data[32];
};
@@ -93,12 +99,12 @@ struct lowcore {
psw_t io_new_psw; /* 0x01f0 */
/* Save areas. */
- __u64 save_area_sync[8]; /* 0x0200 */
- __u64 save_area_async[8]; /* 0x0240 */
+ __u64 save_area[8]; /* 0x0200 */
+ __u64 stack_canary; /* 0x0240 */
+ __u8 pad_0x0248[0x0280-0x0248]; /* 0x0248 */
__u64 save_area_restart[1]; /* 0x0280 */
- /* CPU flags. */
- __u64 cpu_flags; /* 0x0288 */
+ __u64 pcpu; /* 0x0288 */
/* Return psws. */
psw_t return_psw; /* 0x0290 */
@@ -122,7 +128,7 @@ struct lowcore {
__u64 int_clock; /* 0x0318 */
__u8 pad_0x0320[0x0328-0x0320]; /* 0x0320 */
__u64 clock_comparator; /* 0x0328 */
- __u64 boot_clock[2]; /* 0x0330 */
+ __u8 pad_0x0330[0x0340-0x0330]; /* 0x0330 */
/* Current process. */
__u64 current_task; /* 0x0340 */
@@ -159,10 +165,7 @@ struct lowcore {
__u32 spinlock_index; /* 0x03b0 */
__u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */
__u64 percpu_offset; /* 0x03b8 */
- __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */
- __u64 machine_flags; /* 0x03c8 */
- __u64 gmap; /* 0x03d0 */
- __u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */
+ __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */
__u32 return_lpswe; /* 0x0400 */
__u32 return_mcck_lpswe; /* 0x0404 */
@@ -213,7 +216,20 @@ struct lowcore {
__u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
} __packed __aligned(8192);
-#define S390_lowcore (*((struct lowcore *) 0))
+static __always_inline struct lowcore *get_lowcore(void)
+{
+ struct lowcore *lc;
+
+ if (__is_defined(__DECOMPRESSOR))
+ return NULL;
+ asm_inline(
+ ALTERNATIVE(" lghi %[lc],0",
+ " llilh %[lc],%[alt]",
+ ALT_FEATURE(MFEATURE_LOWCORE))
+ : [lc] "=d" (lc)
+ : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16));
+ return lc;
+}
extern struct lowcore *lowcore_ptr[];
@@ -222,4 +238,19 @@ static inline void set_prefix(__u32 address)
asm volatile("spx %0" : : "Q" (address) : "memory");
}
+#else /* __ASSEMBLER__ */
+
+.macro GET_LC reg
+ ALTERNATIVE "lghi \reg,0", \
+ __stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \
+ ALT_FEATURE(MFEATURE_LOWCORE)
+.endm
+
+.macro STMG_LC start, end, savearea
+ ALTERNATIVE "stmg \start, \end, \savearea", \
+ __stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \
+ ALT_FEATURE(MFEATURE_LOWCORE)
+.endm
+
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_LOWCORE_H */
diff --git a/arch/s390/include/asm/machine.h b/arch/s390/include/asm/machine.h
new file mode 100644
index 000000000000..9bd4a9dc7778
--- /dev/null
+++ b/arch/s390/include/asm/machine.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#ifndef __ASM_S390_MACHINE_H
+#define __ASM_S390_MACHINE_H
+
+#include <linux/const.h>
+
+#define MFEATURE_LOWCORE 0
+#define MFEATURE_PCI_MIO 1
+#define MFEATURE_SCC 2
+#define MFEATURE_TLB_GUEST 3
+#define MFEATURE_TX 4
+#define MFEATURE_ESOP 5
+#define MFEATURE_DIAG9C 6
+#define MFEATURE_VM 7
+#define MFEATURE_KVM 8
+#define MFEATURE_LPAR 9
+#define MFEATURE_DIAG288 10
+
+#ifndef __ASSEMBLER__
+
+#include <linux/bitops.h>
+#include <asm/alternative.h>
+
+extern unsigned long machine_features[1];
+
+#define MAX_MFEATURE_BIT (sizeof(machine_features) * BITS_PER_BYTE)
+
+static inline void __set_machine_feature(unsigned int nr, unsigned long *mfeatures)
+{
+ if (nr >= MAX_MFEATURE_BIT)
+ return;
+ __set_bit(nr, mfeatures);
+}
+
+static inline void set_machine_feature(unsigned int nr)
+{
+ __set_machine_feature(nr, machine_features);
+}
+
+static inline void __clear_machine_feature(unsigned int nr, unsigned long *mfeatures)
+{
+ if (nr >= MAX_MFEATURE_BIT)
+ return;
+ __clear_bit(nr, mfeatures);
+}
+
+static inline void clear_machine_feature(unsigned int nr)
+{
+ __clear_machine_feature(nr, machine_features);
+}
+
+static bool __test_machine_feature(unsigned int nr, unsigned long *mfeatures)
+{
+ if (nr >= MAX_MFEATURE_BIT)
+ return false;
+ return test_bit(nr, mfeatures);
+}
+
+static bool test_machine_feature(unsigned int nr)
+{
+ return __test_machine_feature(nr, machine_features);
+}
+
+static __always_inline bool __test_machine_feature_constant(unsigned int nr)
+{
+ asm goto(
+ ALTERNATIVE("brcl 15,%l[l_no]", "brcl 0,0", ALT_FEATURE(%[nr]))
+ :
+ : [nr] "i" (nr)
+ :
+ : l_no);
+ return true;
+l_no:
+ return false;
+}
+
+#define DEFINE_MACHINE_HAS_FEATURE(name, feature) \
+static __always_inline bool machine_has_##name(void) \
+{ \
+ if (!__is_defined(__DECOMPRESSOR) && __builtin_constant_p(feature)) \
+ return __test_machine_feature_constant(feature); \
+ return test_machine_feature(feature); \
+}
+
+DEFINE_MACHINE_HAS_FEATURE(relocated_lowcore, MFEATURE_LOWCORE)
+DEFINE_MACHINE_HAS_FEATURE(scc, MFEATURE_SCC)
+DEFINE_MACHINE_HAS_FEATURE(tlb_guest, MFEATURE_TLB_GUEST)
+DEFINE_MACHINE_HAS_FEATURE(tx, MFEATURE_TX)
+DEFINE_MACHINE_HAS_FEATURE(esop, MFEATURE_ESOP)
+DEFINE_MACHINE_HAS_FEATURE(diag9c, MFEATURE_DIAG9C)
+DEFINE_MACHINE_HAS_FEATURE(vm, MFEATURE_VM)
+DEFINE_MACHINE_HAS_FEATURE(kvm, MFEATURE_KVM)
+DEFINE_MACHINE_HAS_FEATURE(lpar, MFEATURE_LPAR)
+
+#define machine_is_vm machine_has_vm
+#define machine_is_kvm machine_has_kvm
+#define machine_is_lpar machine_has_lpar
+
+#endif /* __ASSEMBLER__ */
+#endif /* __ASM_S390_MACHINE_H */
diff --git a/arch/s390/include/asm/march.h b/arch/s390/include/asm/march.h
new file mode 100644
index 000000000000..11a71bd14954
--- /dev/null
+++ b/arch/s390/include/asm/march.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_S390_MARCH_H
+#define __ASM_S390_MARCH_H
+
+#include <linux/kconfig.h>
+
+#define MARCH_HAS_Z10_FEATURES 1
+
+#ifndef __DECOMPRESSOR
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#define MARCH_HAS_Z196_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+#define MARCH_HAS_ZEC12_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
+#define MARCH_HAS_Z13_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_Z14_FEATURES
+#define MARCH_HAS_Z14_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_Z15_FEATURES
+#define MARCH_HAS_Z15_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_Z16_FEATURES
+#define MARCH_HAS_Z16_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES
+#define MARCH_HAS_Z17_FEATURES 1
+#endif
+
+#endif /* __DECOMPRESSOR */
+
+#endif /* __ASM_S390_MARCH_H */
diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
index b85e13505a0f..28c83ec1f243 100644
--- a/arch/s390/include/asm/mem_encrypt.h
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -2,11 +2,11 @@
#ifndef S390_MEM_ENCRYPT_H__
#define S390_MEM_ENCRYPT_H__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
int set_memory_encrypted(unsigned long vaddr, int numpages);
int set_memory_decrypted(unsigned long vaddr, int numpages);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* S390_MEM_ENCRYPT_H__ */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 4c2dc7abc285..f07e49b419ab 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -22,10 +22,7 @@ typedef struct {
* The following bitfields need a down_write on the mm
* semaphore when they are written to. As they are only
* written once, they can be read without a lock.
- *
- * The mmu context allocates 4K page tables.
*/
- unsigned int alloc_pgste:1;
/* The mmu context uses extended page tables. */
unsigned int has_pgste:1;
/* The mmu context uses storage keys. */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index a7789a9f6218..d9b8501bc93d 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
#include <linux/mm_types.h>
#include <asm/tlbflush.h>
#include <asm/ctlreg.h>
+#include <asm/asce.h>
#include <asm-generic/mm_hooks.h>
#define init_new_context init_new_context
@@ -29,9 +30,6 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
#ifdef CONFIG_PGSTE
- mm->context.alloc_pgste = page_table_allocate_pgste ||
- test_thread_flag(TIF_PGSTE) ||
- (current->mm && current->mm->context.alloc_pgste);
mm->context.has_pgste = 0;
mm->context.uses_skeys = 0;
mm->context.uses_cmm = 0;
@@ -76,11 +74,12 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *
int cpu = smp_processor_id();
if (next == &init_mm)
- S390_lowcore.user_asce = s390_invalid_asce;
+ get_lowcore()->user_asce = s390_invalid_asce;
else
- S390_lowcore.user_asce.val = next->context.asce;
+ get_lowcore()->user_asce.val = next->context.asce;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
- /* Clear previous user-ASCE from CR7 */
+ /* Clear previous user-ASCE from CR1 and CR7 */
+ local_ctl_load(1, &s390_invalid_asce);
local_ctl_load(7, &s390_invalid_asce);
if (prev != next)
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
@@ -102,6 +101,7 @@ static inline void finish_arch_post_lock_switch(void)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
+ unsigned long flags;
if (mm) {
preempt_disable();
@@ -111,16 +111,26 @@ static inline void finish_arch_post_lock_switch(void)
__tlb_flush_mm_lazy(mm);
preempt_enable();
}
- local_ctl_load(7, &S390_lowcore.user_asce);
+ local_irq_save(flags);
+ if (test_thread_flag(TIF_ASCE_PRIMARY))
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ else
+ local_ctl_load(1, &get_lowcore()->user_asce);
+ local_ctl_load(7, &get_lowcore()->user_asce);
+ local_irq_restore(flags);
}
#define activate_mm activate_mm
static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
- switch_mm(prev, next, current);
+ switch_mm_irqs_off(prev, next, current);
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
- local_ctl_load(7, &S390_lowcore.user_asce);
+ if (test_thread_flag(TIF_ASCE_PRIMARY))
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ else
+ local_ctl_load(1, &get_lowcore()->user_asce);
+ local_ctl_load(7, &get_lowcore()->user_asce);
}
#include <asm-generic/mmu_context.h>
diff --git a/arch/s390/include/asm/mmzone.h b/arch/s390/include/asm/mmzone.h
deleted file mode 100644
index 73e3e7c6976c..000000000000
--- a/arch/s390/include/asm/mmzone.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NUMA support for s390
- *
- * Copyright IBM Corp. 2015
- */
-
-#ifndef _ASM_S390_MMZONE_H
-#define _ASM_S390_MMZONE_H
-
-#ifdef CONFIG_NUMA
-
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[nid])
-
-#endif /* CONFIG_NUMA */
-#endif /* _ASM_S390_MMZONE_H */
diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h
index 9f1eea15872c..916ab59e458a 100644
--- a/arch/s390/include/asm/module.h
+++ b/arch/s390/include/asm/module.h
@@ -38,4 +38,18 @@ struct mod_arch_specific {
#endif /* CONFIG_FUNCTION_TRACER */
};
+static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ const char *name)
+{
+ const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ const Elf_Shdr *s, *se;
+
+ for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+ if (strcmp(name, secstrs + s->sh_name) == 0)
+ return s;
+ }
+ return NULL;
+}
+
#endif /* _ASM_S390_MODULE_H */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 227466ce9e41..6454c1531854 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -33,7 +33,7 @@
#define MCCK_CODE_FC_VALID BIT(63 - 43)
#define MCCK_CODE_CPU_TIMER_VALID BIT(63 - 46)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
union mci {
unsigned long val;
@@ -104,5 +104,5 @@ void nmi_free_mcesa(u64 *mcesad);
void s390_handle_mcck(void);
void s390_do_machine_check(struct pt_regs *regs);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index b9c1f3cae842..81c4813cff18 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -2,11 +2,20 @@
#ifndef _ASM_S390_EXPOLINE_H
#define _ASM_S390_EXPOLINE_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
+#include <asm/facility.h>
extern int nospec_disable;
+extern int nobp;
+
+static inline bool nobp_enabled(void)
+{
+ if (__is_defined(__DECOMPRESSOR))
+ return false;
+ return nobp && test_facility(82);
+}
void nospec_init_branches(void);
void nospec_auto_detect(void);
@@ -17,8 +26,6 @@ static inline bool nospec_uses_trampoline(void)
return __is_defined(CC_USING_EXPOLINE) && !nospec_disable;
}
-#ifdef CONFIG_EXPOLINE_EXTERN
-
void __s390_indirect_jump_r1(void);
void __s390_indirect_jump_r2(void);
void __s390_indirect_jump_r3(void);
@@ -35,8 +42,6 @@ void __s390_indirect_jump_r13(void);
void __s390_indirect_jump_r14(void);
void __s390_indirect_jump_r15(void);
-#endif
-
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index cb15dd25bf21..46f92bb4c9e5 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -3,9 +3,10 @@
#define _ASM_S390_NOSPEC_ASM_H
#include <linux/linkage.h>
+#include <linux/export.h>
#include <asm/dwarf.h>
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#ifdef CC_USING_EXPOLINE
@@ -18,7 +19,7 @@
#ifdef CONFIG_EXPOLINE_EXTERN
SYM_CODE_START(\name)
#else
- .pushsection .text.\name,"axG",@progbits,\name,comdat
+ .pushsection .text..\name,"axG",@progbits,\name,comdat
.globl \name
.hidden \name
.type \name,@function
@@ -128,6 +129,6 @@
.endm
#endif /* CC_USING_EXPOLINE */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_NOSPEC_ASM_H */
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index 08fcbd628120..794fdb21500a 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -7,7 +7,6 @@
#ifndef PAGE_STATES_H
#define PAGE_STATES_H
-#include <asm/sections.h>
#include <asm/page.h>
#define ESSA_GET_STATE 0
@@ -21,7 +20,7 @@
#define ESSA_MAX ESSA_SET_STABLE_NODAT
-extern int __bootdata_preserved(cmma_flag);
+extern int cmma_flag;
static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd)
{
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 224ff9d433ea..9240a363c893 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -10,15 +10,10 @@
#include <linux/const.h>
#include <asm/types.h>
+#include <asm/asm.h>
-#define _PAGE_SHIFT CONFIG_PAGE_SHIFT
-#define _PAGE_SIZE (_AC(1, UL) << _PAGE_SHIFT)
-#define _PAGE_MASK (~(_PAGE_SIZE - 1))
+#include <vdso/page.h>
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT _PAGE_SHIFT
-#define PAGE_SIZE _PAGE_SIZE
-#define PAGE_MASK _PAGE_MASK
#define PAGE_DEFAULT_ACC _AC(0, UL)
/* storage-protection override */
#define PAGE_SPO_ACC 9
@@ -38,7 +33,7 @@
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#include <asm/setup.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
void __storage_key_init_range(unsigned long start, unsigned long end);
@@ -74,11 +69,13 @@ static inline void copy_page(void *to, void *from)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
- vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false)
+ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr)
-/*
- * These are used to make use of C type-checking..
- */
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
+#define STRICT_MM_TYPECHECKS
+#endif
+
+#ifdef STRICT_MM_TYPECHECKS
typedef struct { unsigned long pgprot; } pgprot_t;
typedef struct { unsigned long pgste; } pgste_t;
@@ -87,52 +84,65 @@ typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pud; } pud_t;
typedef struct { unsigned long p4d; } p4d_t;
typedef struct { unsigned long pgd; } pgd_t;
-typedef pte_t *pgtable_t;
-#define pgprot_val(x) ((x).pgprot)
-#define pgste_val(x) ((x).pgste)
-
-static inline unsigned long pte_val(pte_t pte)
-{
- return pte.pte;
+#define DEFINE_PGVAL_FUNC(name) \
+static __always_inline unsigned long name ## _val(name ## _t name) \
+{ \
+ return name.name; \
}
-static inline unsigned long pmd_val(pmd_t pmd)
-{
- return pmd.pmd;
-}
+#else /* STRICT_MM_TYPECHECKS */
-static inline unsigned long pud_val(pud_t pud)
-{
- return pud.pud;
-}
+typedef unsigned long pgprot_t;
+typedef unsigned long pgste_t;
+typedef unsigned long pte_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pud_t;
+typedef unsigned long p4d_t;
+typedef unsigned long pgd_t;
-static inline unsigned long p4d_val(p4d_t p4d)
-{
- return p4d.p4d;
+#define DEFINE_PGVAL_FUNC(name) \
+static __always_inline unsigned long name ## _val(name ## _t name) \
+{ \
+ return name; \
}
-static inline unsigned long pgd_val(pgd_t pgd)
-{
- return pgd.pgd;
-}
+#endif /* STRICT_MM_TYPECHECKS */
+
+DEFINE_PGVAL_FUNC(pgprot)
+DEFINE_PGVAL_FUNC(pgste)
+DEFINE_PGVAL_FUNC(pte)
+DEFINE_PGVAL_FUNC(pmd)
+DEFINE_PGVAL_FUNC(pud)
+DEFINE_PGVAL_FUNC(p4d)
+DEFINE_PGVAL_FUNC(pgd)
+typedef pte_t *pgtable_t;
+
+#define __pgprot(x) ((pgprot_t) { (x) } )
#define __pgste(x) ((pgste_t) { (x) } )
#define __pte(x) ((pte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pud(x) ((pud_t) { (x) } )
#define __p4d(x) ((p4d_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
-#define __pgprot(x) ((pgprot_t) { (x) } )
static inline void page_set_storage_key(unsigned long addr,
unsigned char skey, int mapped)
{
- if (!mapped)
- asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0"
- : : "d" (skey), "a" (addr));
- else
- asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+ if (!mapped) {
+ asm volatile(
+ " .insn rrf,0xb22b0000,%[skey],%[addr],8,0"
+ :
+ : [skey] "d" (skey), [addr] "a" (addr)
+ : "memory");
+ } else {
+ asm volatile(
+ " sske %[skey],%[addr]"
+ :
+ : [skey] "d" (skey), [addr] "a" (addr)
+ : "memory");
+ }
}
static inline unsigned char page_get_storage_key(unsigned long addr)
@@ -148,11 +158,12 @@ static inline int page_reset_referenced(unsigned long addr)
int cc;
asm volatile(
- " rrbe 0,%1\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc) : "a" (addr) : "cc");
- return cc;
+ " rrbe 0,%[addr]\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [addr] "a" (addr)
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
}
/* Bits int the storage key */
@@ -162,6 +173,7 @@ static inline int page_reset_referenced(unsigned long addr)
#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */
struct page;
+struct folio;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
@@ -173,10 +185,8 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define HAVE_ARCH_FREE_PAGE
#define HAVE_ARCH_ALLOC_PAGE
-#if IS_ENABLED(CONFIG_PGSTE)
-int arch_make_page_accessible(struct page *page);
-#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
-#endif
+int arch_make_folio_accessible(struct folio *folio);
+#define HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
struct vm_layout {
unsigned long kaslr_offset;
@@ -189,7 +199,11 @@ extern struct vm_layout vm_layout;
#define __kaslr_offset vm_layout.kaslr_offset
#define __kaslr_offset_phys vm_layout.kaslr_offset_phys
+#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE
#define __identity_base vm_layout.identity_base
+#else
+#define __identity_base 0UL
+#endif
#define ident_map_size vm_layout.identity_size
static inline unsigned long kaslr_offset(void)
@@ -246,8 +260,8 @@ static inline unsigned long __phys_addr(unsigned long x, bool is_31bit)
#define phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
-#define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys))
-#define page_to_phys(page) pfn_to_phys(page_to_pfn(page))
+#define phys_to_folio(phys) page_folio(phys_to_page(phys))
+#define folio_to_phys(page) pfn_to_phys(folio_pfn(folio))
static inline void *pfn_to_virt(unsigned long pfn)
{
@@ -268,7 +282,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
@@ -276,8 +290,9 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#define AMODE31_SIZE (3 * PAGE_SIZE)
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
-#define __START_KERNEL 0x100000
#define __NO_KASLR_START_KERNEL CONFIG_KERNEL_IMAGE_BASE
#define __NO_KASLR_END_KERNEL (__NO_KASLR_START_KERNEL + KERNEL_IMAGE_SIZE)
+#define TEXT_OFFSET 0x100000
+
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
index 3f609565734b..534d0320e2aa 100644
--- a/arch/s390/include/asm/pai.h
+++ b/arch/s390/include/asm/pai.h
@@ -11,6 +11,7 @@
#include <linux/jump_label.h>
#include <asm/lowcore.h>
#include <asm/ptrace.h>
+#include <asm/asm.h>
struct qpaci_info_block {
u64 header;
@@ -33,12 +34,11 @@ static inline int qpaci(struct qpaci_info_block *info)
" lgr 0,%[size]\n"
" .insn s,0xb28f0000,%[info]\n"
" lgr %[size],0\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc), [info] "=Q" (*info), [size] "+&d" (size)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [info] "=Q" (*info), [size] "+&d" (size)
:
- : "0", "cc", "memory");
- return cc ? (size + 1) * sizeof(u64) : 0;
+ : CC_CLOBBER_LIST("0", "memory"));
+ return CC_TRANSFORM(cc) ? (size + 1) * sizeof(u64) : 0;
}
#define PAI_CRYPTO_BASE 0x1000 /* First event number */
@@ -55,11 +55,11 @@ static __always_inline void pai_kernel_enter(struct pt_regs *regs)
return;
if (!static_branch_unlikely(&pai_key))
return;
- if (!S390_lowcore.ccd)
+ if (!get_lowcore()->ccd)
return;
if (!user_mode(regs))
return;
- WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd | PAI_CRYPTO_KERNEL_OFFSET);
+ WRITE_ONCE(get_lowcore()->ccd, get_lowcore()->ccd | PAI_CRYPTO_KERNEL_OFFSET);
}
static __always_inline void pai_kernel_exit(struct pt_regs *regs)
@@ -68,18 +68,16 @@ static __always_inline void pai_kernel_exit(struct pt_regs *regs)
return;
if (!static_branch_unlikely(&pai_key))
return;
- if (!S390_lowcore.ccd)
+ if (!get_lowcore()->ccd)
return;
if (!user_mode(regs))
return;
- WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd & ~PAI_CRYPTO_KERNEL_OFFSET);
+ WRITE_ONCE(get_lowcore()->ccd, get_lowcore()->ccd & ~PAI_CRYPTO_KERNEL_OFFSET);
}
-enum paievt_mode {
- PAI_MODE_NONE,
- PAI_MODE_SAMPLING,
- PAI_MODE_COUNTING,
-};
-
#define PAI_SAVE_AREA(x) ((x)->hw.event_base)
+#define PAI_CPU_MASK(x) ((x)->hw.addr_filters)
+#define PAI_PMU_IDX(x) ((x)->hw.last_tag)
+#define PAI_SWLIST(x) (&(x)->hw.tp_list)
+
#endif
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 30820a649e6e..a32f465ecf73 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -11,6 +11,9 @@
#include <asm/pci_insn.h>
#include <asm/sclp.h>
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
+#define arch_can_pci_mmap_wc() 1
+
#define PCIBIOS_MIN_IO 0x1000
#define PCIBIOS_MIN_MEM 0x10000000
@@ -96,7 +99,6 @@ struct zpci_bar_struct {
u8 size; /* order 2 exponent */
};
-struct s390_domain;
struct kvm_zdev;
#define ZPCI_FUNCTIONS_PER_BUS 256
@@ -107,9 +109,10 @@ struct zpci_bus {
struct list_head resources;
struct list_head bus_next;
struct resource bus_resource;
- int pchid;
+ int topo; /* TID if topo_is_tid, PCHID otherwise */
int domain_nr;
- bool multifunction;
+ u8 multifunction : 1;
+ u8 topo_is_tid : 1;
enum pci_bus_speed max_bus_speed;
};
@@ -130,17 +133,20 @@ struct zpci_dev {
u16 vfn; /* virtual function number */
u16 pchid; /* physical channel ID */
u16 maxstbl; /* Maximum store block size */
+ u16 rid; /* RID as supplied by firmware */
+ u16 tid; /* Topology for which RID is valid */
u8 pfgid; /* function group ID */
u8 pft; /* pci function type */
u8 port;
+ u8 fidparm;
u8 dtsm; /* Supported DT mask */
u8 rid_available : 1;
u8 has_hp_slot : 1;
u8 has_resources : 1;
u8 is_physfn : 1;
u8 util_str_avail : 1;
- u8 irqs_registered : 1;
- u8 reserved : 2;
+ u8 tid_avail : 1;
+ u8 rtr_avail : 1; /* Relaxed translation allowed */
unsigned int devfn; /* DEVFN part of the RID*/
u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
@@ -181,9 +187,10 @@ struct zpci_dev {
struct dentry *debugfs_dev;
/* IOMMU and passthrough */
- struct s390_domain *s390_domain; /* s390 IOMMU domain data */
+ struct iommu_domain *s390_domain; /* attached IOMMU domain */
struct kvm_zdev *kzdev;
struct mutex kzdev_lock;
+ spinlock_t dom_lock; /* protect s390_domain change */
};
static inline bool zdev_enabled(struct zpci_dev *zdev)
@@ -191,7 +198,14 @@ static inline bool zdev_enabled(struct zpci_dev *zdev)
return (zdev->fh & (1UL << 31)) ? true : false;
}
-extern const struct attribute_group *zpci_attr_groups[];
+extern const struct attribute_group zpci_attr_group;
+extern const struct attribute_group pfip_attr_group;
+extern const struct attribute_group zpci_ident_attr_group;
+
+#define ARCH_PCI_DEV_GROUPS &zpci_attr_group, \
+ &pfip_attr_group, \
+ &zpci_ident_attr_group,
+
extern unsigned int s390_pci_force_floating __initdata;
extern unsigned int s390_pci_no_rid;
@@ -203,12 +217,15 @@ extern struct airq_iv *zpci_aif_sbv;
----------------------------------------------------------------------------- */
/* Base stuff */
struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state);
+int zpci_add_device(struct zpci_dev *zdev);
int zpci_enable_device(struct zpci_dev *);
+int zpci_reenable_device(struct zpci_dev *zdev);
int zpci_disable_device(struct zpci_dev *);
int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh);
int zpci_deconfigure_device(struct zpci_dev *zdev);
void zpci_device_reserved(struct zpci_dev *zdev);
bool zpci_is_device_configured(struct zpci_dev *zdev);
+int zpci_scan_devices(void);
int zpci_hot_reset_device(struct zpci_dev *zdev);
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *);
@@ -218,7 +235,7 @@ void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
/* CLP */
int clp_setup_writeback_mio(void);
-int clp_scan_pci_devices(void);
+int clp_scan_pci_devices(struct list_head *scan_list);
int clp_query_pci_fn(struct zpci_dev *zdev);
int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as);
int clp_disable_fh(struct zpci_dev *zdev, u32 *fh);
@@ -228,9 +245,20 @@ int clp_refresh_fh(u32 fid, u32 *fh);
/* UID */
void update_uid_checking(bool new);
+/* Firmware Sysfs */
+int __init __zpci_fw_sysfs_init(void);
+
+static inline int __init zpci_fw_sysfs_init(void)
+{
+ if (IS_ENABLED(CONFIG_SYSFS))
+ return __zpci_fw_sysfs_init();
+ return 0;
+}
+
/* IOMMU Interface */
int zpci_init_iommu(struct zpci_dev *zdev);
void zpci_destroy_iommu(struct zpci_dev *zdev);
+int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status);
#ifdef CONFIG_PCI
static inline bool zpci_use_mio(struct zpci_dev *zdev)
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index f0c677ddd270..7ebff39c84b3 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -110,7 +110,8 @@ struct clp_req_query_pci {
struct clp_rsp_query_pci {
struct clp_rsp_hdr hdr;
u16 vfn; /* virtual fn number */
- u16 : 3;
+ u16 : 2;
+ u16 tid_avail : 1;
u16 rid_avail : 1;
u16 is_physfn : 1;
u16 reserved1 : 1;
@@ -122,16 +123,18 @@ struct clp_rsp_query_pci {
u16 pchid;
__le32 bar[PCI_STD_NUM_BARS];
u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
- u16 : 12;
- u16 port : 4;
+ u8 fidparm;
+ u8 reserved3 : 4;
+ u8 port : 4;
u8 fmb_len;
u8 pft; /* pci function type */
u64 sdma; /* start dma as */
u64 edma; /* end dma as */
#define ZPCI_RID_MASK_DEVFN 0x00ff
u16 rid; /* BUS/DEVFN PCI address */
- u16 reserved0;
- u32 reserved[10];
+ u32 reserved0;
+ u16 tid;
+ u32 reserved[9];
u32 uid; /* user defined id */
u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
u32 reserved2[16];
@@ -153,7 +156,9 @@ struct clp_rsp_query_pci_grp {
u16 : 4;
u16 noi : 12; /* number of interrupts */
u8 version;
- u8 : 6;
+ u8 : 2;
+ u8 rtr : 1; /* Relaxed translation requirement */
+ u8 : 3;
u8 frame : 1;
u8 refresh : 1; /* TLB refresh mode */
u16 : 3;
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 42d7cc4262ca..d12e17201661 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -25,6 +25,7 @@ enum zpci_ioat_dtype {
#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5)
#define ZPCI_TABLE_SIZE_RT (1UL << 42)
+#define ZPCI_TABLE_SIZE_RS (1UL << 53)
#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
@@ -55,6 +56,8 @@ enum zpci_ioat_dtype {
#define ZPCI_PT_BITS 8
#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT)
#define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RS_SHIFT (ZPCI_RT_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RF_SHIFT (ZPCI_RS_SHIFT + ZPCI_TABLE_BITS)
#define ZPCI_RTE_FLAG_MASK 0x3fffUL
#define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK)
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index e5f57cfe1d45..025c6dcbf893 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -16,11 +16,11 @@
#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40
#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44
-/* Load/Store return codes */
-#define ZPCI_PCI_LS_OK 0
-#define ZPCI_PCI_LS_ERR 1
-#define ZPCI_PCI_LS_BUSY 2
-#define ZPCI_PCI_LS_INVAL_HANDLE 3
+/* PCI instruction condition codes */
+#define ZPCI_CC_OK 0
+#define ZPCI_CC_ERR 1
+#define ZPCI_CC_BUSY 2
+#define ZPCI_CC_INVAL_HANDLE 3
/* Load/Store address space identifiers */
#define ZPCI_PCIAS_MEMIO_0 0
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 2686bee800e3..43a5ea4ee20f 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -143,7 +143,7 @@ static inline int zpci_get_max_io_size(u64 src, u64 dst, int len, int max)
static inline int zpci_memcpy_fromio(void *dst,
const volatile void __iomem *src,
- unsigned long n)
+ size_t n)
{
int size, rc = 0;
@@ -162,7 +162,7 @@ static inline int zpci_memcpy_fromio(void *dst,
}
static inline int zpci_memcpy_toio(volatile void __iomem *dst,
- const void *src, unsigned long n)
+ const void *src, size_t n)
{
int size, rc = 0;
@@ -187,7 +187,7 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
}
static inline int zpci_memset_io(volatile void __iomem *dst,
- unsigned char val, size_t count)
+ int val, size_t count)
{
u8 *src = kmalloc(count, GFP_KERNEL);
int rc;
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 264095dd84bc..5899f57f17d1 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -4,21 +4,13 @@
#include <linux/preempt.h>
#include <asm/cmpxchg.h>
+#include <asm/march.h>
/*
* s390 uses its own implementation for per cpu data, the offset of
* the cpu local data area is cached in the cpu's lowcore memory.
*/
-#define __my_cpu_offset S390_lowcore.percpu_offset
-
-/*
- * For 64 bit module code, the module may be more than 4G above the
- * per cpu area, use weak definitions to force the compiler to
- * generate external references.
- */
-#if defined(MODULE)
-#define ARCH_NEEDS_WEAK_PER_CPU
-#endif
+#define __my_cpu_offset get_lowcore()->percpu_offset
/*
* We use a compare-and-swap loop since that uses less cpu cycles than
@@ -50,7 +42,7 @@
#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
-#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifndef MARCH_HAS_Z196_FEATURES
#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
@@ -61,7 +53,7 @@
#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
-#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+#else /* MARCH_HAS_Z196_FEATURES */
#define arch_this_cpu_add(pcp, val, op1, op2, szcast) \
{ \
@@ -73,13 +65,13 @@
if (__builtin_constant_p(val__) && \
((szcast)val__ > -129) && ((szcast)val__ < 128)) { \
asm volatile( \
- op2 " %[ptr__],%[val__]\n" \
+ op2 " %[ptr__],%[val__]" \
: [ptr__] "+Q" (*ptr__) \
: [val__] "i" ((szcast)val__) \
: "cc"); \
} else { \
asm volatile( \
- op1 " %[old__],%[val__],%[ptr__]\n" \
+ op1 " %[old__],%[val__],%[ptr__]" \
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
: [val__] "d" (val__) \
: "cc"); \
@@ -98,7 +90,7 @@
preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
asm volatile( \
- op " %[old__],%[val__],%[ptr__]\n" \
+ op " %[old__],%[val__],%[ptr__]" \
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
: [val__] "d" (val__) \
: "cc"); \
@@ -117,7 +109,7 @@
preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
asm volatile( \
- op " %[old__],%[val__],%[ptr__]\n" \
+ op " %[old__],%[val__],%[ptr__]" \
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
: [val__] "d" (val__) \
: "cc"); \
@@ -129,7 +121,7 @@
#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lao")
#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, "laog")
-#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+#endif /* MARCH_HAS_Z196_FEATURES */
#define arch_this_cpu_cmpxchg(pcp, oval, nval) \
({ \
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 9917e2717b2b..e53894cedf08 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -37,9 +37,9 @@ extern ssize_t cpumf_events_sysfs_show(struct device *dev,
/* Perf callbacks */
struct pt_regs;
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-#define perf_misc_flags(regs) perf_misc_flags(regs)
+extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
+#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
/* Perf pt_regs extension for sample-data-entry indicators */
@@ -48,31 +48,8 @@ struct perf_sf_sde_regs {
unsigned long reserved:63; /* reserved */
};
-/* Perf PMU definitions for the counter facility */
-#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */
-
-/* Perf PMU definitions for the sampling facility */
-#define PERF_CPUM_SF_MAX_CTR 2
-#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */
-#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */
-#define PERF_EVENT_CPUM_CF_DIAG 0xBC000UL /* Event: Counter sets */
-#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */
-#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */
-#define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \
- PERF_CPUM_SF_DIAG_MODE)
-#define PERF_CPUM_SF_FREQ_MODE 0x0008 /* Sampling with frequency */
-
-#define REG_NONE 0
-#define REG_OVERFLOW 1
-#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
-#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
-#define TEAR_REG(hwc) ((hwc)->last_tag)
-#define SAMPL_RATE(hwc) ((hwc)->event_base)
-#define SAMPL_FLAGS(hwc) ((hwc)->config_base)
-#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
-#define SAMPLE_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
-
#define perf_arch_fetch_caller_regs(regs, __ip) do { \
+ (regs)->psw.mask = 0; \
(regs)->psw.addr = (__ip); \
(regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) - \
offsetof(struct stack_frame, back_chain); \
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 7b84ef6dc4b6..a16e65072371 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -19,14 +19,17 @@
#define CRST_ALLOC_ORDER 2
-unsigned long *crst_table_alloc(struct mm_struct *);
+unsigned long *crst_table_alloc_noprof(struct mm_struct *);
+#define crst_table_alloc(...) alloc_hooks(crst_table_alloc_noprof(__VA_ARGS__))
void crst_table_free(struct mm_struct *, unsigned long *);
-unsigned long *page_table_alloc(struct mm_struct *);
-struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm);
+unsigned long *page_table_alloc_noprof(struct mm_struct *);
+#define page_table_alloc(...) alloc_hooks(page_table_alloc_noprof(__VA_ARGS__))
void page_table_free(struct mm_struct *, unsigned long *);
+
+struct ptdesc *page_table_alloc_pgste_noprof(struct mm_struct *mm);
+#define page_table_alloc_pgste(...) alloc_hooks(page_table_alloc_pgste_noprof(__VA_ARGS__))
void page_table_free_pgste(struct ptdesc *ptdesc);
-extern int page_table_allocate_pgste;
static inline void crst_table_init(unsigned long *crst, unsigned long entry)
{
@@ -49,54 +52,70 @@ static inline unsigned long check_asce_limit(struct mm_struct *mm, unsigned long
return addr;
}
-static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline p4d_t *p4d_alloc_one_noprof(struct mm_struct *mm, unsigned long address)
{
- unsigned long *table = crst_table_alloc(mm);
+ unsigned long *table = crst_table_alloc_noprof(mm);
+
+ if (!table)
+ return NULL;
+ crst_table_init(table, _REGION2_ENTRY_EMPTY);
+ pagetable_p4d_ctor(virt_to_ptdesc(table));
- if (table)
- crst_table_init(table, _REGION2_ENTRY_EMPTY);
return (p4d_t *) table;
}
+#define p4d_alloc_one(...) alloc_hooks(p4d_alloc_one_noprof(__VA_ARGS__))
static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
{
- if (!mm_p4d_folded(mm))
- crst_table_free(mm, (unsigned long *) p4d);
+ if (mm_p4d_folded(mm))
+ return;
+
+ pagetable_dtor(virt_to_ptdesc(p4d));
+ crst_table_free(mm, (unsigned long *) p4d);
}
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pud_t *pud_alloc_one_noprof(struct mm_struct *mm, unsigned long address)
{
- unsigned long *table = crst_table_alloc(mm);
- if (table)
- crst_table_init(table, _REGION3_ENTRY_EMPTY);
+ unsigned long *table = crst_table_alloc_noprof(mm);
+
+ if (!table)
+ return NULL;
+ crst_table_init(table, _REGION3_ENTRY_EMPTY);
+ pagetable_pud_ctor(virt_to_ptdesc(table));
+
return (pud_t *) table;
}
+#define pud_alloc_one(...) alloc_hooks(pud_alloc_one_noprof(__VA_ARGS__))
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
- if (!mm_pud_folded(mm))
- crst_table_free(mm, (unsigned long *) pud);
+ if (mm_pud_folded(mm))
+ return;
+
+ pagetable_dtor(virt_to_ptdesc(pud));
+ crst_table_free(mm, (unsigned long *) pud);
}
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
+static inline pmd_t *pmd_alloc_one_noprof(struct mm_struct *mm, unsigned long vmaddr)
{
- unsigned long *table = crst_table_alloc(mm);
+ unsigned long *table = crst_table_alloc_noprof(mm);
if (!table)
return NULL;
crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
- if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) {
+ if (!pagetable_pmd_ctor(mm, virt_to_ptdesc(table))) {
crst_table_free(mm, table);
return NULL;
}
return (pmd_t *) table;
}
+#define pmd_alloc_one(...) alloc_hooks(pmd_alloc_one_noprof(__VA_ARGS__))
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
if (mm_pmd_folded(mm))
return;
- pagetable_pmd_dtor(virt_to_ptdesc(pmd));
+ pagetable_dtor(virt_to_ptdesc(pmd));
crst_table_free(mm, (unsigned long *) pmd);
}
@@ -115,13 +134,21 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
set_pud(pud, __pud(_REGION3_ENTRY | __pa(pmd)));
}
-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+static inline pgd_t *pgd_alloc_noprof(struct mm_struct *mm)
{
- return (pgd_t *) crst_table_alloc(mm);
+ unsigned long *table = crst_table_alloc_noprof(mm);
+
+ if (!table)
+ return NULL;
+ pagetable_pgd_ctor(virt_to_ptdesc(table));
+
+ return (pgd_t *) table;
}
+#define pgd_alloc(...) alloc_hooks(pgd_alloc_noprof(__VA_ARGS__))
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
+ pagetable_dtor(virt_to_ptdesc(pgd));
crst_table_free(mm, (unsigned long *) pgd);
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 70b6ee557eb2..bca9b29778c3 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -14,10 +14,10 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
+#include <linux/cpufeature.h>
#include <linux/page-flags.h>
#include <linux/radix-tree.h>
#include <linux/atomic.h>
-#include <asm/sections.h>
#include <asm/ctlreg.h>
#include <asm/bug.h>
#include <asm/page.h>
@@ -35,7 +35,7 @@ enum {
PG_DIRECT_MAP_MAX
};
-extern atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
+extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
static inline void update_page_count(int level, long count)
{
@@ -85,14 +85,14 @@ extern unsigned long zero_page_mask;
* happen without trampolines and in addition the placement within a
* 2GB frame is branch prediction unit friendly.
*/
-extern unsigned long __bootdata_preserved(VMALLOC_START);
-extern unsigned long __bootdata_preserved(VMALLOC_END);
+extern unsigned long VMALLOC_START;
+extern unsigned long VMALLOC_END;
#define VMALLOC_DEFAULT_SIZE ((512UL << 30) - MODULES_LEN)
-extern struct page *__bootdata_preserved(vmemmap);
-extern unsigned long __bootdata_preserved(vmemmap_size);
+extern struct page *vmemmap;
+extern unsigned long vmemmap_size;
-extern unsigned long __bootdata_preserved(MODULES_VADDR);
-extern unsigned long __bootdata_preserved(MODULES_END);
+extern unsigned long MODULES_VADDR;
+extern unsigned long MODULES_END;
#define MODULES_VADDR MODULES_VADDR
#define MODULES_END MODULES_END
#define MODULES_LEN (1UL << 31)
@@ -107,12 +107,26 @@ static inline int is_module_addr(void *addr)
return 1;
}
+#ifdef CONFIG_KMSAN
+#define KMSAN_VMALLOC_SIZE (VMALLOC_END - VMALLOC_START)
+#define KMSAN_VMALLOC_SHADOW_START VMALLOC_END
+#define KMSAN_VMALLOC_SHADOW_END (KMSAN_VMALLOC_SHADOW_START + KMSAN_VMALLOC_SIZE)
+#define KMSAN_VMALLOC_ORIGIN_START KMSAN_VMALLOC_SHADOW_END
+#define KMSAN_VMALLOC_ORIGIN_END (KMSAN_VMALLOC_ORIGIN_START + KMSAN_VMALLOC_SIZE)
+#define KMSAN_MODULES_SHADOW_START KMSAN_VMALLOC_ORIGIN_END
+#define KMSAN_MODULES_SHADOW_END (KMSAN_MODULES_SHADOW_START + MODULES_LEN)
+#define KMSAN_MODULES_ORIGIN_START KMSAN_MODULES_SHADOW_END
+#define KMSAN_MODULES_ORIGIN_END (KMSAN_MODULES_ORIGIN_START + MODULES_LEN)
+#endif
+
#ifdef CONFIG_RANDOMIZE_BASE
#define KASLR_LEN (1UL << 31)
#else
#define KASLR_LEN 0UL
#endif
+void setup_protection_map(void);
+
/*
* A 64 bit pagetable entry of S390 has following format:
* | PFRA |0IPC| OS |
@@ -265,7 +279,8 @@ static inline int is_module_addr(void *addr)
#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
-#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
+#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH | \
+ _REGION3_ENTRY_PRESENT)
#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
#define _REGION3_ENTRY_HARDWARE_BITS 0xfffffffffffff6ffUL
@@ -273,18 +288,27 @@ static inline int is_module_addr(void *addr)
#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */
#define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */
#define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */
+#define _REGION3_ENTRY_COMM 0x0010 /* Common-Region, marks swap entry */
#define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */
-#define _REGION3_ENTRY_WRITE 0x0002 /* SW region write bit */
-#define _REGION3_ENTRY_READ 0x0001 /* SW region read bit */
+#define _REGION3_ENTRY_WRITE 0x8000 /* SW region write bit */
+#define _REGION3_ENTRY_READ 0x4000 /* SW region read bit */
#ifdef CONFIG_MEM_SOFT_DIRTY
-#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */
+#define _REGION3_ENTRY_SOFT_DIRTY 0x0002 /* SW region soft dirty bit */
#else
#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
#endif
#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL
+/*
+ * SW region present bit. For non-leaf region-third-table entries, bits 62-63
+ * indicate the TABLE LENGTH and both must be set to 1. But such entries
+ * would always be considered as present, so it is safe to use bit 63 as
+ * PRESENT bit for PUD.
+ */
+#define _REGION3_ENTRY_PRESENT 0x0001
+
/* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe3fUL
#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe3cUL
@@ -296,21 +320,29 @@ static inline int is_module_addr(void *addr)
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY_TYPE_MASK 0x0c /* segment table type mask */
-#define _SEGMENT_ENTRY (0)
+#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PRESENT)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */
#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */
+
+#define _SEGMENT_ENTRY_COMM 0x0010 /* Common-Segment, marks swap entry */
#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */
-#define _SEGMENT_ENTRY_WRITE 0x0002 /* SW segment write bit */
-#define _SEGMENT_ENTRY_READ 0x0001 /* SW segment read bit */
+#define _SEGMENT_ENTRY_WRITE 0x8000 /* SW segment write bit */
+#define _SEGMENT_ENTRY_READ 0x4000 /* SW segment read bit */
#ifdef CONFIG_MEM_SOFT_DIRTY
-#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0002 /* SW segment soft dirty bit */
#else
#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
#endif
+#define _SEGMENT_ENTRY_PRESENT 0x0001 /* SW segment present bit */
+
+/* Common bits in region and segment table entries, for swap entries */
+#define _RST_ENTRY_COMM 0x0010 /* Common-Region/Segment, marks swap entry */
+#define _RST_ENTRY_INVALID 0x0020 /* invalid region/segment table entry */
+
#define _CRST_ENTRIES 2048 /* number of region/segment table entries */
#define _PAGE_ENTRIES 256 /* number of page table entries */
@@ -326,7 +358,7 @@ static inline int is_module_addr(void *addr)
#define _REGION2_INDEX (0x7ffUL << _REGION2_SHIFT)
#define _REGION3_INDEX (0x7ffUL << _REGION3_SHIFT)
#define _SEGMENT_INDEX (0x7ffUL << _SEGMENT_SHIFT)
-#define _PAGE_INDEX (0xffUL << _PAGE_SHIFT)
+#define _PAGE_INDEX (0xffUL << PAGE_SHIFT)
#define _REGION1_SIZE (1UL << _REGION1_SHIFT)
#define _REGION2_SIZE (1UL << _REGION2_SHIFT)
@@ -389,9 +421,10 @@ static inline int is_module_addr(void *addr)
#define PGSTE_HC_BIT 0x0020000000000000UL
#define PGSTE_GR_BIT 0x0004000000000000UL
#define PGSTE_GC_BIT 0x0002000000000000UL
-#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
-#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
-#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */
+#define PGSTE_ST2_MASK 0x0000ffff00000000UL
+#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */
+#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */
+#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */
/* Guest Page State used for virtualization */
#define _PGSTE_GPS_ZERO 0x0000000080000000UL
@@ -413,90 +446,107 @@ static inline int is_module_addr(void *addr)
/*
* Page protection definitions.
*/
-#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+#define __PAGE_NONE (_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
+#define __PAGE_RO (_PAGE_PRESENT | _PAGE_READ | \
_PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RX __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+#define __PAGE_RX (_PAGE_PRESENT | _PAGE_READ | \
_PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RW __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_RW (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_RWX (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_INVALID | _PAGE_PROTECT)
-
-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_SHARED (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
-#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_KERNEL (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
-#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+#define __PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
_PAGE_PROTECT | _PAGE_NOEXEC)
-#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
- _PAGE_YOUNG | _PAGE_DIRTY)
-/*
- * On s390 the page table entry has an invalid bit and a read-only bit.
- * Read permission implies execute permission and write permission
- * implies read permission.
- */
- /*xwr*/
+extern unsigned long page_noexec_mask;
+
+#define __pgprot_page_mask(x) __pgprot((x) & page_noexec_mask)
+
+#define PAGE_NONE __pgprot_page_mask(__PAGE_NONE)
+#define PAGE_RO __pgprot_page_mask(__PAGE_RO)
+#define PAGE_RX __pgprot_page_mask(__PAGE_RX)
+#define PAGE_RW __pgprot_page_mask(__PAGE_RW)
+#define PAGE_RWX __pgprot_page_mask(__PAGE_RWX)
+#define PAGE_SHARED __pgprot_page_mask(__PAGE_SHARED)
+#define PAGE_KERNEL __pgprot_page_mask(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO __pgprot_page_mask(__PAGE_KERNEL_RO)
/*
* Segment entry (large page) protection definitions.
*/
-#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
+#define __SEGMENT_NONE (_SEGMENT_ENTRY_PRESENT | \
+ _SEGMENT_ENTRY_INVALID | \
_SEGMENT_ENTRY_PROTECT)
-#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PROTECT | \
+#define __SEGMENT_RO (_SEGMENT_ENTRY_PRESENT | \
+ _SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PROTECT | \
+#define __SEGMENT_RX (_SEGMENT_ENTRY_PRESENT | \
+ _SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_READ)
-#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_READ | \
+#define __SEGMENT_RW (_SEGMENT_ENTRY_PRESENT | \
+ _SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE | \
_SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_READ | \
+#define __SEGMENT_RWX (_SEGMENT_ENTRY_PRESENT | \
+ _SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE)
-#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \
+#define __SEGMENT_KERNEL (_SEGMENT_ENTRY | \
_SEGMENT_ENTRY_LARGE | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE | \
_SEGMENT_ENTRY_YOUNG | \
_SEGMENT_ENTRY_DIRTY | \
_SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY | \
+#define __SEGMENT_KERNEL_RO (_SEGMENT_ENTRY | \
_SEGMENT_ENTRY_LARGE | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_YOUNG | \
_SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY | \
- _SEGMENT_ENTRY_LARGE | \
- _SEGMENT_ENTRY_READ | \
- _SEGMENT_ENTRY_WRITE | \
- _SEGMENT_ENTRY_YOUNG | \
- _SEGMENT_ENTRY_DIRTY)
+
+extern unsigned long segment_noexec_mask;
+
+#define __pgprot_segment_mask(x) __pgprot((x) & segment_noexec_mask)
+
+#define SEGMENT_NONE __pgprot_segment_mask(__SEGMENT_NONE)
+#define SEGMENT_RO __pgprot_segment_mask(__SEGMENT_RO)
+#define SEGMENT_RX __pgprot_segment_mask(__SEGMENT_RX)
+#define SEGMENT_RW __pgprot_segment_mask(__SEGMENT_RW)
+#define SEGMENT_RWX __pgprot_segment_mask(__SEGMENT_RWX)
+#define SEGMENT_KERNEL __pgprot_segment_mask(__SEGMENT_KERNEL)
+#define SEGMENT_KERNEL_RO __pgprot_segment_mask(__SEGMENT_KERNEL_RO)
/*
* Region3 entry (large page) protection definitions.
*/
-#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \
- _REGION3_ENTRY_LARGE | \
- _REGION3_ENTRY_READ | \
- _REGION3_ENTRY_WRITE | \
- _REGION3_ENTRY_YOUNG | \
+#define __REGION3_KERNEL (_REGION_ENTRY_TYPE_R3 | \
+ _REGION3_ENTRY_PRESENT | \
+ _REGION3_ENTRY_LARGE | \
+ _REGION3_ENTRY_READ | \
+ _REGION3_ENTRY_WRITE | \
+ _REGION3_ENTRY_YOUNG | \
_REGION3_ENTRY_DIRTY | \
_REGION_ENTRY_NOEXEC)
-#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
- _REGION3_ENTRY_LARGE | \
- _REGION3_ENTRY_READ | \
- _REGION3_ENTRY_YOUNG | \
- _REGION_ENTRY_PROTECT | \
- _REGION_ENTRY_NOEXEC)
-#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \
- _REGION3_ENTRY_LARGE | \
- _REGION3_ENTRY_READ | \
- _REGION3_ENTRY_WRITE | \
- _REGION3_ENTRY_YOUNG | \
- _REGION3_ENTRY_DIRTY)
+#define __REGION3_KERNEL_RO (_REGION_ENTRY_TYPE_R3 | \
+ _REGION3_ENTRY_PRESENT | \
+ _REGION3_ENTRY_LARGE | \
+ _REGION3_ENTRY_READ | \
+ _REGION3_ENTRY_YOUNG | \
+ _REGION_ENTRY_PROTECT | \
+ _REGION_ENTRY_NOEXEC)
+
+extern unsigned long region_noexec_mask;
+
+#define __pgprot_region_mask(x) __pgprot((x) & region_noexec_mask)
+
+#define REGION3_KERNEL __pgprot_region_mask(__REGION3_KERNEL)
+#define REGION3_KERNEL_RO __pgprot_region_mask(__REGION3_KERNEL_RO)
static inline bool mm_p4d_folded(struct mm_struct *mm)
{
@@ -534,13 +584,14 @@ static inline int mm_is_protected(struct mm_struct *mm)
return 0;
}
-static inline int mm_alloc_pgste(struct mm_struct *mm)
+static inline pgste_t clear_pgste_bit(pgste_t pgste, unsigned long mask)
{
-#ifdef CONFIG_PGSTE
- if (unlikely(mm->context.alloc_pgste))
- return 1;
-#endif
- return 0;
+ return __pgste(pgste_val(pgste) & ~mask);
+}
+
+static inline pgste_t set_pgste_bit(pgste_t pgste, unsigned long mask)
+{
+ return __pgste(pgste_val(pgste) | mask);
}
static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
@@ -597,19 +648,15 @@ static inline int mm_uses_skeys(struct mm_struct *mm)
return 0;
}
-static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new)
-{
- union register_pair r1 = { .even = old, .odd = new, };
- unsigned long address = (unsigned long)ptr | 1;
-
- asm volatile(
- " csp %[r1],%[address]"
- : [r1] "+&d" (r1.pair), "+m" (*ptr)
- : [address] "d" (address)
- : "cc");
-}
-
-static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new)
+/**
+ * cspg() - Compare and Swap and Purge (CSPG)
+ * @ptr: Pointer to the value to be exchanged
+ * @old: The expected old value
+ * @new: The new value
+ *
+ * Return: True if compare and swap was successful, otherwise false.
+ */
+static inline bool cspg(unsigned long *ptr, unsigned long old, unsigned long new)
{
union register_pair r1 = { .even = old, .odd = new, };
unsigned long address = (unsigned long)ptr | 1;
@@ -619,6 +666,7 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new
: [r1] "+&d" (r1.pair), "+m" (*ptr)
: [address] "d" (address)
: "cc");
+ return old == r1.even;
}
#define CRDTE_DTT_PAGE 0x00UL
@@ -627,7 +675,18 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new
#define CRDTE_DTT_REGION2 0x18UL
#define CRDTE_DTT_REGION1 0x1cUL
-static inline void crdte(unsigned long old, unsigned long new,
+/**
+ * crdte() - Compare and Replace DAT Table Entry
+ * @old: The expected old value
+ * @new: The new value
+ * @table: Pointer to the value to be exchanged
+ * @dtt: Table type of the table to be exchanged
+ * @address: The address mapped by the entry to be replaced
+ * @asce: The ASCE of this entry
+ *
+ * Return: True if compare and replace was successful, otherwise false.
+ */
+static inline bool crdte(unsigned long old, unsigned long new,
unsigned long *table, unsigned long dtt,
unsigned long address, unsigned long asce)
{
@@ -638,6 +697,7 @@ static inline void crdte(unsigned long old, unsigned long new,
: [r1] "+&d" (r1.pair)
: [r2] "d" (r2.pair), [asce] "a" (asce)
: "memory", "cc");
+ return old == r1.even;
}
/*
@@ -713,7 +773,7 @@ static inline int pud_present(pud_t pud)
{
if (pud_folded(pud))
return 1;
- return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
+ return (pud_val(pud) & _REGION3_ENTRY_PRESENT) != 0;
}
static inline int pud_none(pud_t pud)
@@ -728,13 +788,18 @@ static inline bool pud_leaf(pud_t pud)
{
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
return 0;
- return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
+ return (pud_present(pud) && (pud_val(pud) & _REGION3_ENTRY_LARGE) != 0);
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_PRESENT) != 0;
}
#define pmd_leaf pmd_leaf
static inline bool pmd_leaf(pmd_t pmd)
{
- return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
+ return (pmd_present(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0);
}
static inline int pmd_bad(pmd_t pmd)
@@ -766,11 +831,6 @@ static inline int p4d_bad(p4d_t p4d)
return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
}
-static inline int pmd_present(pmd_t pmd)
-{
- return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
-}
-
static inline int pmd_none(pmd_t pmd)
{
return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
@@ -843,7 +903,7 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
@@ -891,6 +951,12 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
return clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY));
}
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define pmd_swp_soft_dirty(pmd) pmd_soft_dirty(pmd)
+#define pmd_swp_mksoft_dirty(pmd) pmd_mksoft_dirty(pmd)
+#define pmd_swp_clear_soft_dirty(pmd) pmd_clear_soft_dirty(pmd)
+#endif
+
/*
* query functions pte_write/pte_dirty/pte_young only work if
* pte_present() is true. Undefined behaviour if not..
@@ -922,6 +988,7 @@ static inline int pte_unused(pte_t pte)
* young/old accounting is not supported, i.e _PAGE_PROTECT and _PAGE_INVALID
* must not be set.
*/
+#define pte_pgprot pte_pgprot
static inline pgprot_t pte_pgprot(pte_t pte)
{
unsigned long pte_flags = pte_val(pte) & _PAGE_CHG_MASK;
@@ -1075,17 +1142,15 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_NODAT 0x400
#define IPTE_GUEST_ASCE 0x800
-static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep,
- unsigned long opt, unsigned long asce,
- int local)
+static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, int local)
{
unsigned long pto;
pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
- asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]"
+ asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%%r0,%[m4]"
: "+m" (*ptep)
- : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt),
- [asce] "a" (asce), [m4] "i" (local));
+ : [r1] "a" (pto), [r2] "a" (addr & PAGE_MASK),
+ [m4] "i" (local));
}
static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
@@ -1167,7 +1232,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
/* At this point the reference through the mapping is still present */
if (mm_is_protected(mm) && pte_present(res))
- uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+ uv_convert_from_secure_pte(res);
return res;
}
@@ -1185,7 +1250,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
/* At this point the reference through the mapping is still present */
if (mm_is_protected(vma->vm_mm) && pte_present(res))
- uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+ uv_convert_from_secure_pte(res);
return res;
}
@@ -1217,14 +1282,14 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
* The notifier should have destroyed all protected vCPUs at this
* point, so the destroy should be successful.
*/
- if (full && !uv_destroy_owned_page(pte_val(res) & PAGE_MASK))
+ if (full && !uv_destroy_pte(res))
return res;
/*
* If something went wrong and the page could not be destroyed, or
* if this is not a mm teardown, the slower export is used as
* fallback instead.
*/
- uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+ uv_convert_from_secure_pte(res);
return res;
}
@@ -1268,8 +1333,8 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
* PTE does not have _PAGE_PROTECT set, to avoid unnecessary overhead.
* A local RDP can be used to do the flush.
*/
- if (MACHINE_HAS_RDP && !(pte_val(*ptep) & _PAGE_PROTECT))
- __ptep_rdp(address, ptep, 0, 0, 1);
+ if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT))
+ __ptep_rdp(address, ptep, 1);
}
#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
@@ -1283,7 +1348,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
{
if (pte_same(*ptep, entry))
return 0;
- if (MACHINE_HAS_RDP && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
+ if (cpu_has_rdp() && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry);
else
ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
@@ -1323,7 +1388,6 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
unsigned long *oldpte, unsigned long *oldpgste);
-void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr);
void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr);
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
@@ -1331,9 +1395,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
#define pgprot_writecombine pgprot_writecombine
pgprot_t pgprot_writecombine(pgprot_t prot);
-#define pgprot_writethrough pgprot_writethrough
-pgprot_t pgprot_writethrough(pgprot_t prot);
-
#define PFN_PTE_SHIFT PAGE_SHIFT
/*
@@ -1375,21 +1436,9 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
pte_t __pte;
__pte = __pte(physpage | pgprot_val(pgprot));
- if (!MACHINE_HAS_NX)
- __pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC));
return pte_mkyoung(__pte);
}
-static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
-{
- unsigned long physpage = page_to_phys(page);
- pte_t __pte = mk_pte_phys(physpage, pgprot);
-
- if (pte_write(__pte) && PageDirty(page))
- __pte = pte_mkdirty(__pte);
- return __pte;
-}
-
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
@@ -1628,10 +1677,10 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
-static inline void __pmdp_csp(pmd_t *pmdp)
+static inline void __pmdp_cspg(pmd_t *pmdp)
{
- csp((unsigned int *)pmdp + 1, pmd_val(*pmdp),
- pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+ cspg((unsigned long *)pmdp, pmd_val(*pmdp),
+ pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
}
#define IDTE_GLOBAL 0
@@ -1744,8 +1793,6 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
{
- if (!MACHINE_HAS_NX)
- entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
set_pmd(pmdp, entry);
}
@@ -1813,17 +1860,16 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
#define pmdp_collapse_flush pmdp_collapse_flush
#define pfn_pmd(pfn, pgprot) mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot))
-#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
static inline int pmd_trans_huge(pmd_t pmd)
{
- return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
+ return pmd_leaf(pmd);
}
#define has_transparent_hugepage has_transparent_hugepage
static inline int has_transparent_hugepage(void)
{
- return MACHINE_HAS_EDAT1 ? 1 : 0;
+ return cpu_has_edat1() ? 1 : 0;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -1877,6 +1923,92 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+/*
+ * 64 bit swap entry format for REGION3 and SEGMENT table entries (RSTE)
+ * Bits 59 and 63 are used to indicate the swap entry. Bit 58 marks the rste
+ * as invalid.
+ * A swap entry is indicated by bit pattern (rste & 0x011) == 0x010
+ * | offset |Xtype |11TT|S0|
+ * |0000000000111111111122222222223333333333444444444455|555555|5566|66|
+ * |0123456789012345678901234567890123456789012345678901|234567|8901|23|
+ *
+ * Bits 0-51 store the offset.
+ * Bits 53-57 store the type.
+ * Bit 62 (S) is used for softdirty tracking.
+ * Bits 60-61 (TT) indicate the table type: 0x01 for REGION3 and 0x00 for SEGMENT.
+ * Bit 52 (X) is unused.
+ */
+
+#define __SWP_OFFSET_MASK_RSTE ((1UL << 52) - 1)
+#define __SWP_OFFSET_SHIFT_RSTE 12
+#define __SWP_TYPE_MASK_RSTE ((1UL << 5) - 1)
+#define __SWP_TYPE_SHIFT_RSTE 6
+
+/*
+ * TT bits set to 0x00 == SEGMENT. For REGION3 entries, caller must add R3
+ * bits 0x01. See also __set_huge_pte_at().
+ */
+static inline unsigned long mk_swap_rste(unsigned long type, unsigned long offset)
+{
+ unsigned long rste;
+
+ rste = _RST_ENTRY_INVALID | _RST_ENTRY_COMM;
+ rste |= (offset & __SWP_OFFSET_MASK_RSTE) << __SWP_OFFSET_SHIFT_RSTE;
+ rste |= (type & __SWP_TYPE_MASK_RSTE) << __SWP_TYPE_SHIFT_RSTE;
+ return rste;
+}
+
+static inline unsigned long __swp_type_rste(swp_entry_t entry)
+{
+ return (entry.val >> __SWP_TYPE_SHIFT_RSTE) & __SWP_TYPE_MASK_RSTE;
+}
+
+static inline unsigned long __swp_offset_rste(swp_entry_t entry)
+{
+ return (entry.val >> __SWP_OFFSET_SHIFT_RSTE) & __SWP_OFFSET_MASK_RSTE;
+}
+
+#define __rste_to_swp_entry(rste) ((swp_entry_t) { rste })
+
+/*
+ * s390 has different layout for PTE and region / segment table entries (RSTE).
+ * This is also true for swap entries, and their swap type and offset encoding.
+ * For hugetlbfs PTE_MARKER support, s390 has internal __swp_type_rste() and
+ * __swp_offset_rste() helpers to correctly handle RSTE swap entries.
+ *
+ * But common swap code does not know about this difference, and only uses
+ * __swp_type(), __swp_offset() and __swp_entry() helpers for conversion between
+ * arch-dependent and arch-independent representation of swp_entry_t for all
+ * pagetable levels. On s390, those helpers only work for PTE swap entries.
+ *
+ * Therefore, implement __pmd_to_swp_entry() to build a fake PTE swap entry
+ * and return the arch-dependent representation of that. Correspondingly,
+ * implement __swp_entry_to_pmd() to convert that into a proper PMD swap
+ * entry again. With this, the arch-dependent swp_entry_t representation will
+ * always look like a PTE swap entry in common code.
+ *
+ * This is somewhat similar to fake PTEs in hugetlbfs code for s390, but only
+ * requires conversion of the swap type and offset, and not all the possible
+ * PTE bits.
+ */
+static inline swp_entry_t __pmd_to_swp_entry(pmd_t pmd)
+{
+ swp_entry_t arch_entry;
+ pte_t pte;
+
+ arch_entry = __rste_to_swp_entry(pmd_val(pmd));
+ pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry));
+ return __pte_to_swp_entry(pte);
+}
+
+static inline pmd_t __swp_entry_to_pmd(swp_entry_t arch_entry)
+{
+ pmd_t pmd;
+
+ pmd = __pmd(mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry)));
+ return pmd;
+}
+
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
@@ -1894,4 +2026,40 @@ extern void s390_reset_cmma(struct mm_struct *mm);
#define pmd_pgtable(pmd) \
((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))
+static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt)
+{
+ unsigned long *pgstes, res;
+
+ pgstes = pgt + _PAGE_ENTRIES;
+
+ res = (pgstes[0] & PGSTE_ST2_MASK) << 16;
+ res |= pgstes[1] & PGSTE_ST2_MASK;
+ res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16;
+ res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32;
+
+ return res;
+}
+
+static inline pgste_t pgste_get_lock(pte_t *ptep)
+{
+ unsigned long value = 0;
+#ifdef CONFIG_PGSTE
+ unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
+
+ do {
+ value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
+ } while (value & PGSTE_PCL_BIT);
+ value |= PGSTE_PCL_BIT;
+#endif
+ return __pgste(value);
+}
+
+static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+ barrier();
+ WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
+#endif
+}
+
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
index f45cfc8bc233..7ef3bbec98b0 100644
--- a/arch/s390/include/asm/physmem_info.h
+++ b/arch/s390/include/asm/physmem_info.h
@@ -9,6 +9,7 @@ enum physmem_info_source {
MEM_DETECT_NONE = 0,
MEM_DETECT_SCLP_STOR_INFO,
MEM_DETECT_DIAG260,
+ MEM_DETECT_DIAG500_STOR_LIMIT,
MEM_DETECT_SCLP_READ_INFO,
MEM_DETECT_BIN_SEARCH
};
@@ -25,7 +26,7 @@ enum reserved_range_type {
RR_AMODE31,
RR_IPLREPORT,
RR_CERT_COMP_LIST,
- RR_MEM_DETECT_EXTENDED,
+ RR_MEM_DETECT_EXT,
RR_VMEM,
RR_MAX
};
@@ -107,6 +108,8 @@ static inline const char *get_physmem_info_source(void)
return "sclp storage info";
case MEM_DETECT_DIAG260:
return "diag260";
+ case MEM_DETECT_DIAG500_STOR_LIMIT:
+ return "diag500 storage limit";
case MEM_DETECT_SCLP_READ_INFO:
return "sclp read info";
case MEM_DETECT_BIN_SEARCH:
@@ -125,7 +128,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t)
RR_TYPE_NAME(AMODE31);
RR_TYPE_NAME(IPLREPORT);
RR_TYPE_NAME(CERT_COMP_LIST);
- RR_TYPE_NAME(MEM_DETECT_EXTENDED);
+ RR_TYPE_NAME(MEM_DETECT_EXT);
RR_TYPE_NAME(VMEM);
default:
return "UNKNOWN";
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
index 47d80a7451a6..b7b59faf16f4 100644
--- a/arch/s390/include/asm/pkey.h
+++ b/arch/s390/include/asm/pkey.h
@@ -20,9 +20,22 @@
* @param key pointer to a buffer containing the key blob
* @param keylen size of the key blob in bytes
* @param protkey pointer to buffer receiving the protected key
+ * @param xflags additional execution flags (see PKEY_XFLAG_* definitions below)
+ * As of now the only supported flag is PKEY_XFLAG_NOMEMALLOC.
* @return 0 on success, negative errno value on failure
*/
-int pkey_keyblob2pkey(const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype);
+int pkey_key2protkey(const u8 *key, u32 keylen,
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 xflags);
+
+/*
+ * If this flag is given in the xflags parameter, the pkey implementation
+ * is not allowed to allocate memory but instead should fall back to use
+ * preallocated memory or simple fail with -ENOMEM.
+ * This flag is for protected key derive within a cipher or similar
+ * which must not allocate memory which would cause io operations - see
+ * also the CRYPTO_ALG_ALLOCATES_MEMORY flag in crypto.h.
+ */
+#define PKEY_XFLAG_NOMEMALLOC 0x0001
#endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index 0e3da500e98c..6ccd033acfe5 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -5,43 +5,59 @@
#include <asm/current.h>
#include <linux/thread_info.h>
#include <asm/atomic_ops.h>
-
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#include <asm/cmpxchg.h>
+#include <asm/march.h>
/* We use the MSB mostly because its available */
#define PREEMPT_NEED_RESCHED 0x80000000
+
+/*
+ * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
+ * that a decrement hitting 0 means we can and should reschedule.
+ */
#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
static __always_inline int preempt_count(void)
{
- return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
+ return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED;
}
static __always_inline void preempt_count_set(int pc)
{
int old, new;
+ old = READ_ONCE(get_lowcore()->preempt_count);
do {
- old = READ_ONCE(S390_lowcore.preempt_count);
- new = (old & PREEMPT_NEED_RESCHED) |
- (pc & ~PREEMPT_NEED_RESCHED);
- } while (__atomic_cmpxchg(&S390_lowcore.preempt_count,
- old, new) != old);
+ new = (old & PREEMPT_NEED_RESCHED) | (pc & ~PREEMPT_NEED_RESCHED);
+ } while (!arch_try_cmpxchg(&get_lowcore()->preempt_count, &old, new));
}
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * short instruction sequence.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
static __always_inline void set_preempt_need_resched(void)
{
- __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+ __atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
}
static __always_inline void clear_preempt_need_resched(void)
{
- __atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+ __atomic_or(PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
}
static __always_inline bool test_preempt_need_resched(void)
{
- return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED);
+ return !(READ_ONCE(get_lowcore()->preempt_count) & PREEMPT_NEED_RESCHED);
}
static __always_inline void __preempt_count_add(int val)
@@ -52,11 +68,11 @@ static __always_inline void __preempt_count_add(int val)
*/
if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) {
if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) {
- __atomic_add_const(val, &S390_lowcore.preempt_count);
+ __atomic_add_const(val, &get_lowcore()->preempt_count);
return;
}
}
- __atomic_add(val, &S390_lowcore.preempt_count);
+ __atomic_add(val, &get_lowcore()->preempt_count);
}
static __always_inline void __preempt_count_sub(int val)
@@ -64,76 +80,47 @@ static __always_inline void __preempt_count_sub(int val)
__preempt_count_add(-val);
}
+/*
+ * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule
+ * a decrement which hits zero means we have no preempt_count and should
+ * reschedule.
+ */
static __always_inline bool __preempt_count_dec_and_test(void)
{
- return __atomic_add(-1, &S390_lowcore.preempt_count) == 1;
+ return __atomic_add_const_and_test(-1, &get_lowcore()->preempt_count);
}
+/*
+ * Returns true when we need to resched and can (barring IRQ state).
+ */
static __always_inline bool should_resched(int preempt_offset)
{
- return unlikely(READ_ONCE(S390_lowcore.preempt_count) ==
- preempt_offset);
-}
-
-#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-
-#define PREEMPT_ENABLED (0)
-
-static __always_inline int preempt_count(void)
-{
- return READ_ONCE(S390_lowcore.preempt_count);
-}
-
-static __always_inline void preempt_count_set(int pc)
-{
- S390_lowcore.preempt_count = pc;
-}
-
-static __always_inline void set_preempt_need_resched(void)
-{
+ return unlikely(READ_ONCE(get_lowcore()->preempt_count) == preempt_offset);
}
-static __always_inline void clear_preempt_need_resched(void)
-{
-}
+#define init_task_preempt_count(p) do { } while (0)
+/* Deferred to CPU bringup time */
+#define init_idle_preempt_count(p, cpu) do { } while (0)
-static __always_inline bool test_preempt_need_resched(void)
-{
- return false;
-}
+#ifdef CONFIG_PREEMPTION
-static __always_inline void __preempt_count_add(int val)
-{
- S390_lowcore.preempt_count += val;
-}
+void preempt_schedule(void);
+void preempt_schedule_notrace(void);
-static __always_inline void __preempt_count_sub(int val)
-{
- S390_lowcore.preempt_count -= val;
-}
+#ifdef CONFIG_PREEMPT_DYNAMIC
-static __always_inline bool __preempt_count_dec_and_test(void)
-{
- return !--S390_lowcore.preempt_count && tif_need_resched();
-}
+void dynamic_preempt_schedule(void);
+void dynamic_preempt_schedule_notrace(void);
+#define __preempt_schedule() dynamic_preempt_schedule()
+#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace()
-static __always_inline bool should_resched(int preempt_offset)
-{
- return unlikely(preempt_count() == preempt_offset &&
- tif_need_resched());
-}
+#else /* CONFIG_PREEMPT_DYNAMIC */
-#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+#define __preempt_schedule() preempt_schedule()
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
-#define init_task_preempt_count(p) do { } while (0)
-/* Deferred to CPU bringup time */
-#define init_idle_preempt_count(p, cpu) do { } while (0)
+#endif /* CONFIG_PREEMPT_DYNAMIC */
-#ifdef CONFIG_PREEMPTION
-extern void preempt_schedule(void);
-#define __preempt_schedule() preempt_schedule()
-extern void preempt_schedule_notrace(void);
-#define __preempt_schedule_notrace() preempt_schedule_notrace()
#endif /* CONFIG_PREEMPTION */
#endif /* __ASM_PREEMPT_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 07ad5a1df878..3affba95845b 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,13 +14,11 @@
#include <linux/bits.h>
-#define CIF_SIE 0 /* CPU needs SIE exit cleanup */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
#define CIF_MCCK_GUEST 6 /* machine check happening in guest */
#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */
-#define _CIF_SIE BIT(CIF_SIE)
#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY)
#define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT)
#define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST)
@@ -28,11 +26,12 @@
#define RESTART_FLAG_CTLREGS _AC(1 << 0, U)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/cpumask.h>
#include <linux/linkage.h>
#include <linux/irqflags.h>
+#include <linux/bitops.h>
#include <asm/fpu-types.h>
#include <asm/cpu.h>
#include <asm/page.h>
@@ -41,38 +40,50 @@
#include <asm/runtime_instr.h>
#include <asm/irqflags.h>
#include <asm/alternative.h>
+#include <asm/fault.h>
+
+struct pcpu {
+ unsigned long ec_mask; /* bit mask for ec_xxx functions */
+ unsigned long ec_clk; /* sigp timestamp for ec_xxx */
+ unsigned long flags; /* per CPU flags */
+ unsigned long capacity; /* cpu capacity for scheduler */
+ signed char state; /* physical cpu state */
+ signed char polarization; /* physical polarization */
+ u16 address; /* physical cpu address */
+};
+
+DECLARE_PER_CPU(struct pcpu, pcpu_devices);
typedef long (*sys_call_ptr_t)(struct pt_regs *regs);
+static __always_inline struct pcpu *this_pcpu(void)
+{
+ return (struct pcpu *)(get_lowcore()->pcpu);
+}
+
static __always_inline void set_cpu_flag(int flag)
{
- S390_lowcore.cpu_flags |= (1UL << flag);
+ set_bit(flag, &this_pcpu()->flags);
}
static __always_inline void clear_cpu_flag(int flag)
{
- S390_lowcore.cpu_flags &= ~(1UL << flag);
+ clear_bit(flag, &this_pcpu()->flags);
}
static __always_inline bool test_cpu_flag(int flag)
{
- return S390_lowcore.cpu_flags & (1UL << flag);
+ return test_bit(flag, &this_pcpu()->flags);
}
static __always_inline bool test_and_set_cpu_flag(int flag)
{
- if (test_cpu_flag(flag))
- return true;
- set_cpu_flag(flag);
- return false;
+ return test_and_set_bit(flag, &this_pcpu()->flags);
}
static __always_inline bool test_and_clear_cpu_flag(int flag)
{
- if (!test_cpu_flag(flag))
- return false;
- clear_cpu_flag(flag);
- return true;
+ return test_and_clear_bit(flag, &this_pcpu()->flags);
}
/*
@@ -81,9 +92,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag)
*/
static __always_inline bool test_cpu_flag_of(int flag, int cpu)
{
- struct lowcore *lc = lowcore_ptr[cpu];
-
- return lc->cpu_flags & (1UL << flag);
+ return test_bit(flag, &per_cpu(pcpu_devices, cpu).flags);
}
#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
@@ -110,18 +119,12 @@ extern void execve_tail(void);
unsigned long vdso_text_size(void);
unsigned long vdso_size(void);
-/*
- * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
- */
-
-#define TASK_SIZE (test_thread_flag(TIF_31BIT) ? \
- _REGION3_SIZE : TASK_SIZE_MAX)
-#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
- (_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1))
+#define TASK_SIZE (TASK_SIZE_MAX)
+#define TASK_UNMAPPED_BASE (_REGION2_SIZE >> 1)
#define TASK_SIZE_MAX (-PAGE_SIZE)
#define VDSO_BASE (STACK_TOP + PAGE_SIZE)
-#define VDSO_LIMIT (test_thread_flag(TIF_31BIT) ? _REGION3_SIZE : _REGION2_SIZE)
+#define VDSO_LIMIT (_REGION2_SIZE)
#define STACK_TOP (VDSO_LIMIT - vdso_size() - PAGE_SIZE)
#define STACK_TOP_MAX (_REGION2_SIZE - vdso_size() - PAGE_SIZE)
@@ -149,13 +152,12 @@ static __always_inline void __stackleak_poison(unsigned long erase_low,
" la %[addr],256(%[addr])\n"
" brctg %[tmp],0b\n"
"1: stg %[poison],0(%[addr])\n"
- " larl %[tmp],3f\n"
- " ex %[count],0(%[tmp])\n"
+ " exrl %[count],3f\n"
" j 4f\n"
"2: stg %[poison],0(%[addr])\n"
" j 4f\n"
"3: mvc 8(1,%[addr]),0(%[addr])\n"
- "4:\n"
+ "4:"
: [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp)
: [poison] "d" (poison)
: "memory", "cc"
@@ -173,11 +175,8 @@ struct thread_struct {
unsigned long system_timer; /* task cputime in kernel space */
unsigned long hardirq_timer; /* task cputime in hardirq context */
unsigned long softirq_timer; /* task cputime in softirq context */
- const sys_call_ptr_t *sys_call_table; /* system call table address */
- unsigned long gmap_addr; /* address of last gmap fault. */
- unsigned int gmap_write_flag; /* gmap fault write indication */
+ union teid gmap_teid; /* address and flags of last gmap fault */
unsigned int gmap_int_code; /* int code of last gmap fault */
- unsigned int gmap_pfault; /* signal of a pending guest pfault */
int ufpu_flags; /* user fpu flags */
int kfpu_flags; /* kernel fpu flags */
@@ -269,7 +268,7 @@ static __always_inline unsigned long __current_stack_pointer(void)
static __always_inline bool on_thread_stack(void)
{
- unsigned long ksp = S390_lowcore.kernel_stack;
+ unsigned long ksp = get_lowcore()->kernel_stack;
return !((ksp ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
}
@@ -373,14 +372,19 @@ static inline void local_mcck_enable(void)
/*
* Rewind PSW instruction address by specified number of bytes.
*/
-static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
+static inline unsigned long __rewind_psw(psw_t psw, long ilen)
{
unsigned long mask;
mask = (psw.mask & PSW_MASK_EA) ? -1UL :
(psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 :
(1UL << 24) - 1;
- return (psw.addr - ilc) & mask;
+ return (psw.addr - ilen) & mask;
+}
+
+static inline unsigned long __forward_psw(psw_t psw, long ilen)
+{
+ return __rewind_psw(psw, -ilen);
}
/*
@@ -405,9 +409,13 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
static __always_inline void bpon(void)
{
- asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", 82));
+ asm_inline volatile(
+ ALTERNATIVE(" nop\n",
+ " .insn rrf,0xb2e80000,0,0,13,0\n",
+ ALT_SPEC(82))
+ );
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 2ad9324f6338..962cf042c66d 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -8,17 +8,21 @@
#define _S390_PTRACE_H
#include <linux/bits.h>
+#include <linux/typecheck.h>
#include <uapi/asm/ptrace.h>
+#include <asm/thread_info.h>
#include <asm/tpi.h>
#define PIF_SYSCALL 0 /* inside a system call */
-#define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */
+#define PIF_PSW_ADDR_ADJUSTED 1 /* psw address has been adjusted */
#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */
+#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */
#define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */
#define _PIF_SYSCALL BIT(PIF_SYSCALL)
-#define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART)
+#define _PIF_ADDR_PSW_ADJUSTED BIT(PIF_PSW_ADDR_ADJUSTED)
#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET)
+#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT)
#define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS)
#define PSW32_MASK_PER _AC(0x40000000, UL)
@@ -53,7 +57,7 @@
PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct psw_bits {
unsigned long : 1;
@@ -98,7 +102,7 @@ enum {
typedef struct {
unsigned int mask;
unsigned int addr;
-} psw_t32 __aligned(8);
+} psw32_t __aligned(8);
#define PGM_INT_CODE_MASK 0x7f
#define PGM_INT_CODE_PER 0x80
@@ -126,7 +130,6 @@ struct pt_regs {
struct tpi_info tpi_info;
};
unsigned long flags;
- unsigned long cr1;
unsigned long last_break;
};
@@ -229,8 +232,44 @@ static inline void instruction_pointer_set(struct pt_regs *regs,
int regs_query_register_offset(const char *name);
const char *regs_query_register_name(unsigned int offset);
-unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
-unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
+
+static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+ return regs->gprs[15];
+}
+
+static __always_inline unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+ if (offset >= NUM_GPRS)
+ return 0;
+ return regs->gprs[offset];
+}
+
+static __always_inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+ unsigned long ksp = kernel_stack_pointer(regs);
+
+ return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:pt_regs which contains kernel stack pointer.
+ * @n:stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long addr;
+
+ addr = kernel_stack_pointer(regs) + n * sizeof(long);
+ if (!regs_within_kernel_stack(regs, addr))
+ return 0;
+ return READ_ONCE_NOCHECK(*(unsigned long *)addr);
+}
/**
* regs_get_kernel_argument() - get Nth function argument in kernel
@@ -251,15 +290,10 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
return regs_get_kernel_stack_nth(regs, argoffset + n);
}
-static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
-{
- return regs->gprs[15];
-}
-
static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
{
regs->gprs[2] = rc;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _S390_PTRACE_H */
diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h
index e297bcfc476f..4c7a43bc43a1 100644
--- a/arch/s390/include/asm/purgatory.h
+++ b/arch/s390/include/asm/purgatory.h
@@ -7,11 +7,11 @@
#ifndef _S390_PURGATORY_H_
#define _S390_PURGATORY_H_
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/purgatory.h>
int verify_sha256_digest(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _S390_PURGATORY_H_ */
diff --git a/arch/s390/include/asm/runtime-const.h b/arch/s390/include/asm/runtime-const.h
new file mode 100644
index 000000000000..17878b1d048c
--- /dev/null
+++ b/arch/s390/include/asm/runtime-const.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_RUNTIME_CONST_H
+#define _ASM_S390_RUNTIME_CONST_H
+
+#include <linux/uaccess.h>
+
+#define runtime_const_ptr(sym) \
+({ \
+ typeof(sym) __ret; \
+ \
+ asm_inline( \
+ "0: iihf %[__ret],%[c1]\n" \
+ " iilf %[__ret],%[c2]\n" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n" \
+ ".long 0b - .\n" \
+ ".popsection" \
+ : [__ret] "=d" (__ret) \
+ : [c1] "i" (0x01234567UL), \
+ [c2] "i" (0x89abcdefUL)); \
+ __ret; \
+})
+
+#define runtime_const_shift_right_32(val, sym) \
+({ \
+ unsigned int __ret = (val); \
+ \
+ asm_inline( \
+ "0: srl %[__ret],12\n" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n" \
+ ".long 0b - .\n" \
+ ".popsection" \
+ : [__ret] "+d" (__ret)); \
+ __ret; \
+})
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/* 32-bit immediate for iihf and iilf in bits in I2 field */
+static inline void __runtime_fixup_32(u32 *p, unsigned int val)
+{
+ s390_kernel_write(p, &val, sizeof(val));
+}
+
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ __runtime_fixup_32(where + 2, val >> 32);
+ __runtime_fixup_32(where + 8, val);
+}
+
+/* Immediate value is lower 12 bits of D2 field of srl */
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ u32 insn = *(u32 *)where;
+
+ insn &= 0xfffff000;
+ insn |= (val & 63);
+ s390_kernel_write(where, &insn, sizeof(insn));
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif /* _ASM_S390_RUNTIME_CONST_H */
diff --git a/arch/s390/include/asm/rwonce.h b/arch/s390/include/asm/rwonce.h
index 91fc24520e82..402325ec20f0 100644
--- a/arch/s390/include/asm/rwonce.h
+++ b/arch/s390/include/asm/rwonce.h
@@ -19,7 +19,7 @@
\
BUILD_BUG_ON(sizeof(x) != 16); \
asm volatile( \
- " lpq %[val],%[_x]\n" \
+ " lpq %[val],%[_x]" \
: [val] "=d" (__u.val) \
: [_x] "QS" (x) \
: "memory"); \
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 5742d23bba13..0f184dbdbe5e 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -16,7 +16,12 @@
/* 24 + 16 * SCLP_MAX_CORES */
#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE)
-#ifndef __ASSEMBLY__
+#define SCLP_ERRNOTIFY_AQ_RESET 0
+#define SCLP_ERRNOTIFY_AQ_REPAIR 1
+#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2
+#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3
+
+#ifndef __ASSEMBLER__
#include <linux/uio.h>
#include <asm/chpid.h>
#include <asm/cpu.h>
@@ -72,6 +77,7 @@ struct sclp_info {
unsigned char has_core_type : 1;
unsigned char has_sprp : 1;
unsigned char has_hvs : 1;
+ unsigned char has_wti : 1;
unsigned char has_esca : 1;
unsigned char has_sief2 : 1;
unsigned char has_64bscao : 1;
@@ -84,9 +90,12 @@ struct sclp_info {
unsigned char has_ibs : 1;
unsigned char has_skey : 1;
unsigned char has_kss : 1;
+ unsigned char has_diag204_bif : 1;
unsigned char has_gisaf : 1;
+ unsigned char has_diag310 : 1;
unsigned char has_diag318 : 1;
unsigned char has_diag320 : 1;
+ unsigned char has_diag324 : 1;
unsigned char has_sipl : 1;
unsigned char has_sipl_eckd : 1;
unsigned char has_dirq : 1;
@@ -109,6 +118,34 @@ struct sclp_info {
};
extern struct sclp_info sclp;
+struct sccb_header {
+ u16 length;
+ u8 function_code;
+ u8 control_mask[3];
+ u16 response_code;
+} __packed;
+
+struct evbuf_header {
+ u16 length;
+ u8 type;
+ u8 flags;
+ u16 _reserved;
+} __packed;
+
+struct err_notify_evbuf {
+ struct evbuf_header header;
+ u8 action;
+ u8 atype;
+ u32 fh;
+ u32 fid;
+ u8 data[];
+} __packed;
+
+struct err_notify_sccb {
+ struct sccb_header header;
+ struct err_notify_evbuf evbuf;
+} __packed;
+
struct zpci_report_error_header {
u8 version; /* Interface version byte */
u8 action; /* Action qualifier byte
@@ -131,10 +168,12 @@ int sclp_early_read_storage_info(void);
int sclp_early_get_core_info(struct sclp_core_info *info);
void sclp_early_get_ipl_info(struct sclp_ipl_info *info);
void sclp_early_detect(void);
+void sclp_early_detect_machine_features(void);
void sclp_early_printk(const char *s);
void __sclp_early_printk(const char *s, unsigned int len);
void sclp_emergency_printk(const char *s);
+int sclp_init(void);
int sclp_early_get_memsize(unsigned long *mem);
int sclp_early_get_hsa_size(unsigned long *hsa_size);
int _sclp_get_core_info(struct sclp_core_info *info);
@@ -160,5 +199,5 @@ static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
return _sclp_get_core_info(info);
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h
index 71d46f0ba97b..f904b674fee0 100644
--- a/arch/s390/include/asm/seccomp.h
+++ b/arch/s390/include/asm/seccomp.h
@@ -19,10 +19,5 @@
#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_S390X
#define SECCOMP_ARCH_NATIVE_NR NR_syscalls
#define SECCOMP_ARCH_NATIVE_NAME "s390x"
-#ifdef CONFIG_COMPAT
-# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_S390
-# define SECCOMP_ARCH_COMPAT_NR NR_syscalls
-# define SECCOMP_ARCH_COMPAT_NAME "s390"
-#endif
#endif /* _ASM_S390_SECCOMP_H */
diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h
index 06fbabe2f66c..94092f4ae764 100644
--- a/arch/s390/include/asm/set_memory.h
+++ b/arch/s390/include/asm/set_memory.h
@@ -62,5 +62,7 @@ __SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K)
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid);
+bool kernel_page_present(struct page *page);
#endif
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 32f70873e2b7..7c57ac968bf6 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -13,27 +13,6 @@
#define PARMAREA 0x10400
#define COMMAND_LINE_SIZE CONFIG_COMMAND_LINE_SIZE
-/*
- * Machine features detected in early.c
- */
-
-#define MACHINE_FLAG_VM BIT(0)
-#define MACHINE_FLAG_KVM BIT(1)
-#define MACHINE_FLAG_LPAR BIT(2)
-#define MACHINE_FLAG_DIAG9C BIT(3)
-#define MACHINE_FLAG_ESOP BIT(4)
-#define MACHINE_FLAG_IDTE BIT(5)
-#define MACHINE_FLAG_EDAT1 BIT(7)
-#define MACHINE_FLAG_EDAT2 BIT(8)
-#define MACHINE_FLAG_TOPOLOGY BIT(10)
-#define MACHINE_FLAG_TE BIT(11)
-#define MACHINE_FLAG_TLB_LC BIT(12)
-#define MACHINE_FLAG_TLB_GUEST BIT(14)
-#define MACHINE_FLAG_NX BIT(15)
-#define MACHINE_FLAG_GS BIT(16)
-#define MACHINE_FLAG_SCC BIT(17)
-#define MACHINE_FLAG_PCI_MIO BIT(18)
-#define MACHINE_FLAG_RDP BIT(19)
#define LPP_MAGIC BIT(31)
#define LPP_PID_MASK _AC(0xffffffff, UL)
@@ -45,7 +24,7 @@
#define LEGACY_COMMAND_LINE_SIZE 896
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/lowcore.h>
#include <asm/types.h>
@@ -62,6 +41,8 @@ struct parmarea {
char command_line[COMMAND_LINE_SIZE]; /* 0x10480 */
};
+extern char arch_hw_string[128];
+
extern struct parmarea parmarea;
extern unsigned int zlib_dfltcc_support;
@@ -77,25 +58,6 @@ extern unsigned long max_mappable;
/* The Write Back bit position in the physaddr is given by the SLPC PCI */
extern unsigned long mio_wb_bit_mask;
-#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
-#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
-#define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
-
-#define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
-#define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
-#define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
-#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
-#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
-#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
-#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
-#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
-#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
-#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
-#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
-#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
-#define MACHINE_HAS_PCI_MIO (S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO)
-#define MACHINE_HAS_RDP (S390_lowcore.machine_flags & MACHINE_FLAG_RDP)
-
/*
* Console mode. Override with conmode=
*/
@@ -115,6 +77,8 @@ extern unsigned int console_irq;
#define SET_CONSOLE_VT220 do { console_mode = 4; } while (0)
#define SET_CONSOLE_HVC do { console_mode = 5; } while (0)
+void register_early_console(void);
+
#ifdef CONFIG_VMCP
void vmcp_cma_reserve(void);
#else
@@ -138,5 +102,5 @@ static __always_inline u32 gen_lpswe(unsigned long addr)
BUILD_BUG_ON(addr > 0xfff);
return 0xb2b20000 | addr;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_SETUP_H */
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index edee63da08e7..97d77868f83c 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -36,7 +36,9 @@
#define SIGP_STATUS_INCORRECT_STATE 0x00000200UL
#define SIGP_STATUS_NOT_RUNNING 0x00000400UL
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+
+#include <asm/asm.h>
static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm,
u32 *status)
@@ -46,13 +48,12 @@ static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm,
asm volatile(
" sigp %[r1],%[addr],0(%[order])\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=&d" (cc), [r1] "+&d" (r1.pair)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [r1] "+d" (r1.pair)
: [addr] "d" (addr), [order] "a" (order)
- : "cc");
+ : CC_CLOBBER);
*status = r1.even;
- return cc;
+ return CC_TRANSFORM(cc);
}
static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
@@ -67,6 +68,6 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
return cc;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __S390_ASM_SIGP_H */
diff --git a/arch/s390/include/asm/skey.h b/arch/s390/include/asm/skey.h
new file mode 100644
index 000000000000..84e7cf28b712
--- /dev/null
+++ b/arch/s390/include/asm/skey.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SKEY_H
+#define __ASM_SKEY_H
+
+#include <asm/rwonce.h>
+
+struct skey_region {
+ unsigned long start;
+ unsigned long end;
+};
+
+#define SKEY_REGION(_start, _end) \
+ stringify_in_c(.section .skey_region,"a";) \
+ stringify_in_c(.balign 8;) \
+ stringify_in_c(.quad (_start);) \
+ stringify_in_c(.quad (_end);) \
+ stringify_in_c(.previous)
+
+extern int skey_regions_initialized;
+extern struct skey_region __skey_region_start[];
+extern struct skey_region __skey_region_end[];
+
+void __skey_regions_initialize(void);
+
+static inline void skey_regions_initialize(void)
+{
+ if (READ_ONCE(skey_regions_initialized))
+ return;
+ __skey_regions_initialize();
+}
+
+#endif /* __ASM_SKEY_H */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 6e5b1b4b19a9..fb2bdbf35da5 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -7,11 +7,30 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
-#include <asm/sigp.h>
-#include <asm/lowcore.h>
#include <asm/processor.h>
+#include <asm/lowcore.h>
+#include <asm/machine.h>
+#include <asm/sigp.h>
+
+static __always_inline unsigned int raw_smp_processor_id(void)
+{
+ unsigned long lc_cpu_nr;
+ unsigned int cpu;
+
+ BUILD_BUG_ON(sizeof_field(struct lowcore, cpu_nr) != sizeof(cpu));
+ lc_cpu_nr = offsetof(struct lowcore, cpu_nr);
+ asm_inline(
+ ALTERNATIVE(" ly %[cpu],%[offzero](%%r0)\n",
+ " ly %[cpu],%[offalt](%%r0)\n",
+ ALT_FEATURE(MFEATURE_LOWCORE))
+ : [cpu] "=d" (cpu)
+ : [offzero] "i" (lc_cpu_nr),
+ [offalt] "i" (lc_cpu_nr + LOWCORE_ALT_ADDRESS),
+ "m" (((struct lowcore *)0)->cpu_nr));
+ return cpu;
+}
-#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
+#define arch_scale_cpu_capacity smp_cpu_get_capacity
extern struct mutex smp_cpu_state_mutex;
extern unsigned int smp_cpu_mt_shift;
@@ -24,8 +43,7 @@ extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-extern void smp_call_online_cpu(void (*func)(void *), void *);
-extern void smp_call_ipl_cpu(void (*func)(void *), void *);
+extern void __noreturn smp_call_ipl_cpu(void (*func)(void *), void *data);
extern void smp_emergency_stop(void);
extern int smp_find_processor_id(u16 address);
@@ -35,6 +53,9 @@ extern void smp_save_dump_secondary_cpus(void);
extern void smp_yield_cpu(int cpu);
extern void smp_cpu_set_polarization(int cpu, int val);
extern int smp_cpu_get_polarization(int cpu);
+extern void smp_cpu_set_capacity(int cpu, unsigned long val);
+extern void smp_set_core_capacity(int cpu, unsigned long val);
+extern unsigned long smp_cpu_get_capacity(int cpu);
extern int smp_cpu_get_cpu_address(int cpu);
extern void smp_fill_possible_mask(void);
extern void smp_detect_cpus(void);
@@ -59,7 +80,7 @@ static inline void smp_cpus_done(unsigned int max_cpus)
{
}
-extern int smp_rescan_cpus(void);
+extern int smp_rescan_cpus(bool early);
extern void __noreturn cpu_die(void);
extern void __cpu_die(unsigned int cpu);
extern int __cpu_disable(void);
diff --git a/arch/s390/include/asm/softirq_stack.h b/arch/s390/include/asm/softirq_stack.h
index 1ac5115d3115..42d61296bbad 100644
--- a/arch/s390/include/asm/softirq_stack.h
+++ b/arch/s390/include/asm/softirq_stack.h
@@ -8,7 +8,7 @@
#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
static inline void do_softirq_own_stack(void)
{
- call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq);
+ call_on_stack(0, get_lowcore()->async_stack, void, __do_softirq);
}
#endif
#endif /* __ASM_S390_SOFTIRQ_STACK_H */
diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h
index c549893602ea..668dfc5de538 100644
--- a/arch/s390/include/asm/sparsemem.h
+++ b/arch/s390/include/asm/sparsemem.h
@@ -2,7 +2,23 @@
#ifndef _ASM_S390_SPARSEMEM_H
#define _ASM_S390_SPARSEMEM_H
-#define SECTION_SIZE_BITS 28
+#define SECTION_SIZE_BITS 27
#define MAX_PHYSMEM_BITS CONFIG_MAX_PHYSMEM_BITS
+#ifdef CONFIG_NUMA
+
+static inline int memory_add_physaddr_to_nid(u64 addr)
+{
+ return 0;
+}
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
+
+static inline int phys_to_target_node(u64 start)
+{
+ return 0;
+}
+#define phys_to_target_node phys_to_target_node
+
+#endif /* CONFIG_NUMA */
+
#endif /* _ASM_S390_SPARSEMEM_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 37127cd7749e..b06b183b7246 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -16,7 +16,23 @@
#include <asm/processor.h>
#include <asm/alternative.h>
-#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
+static __always_inline unsigned int spinlock_lockval(void)
+{
+ unsigned long lc_lockval;
+ unsigned int lockval;
+
+ BUILD_BUG_ON(sizeof_field(struct lowcore, spinlock_lockval) != sizeof(lockval));
+ lc_lockval = offsetof(struct lowcore, spinlock_lockval);
+ asm_inline(
+ ALTERNATIVE(" ly %[lockval],%[offzero](%%r0)\n",
+ " ly %[lockval],%[offalt](%%r0)\n",
+ ALT_FEATURE(MFEATURE_LOWCORE))
+ : [lockval] "=d" (lockval)
+ : [offzero] "i" (lc_lockval),
+ [offalt] "i" (lc_lockval + LOWCORE_ALT_ADDRESS),
+ "m" (((struct lowcore *)0)->spinlock_lockval));
+ return lockval;
+}
extern int spin_retry;
@@ -57,8 +73,10 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lp)
static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
{
+ int old = 0;
+
barrier();
- return likely(__atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL));
+ return likely(arch_try_cmpxchg(&lp->lock, &old, spinlock_lockval()));
}
static inline void arch_spin_lock(arch_spinlock_t *lp)
@@ -79,10 +97,11 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
typecheck(int, lp->lock);
kcsan_release();
asm_inline volatile(
- ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
- " sth %1,%0\n"
- : "=R" (((unsigned short *) &lp->lock)[1])
- : "d" (0) : "cc", "memory");
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */
+ " mvhhi %[lock],0"
+ : [lock] "=Q" (((unsigned short *)&lp->lock)[1])
+ :
+ : "memory");
}
/*
@@ -118,7 +137,9 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
static inline void arch_write_lock(arch_rwlock_t *rw)
{
- if (!__atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000))
+ int old = 0;
+
+ if (!arch_try_cmpxchg(&rw->cnts, &old, 0x30000))
arch_write_lock_wait(rw);
}
@@ -133,8 +154,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
int old;
old = READ_ONCE(rw->cnts);
- return (!(old & 0xffff0000) &&
- __atomic_cmpxchg_bool(&rw->cnts, old, old + 1));
+ return (!(old & 0xffff0000) && arch_try_cmpxchg(&rw->cnts, &old, old + 1));
}
static inline int arch_write_trylock(arch_rwlock_t *rw)
@@ -142,7 +162,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
int old;
old = READ_ONCE(rw->cnts);
- return !old && __atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000);
+ return !old && arch_try_cmpxchg(&rw->cnts, &old, 0x30000);
}
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index b69695e39957..3653ff57d6d9 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -3,7 +3,7 @@
#define __ASM_SPINLOCK_TYPES_H
#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
-# error "please don't include this file directly"
+# error "Please do not include this file directly."
#endif
typedef struct {
diff --git a/arch/s390/include/asm/stackprotector.h b/arch/s390/include/asm/stackprotector.h
new file mode 100644
index 000000000000..0497850103dd
--- /dev/null
+++ b/arch/s390/include/asm/stackprotector.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_STACKPROTECTOR_H
+#define _ASM_S390_STACKPROTECTOR_H
+
+#include <linux/sched.h>
+#include <asm/current.h>
+#include <asm/lowcore.h>
+
+static __always_inline void boot_init_stack_canary(void)
+{
+ current->stack_canary = get_random_canary();
+ get_lowcore()->stack_canary = current->stack_canary;
+}
+
+#endif /* _ASM_S390_STACKPROTECTOR_H */
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 85b6738b826a..c9ae680a28af 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -65,6 +65,8 @@ struct stack_frame {
unsigned long sie_reason;
unsigned long sie_flags;
unsigned long sie_control_block_phys;
+ unsigned long sie_guest_asce;
+ unsigned long sie_irq;
};
};
unsigned long gprs[10];
@@ -198,7 +200,7 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
" lg 15,%[_stack]\n" \
" stg %[_frame],%[_bc](15)\n" \
" brasl 14,%[_fn]\n" \
- " lgr 15,%[_prev]\n" \
+ " lgr 15,%[_prev]" \
: [_prev] "=&d" (prev), CALL_FMT_##nr \
: [_stack] "R" (__stack), \
[_bc] "i" (offsetof(struct stack_frame, back_chain)), \
@@ -249,7 +251,7 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
" lra 14,0(1)\n" \
" lpswe %[psw_enter]\n" \
"0: lpswe 0(7)\n" \
- "1:\n" \
+ "1:" \
: CALL_FMT_##nr, [psw_leave] "=Q" (psw_leave) \
: [psw_enter] "Q" (psw_enter) \
: "7", CALL_CLOBBER_##nr); \
diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h
index 4d74d7e33340..827cb208de86 100644
--- a/arch/s390/include/asm/stp.h
+++ b/arch/s390/include/asm/stp.h
@@ -94,5 +94,6 @@ struct stp_stzi {
int stp_sync_check(void);
int stp_island_check(void);
void stp_queue_work(void);
+bool stp_enabled(void);
#endif /* __S390_STP_H */
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index 351685de53d2..238e721e5a22 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -15,36 +15,33 @@
#define __HAVE_ARCH_MEMCPY /* gcc builtin & arch function */
#define __HAVE_ARCH_MEMMOVE /* gcc builtin & arch function */
#define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */
-#define __HAVE_ARCH_MEMSET16 /* arch function */
-#define __HAVE_ARCH_MEMSET32 /* arch function */
-#define __HAVE_ARCH_MEMSET64 /* arch function */
void *memcpy(void *dest, const void *src, size_t n);
void *memset(void *s, int c, size_t n);
void *memmove(void *dest, const void *src, size_t n);
-#ifndef CONFIG_KASAN
+#if !defined(CONFIG_KASAN) && !defined(CONFIG_KMSAN)
#define __HAVE_ARCH_MEMCHR /* inline & arch function */
#define __HAVE_ARCH_MEMCMP /* arch function */
#define __HAVE_ARCH_MEMSCAN /* inline & arch function */
#define __HAVE_ARCH_STRCAT /* inline & arch function */
#define __HAVE_ARCH_STRCMP /* arch function */
-#define __HAVE_ARCH_STRCPY /* inline & arch function */
#define __HAVE_ARCH_STRLCAT /* arch function */
#define __HAVE_ARCH_STRLEN /* inline & arch function */
#define __HAVE_ARCH_STRNCAT /* arch function */
-#define __HAVE_ARCH_STRNCPY /* arch function */
#define __HAVE_ARCH_STRNLEN /* inline & arch function */
#define __HAVE_ARCH_STRSTR /* arch function */
+#define __HAVE_ARCH_MEMSET16 /* arch function */
+#define __HAVE_ARCH_MEMSET32 /* arch function */
+#define __HAVE_ARCH_MEMSET64 /* arch function */
/* Prototypes for non-inlined arch strings functions. */
int memcmp(const void *s1, const void *s2, size_t n);
int strcmp(const char *s1, const char *s2);
size_t strlcat(char *dest, const char *src, size_t n);
char *strncat(char *dest, const char *src, size_t n);
-char *strncpy(char *dest, const char *src, size_t n);
char *strstr(const char *s1, const char *s2);
-#endif /* !CONFIG_KASAN */
+#endif /* !defined(CONFIG_KASAN) && !defined(CONFIG_KMSAN) */
#undef __HAVE_ARCH_STRCHR
#undef __HAVE_ARCH_STRNCHR
@@ -74,20 +71,30 @@ void *__memset16(uint16_t *s, uint16_t v, size_t count);
void *__memset32(uint32_t *s, uint32_t v, size_t count);
void *__memset64(uint64_t *s, uint64_t v, size_t count);
+#ifdef __HAVE_ARCH_MEMSET16
static inline void *memset16(uint16_t *s, uint16_t v, size_t count)
{
return __memset16(s, v, count * sizeof(v));
}
+#endif
+#ifdef __HAVE_ARCH_MEMSET32
static inline void *memset32(uint32_t *s, uint32_t v, size_t count)
{
return __memset32(s, v, count * sizeof(v));
}
+#endif
+#ifdef __HAVE_ARCH_MEMSET64
+#ifdef IN_BOOT_STRING_C
+void *memset64(uint64_t *s, uint64_t v, size_t count);
+#else
static inline void *memset64(uint64_t *s, uint64_t v, size_t count)
{
return __memset64(s, v, count * sizeof(v));
}
+#endif
+#endif
#if !defined(IN_ARCH_STRING_C) && (!defined(CONFIG_FORTIFY_SOURCE) || defined(__NO_FORTIFY))
@@ -118,7 +125,7 @@ static inline void *memscan(void *s, int c, size_t n)
asm volatile(
" lgr 0,%[c]\n"
"0: srst %[ret],%[s]\n"
- " jo 0b\n"
+ " jo 0b"
: [ret] "+&a" (ret), [s] "+&a" (s)
: [c] "d" (c)
: "cc", "memory", "0");
@@ -145,22 +152,6 @@ static inline char *strcat(char *dst, const char *src)
}
#endif
-#ifdef __HAVE_ARCH_STRCPY
-static inline char *strcpy(char *dst, const char *src)
-{
- char *ret = dst;
-
- asm volatile(
- " lghi 0,0\n"
- "0: mvst %[dst],%[src]\n"
- " jo 0b"
- : [dst] "+&a" (dst), [src] "+&a" (src)
- :
- : "cc", "memory", "0");
- return ret;
-}
-#endif
-
#if defined(__HAVE_ARCH_STRLEN) || (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__))
static inline size_t __no_sanitize_prefix_strfunc(strlen)(const char *s)
{
@@ -198,7 +189,6 @@ static inline size_t strnlen(const char * s, size_t n)
void *memchr(const void * s, int c, size_t n);
void *memscan(void *s, int c, size_t n);
char *strcat(char *dst, const char *src);
-char *strcpy(char *dst, const char *src);
size_t strlen(const char *s);
size_t strnlen(const char * s, size_t n);
#endif /* !IN_ARCH_STRING_C */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 27e3d804b311..4271e4169f45 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -15,7 +15,6 @@
#include <asm/ptrace.h>
extern const sys_call_ptr_t sys_call_table[];
-extern const sys_call_ptr_t sys_call_table_emu[];
static inline long syscall_get_nr(struct task_struct *task,
struct pt_regs *regs)
@@ -24,6 +23,18 @@ static inline long syscall_get_nr(struct task_struct *task,
(regs->int_code & 0xffff) : -1;
}
+static inline void syscall_set_nr(struct task_struct *task,
+ struct pt_regs *regs,
+ int nr)
+{
+ /*
+ * Unlike syscall_get_nr(), syscall_set_nr() can be called only when
+ * the target task is stopped for tracing on entering syscall, so
+ * there is no need to have the same check syscall_get_nr() has.
+ */
+ regs->int_code = (regs->int_code & ~0xffff) | (nr & 0xffff);
+}
+
static inline void syscall_rollback(struct task_struct *task,
struct pt_regs *regs)
{
@@ -34,15 +45,7 @@ static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
unsigned long error = regs->gprs[2];
-#ifdef CONFIG_COMPAT
- if (test_tsk_thread_flag(task, TIF_31BIT)) {
- /*
- * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
- * and will match correctly in comparisons.
- */
- error = (long)(int)error;
- }
-#endif
+
return IS_ERR_VALUE(error) ? error : 0;
}
@@ -65,25 +68,24 @@ static inline void syscall_get_arguments(struct task_struct *task,
unsigned long *args)
{
unsigned long mask = -1UL;
- unsigned int n = 6;
-#ifdef CONFIG_COMPAT
- if (test_tsk_thread_flag(task, TIF_31BIT))
- mask = 0xffffffff;
-#endif
- while (n-- > 0)
- if (n > 0)
- args[n] = regs->gprs[2 + n] & mask;
+ for (int i = 1; i < 6; i++)
+ args[i] = regs->gprs[2 + i] & mask;
args[0] = regs->orig_gpr2 & mask;
}
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ const unsigned long *args)
+{
+ regs->orig_gpr2 = args[0];
+ for (int n = 1; n < 6; n++)
+ regs->gprs[2 + n] = args[n];
+}
+
static inline int syscall_get_arch(struct task_struct *task)
{
-#ifdef CONFIG_COMPAT
- if (test_tsk_thread_flag(task, TIF_31BIT))
- return AUDIT_ARCH_S390;
-#endif
return AUDIT_ARCH_S390X;
}
@@ -136,7 +138,7 @@ long syscall##nr(unsigned long syscall SYSCALL_PARM_##nr) \
SYSCALL_REGS_##nr; \
\
asm volatile ( \
- " svc 0\n" \
+ " svc 0" \
: "=d" (rc) \
: "d" (r1) SYSCALL_FMT_##nr \
: "memory"); \
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index 35c1d1b860d8..9eb58d5348d8 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -13,101 +13,12 @@
,, regs->orig_gpr2,, regs->gprs[3],, regs->gprs[4] \
,, regs->gprs[5],, regs->gprs[6],, regs->gprs[7])
-#ifdef CONFIG_COMPAT
-
-#define __SC_COMPAT_CAST(t, a) \
-({ \
- long __ReS = a; \
- \
- BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \
- !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t) && \
- !__TYPE_IS_LL(t)); \
- if (__TYPE_IS_L(t)) \
- __ReS = (s32)a; \
- if (__TYPE_IS_UL(t)) \
- __ReS = (u32)a; \
- if (__TYPE_IS_PTR(t)) \
- __ReS = a & 0x7fffffff; \
- if (__TYPE_IS_LL(t)) \
- return -ENOSYS; \
- (t)__ReS; \
-})
-
-/*
- * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
- * named __s390x_sys_*()
- */
-#define COMPAT_SYSCALL_DEFINE0(sname) \
- long __s390_compat_sys_##sname(void); \
- ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO); \
- long __s390_compat_sys_##sname(void)
-
-#define SYSCALL_DEFINE0(sname) \
- SYSCALL_METADATA(_##sname, 0); \
- long __s390_sys_##sname(void); \
- ALLOW_ERROR_INJECTION(__s390_sys_##sname, ERRNO); \
- long __s390x_sys_##sname(void); \
- ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \
- static inline long __do_sys_##sname(void); \
- long __s390_sys_##sname(void) \
- { \
- return __do_sys_##sname(); \
- } \
- long __s390x_sys_##sname(void) \
- { \
- return __do_sys_##sname(); \
- } \
- static inline long __do_sys_##sname(void)
-
-#define COND_SYSCALL(name) \
- cond_syscall(__s390x_sys_##name); \
- cond_syscall(__s390_sys_##name)
-
-#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
- long __s390_compat_sys##name(struct pt_regs *regs); \
- ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \
- static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)); \
- static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__)); \
- long __s390_compat_sys##name(struct pt_regs *regs) \
- { \
- return __se_compat_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__)); \
- } \
- static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)) \
- { \
- __MAP(x, __SC_TEST, __VA_ARGS__); \
- return __do_compat_sys##name(__MAP(x, __SC_DELOUSE, __VA_ARGS__)); \
- } \
- static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__))
-
-/*
- * As some compat syscalls may not be implemented, we need to expand
- * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well.
- */
-#define COND_SYSCALL_COMPAT(name) \
- cond_syscall(__s390_compat_sys_##name)
-
-#define __S390_SYS_STUBx(x, name, ...) \
- long __s390_sys##name(struct pt_regs *regs); \
- ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \
- static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)); \
- long __s390_sys##name(struct pt_regs *regs) \
- { \
- return ___se_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__)); \
- } \
- static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)) \
- { \
- __MAP(x, __SC_TEST, __VA_ARGS__); \
- return __do_sys##name(__MAP(x, __SC_COMPAT_CAST, __VA_ARGS__)); \
- }
-
-#else /* CONFIG_COMPAT */
-
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
- long __s390x_sys_##sname(void); \
+ long __s390x_sys_##sname(struct pt_regs *__unused); \
ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \
static inline long __do_sys_##sname(void); \
- long __s390x_sys_##sname(void) \
+ long __s390x_sys_##sname(struct pt_regs *__unused) \
{ \
return __do_sys_##sname(); \
} \
@@ -118,8 +29,6 @@
#define __S390_SYS_STUBx(x, fullname, name, ...)
-#endif /* CONFIG_COMPAT */
-
#define __SYSCALL_DEFINEx(x, name, ...) \
long __s390x_sys##name(struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index edca5a751df4..9088c5267f35 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -11,8 +11,34 @@
#ifndef __ASM_S390_SYSINFO_H
#define __ASM_S390_SYSINFO_H
-#include <asm/bitsperlong.h>
#include <linux/uuid.h>
+#include <asm/bitsperlong.h>
+#include <asm/asm.h>
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+ int r0 = (fc << 28) | sel1;
+ int cc;
+
+ asm volatile(
+ " lr %%r0,%[r0]\n"
+ " lr %%r1,%[r1]\n"
+ " stsi %[sysinfo]\n"
+ " lr %[r0],%%r0\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [r0] "+d" (r0), [sysinfo] "=Q" (*(char *)sysinfo)
+ : [r1] "d" (sel2)
+ : CC_CLOBBER_LIST("0", "1", "memory"));
+ if (cc == 3)
+ return -EOPNOTSUPP;
+ return fc ? 0 : (unsigned int)r0 >> 28;
+}
struct sysinfo_1_1_1 {
unsigned char p:1;
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index a674c7d25da5..6a548a819400 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -9,14 +9,12 @@
#define _ASM_THREAD_INFO_H
#include <linux/bits.h>
-#ifndef ASM_OFFSETS_C
-#include <asm/asm-offsets.h>
-#endif
+#include <vdso/page.h>
/*
* General size of kernel stacks
*/
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN) || defined(CONFIG_KMSAN)
#define THREAD_SIZE_ORDER 4
#else
#define THREAD_SIZE_ORDER 2
@@ -26,9 +24,7 @@
#define STACK_INIT_OFFSET (THREAD_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
-#ifndef __ASSEMBLY__
-#include <asm/lowcore.h>
-#include <asm/page.h>
+#ifndef __ASSEMBLER__
/*
* low level task data that entry.S needs immediate access to
@@ -40,6 +36,7 @@ struct thread_info {
unsigned long flags; /* low level flags */
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
unsigned int cpu; /* current CPU */
+ unsigned char sie; /* running in SIE context */
};
/*
@@ -59,48 +56,29 @@ void arch_setup_new_exec(void);
/*
* thread information flags bit numbers
+ *
+ * Tell the generic TIF infrastructure which special bits s390 supports
*/
-/* _TIF_WORK bits */
-#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
-#define TIF_SIGPENDING 1 /* signal pending */
-#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
-#define TIF_UPROBE 3 /* breakpointed or single-stepping */
-#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
-#define TIF_PATCH_PENDING 5 /* pending live patching update */
-#define TIF_PGSTE 6 /* New mm's will use 4K page tables */
-#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
-#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
-#define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_RESTORE_SIGMASK
-#define TIF_31BIT 16 /* 32bit process */
-#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
-#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
-#define TIF_SINGLE_STEP 19 /* This task is single stepped */
-#define TIF_BLOCK_STEP 20 /* This task is block stepped */
-#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */
+#include <asm-generic/thread_info_tif.h>
-/* _TIF_TRACE bits */
-#define TIF_SYSCALL_TRACE 24 /* syscall trace active */
-#define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */
-#define TIF_SECCOMP 26 /* secure computing */
-#define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */
+/* Architecture specific bits */
+#define TIF_ASCE_PRIMARY 16 /* primary asce is kernel asce */
+#define TIF_GUARDED_STORAGE 17 /* load guarded storage control block */
+#define TIF_ISOLATE_BP_GUEST 18 /* Run KVM guests with isolated BP */
+#define TIF_PER_TRAP 19 /* Need to handle PER trap on exit to usermode */
+#define TIF_SINGLE_STEP 21 /* This task is single stepped */
+#define TIF_BLOCK_STEP 22 /* This task is block stepped */
+#define TIF_UPROBE_SINGLESTEP 23 /* This task is uprobe single stepped */
-#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME)
-#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
-#define _TIF_SIGPENDING BIT(TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED)
-#define _TIF_UPROBE BIT(TIF_UPROBE)
+#define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY)
#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE)
-#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST)
#define _TIF_PER_TRAP BIT(TIF_PER_TRAP)
-
-#define _TIF_31BIT BIT(TIF_31BIT)
#define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP)
-
-#define _TIF_SYSCALL_TRACE BIT(TIF_SYSCALL_TRACE)
-#define _TIF_SYSCALL_AUDIT BIT(TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP BIT(TIF_SECCOMP)
-#define _TIF_SYSCALL_TRACEPOINT BIT(TIF_SYSCALL_TRACEPOINT)
+#define _TIF_BLOCK_STEP BIT(TIF_BLOCK_STEP)
+#define _TIF_UPROBE_SINGLESTEP BIT(TIF_UPROBE_SINGLESTEP)
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 4d646659a5f5..49447b40f038 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -13,6 +13,8 @@
#include <linux/preempt.h>
#include <linux/time64.h>
#include <asm/lowcore.h>
+#include <asm/machine.h>
+#include <asm/asm.h>
/* The value of the TOD clock for 1.1.1970. */
#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
@@ -44,11 +46,12 @@ static inline int set_tod_clock(__u64 time)
int cc;
asm volatile(
- " sck %1\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc) : "Q" (time) : "cc");
- return cc;
+ " sck %[time]\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [time] "Q" (time)
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
}
static inline int store_tod_clock_ext_cc(union tod_clock *clk)
@@ -56,11 +59,12 @@ static inline int store_tod_clock_ext_cc(union tod_clock *clk)
int cc;
asm volatile(
- " stcke %1\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc), "=Q" (*clk) : : "cc");
- return cc;
+ " stcke %[clk]\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [clk] "=Q" (*clk)
+ :
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
}
static __always_inline void store_tod_clock_ext(union tod_clock *tod)
@@ -77,7 +81,7 @@ static inline void set_tod_programmable_field(u16 val)
{
asm volatile(
" lgr 0,%[val]\n"
- " sckpf\n"
+ " sckpf"
:
: [val] "d" ((unsigned long)val)
: "0");
@@ -93,6 +97,7 @@ extern unsigned char ptff_function_mask[16];
#define PTFF_QAF 0x00 /* query available functions */
#define PTFF_QTO 0x01 /* query tod offset */
#define PTFF_QSI 0x02 /* query steering information */
+#define PTFF_QPT 0x03 /* query physical clock */
#define PTFF_QUI 0x04 /* query UTC information */
#define PTFF_ATO 0x40 /* adjust tod offset */
#define PTFF_STO 0x41 /* set tod offset */
@@ -149,28 +154,27 @@ struct ptff_qui {
" lgr 0,%[reg0]\n" \
" lgr 1,%[reg1]\n" \
" ptff\n" \
- " ipm %[rc]\n" \
- " srl %[rc],28\n" \
- : [rc] "=&d" (rc), "+m" (*(struct addrtype *)reg1) \
+ CC_IPM(rc) \
+ : CC_OUT(rc, rc), "+m" (*(struct addrtype *)reg1) \
: [reg0] "d" (reg0), [reg1] "d" (reg1) \
- : "cc", "0", "1"); \
- rc; \
+ : CC_CLOBBER_LIST("0", "1")); \
+ CC_TRANSFORM(rc); \
})
static inline unsigned long local_tick_disable(void)
{
unsigned long old;
- old = S390_lowcore.clock_comparator;
- S390_lowcore.clock_comparator = clock_comparator_max;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ old = get_lowcore()->clock_comparator;
+ get_lowcore()->clock_comparator = clock_comparator_max;
+ set_clock_comparator(get_lowcore()->clock_comparator);
return old;
}
static inline void local_tick_enable(unsigned long comp)
{
- S390_lowcore.clock_comparator = comp;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = comp;
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
@@ -192,13 +196,6 @@ static inline unsigned long get_tod_clock_fast(void)
asm volatile("stckf %0" : "=Q" (clk) : : "cc");
return clk;
}
-
-static inline cycles_t get_cycles(void)
-{
- return (cycles_t) get_tod_clock() >> 2;
-}
-#define get_cycles get_cycles
-
int get_phys_clock(unsigned long *clock);
void init_cpu_timer(void);
@@ -226,6 +223,12 @@ static inline unsigned long get_tod_clock_monotonic(void)
return tod;
}
+static inline cycles_t get_cycles(void)
+{
+ return (cycles_t)get_tod_clock_monotonic() >> 2;
+}
+#define get_cycles get_cycles
+
/**
* tod_to_ns - convert a TOD format value to nanoseconds
* @todval: to be converted TOD format value
@@ -250,6 +253,11 @@ static __always_inline unsigned long tod_to_ns(unsigned long todval)
return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
}
+static __always_inline u128 eitod_to_ns(u128 todval)
+{
+ return (todval * 125) >> 9;
+}
+
/**
* tod_after - compare two 64 bit TOD values
* @a: first 64 bit TOD timestamp
@@ -259,7 +267,7 @@ static __always_inline unsigned long tod_to_ns(unsigned long todval)
*/
static inline int tod_after(unsigned long a, unsigned long b)
{
- if (MACHINE_HAS_SCC)
+ if (machine_has_scc())
return (long) a > (long) b;
return a > b;
}
@@ -273,7 +281,7 @@ static inline int tod_after(unsigned long a, unsigned long b)
*/
static inline int tod_after_eq(unsigned long a, unsigned long b)
{
- if (MACHINE_HAS_SCC)
+ if (machine_has_scc())
return (long) a >= (long) b;
return a >= b;
}
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index e95b2c8081eb..1e50f6f1ad9d 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -22,7 +22,6 @@
* Pages used for the page tables is a different story. FIXME: more
*/
-void __tlb_remove_table(void *_table);
static inline void tlb_flush(struct mmu_gather *tlb);
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, bool delay_rmap, int page_size);
@@ -37,11 +36,12 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
#include <asm/tlbflush.h>
#include <asm-generic/tlb.h>
+#include <asm/gmap.h>
/*
* Release the page cache reference for a pte removed by
* tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
- * has already been freed, so just do free_page_and_swap_cache.
+ * has already been freed, so just do free_folio_and_swap_cache.
*
* s390 doesn't delay rmap removal.
*/
@@ -50,7 +50,7 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
{
VM_WARN_ON_ONCE(delay_rmap);
- free_page_and_swap_cache(page);
+ free_folio_and_swap_cache(page_folio(page));
return false;
}
@@ -85,9 +85,9 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_pmds = 1;
- if (mm_alloc_pgste(tlb->mm))
+ if (mm_has_pgste(tlb->mm))
gmap_unlink(tlb->mm, (unsigned long *)pte, address);
- tlb_remove_ptdesc(tlb, pte);
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(pte));
}
/*
@@ -102,12 +102,11 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
{
if (mm_pmd_folded(tlb->mm))
return;
- pagetable_pmd_dtor(virt_to_ptdesc(pmd));
__tlb_adjust_range(tlb, address, PAGE_SIZE);
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_puds = 1;
- tlb_remove_ptdesc(tlb, pmd);
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd));
}
/*
@@ -125,7 +124,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
__tlb_adjust_range(tlb, address, PAGE_SIZE);
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
- tlb_remove_ptdesc(tlb, p4d);
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
}
/*
@@ -140,11 +139,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
{
if (mm_pud_folded(tlb->mm))
return;
+ __tlb_adjust_range(tlb, address, PAGE_SIZE);
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_p4ds = 1;
- tlb_remove_ptdesc(tlb, pud);
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud));
}
-
#endif /* _S390_TLB_H */
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index a6e2cd89b609..163ccbbe8c47 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -2,9 +2,11 @@
#ifndef _S390_TLBFLUSH_H
#define _S390_TLBFLUSH_H
+#include <linux/cpufeature.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <asm/processor.h>
+#include <asm/machine.h>
/*
* Flush all TLB entries on the local CPU.
@@ -22,7 +24,7 @@ static inline void __tlb_flush_idte(unsigned long asce)
unsigned long opt;
opt = IDTE_PTOA;
- if (MACHINE_HAS_TLB_GUEST)
+ if (machine_has_tlb_guest())
opt |= IDTE_GUEST_ASCE;
/* Global TLB flush for the mm */
asm volatile("idte 0,%1,%0" : : "a" (opt), "a" (asce) : "cc");
@@ -33,9 +35,9 @@ static inline void __tlb_flush_idte(unsigned long asce)
*/
static inline void __tlb_flush_global(void)
{
- unsigned int dummy = 0;
+ unsigned long dummy = 0;
- csp(&dummy, 0, 0);
+ cspg(&dummy, 0, 0);
}
/*
@@ -46,18 +48,13 @@ static inline void __tlb_flush_mm(struct mm_struct *mm)
{
unsigned long gmap_asce;
- /*
- * If the machine has IDTE we prefer to do a per mm flush
- * on all cpus instead of doing a local flush if the mm
- * only ran on the local cpu.
- */
preempt_disable();
atomic_inc(&mm->context.flush_count);
/* Reset TLB flush mask */
cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
barrier();
gmap_asce = READ_ONCE(mm->context.gmap_asce);
- if (MACHINE_HAS_IDTE && gmap_asce != -1UL) {
+ if (gmap_asce != -1UL) {
if (gmap_asce)
__tlb_flush_idte(gmap_asce);
__tlb_flush_idte(mm->context.asce);
@@ -71,10 +68,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm)
static inline void __tlb_flush_kernel(void)
{
- if (MACHINE_HAS_IDTE)
- __tlb_flush_idte(init_mm.context.asce);
- else
- __tlb_flush_global();
+ __tlb_flush_idte(init_mm.context.asce);
}
static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
@@ -89,7 +83,6 @@ static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
/*
* TLB flushing:
- * flush_tlb() - flushes the current mm struct TLBs
* flush_tlb_all() - flushes all processes TLBs
* flush_tlb_mm(mm) - flushes the specified mm context TLB's
* flush_tlb_page(vma, vmaddr) - flushes one page
@@ -105,7 +98,6 @@ static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
* only one user. At the end of the update the flush_tlb_mm and
* flush_tlb_range functions need to do the flush.
*/
-#define flush_tlb() do { } while (0)
#define flush_tlb_all() do { } while (0)
#define flush_tlb_page(vma, addr) do { } while (0)
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 3a0ac0c7a9a3..44110847342a 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -61,12 +61,21 @@ static inline void topology_expect_change(void) { }
#endif /* CONFIG_SCHED_TOPOLOGY */
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+ return smp_get_base_cpu(cpu) == cpu;
+}
+#define topology_is_primary_thread topology_is_primary_thread
+
#define POLARIZATION_UNKNOWN (-1)
#define POLARIZATION_HRZ (0)
#define POLARIZATION_VL (1)
#define POLARIZATION_VM (2)
#define POLARIZATION_VH (3)
+#define CPU_CAPACITY_HIGH SCHED_CAPACITY_SCALE
+#define CPU_CAPACITY_LOW (SCHED_CAPACITY_SCALE >> 3)
+
#define SD_BOOK_INIT SD_CPU_INIT
#ifdef CONFIG_NUMA
diff --git a/arch/s390/include/asm/tpi.h b/arch/s390/include/asm/tpi.h
index f76e5fdff23a..71c8b6f76cdd 100644
--- a/arch/s390/include/asm/tpi.h
+++ b/arch/s390/include/asm/tpi.h
@@ -5,7 +5,7 @@
#include <linux/types.h>
#include <uapi/asm/schid.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* I/O-Interruption Code as stored by TEST PENDING INTERRUPTION (TPI). */
struct tpi_info {
@@ -32,6 +32,6 @@ struct tpi_adapter_info {
u32 :27;
} __packed __aligned(4);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_TPI_H */
diff --git a/arch/s390/include/asm/trace/ap.h b/arch/s390/include/asm/trace/ap.h
new file mode 100644
index 000000000000..5c2e6c664b4d
--- /dev/null
+++ b/arch/s390/include/asm/trace/ap.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Tracepoint definitions for s390 ap bus related trace events
+ *
+ * There are two AP bus related tracepoint events defined here:
+ * There is a tracepoint s390_ap_nqap event immediately after a request
+ * has been pushed into the AP firmware queue with the NQAP AP command.
+ * The other tracepoint s390_ap_dqap event fires immediately after a
+ * reply has been pulled out of the AP firmware queue via DQAP AP command.
+ * The idea of these two trace events focuses on performance to measure
+ * the runtime of a crypto request/reply as close as possible at the
+ * firmware level. In combination with the two zcrypt tracepoints (see the
+ * zcrypt.h trace event definition file) this gives measurement data about
+ * the runtime of a request/reply within the zcrpyt and AP bus layer.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM s390
+
+#if !defined(_TRACE_S390_AP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_S390_AP_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(s390_ap_nqapdqap_template,
+ TP_PROTO(u16 card, u16 dom, u32 status, u64 psmid),
+ TP_ARGS(card, dom, status, psmid),
+ TP_STRUCT__entry(
+ __field(u16, card)
+ __field(u16, dom)
+ __field(u32, status)
+ __field(u64, psmid)),
+ TP_fast_assign(
+ __entry->card = card;
+ __entry->dom = dom;
+ __entry->status = status;
+ __entry->psmid = psmid;),
+ TP_printk("card=%u dom=%u status=0x%08x psmid=0x%016lx",
+ (unsigned short)__entry->card,
+ (unsigned short)__entry->dom,
+ (unsigned int)__entry->status,
+ (unsigned long)__entry->psmid)
+);
+
+/**
+ * trace_s390_ap_nqap - ap msg nqap tracepoint function
+ * @card: Crypto card number addressed.
+ * @dom: Domain within the crypto card addressed.
+ * @status: AP queue status (GR1 on return of nqap).
+ * @psmid: Unique id identifying this request/reply.
+ *
+ * Called immediately after a request has been enqueued into
+ * the AP firmware queue with the NQAP command.
+ */
+DEFINE_EVENT(s390_ap_nqapdqap_template,
+ s390_ap_nqap,
+ TP_PROTO(u16 card, u16 dom, u32 status, u64 psmid),
+ TP_ARGS(card, dom, status, psmid)
+);
+
+/**
+ * trace_s390_ap_dqap - ap msg dqap tracepoint function
+ * @card: Crypto card number addressed.
+ * @dom: Domain within the crypto card addressed.
+ * @status: AP queue status (GR1 on return of dqap).
+ * @psmid: Unique id identifying this request/reply.
+ *
+ * Called immediately after a reply has been dequeued from
+ * the AP firmware queue with the DQAP command.
+ */
+DEFINE_EVENT(s390_ap_nqapdqap_template,
+ s390_ap_dqap,
+ TP_PROTO(u16 card, u16 dom, u32 status, u64 psmid),
+ TP_ARGS(card, dom, status, psmid)
+);
+
+#endif /* _TRACE_S390_AP_H */
+
+/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm/trace
+#define TRACE_INCLUDE_FILE ap
+
+#include <trace/define_trace.h>
diff --git a/arch/s390/include/asm/trace/hiperdispatch.h b/arch/s390/include/asm/trace/hiperdispatch.h
new file mode 100644
index 000000000000..46462ee645b0
--- /dev/null
+++ b/arch/s390/include/asm/trace/hiperdispatch.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Tracepoint header for hiperdispatch
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM s390
+
+#if !defined(_TRACE_S390_HIPERDISPATCH_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_S390_HIPERDISPATCH_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm/trace
+#define TRACE_INCLUDE_FILE hiperdispatch
+
+TRACE_EVENT(s390_hd_work_fn,
+ TP_PROTO(int steal_time_percentage,
+ int entitled_core_count,
+ int highcap_core_count),
+ TP_ARGS(steal_time_percentage,
+ entitled_core_count,
+ highcap_core_count),
+ TP_STRUCT__entry(__field(int, steal_time_percentage)
+ __field(int, entitled_core_count)
+ __field(int, highcap_core_count)),
+ TP_fast_assign(__entry->steal_time_percentage = steal_time_percentage;
+ __entry->entitled_core_count = entitled_core_count;
+ __entry->highcap_core_count = highcap_core_count;),
+ TP_printk("steal: %d entitled_core_count: %d highcap_core_count: %d",
+ __entry->steal_time_percentage,
+ __entry->entitled_core_count,
+ __entry->highcap_core_count)
+);
+
+TRACE_EVENT(s390_hd_rebuild_domains,
+ TP_PROTO(int current_highcap_core_count,
+ int new_highcap_core_count),
+ TP_ARGS(current_highcap_core_count,
+ new_highcap_core_count),
+ TP_STRUCT__entry(__field(int, current_highcap_core_count)
+ __field(int, new_highcap_core_count)),
+ TP_fast_assign(__entry->current_highcap_core_count = current_highcap_core_count;
+ __entry->new_highcap_core_count = new_highcap_core_count),
+ TP_printk("change highcap_core_count: %u -> %u",
+ __entry->current_highcap_core_count,
+ __entry->new_highcap_core_count)
+);
+
+#endif /* _TRACE_S390_HIPERDISPATCH_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/include/asm/trace/zcrypt.h b/arch/s390/include/asm/trace/zcrypt.h
index 457ddaa99e19..bfb01e335f31 100644
--- a/arch/s390/include/asm/trace/zcrypt.h
+++ b/arch/s390/include/asm/trace/zcrypt.h
@@ -2,7 +2,7 @@
/*
* Tracepoint definitions for the s390 zcrypt device driver
*
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016,2025
* Author(s): Harald Freudenberger <freude@de.ibm.com>
*
* Currently there are two tracepoint events defined here.
@@ -73,14 +73,15 @@ TRACE_EVENT(s390_zcrypt_req,
/**
* trace_s390_zcrypt_rep - zcrypt reply tracepoint function
- * @ptr: Address of the local buffer where the request from userspace
- * is stored. Can be used as a unique id to match together
- * request and reply.
- * @fc: Function code.
- * @rc: The bare returncode as returned by the device driver ioctl
- * function.
- * @dev: The adapter nr where this request was actually processed.
- * @dom: Domain id of the device where this request was processed.
+ * @ptr: Address of the local buffer where the request from userspace
+ * is stored. Can be used as a unique id to match together
+ * request and reply.
+ * @fc: Function code.
+ * @rc: The bare returncode as returned by the device driver ioctl
+ * function.
+ * @card: The adapter nr where this request was actually processed.
+ * @dom: Domain id of the device where this request was processed.
+ * @psmid: Unique id identifying this request/reply.
*
* Called upon recognising the reply from the crypto adapter. This
* message may act as the exit timestamp for the request but also
@@ -88,26 +89,29 @@ TRACE_EVENT(s390_zcrypt_req,
* and the returncode from the device driver.
*/
TRACE_EVENT(s390_zcrypt_rep,
- TP_PROTO(void *ptr, u32 fc, u32 rc, u16 dev, u16 dom),
- TP_ARGS(ptr, fc, rc, dev, dom),
+ TP_PROTO(void *ptr, u32 fc, u32 rc, u16 card, u16 dom, u64 psmid),
+ TP_ARGS(ptr, fc, rc, card, dom, psmid),
TP_STRUCT__entry(
__field(void *, ptr)
__field(u32, fc)
__field(u32, rc)
- __field(u16, device)
- __field(u16, domain)),
+ __field(u16, card)
+ __field(u16, dom)
+ __field(u64, psmid)),
TP_fast_assign(
__entry->ptr = ptr;
__entry->fc = fc;
__entry->rc = rc;
- __entry->device = dev;
- __entry->domain = dom;),
- TP_printk("ptr=%p fc=0x%04x rc=%d dev=0x%02hx domain=0x%04hx",
+ __entry->card = card;
+ __entry->dom = dom;
+ __entry->psmid = psmid;),
+ TP_printk("ptr=%p fc=0x%04x rc=%d card=%u dom=%u psmid=0x%016lx",
__entry->ptr,
- (unsigned int) __entry->fc,
- (int) __entry->rc,
- (unsigned short) __entry->device,
- (unsigned short) __entry->domain)
+ (unsigned int)__entry->fc,
+ (int)__entry->rc,
+ (unsigned short)__entry->card,
+ (unsigned short)__entry->dom,
+ (unsigned long)__entry->psmid)
);
#endif /* _TRACE_S390_ZCRYPT_H */
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
index 0b5d550a0478..53695b2196f7 100644
--- a/arch/s390/include/asm/types.h
+++ b/arch/s390/include/asm/types.h
@@ -5,7 +5,7 @@
#include <uapi/asm/types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
union register_pair {
unsigned __int128 pair;
@@ -15,5 +15,5 @@ union register_pair {
};
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_TYPES_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 81ae8a98e7ec..c5e02addcd67 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -13,24 +13,81 @@
/*
* User space memory access functions
*/
+#include <linux/pgtable.h>
#include <asm/asm-extable.h>
#include <asm/processor.h>
#include <asm/extable.h>
#include <asm/facility.h>
#include <asm-generic/access_ok.h>
+#include <asm/asce.h>
+#include <linux/instrumented.h>
void debug_user_asce(int exit);
-unsigned long __must_check
-raw_copy_from_user(void *to, const void __user *from, unsigned long n);
-
-unsigned long __must_check
-raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+#ifdef CONFIG_KMSAN
+#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory
+#else
+#define uaccess_kmsan_or_inline __always_inline
+#endif
-#ifndef CONFIG_KASAN
#define INLINE_COPY_FROM_USER
#define INLINE_COPY_TO_USER
-#endif
+
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long size)
+{
+ unsigned long osize;
+ int cc;
+
+ while (1) {
+ osize = size;
+ asm_inline volatile(
+ " lhi %%r0,%[spec]\n"
+ "0: mvcos %[to],%[from],%[size]\n"
+ "1: nopr %%r7\n"
+ CC_IPM(cc)
+ EX_TABLE_UA_MVCOS_FROM(0b, 0b)
+ EX_TABLE_UA_MVCOS_FROM(1b, 0b)
+ : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to)
+ : [spec] "I" (0x81), [from] "Q" (*(const char __user *)from)
+ : CC_CLOBBER_LIST("memory", "0"));
+ if (__builtin_constant_p(osize) && osize <= 4096)
+ return osize - size;
+ if (likely(CC_TRANSFORM(cc) == 0))
+ return osize - size;
+ size -= 4096;
+ to += 4096;
+ from += 4096;
+ }
+}
+
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long size)
+{
+ unsigned long osize;
+ int cc;
+
+ while (1) {
+ osize = size;
+ asm_inline volatile(
+ " llilh %%r0,%[spec]\n"
+ "0: mvcos %[to],%[from],%[size]\n"
+ "1: nopr %%r7\n"
+ CC_IPM(cc)
+ EX_TABLE_UA_MVCOS_TO(0b, 0b)
+ EX_TABLE_UA_MVCOS_TO(1b, 0b)
+ : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
+ : [spec] "I" (0x81), [from] "Q" (*(const char *)from)
+ : CC_CLOBBER_LIST("memory", "0"));
+ if (__builtin_constant_p(osize) && osize <= 4096)
+ return osize - size;
+ if (likely(CC_TRANSFORM(cc) == 0))
+ return osize - size;
+ size -= 4096;
+ to += 4096;
+ from += 4096;
+ }
+}
unsigned long __must_check
_copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key);
@@ -54,163 +111,113 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo
return n;
}
-union oac {
- unsigned int val;
- struct {
- struct {
- unsigned short key : 4;
- unsigned short : 4;
- unsigned short as : 2;
- unsigned short : 4;
- unsigned short k : 1;
- unsigned short a : 1;
- } oac1;
- struct {
- unsigned short key : 4;
- unsigned short : 4;
- unsigned short as : 2;
- unsigned short : 4;
- unsigned short k : 1;
- unsigned short a : 1;
- } oac2;
- };
-};
-
int __noreturn __put_user_bad(void);
-#define __put_user_asm(to, from, size) \
-({ \
- union oac __oac_spec = { \
- .oac1.as = PSW_BITS_AS_SECONDARY, \
- .oac1.a = 1, \
- }; \
- int __rc; \
- \
- asm volatile( \
- " lr 0,%[spec]\n" \
- "0: mvcos %[_to],%[_from],%[_size]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- EX_TABLE_UA_STORE(0b, 2b, %[rc]) \
- EX_TABLE_UA_STORE(1b, 2b, %[rc]) \
- : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \
- : [_size] "d" (size), [_from] "Q" (*(from)), \
- [spec] "d" (__oac_spec.val) \
- : "cc", "0"); \
- __rc; \
-})
-
-static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
-{
- int rc;
-
- switch (size) {
- case 1:
- rc = __put_user_asm((unsigned char __user *)ptr,
- (unsigned char *)x,
- size);
- break;
- case 2:
- rc = __put_user_asm((unsigned short __user *)ptr,
- (unsigned short *)x,
- size);
- break;
- case 4:
- rc = __put_user_asm((unsigned int __user *)ptr,
- (unsigned int *)x,
- size);
- break;
- case 8:
- rc = __put_user_asm((unsigned long __user *)ptr,
- (unsigned long *)x,
- size);
- break;
- default:
- __put_user_bad();
- break;
- }
- return rc;
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define DEFINE_PUT_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
+__put_user_##type##_noinstr(unsigned type __user *to, \
+ unsigned type *from, \
+ unsigned long size) \
+{ \
+ asm goto( \
+ " llilh %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[Efault]) \
+ EX_TABLE(1b, %l[Efault]) \
+ : [to] "+Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
+ : "cc", "0" \
+ : Efault \
+ ); \
+ return 0; \
+Efault: \
+ return -EFAULT; \
}
-int __noreturn __get_user_bad(void);
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
-#define __get_user_asm(to, from, size) \
-({ \
- union oac __oac_spec = { \
- .oac2.as = PSW_BITS_AS_SECONDARY, \
- .oac2.a = 1, \
- }; \
- int __rc; \
+#define DEFINE_PUT_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
+__put_user_##type##_noinstr(unsigned type __user *to, \
+ unsigned type *from, \
+ unsigned long size) \
+{ \
+ int rc; \
\
- asm volatile( \
- " lr 0,%[spec]\n" \
- "0: mvcos 0(%[_to]),%[_from],%[_size]\n" \
- "1: xr %[rc],%[rc]\n" \
+ asm_inline volatile( \
+ " llilh %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: lhi %[rc],0\n" \
"2:\n" \
- EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \
- EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \
- : [rc] "=&d" (__rc), "=Q" (*(to)) \
- : [_size] "d" (size), [_from] "Q" (*(from)), \
- [spec] "d" (__oac_spec.val), [_to] "a" (to), \
- [_ksize] "K" (size) \
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \
+ : [rc] "=d" (rc), [to] "+Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
: "cc", "0"); \
- __rc; \
-})
+ return rc; \
+}
-static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
-{
- int rc;
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
- switch (size) {
- case 1:
- rc = __get_user_asm((unsigned char *)x,
- (unsigned char __user *)ptr,
- size);
- break;
- case 2:
- rc = __get_user_asm((unsigned short *)x,
- (unsigned short __user *)ptr,
- size);
- break;
- case 4:
- rc = __get_user_asm((unsigned int *)x,
- (unsigned int __user *)ptr,
- size);
- break;
- case 8:
- rc = __get_user_asm((unsigned long *)x,
- (unsigned long __user *)ptr,
- size);
- break;
- default:
- __get_user_bad();
- break;
- }
- return rc;
+DEFINE_PUT_USER_NOINSTR(char);
+DEFINE_PUT_USER_NOINSTR(short);
+DEFINE_PUT_USER_NOINSTR(int);
+DEFINE_PUT_USER_NOINSTR(long);
+
+#define DEFINE_PUT_USER(type) \
+static __always_inline int \
+__put_user_##type(unsigned type __user *to, unsigned type *from, \
+ unsigned long size) \
+{ \
+ int rc; \
+ \
+ rc = __put_user_##type##_noinstr(to, from, size); \
+ instrument_put_user(*from, to, size); \
+ return rc; \
}
-/*
- * These are the main single-value transfer routines. They automatically
- * use the right size if we just have the right pointer type.
- */
+DEFINE_PUT_USER(char);
+DEFINE_PUT_USER(short);
+DEFINE_PUT_USER(int);
+DEFINE_PUT_USER(long);
+
#define __put_user(x, ptr) \
({ \
__typeof__(*(ptr)) __x = (x); \
- int __pu_err = -EFAULT; \
+ int __prc; \
\
__chk_user_ptr(ptr); \
switch (sizeof(*(ptr))) { \
case 1: \
+ __prc = __put_user_char((unsigned char __user *)(ptr), \
+ (unsigned char *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
case 2: \
+ __prc = __put_user_short((unsigned short __user *)(ptr),\
+ (unsigned short *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
case 4: \
+ __prc = __put_user_int((unsigned int __user *)(ptr), \
+ (unsigned int *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
case 8: \
- __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __prc = __put_user_long((unsigned long __user *)(ptr), \
+ (unsigned long *)&__x, \
+ sizeof(*(ptr))); \
break; \
default: \
- __put_user_bad(); \
+ __prc = __put_user_bad(); \
break; \
} \
- __builtin_expect(__pu_err, 0); \
+ __builtin_expect(__prc, 0); \
})
#define put_user(x, ptr) \
@@ -219,45 +226,129 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign
__put_user(x, ptr); \
})
+int __noreturn __get_user_bad(void);
+
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define DEFINE_GET_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
+__get_user_##type##_noinstr(unsigned type *to, \
+ const unsigned type __user *from, \
+ unsigned long size) \
+{ \
+ asm goto( \
+ " lhi %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[Efault]) \
+ EX_TABLE(1b, %l[Efault]) \
+ : [to] "=Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
+ : "cc", "0" \
+ : Efault \
+ ); \
+ return 0; \
+Efault: \
+ *to = 0; \
+ return -EFAULT; \
+}
+
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+#define DEFINE_GET_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
+__get_user_##type##_noinstr(unsigned type *to, \
+ const unsigned type __user *from, \
+ unsigned long size) \
+{ \
+ int rc; \
+ \
+ asm_inline volatile( \
+ " lhi %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: lhi %[rc],0\n" \
+ "2:\n" \
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \
+ : [rc] "=d" (rc), [to] "=Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
+ : "cc", "0"); \
+ if (likely(!rc)) \
+ return 0; \
+ *to = 0; \
+ return rc; \
+}
+
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+DEFINE_GET_USER_NOINSTR(char);
+DEFINE_GET_USER_NOINSTR(short);
+DEFINE_GET_USER_NOINSTR(int);
+DEFINE_GET_USER_NOINSTR(long);
+
+#define DEFINE_GET_USER(type) \
+static __always_inline int \
+__get_user_##type(unsigned type *to, const unsigned type __user *from, \
+ unsigned long size) \
+{ \
+ int rc; \
+ \
+ rc = __get_user_##type##_noinstr(to, from, size); \
+ instrument_get_user(*to); \
+ return rc; \
+}
+
+DEFINE_GET_USER(char);
+DEFINE_GET_USER(short);
+DEFINE_GET_USER(int);
+DEFINE_GET_USER(long);
+
#define __get_user(x, ptr) \
({ \
- int __gu_err = -EFAULT; \
+ const __user void *____guptr = (ptr); \
+ int __grc; \
\
__chk_user_ptr(ptr); \
switch (sizeof(*(ptr))) { \
case 1: { \
+ const unsigned char __user *__guptr = ____guptr; \
unsigned char __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_char(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 2: { \
+ const unsigned short __user *__guptr = ____guptr; \
unsigned short __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_short(&__x, __guptr, sizeof(*(ptr)));\
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 4: { \
+ const unsigned int __user *__guptr = ____guptr; \
unsigned int __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_int(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 8: { \
+ const unsigned long __user *__guptr = ____guptr; \
unsigned long __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_long(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
default: \
- __get_user_bad(); \
+ __grc = __get_user_bad(); \
break; \
} \
- __builtin_expect(__gu_err, 0); \
+ __builtin_expect(__grc, 0); \
})
#define get_user(x, ptr) \
@@ -273,295 +364,139 @@ long __must_check strncpy_from_user(char *dst, const char __user *src, long coun
long __must_check strnlen_user(const char __user *src, long count);
-/*
- * Zero Userspace
- */
-unsigned long __must_check __clear_user(void __user *to, unsigned long size);
+static uaccess_kmsan_or_inline __must_check unsigned long
+__clear_user(void __user *to, unsigned long size)
+{
+ unsigned long osize;
+ int cc;
+
+ while (1) {
+ osize = size;
+ asm_inline volatile(
+ " llilh %%r0,%[spec]\n"
+ "0: mvcos %[to],%[from],%[size]\n"
+ "1: nopr %%r7\n"
+ CC_IPM(cc)
+ EX_TABLE_UA_MVCOS_TO(0b, 0b)
+ EX_TABLE_UA_MVCOS_TO(1b, 0b)
+ : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
+ : [spec] "I" (0x81), [from] "Q" (*(const char *)empty_zero_page)
+ : CC_CLOBBER_LIST("memory", "0"));
+ if (__builtin_constant_p(osize) && osize <= 4096)
+ return osize - size;
+ if (CC_TRANSFORM(cc) == 0)
+ return osize - size;
+ size -= 4096;
+ to += 4096;
+ }
+}
-static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
+static __always_inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
{
might_fault();
return __clear_user(to, n);
}
-void *s390_kernel_write(void *dst, const void *src, size_t size);
+void *__s390_kernel_write(void *dst, const void *src, size_t size);
-int __noreturn __put_kernel_bad(void);
+static inline void *s390_kernel_write(void *dst, const void *src, size_t size)
+{
+ if (__is_defined(__DECOMPRESSOR))
+ return memcpy(dst, src, size);
+ return __s390_kernel_write(dst, src, size);
+}
-#define __put_kernel_asm(val, to, insn) \
-({ \
- int __rc; \
- \
- asm volatile( \
- "0: " insn " %[_val],%[_to]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- EX_TABLE_UA_STORE(0b, 2b, %[rc]) \
- EX_TABLE_UA_STORE(1b, 2b, %[rc]) \
- : [rc] "=d" (__rc), [_to] "+Q" (*(to)) \
- : [_val] "d" (val) \
- : "cc"); \
- __rc; \
-})
+void __noreturn __mvc_kernel_nofault_bad(void);
+
+#if defined(CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && defined(CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS)
-#define __put_kernel_nofault(dst, src, type, err_label) \
+#define __mvc_kernel_nofault(dst, src, type, err_label) \
do { \
- unsigned long __x = (unsigned long)(*((type *)(src))); \
- int __pk_err; \
- \
switch (sizeof(type)) { \
case 1: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \
- break; \
case 2: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \
- break; \
case 4: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \
- break; \
case 8: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \
+ asm goto( \
+ "0: mvc %O[_dst](%[_len],%R[_dst]),%[_src]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[err_label]) \
+ EX_TABLE(1b, %l[err_label]) \
+ : [_dst] "=Q" (*(type *)dst) \
+ : [_src] "Q" (*(type *)(src)), \
+ [_len] "I" (sizeof(type)) \
+ : \
+ : err_label); \
break; \
default: \
- __pk_err = __put_kernel_bad(); \
+ __mvc_kernel_nofault_bad(); \
break; \
} \
- if (unlikely(__pk_err)) \
- goto err_label; \
} while (0)
-int __noreturn __get_kernel_bad(void);
-
-#define __get_kernel_asm(val, from, insn) \
-({ \
- int __rc; \
- \
- asm volatile( \
- "0: " insn " %[_val],%[_from]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val]) \
- EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val]) \
- : [rc] "=d" (__rc), [_val] "=d" (val) \
- : [_from] "Q" (*(from)) \
- : "cc"); \
- __rc; \
-})
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#define __get_kernel_nofault(dst, src, type, err_label) \
+#define __mvc_kernel_nofault(dst, src, type, err_label) \
do { \
- int __gk_err; \
+ type *(__dst) = (type *)(dst); \
+ int __rc; \
\
switch (sizeof(type)) { \
- case 1: { \
- unsigned char __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 2: { \
- unsigned short __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 4: { \
- unsigned int __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 8: { \
- unsigned long __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \
- *((type *)(dst)) = (type)__x; \
+ case 1: \
+ case 2: \
+ case 4: \
+ case 8: \
+ asm_inline volatile( \
+ "0: mvc 0(%[_len],%[_dst]),%[_src]\n" \
+ "1: lhi %[_rc],0\n" \
+ "2:\n" \
+ EX_TABLE_UA_FAULT(0b, 2b, %[_rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[_rc]) \
+ : [_rc] "=d" (__rc), \
+ "=m" (*__dst) \
+ : [_src] "Q" (*(type *)(src)), \
+ [_dst] "a" (__dst), \
+ [_len] "I" (sizeof(type))); \
+ if (__rc) \
+ goto err_label; \
break; \
- }; \
default: \
- __gk_err = __get_kernel_bad(); \
+ __mvc_kernel_nofault_bad(); \
break; \
} \
- if (unlikely(__gk_err)) \
- goto err_label; \
} while (0)
-void __cmpxchg_user_key_called_with_bad_pointer(void);
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+#define arch_get_kernel_nofault __mvc_kernel_nofault
+#define arch_put_kernel_nofault __mvc_kernel_nofault
-static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
- __uint128_t old, __uint128_t new,
- unsigned long key, int size)
-{
- int rc = 0;
+void __cmpxchg_user_key_called_with_bad_pointer(void);
+int __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
+ unsigned char old, unsigned char new, unsigned long key);
+int __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
+ unsigned short old, unsigned short new, unsigned long key);
+int __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
+ unsigned int old, unsigned int new, unsigned long key);
+int __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
+ unsigned long old, unsigned long new, unsigned long key);
+int __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
+ __uint128_t old, __uint128_t new, unsigned long key);
+
+static __always_inline int _cmpxchg_user_key(unsigned long address, void *uval,
+ __uint128_t old, __uint128_t new,
+ unsigned long key, int size)
+{
switch (size) {
- case 1: {
- unsigned int prev, shift, mask, _old, _new;
- unsigned long count;
-
- shift = (3 ^ (address & 3)) << 3;
- address ^= address & 3;
- _old = ((unsigned int)old & 0xff) << shift;
- _new = ((unsigned int)new & 0xff) << shift;
- mask = ~(0xff << shift);
- asm volatile(
- " spka 0(%[key])\n"
- " sacf 256\n"
- " llill %[count],%[max_loops]\n"
- "0: l %[prev],%[address]\n"
- "1: nr %[prev],%[mask]\n"
- " xilf %[mask],0xffffffff\n"
- " or %[new],%[prev]\n"
- " or %[prev],%[tmp]\n"
- "2: lr %[tmp],%[prev]\n"
- "3: cs %[prev],%[new],%[address]\n"
- "4: jnl 5f\n"
- " xr %[tmp],%[prev]\n"
- " xr %[new],%[tmp]\n"
- " nr %[tmp],%[mask]\n"
- " jnz 5f\n"
- " brct %[count],2b\n"
- "5: sacf 768\n"
- " spka %[default_key]\n"
- EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
- : [rc] "+&d" (rc),
- [prev] "=&d" (prev),
- [address] "+Q" (*(int *)address),
- [tmp] "+&d" (_old),
- [new] "+&d" (_new),
- [mask] "+&d" (mask),
- [count] "=a" (count)
- : [key] "%[count]" (key << 4),
- [default_key] "J" (PAGE_DEFAULT_KEY),
- [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
- : "memory", "cc");
- *(unsigned char *)uval = prev >> shift;
- if (!count)
- rc = -EAGAIN;
- return rc;
- }
- case 2: {
- unsigned int prev, shift, mask, _old, _new;
- unsigned long count;
-
- shift = (2 ^ (address & 2)) << 3;
- address ^= address & 2;
- _old = ((unsigned int)old & 0xffff) << shift;
- _new = ((unsigned int)new & 0xffff) << shift;
- mask = ~(0xffff << shift);
- asm volatile(
- " spka 0(%[key])\n"
- " sacf 256\n"
- " llill %[count],%[max_loops]\n"
- "0: l %[prev],%[address]\n"
- "1: nr %[prev],%[mask]\n"
- " xilf %[mask],0xffffffff\n"
- " or %[new],%[prev]\n"
- " or %[prev],%[tmp]\n"
- "2: lr %[tmp],%[prev]\n"
- "3: cs %[prev],%[new],%[address]\n"
- "4: jnl 5f\n"
- " xr %[tmp],%[prev]\n"
- " xr %[new],%[tmp]\n"
- " nr %[tmp],%[mask]\n"
- " jnz 5f\n"
- " brct %[count],2b\n"
- "5: sacf 768\n"
- " spka %[default_key]\n"
- EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
- : [rc] "+&d" (rc),
- [prev] "=&d" (prev),
- [address] "+Q" (*(int *)address),
- [tmp] "+&d" (_old),
- [new] "+&d" (_new),
- [mask] "+&d" (mask),
- [count] "=a" (count)
- : [key] "%[count]" (key << 4),
- [default_key] "J" (PAGE_DEFAULT_KEY),
- [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
- : "memory", "cc");
- *(unsigned short *)uval = prev >> shift;
- if (!count)
- rc = -EAGAIN;
- return rc;
- }
- case 4: {
- unsigned int prev = old;
-
- asm volatile(
- " spka 0(%[key])\n"
- " sacf 256\n"
- "0: cs %[prev],%[new],%[address]\n"
- "1: sacf 768\n"
- " spka %[default_key]\n"
- EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
- : [rc] "+&d" (rc),
- [prev] "+&d" (prev),
- [address] "+Q" (*(int *)address)
- : [new] "d" ((unsigned int)new),
- [key] "a" (key << 4),
- [default_key] "J" (PAGE_DEFAULT_KEY)
- : "memory", "cc");
- *(unsigned int *)uval = prev;
- return rc;
- }
- case 8: {
- unsigned long prev = old;
-
- asm volatile(
- " spka 0(%[key])\n"
- " sacf 256\n"
- "0: csg %[prev],%[new],%[address]\n"
- "1: sacf 768\n"
- " spka %[default_key]\n"
- EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
- : [rc] "+&d" (rc),
- [prev] "+&d" (prev),
- [address] "+QS" (*(long *)address)
- : [new] "d" ((unsigned long)new),
- [key] "a" (key << 4),
- [default_key] "J" (PAGE_DEFAULT_KEY)
- : "memory", "cc");
- *(unsigned long *)uval = prev;
- return rc;
- }
- case 16: {
- __uint128_t prev = old;
-
- asm volatile(
- " spka 0(%[key])\n"
- " sacf 256\n"
- "0: cdsg %[prev],%[new],%[address]\n"
- "1: sacf 768\n"
- " spka %[default_key]\n"
- EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
- : [rc] "+&d" (rc),
- [prev] "+&d" (prev),
- [address] "+QS" (*(__int128_t *)address)
- : [new] "d" (new),
- [key] "a" (key << 4),
- [default_key] "J" (PAGE_DEFAULT_KEY)
- : "memory", "cc");
- *(__uint128_t *)uval = prev;
- return rc;
- }
+ case 1: return __cmpxchg_user_key1(address, uval, old, new, key);
+ case 2: return __cmpxchg_user_key2(address, uval, old, new, key);
+ case 4: return __cmpxchg_user_key4(address, uval, old, new, key);
+ case 8: return __cmpxchg_user_key8(address, uval, old, new, key);
+ case 16: return __cmpxchg_user_key16(address, uval, old, new, key);
+ default: __cmpxchg_user_key_called_with_bad_pointer();
}
- __cmpxchg_user_key_called_with_bad_pointer();
- return rc;
+ return 0;
}
/**
@@ -593,8 +528,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \
might_fault(); \
__chk_user_ptr(__ptr); \
- __cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \
- (old), (new), (key), sizeof(*(__ptr))); \
+ _cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \
+ (old), (new), (key), sizeof(*(__ptr))); \
})
#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 4260bc5ce7f8..921c3fb3586b 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -8,7 +8,8 @@
#define _ASM_S390_UNISTD_H_
#include <uapi/asm/unistd.h>
-#include <asm/unistd_nr.h>
+
+#define NR_syscalls (__NR_syscalls)
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_OLD_READDIR
@@ -27,14 +28,8 @@
#define __ARCH_WANT_SYS_OLDUMOUNT
#define __ARCH_WANT_SYS_SIGPENDING
#define __ARCH_WANT_SYS_SIGPROCMASK
-# ifdef CONFIG_COMPAT
-# define __ARCH_WANT_COMPAT_STAT
-# define __ARCH_WANT_SYS_TIME32
-# define __ARCH_WANT_SYS_UTIME32
-# endif
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_CLONE
-#define __ARCH_WANT_SYS_CLONE3
#endif /* _ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 0e7bd3873907..8018549a1ad2 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -2,7 +2,7 @@
/*
* Ultravisor Interfaces
*
- * Copyright IBM Corp. 2019, 2022
+ * Copyright IBM Corp. 2019, 2024
*
* Author(s):
* Vasily Gorbik <gor@linux.ibm.com>
@@ -16,7 +16,7 @@
#include <linux/bug.h>
#include <linux/sched.h>
#include <asm/page.h>
-#include <asm/gmap.h>
+#include <asm/asm.h>
#define UVC_CC_OK 0
#define UVC_CC_ERROR 1
@@ -28,9 +28,11 @@
#define UVC_RC_INV_STATE 0x0003
#define UVC_RC_INV_LEN 0x0005
#define UVC_RC_NO_RESUME 0x0007
+#define UVC_RC_MORE_DATA 0x0100
#define UVC_RC_NEED_DESTROY 0x8000
#define UVC_CMD_QUI 0x0001
+#define UVC_CMD_QUERY_KEYS 0x0002
#define UVC_CMD_INIT_UV 0x000f
#define UVC_CMD_CREATE_SEC_CONF 0x0100
#define UVC_CMD_DESTROY_SEC_CONF 0x0101
@@ -61,6 +63,7 @@
#define UVC_CMD_ADD_SECRET 0x1031
#define UVC_CMD_LIST_SECRETS 0x1033
#define UVC_CMD_LOCK_SECRETS 0x1034
+#define UVC_CMD_RETR_SECRET 0x1035
/* Bits in installed uv calls */
enum uv_cmds_inst {
@@ -94,6 +97,8 @@ enum uv_cmds_inst {
BIT_UVC_CMD_ADD_SECRET = 29,
BIT_UVC_CMD_LIST_SECRETS = 30,
BIT_UVC_CMD_LOCK_SECRETS = 31,
+ BIT_UVC_CMD_RETR_SECRET = 33,
+ BIT_UVC_CMD_QUERY_KEYS = 34,
};
enum uv_feat_ind {
@@ -140,11 +145,27 @@ struct uv_cb_qui {
u64 reservedf0; /* 0x00f0 */
u64 supp_add_secret_req_ver; /* 0x00f8 */
u64 supp_add_secret_pcf; /* 0x0100 */
- u64 supp_secret_types; /* 0x0180 */
- u16 max_secrets; /* 0x0110 */
- u8 reserved112[0x120 - 0x112]; /* 0x0112 */
+ u64 supp_secret_types; /* 0x0108 */
+ u16 max_assoc_secrets; /* 0x0110 */
+ u16 max_retr_secrets; /* 0x0112 */
+ u8 reserved114[0x120 - 0x114]; /* 0x0114 */
} __packed __aligned(8);
+struct uv_key_hash {
+ u64 dword[4];
+} __packed __aligned(8);
+
+#define UVC_QUERY_KEYS_IDX_HK 0
+#define UVC_QUERY_KEYS_IDX_BACK_HK 1
+
+/* Query Ultravisor Keys */
+struct uv_cb_query_keys {
+ struct uv_cb_header header; /* 0x0000 */
+ u64 reserved08[3]; /* 0x0008 */
+ struct uv_key_hash key_hashes[15]; /* 0x0020 */
+} __packed __aligned(8);
+static_assert(sizeof(struct uv_cb_query_keys) == 0x200);
+
/* Initialize Ultravisor */
struct uv_cb_init {
struct uv_cb_header header;
@@ -317,7 +338,6 @@ struct uv_cb_dump_complete {
* A common UV call struct for pv guests that contains a single address
* Examples:
* Add Secret
- * List Secrets
*/
struct uv_cb_guest_addr {
struct uv_cb_header header;
@@ -326,18 +346,102 @@ struct uv_cb_guest_addr {
u64 reserved28[4];
} __packed __aligned(8);
+#define UVC_RC_RETR_SECR_BUF_SMALL 0x0109
+#define UVC_RC_RETR_SECR_STORE_EMPTY 0x010f
+#define UVC_RC_RETR_SECR_INV_IDX 0x0110
+#define UVC_RC_RETR_SECR_INV_SECRET 0x0111
+
+struct uv_cb_retr_secr {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u16 secret_idx;
+ u16 reserved1a;
+ u32 buf_size;
+ u64 buf_addr;
+ u64 reserved28[4];
+} __packed __aligned(8);
+
+struct uv_cb_list_secrets {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u8 reserved18[6];
+ u16 start_idx;
+ u64 list_addr;
+ u64 reserved28[4];
+} __packed __aligned(8);
+
+enum uv_secret_types {
+ UV_SECRET_INVAL = 0x0,
+ UV_SECRET_NULL = 0x1,
+ UV_SECRET_ASSOCIATION = 0x2,
+ UV_SECRET_PLAIN = 0x3,
+ UV_SECRET_AES_128 = 0x4,
+ UV_SECRET_AES_192 = 0x5,
+ UV_SECRET_AES_256 = 0x6,
+ UV_SECRET_AES_XTS_128 = 0x7,
+ UV_SECRET_AES_XTS_256 = 0x8,
+ UV_SECRET_HMAC_SHA_256 = 0x9,
+ UV_SECRET_HMAC_SHA_512 = 0xa,
+ /* 0x0b - 0x10 reserved */
+ UV_SECRET_ECDSA_P256 = 0x11,
+ UV_SECRET_ECDSA_P384 = 0x12,
+ UV_SECRET_ECDSA_P521 = 0x13,
+ UV_SECRET_ECDSA_ED25519 = 0x14,
+ UV_SECRET_ECDSA_ED448 = 0x15,
+};
+
+/**
+ * uv_secret_list_item_hdr - UV secret metadata.
+ * @index: Index of the secret in the secret list.
+ * @type: Type of the secret. See `enum uv_secret_types`.
+ * @length: Length of the stored secret.
+ */
+struct uv_secret_list_item_hdr {
+ u16 index;
+ u16 type;
+ u32 length;
+} __packed __aligned(8);
+
+#define UV_SECRET_ID_LEN 32
+/**
+ * uv_secret_list_item - UV secret entry.
+ * @hdr: The metadata of this secret.
+ * @id: The ID of this secret, not the secret itself.
+ */
+struct uv_secret_list_item {
+ struct uv_secret_list_item_hdr hdr;
+ u64 reserverd08;
+ u8 id[UV_SECRET_ID_LEN];
+} __packed __aligned(8);
+
+/**
+ * uv_secret_list - UV secret-metadata list.
+ * @num_secr_stored: Number of secrets stored in this list.
+ * @total_num_secrets: Number of secrets stored in the UV for this guest.
+ * @next_secret_idx: positive number if there are more secrets available or zero.
+ * @secrets: Up to 85 UV-secret metadata entries.
+ */
+struct uv_secret_list {
+ u16 num_secr_stored;
+ u16 total_num_secrets;
+ u16 next_secret_idx;
+ u16 reserved_06;
+ u64 reserved_08;
+ struct uv_secret_list_item secrets[85];
+} __packed __aligned(8);
+static_assert(sizeof(struct uv_secret_list) == PAGE_SIZE);
+
static inline int __uv_call(unsigned long r1, unsigned long r2)
{
int cc;
asm volatile(
- " .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc)
+ " .insn rrf,0xb9a40000,%[r1],%[r2],0,0\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
: [r1] "a" (r1), [r2] "a" (r2)
- : "memory", "cc");
- return cc;
+ : CC_CLOBBER_LIST("memory"));
+ return CC_TRANSFORM(cc);
}
static inline int uv_call(unsigned long r1, unsigned long r2)
@@ -382,6 +486,48 @@ static inline int uv_cmd_nodata(u64 handle, u16 cmd, u16 *rc, u16 *rrc)
return cc ? -EINVAL : 0;
}
+/**
+ * uv_list_secrets() - Do a List Secrets UVC.
+ *
+ * @buf: Buffer to write list into; size of one page.
+ * @start_idx: The smallest index that should be included in the list.
+ * For the fist invocation use 0.
+ * @rc: Pointer to store the return code or NULL.
+ * @rrc: Pointer to store the return reason code or NULL.
+ *
+ * This function calls the List Secrets UVC. The result is written into `buf`,
+ * that needs to be at least one page of writable memory.
+ * `buf` consists of:
+ * * %struct uv_secret_list_hdr
+ * * %struct uv_secret_list_item (multiple)
+ *
+ * For `start_idx` use _0_ for the first call. If there are more secrets available
+ * but could not fit into the page then `rc` is `UVC_RC_MORE_DATA`.
+ * In this case use `uv_secret_list_hdr.next_secret_idx` for `start_idx`.
+ *
+ * Context: might sleep.
+ *
+ * Return: The UVC condition code.
+ */
+static inline int uv_list_secrets(struct uv_secret_list *buf, u16 start_idx,
+ u16 *rc, u16 *rrc)
+{
+ struct uv_cb_list_secrets uvcb = {
+ .header.len = sizeof(uvcb),
+ .header.cmd = UVC_CMD_LIST_SECRETS,
+ .start_idx = start_idx,
+ .list_addr = (u64)buf,
+ };
+ int cc = uv_call_sched(0, (u64)&uvcb);
+
+ if (rc)
+ *rc = uvcb.header.rc;
+ if (rrc)
+ *rrc = uvcb.header.rrc;
+
+ return cc;
+}
+
struct uv_info {
unsigned long inst_calls_list[4];
unsigned long uv_base_stor_len;
@@ -402,7 +548,8 @@ struct uv_info {
unsigned long supp_add_secret_req_ver;
unsigned long supp_add_secret_pcf;
unsigned long supp_secret_types;
- unsigned short max_secrets;
+ unsigned short max_assoc_secrets;
+ unsigned short max_retr_secrets;
};
extern struct uv_info uv_info;
@@ -414,7 +561,6 @@ static inline bool uv_has_feature(u8 feature_bit)
return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
}
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
extern int prot_virt_guest;
static inline int is_prot_virt_guest(void)
@@ -442,7 +588,10 @@ static inline int share(unsigned long addr, u16 cmd)
if (!uv_call(0, (u64)&uvcb))
return 0;
- return -EINVAL;
+ pr_err("%s UVC failed (rc: 0x%x, rrc: 0x%x), possible hypervisor bug.\n",
+ uvcb.header.cmd == UVC_CMD_SET_SHARED_ACCESS ? "Share" : "Unshare",
+ uvcb.header.rc, uvcb.header.rrc);
+ panic("System security cannot be guaranteed unless the system panics now.\n");
}
/*
@@ -466,13 +615,11 @@ static inline int uv_remove_shared(unsigned long addr)
return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
}
-#else
-#define is_prot_virt_guest() 0
-static inline int uv_set_shared(unsigned long addr) { return 0; }
-static inline int uv_remove_shared(unsigned long addr) { return 0; }
-#endif
+int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret);
+int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size);
-#if IS_ENABLED(CONFIG_KVM)
extern int prot_virt_host;
static inline int is_prot_virt_host(void)
@@ -481,37 +628,13 @@ static inline int is_prot_virt_host(void)
}
int uv_pin_shared(unsigned long paddr);
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
-int uv_destroy_owned_page(unsigned long paddr);
+int uv_destroy_folio(struct folio *folio);
+int uv_destroy_pte(pte_t pte);
+int uv_convert_from_secure_pte(pte_t pte);
+int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb);
int uv_convert_from_secure(unsigned long paddr);
-int uv_convert_owned_from_secure(unsigned long paddr);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+int uv_convert_from_secure_folio(struct folio *folio);
void setup_uv(void);
-#else
-#define is_prot_virt_host() 0
-static inline void setup_uv(void) {}
-
-static inline int uv_pin_shared(unsigned long paddr)
-{
- return 0;
-}
-
-static inline int uv_destroy_owned_page(unsigned long paddr)
-{
- return 0;
-}
-
-static inline int uv_convert_from_secure(unsigned long paddr)
-{
- return 0;
-}
-
-static inline int uv_convert_owned_from_secure(unsigned long paddr)
-{
- return 0;
-}
-#endif
#endif /* _ASM_S390_UV_H */
diff --git a/arch/s390/include/asm/vdso-symbols.h b/arch/s390/include/asm/vdso-symbols.h
new file mode 100644
index 000000000000..e3561e67c4e3
--- /dev/null
+++ b/arch/s390/include/asm/vdso-symbols.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_VDSO_SYMBOLS_H__
+#define __S390_VDSO_SYMBOLS_H__
+
+#include <generated/vdso-offsets.h>
+
+#define VDSO_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso_offset_##name))
+
+#endif /* __S390_VDSO_SYMBOLS_H__ */
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index 53165aa7813a..8e2fffa0ca68 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -4,30 +4,13 @@
#include <vdso/datapage.h>
-#ifndef __ASSEMBLY__
-
-#include <generated/vdso64-offsets.h>
-#ifdef CONFIG_COMPAT
-#include <generated/vdso32-offsets.h>
-#endif
-
-#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name))
-#ifdef CONFIG_COMPAT
-#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name))
-#else
-#define VDSO32_SYMBOL(tsk, name) (-1UL)
-#endif
-
-extern struct vdso_data *vdso_data;
+#ifndef __ASSEMBLER__
int vdso_getcpu_init(void);
-#endif /* __ASSEMBLY__ */
-
-/* Default link address for the vDSO */
-#define VDSO_LBASE 0
+#endif /* __ASSEMBLER__ */
-#define __VVAR_PAGES 2
+#define __VDSO_PAGES 4
#define VDSO_VERSION_STRING LINUX_2.6.29
diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h
deleted file mode 100644
index 0e2b40ef69b0..000000000000
--- a/arch/s390/include/asm/vdso/data.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __S390_ASM_VDSO_DATA_H
-#define __S390_ASM_VDSO_DATA_H
-
-#include <linux/types.h>
-
-struct arch_vdso_data {
- __s64 tod_steering_delta;
- __u64 tod_steering_end;
-};
-
-#endif /* __S390_ASM_VDSO_DATA_H */
diff --git a/arch/s390/include/asm/vdso/getrandom.h b/arch/s390/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..6741a27199f8
--- /dev/null
+++ b/arch/s390/include/asm/vdso/getrandom.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLER__
+
+#include <vdso/datapage.h>
+#include <asm/vdso/vsyscall.h>
+#include <asm/syscall.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer: Destination buffer to fill with random bytes.
+ * @len: Size of @buffer in bytes.
+ * @flags: Zero or more GRND_* flags.
+ * Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
+{
+ return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags);
+}
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
index 7937765ccfa5..c31ac5f61c83 100644
--- a/arch/s390/include/asm/vdso/gettimeofday.h
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -14,20 +14,9 @@
#include <linux/compiler.h>
-static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd)
{
- return _vdso_data;
-}
-
-static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd)
-{
- u64 adj, now;
-
- now = get_tod_clock();
- adj = vd->arch_data.tod_steering_end - now;
- if (unlikely((s64) adj > 0))
- now += (vd->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
- return now;
+ return get_tod_clock() - vd->arch_data.tod_delta;
}
static __always_inline
@@ -49,12 +38,4 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
return syscall2(__NR_clock_getres, (long)clkid, (long)ts);
}
-#ifdef CONFIG_TIME_NS
-static __always_inline
-const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
-{
- return _timens_data;
-}
-#endif
-
#endif
diff --git a/arch/s390/include/asm/vdso/time_data.h b/arch/s390/include/asm/vdso/time_data.h
new file mode 100644
index 000000000000..25c4e0d9f596
--- /dev/null
+++ b/arch/s390/include/asm/vdso/time_data.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_ASM_VDSO_TIME_DATA_H
+#define __S390_ASM_VDSO_TIME_DATA_H
+
+#include <linux/types.h>
+
+struct arch_vdso_time_data {
+ __s64 tod_delta;
+};
+
+#endif /* __S390_ASM_VDSO_TIME_DATA_H */
diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h
index 6c67c08cefdd..b00acec8ddbc 100644
--- a/arch/s390/include/asm/vdso/vsyscall.h
+++ b/arch/s390/include/asm/vdso/vsyscall.h
@@ -2,25 +2,15 @@
#ifndef __ASM_VDSO_VSYSCALL_H
#define __ASM_VDSO_VSYSCALL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/hrtimer.h>
-#include <linux/timekeeper_internal.h>
#include <vdso/datapage.h>
#include <asm/vdso.h>
-/*
- * Update the vDSO data page to keep in sync with kernel timekeeping.
- */
-
-static __always_inline struct vdso_data *__s390_get_k_vdso_data(void)
-{
- return vdso_data;
-}
-#define __arch_get_k_vdso_data __s390_get_k_vdso_data
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
index 561c91c1a87c..9d25fb35a042 100644
--- a/arch/s390/include/asm/vtime.h
+++ b/arch/s390/include/asm/vtime.h
@@ -4,16 +4,20 @@
static inline void update_timer_sys(void)
{
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
- S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.sys_enter_timer;
- S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+ struct lowcore *lc = get_lowcore();
+
+ lc->system_timer += lc->last_update_timer - lc->exit_timer;
+ lc->user_timer += lc->exit_timer - lc->sys_enter_timer;
+ lc->last_update_timer = lc->sys_enter_timer;
}
static inline void update_timer_mcck(void)
{
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
- S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.mcck_enter_timer;
- S390_lowcore.last_update_timer = S390_lowcore.mcck_enter_timer;
+ struct lowcore *lc = get_lowcore();
+
+ lc->system_timer += lc->last_update_timer - lc->exit_timer;
+ lc->user_timer += lc->exit_timer - lc->mcck_enter_timer;
+ lc->last_update_timer = lc->mcck_enter_timer;
}
#endif /* _S390_VTIME_H */
diff --git a/arch/s390/include/asm/word-at-a-time.h b/arch/s390/include/asm/word-at-a-time.h
index 203acd6e431b..eaa19dee7699 100644
--- a/arch/s390/include/asm/word-at-a-time.h
+++ b/arch/s390/include/asm/word-at-a-time.h
@@ -52,7 +52,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long data;
- asm volatile(
+ asm_inline volatile(
"0: lg %[data],0(%[addr])\n"
"1: nopr %%r7\n"
EX_TABLE_ZEROPAD(0b, 1b, %[data], %[addr])
diff --git a/arch/s390/include/uapi/asm/bitsperlong.h b/arch/s390/include/uapi/asm/bitsperlong.h
index cceaf47b021a..7af27a985f25 100644
--- a/arch/s390/include/uapi/asm/bitsperlong.h
+++ b/arch/s390/include/uapi/asm/bitsperlong.h
@@ -2,11 +2,7 @@
#ifndef __ASM_S390_BITSPERLONG_H
#define __ASM_S390_BITSPERLONG_H
-#ifndef __s390x__
-#define __BITS_PER_LONG 32
-#else
#define __BITS_PER_LONG 64
-#endif
#include <asm-generic/bitsperlong.h>
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index b11d98800458..7c364b33c84d 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -294,7 +294,7 @@ struct dasd_snid_ioctl_data {
/********************************************************************************
* SECTION: Definition of IOCTLs
*
- * Here ist how the ioctl-nr should be used:
+ * Here is how the ioctl-nr should be used:
* 0 - 31 DASD driver itself
* 32 - 239 still open
* 240 - 255 reserved for EMC
diff --git a/arch/s390/include/uapi/asm/diag.h b/arch/s390/include/uapi/asm/diag.h
new file mode 100644
index 000000000000..b7e6ccb4ff6e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/diag.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Diag ioctls and its associated structures definitions.
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#ifndef __S390_UAPI_ASM_DIAG_H
+#define __S390_UAPI_ASM_DIAG_H
+
+#include <linux/types.h>
+
+#define DIAG_MAGIC_STR 'D'
+
+struct diag324_pib {
+ __u64 address;
+ __u64 sequence;
+};
+
+struct diag310_memtop {
+ __u64 address;
+ __u64 nesting_lvl;
+};
+
+/* Diag ioctl definitions */
+#define DIAG324_GET_PIBBUF _IOWR(DIAG_MAGIC_STR, 0x77, struct diag324_pib)
+#define DIAG324_GET_PIBLEN _IOR(DIAG_MAGIC_STR, 0x78, size_t)
+#define DIAG310_GET_STRIDE _IOR(DIAG_MAGIC_STR, 0x79, size_t)
+#define DIAG310_GET_MEMTOPLEN _IOWR(DIAG_MAGIC_STR, 0x7a, size_t)
+#define DIAG310_GET_MEMTOPBUF _IOWR(DIAG_MAGIC_STR, 0x7b, struct diag310_memtop)
+
+#endif /* __S390_UAPI_ASM_DIAG_H */
diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h
index 1030cd186899..9277e76d6d72 100644
--- a/arch/s390/include/uapi/asm/ipcbuf.h
+++ b/arch/s390/include/uapi/asm/ipcbuf.h
@@ -24,9 +24,6 @@ struct ipc64_perm
__kernel_mode_t mode;
unsigned short __pad1;
unsigned short seq;
-#ifndef __s390x__
- unsigned short __pad2;
-#endif /* ! __s390x__ */
unsigned long __unused1;
unsigned long __unused2;
};
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 05eaf6db3ad4..60345dd2cba2 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -469,7 +469,8 @@ struct kvm_s390_vm_cpu_subfunc {
__u8 kdsa[16]; /* with MSA9 */
__u8 sortl[32]; /* with STFLE.150 */
__u8 dfltcc[32]; /* with STFLE.151 */
- __u8 reserved[1728];
+ __u8 pfcr[16]; /* with STFLE.201 */
+ __u8 reserved[1712];
};
#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index 5ad76471e73f..ca42e941675d 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -41,23 +41,29 @@
#define PKEY_KEYTYPE_ECC_P521 7
#define PKEY_KEYTYPE_ECC_ED25519 8
#define PKEY_KEYTYPE_ECC_ED448 9
+#define PKEY_KEYTYPE_AES_XTS_128 10
+#define PKEY_KEYTYPE_AES_XTS_256 11
+#define PKEY_KEYTYPE_HMAC_512 12
+#define PKEY_KEYTYPE_HMAC_1024 13
/* the newer ioctls use a pkey_key_type enum for type information */
enum pkey_key_type {
- PKEY_TYPE_CCA_DATA = (__u32) 1,
- PKEY_TYPE_CCA_CIPHER = (__u32) 2,
- PKEY_TYPE_EP11 = (__u32) 3,
- PKEY_TYPE_CCA_ECC = (__u32) 0x1f,
- PKEY_TYPE_EP11_AES = (__u32) 6,
- PKEY_TYPE_EP11_ECC = (__u32) 7,
+ PKEY_TYPE_CCA_DATA = (__u32)1,
+ PKEY_TYPE_CCA_CIPHER = (__u32)2,
+ PKEY_TYPE_EP11 = (__u32)3,
+ PKEY_TYPE_CCA_ECC = (__u32)0x1f,
+ PKEY_TYPE_EP11_AES = (__u32)6,
+ PKEY_TYPE_EP11_ECC = (__u32)7,
+ PKEY_TYPE_PROTKEY = (__u32)8,
+ PKEY_TYPE_UVSECRET = (__u32)9,
};
/* the newer ioctls use a pkey_key_size enum for key size information */
enum pkey_key_size {
- PKEY_SIZE_AES_128 = (__u32) 128,
- PKEY_SIZE_AES_192 = (__u32) 192,
- PKEY_SIZE_AES_256 = (__u32) 256,
- PKEY_SIZE_UNKNOWN = (__u32) 0xFFFFFFFF,
+ PKEY_SIZE_AES_128 = (__u32)128,
+ PKEY_SIZE_AES_192 = (__u32)192,
+ PKEY_SIZE_AES_256 = (__u32)256,
+ PKEY_SIZE_UNKNOWN = (__u32)0xFFFFFFFF,
};
/* some of the newer ioctls use these flags */
@@ -120,6 +126,7 @@ struct pkey_genseck {
__u32 keytype; /* in: key type to generate */
struct pkey_seckey seckey; /* out: the secure key blob */
};
+
#define PKEY_GENSECK _IOWR(PKEY_IOCTL_MAGIC, 0x01, struct pkey_genseck)
/*
@@ -132,6 +139,7 @@ struct pkey_clr2seck {
struct pkey_clrkey clrkey; /* in: the clear key value */
struct pkey_seckey seckey; /* out: the secure key blob */
};
+
#define PKEY_CLR2SECK _IOWR(PKEY_IOCTL_MAGIC, 0x02, struct pkey_clr2seck)
/*
@@ -143,6 +151,7 @@ struct pkey_sec2protk {
struct pkey_seckey seckey; /* in: the secure key blob */
struct pkey_protkey protkey; /* out: the protected key */
};
+
#define PKEY_SEC2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x03, struct pkey_sec2protk)
/*
@@ -153,6 +162,7 @@ struct pkey_clr2protk {
struct pkey_clrkey clrkey; /* in: the clear key value */
struct pkey_protkey protkey; /* out: the protected key */
};
+
#define PKEY_CLR2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x04, struct pkey_clr2protk)
/*
@@ -164,6 +174,7 @@ struct pkey_findcard {
__u16 cardnr; /* out: card number */
__u16 domain; /* out: domain number */
};
+
#define PKEY_FINDCARD _IOWR(PKEY_IOCTL_MAGIC, 0x05, struct pkey_findcard)
/*
@@ -173,6 +184,7 @@ struct pkey_skey2pkey {
struct pkey_seckey seckey; /* in: the secure key blob */
struct pkey_protkey protkey; /* out: the protected key */
};
+
#define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey)
/*
@@ -190,6 +202,7 @@ struct pkey_verifykey {
__u16 keysize; /* out: key size in bits */
__u32 attributes; /* out: attribute bits */
};
+
#define PKEY_VERIFYKEY _IOWR(PKEY_IOCTL_MAGIC, 0x07, struct pkey_verifykey)
#define PKEY_VERIFY_ATTR_AES 0x00000001 /* key is an AES key */
#define PKEY_VERIFY_ATTR_OLD_MKVP 0x00000100 /* key has old MKVP value */
@@ -221,6 +234,7 @@ struct pkey_kblob2pkey {
__u32 keylen; /* in: the key blob length */
struct pkey_protkey protkey; /* out: the protected key */
};
+
#define PKEY_KBLOB2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x0A, struct pkey_kblob2pkey)
/*
@@ -253,6 +267,7 @@ struct pkey_genseck2 {
__u32 keylen; /* in: available key blob buffer size */
/* out: actual key blob size */
};
+
#define PKEY_GENSECK2 _IOWR(PKEY_IOCTL_MAGIC, 0x11, struct pkey_genseck2)
/*
@@ -287,6 +302,7 @@ struct pkey_clr2seck2 {
__u32 keylen; /* in: available key blob buffer size */
/* out: actual key blob size */
};
+
#define PKEY_CLR2SECK2 _IOWR(PKEY_IOCTL_MAGIC, 0x12, struct pkey_clr2seck2)
/*
@@ -324,6 +340,7 @@ struct pkey_verifykey2 {
enum pkey_key_size size; /* out: the key size */
__u32 flags; /* out: additional key info flags */
};
+
#define PKEY_VERIFYKEY2 _IOWR(PKEY_IOCTL_MAGIC, 0x17, struct pkey_verifykey2)
/*
@@ -346,6 +363,7 @@ struct pkey_kblob2pkey2 {
__u32 apqn_entries; /* in: # of apqn target list entries */
struct pkey_protkey protkey; /* out: the protected key */
};
+
#define PKEY_KBLOB2PROTK2 _IOWR(PKEY_IOCTL_MAGIC, 0x1A, struct pkey_kblob2pkey2)
/*
@@ -382,6 +400,7 @@ struct pkey_apqns4key {
__u32 apqn_entries; /* in: max # of apqn entries in the list */
/* out: # apqns stored into the list */
};
+
#define PKEY_APQNS4K _IOWR(PKEY_IOCTL_MAGIC, 0x1B, struct pkey_apqns4key)
/*
@@ -421,6 +440,7 @@ struct pkey_apqns4keytype {
__u32 apqn_entries; /* in: max # of apqn entries in the list */
/* out: # apqns stored into the list */
};
+
#define PKEY_APQNS4KT _IOWR(PKEY_IOCTL_MAGIC, 0x1C, struct pkey_apqns4keytype)
/*
@@ -447,6 +467,7 @@ struct pkey_kblob2pkey3 {
__u32 pkeylen; /* in/out: size of pkey buffer/actual len of pkey */
__u8 __user *pkey; /* in: pkey blob buffer space ptr */
};
+
#define PKEY_KBLOB2PROTK3 _IOWR(PKEY_IOCTL_MAGIC, 0x1D, struct pkey_kblob2pkey3)
#endif /* _UAPI_PKEY_H */
diff --git a/arch/s390/include/uapi/asm/posix_types.h b/arch/s390/include/uapi/asm/posix_types.h
index 1913613e71b6..ad5ab940d192 100644
--- a/arch/s390/include/uapi/asm/posix_types.h
+++ b/arch/s390/include/uapi/asm/posix_types.h
@@ -26,17 +26,6 @@ typedef unsigned short __kernel_old_gid_t;
#define __kernel_old_uid_t __kernel_old_uid_t
#endif
-#ifndef __s390x__
-
-typedef unsigned long __kernel_ino_t;
-typedef unsigned short __kernel_mode_t;
-typedef unsigned short __kernel_ipc_pid_t;
-typedef unsigned short __kernel_uid_t;
-typedef unsigned short __kernel_gid_t;
-typedef int __kernel_ptrdiff_t;
-
-#else /* __s390x__ */
-
typedef unsigned int __kernel_ino_t;
typedef unsigned int __kernel_mode_t;
typedef int __kernel_ipc_pid_t;
@@ -45,8 +34,6 @@ typedef unsigned int __kernel_gid_t;
typedef long __kernel_ptrdiff_t;
typedef unsigned long __kernel_sigset_t; /* at least 32 bits */
-#endif /* __s390x__ */
-
#define __kernel_ino_t __kernel_ino_t
#define __kernel_mode_t __kernel_mode_t
#define __kernel_ipc_pid_t __kernel_ipc_pid_t
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index bb0826024bb9..ea29ba470e5a 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -14,94 +14,6 @@
* Offsets in the user_regs_struct. They are used for the ptrace
* system call and in entry.S
*/
-#ifndef __s390x__
-
-#define PT_PSWMASK 0x00
-#define PT_PSWADDR 0x04
-#define PT_GPR0 0x08
-#define PT_GPR1 0x0C
-#define PT_GPR2 0x10
-#define PT_GPR3 0x14
-#define PT_GPR4 0x18
-#define PT_GPR5 0x1C
-#define PT_GPR6 0x20
-#define PT_GPR7 0x24
-#define PT_GPR8 0x28
-#define PT_GPR9 0x2C
-#define PT_GPR10 0x30
-#define PT_GPR11 0x34
-#define PT_GPR12 0x38
-#define PT_GPR13 0x3C
-#define PT_GPR14 0x40
-#define PT_GPR15 0x44
-#define PT_ACR0 0x48
-#define PT_ACR1 0x4C
-#define PT_ACR2 0x50
-#define PT_ACR3 0x54
-#define PT_ACR4 0x58
-#define PT_ACR5 0x5C
-#define PT_ACR6 0x60
-#define PT_ACR7 0x64
-#define PT_ACR8 0x68
-#define PT_ACR9 0x6C
-#define PT_ACR10 0x70
-#define PT_ACR11 0x74
-#define PT_ACR12 0x78
-#define PT_ACR13 0x7C
-#define PT_ACR14 0x80
-#define PT_ACR15 0x84
-#define PT_ORIGGPR2 0x88
-#define PT_FPC 0x90
-/*
- * A nasty fact of life that the ptrace api
- * only supports passing of longs.
- */
-#define PT_FPR0_HI 0x98
-#define PT_FPR0_LO 0x9C
-#define PT_FPR1_HI 0xA0
-#define PT_FPR1_LO 0xA4
-#define PT_FPR2_HI 0xA8
-#define PT_FPR2_LO 0xAC
-#define PT_FPR3_HI 0xB0
-#define PT_FPR3_LO 0xB4
-#define PT_FPR4_HI 0xB8
-#define PT_FPR4_LO 0xBC
-#define PT_FPR5_HI 0xC0
-#define PT_FPR5_LO 0xC4
-#define PT_FPR6_HI 0xC8
-#define PT_FPR6_LO 0xCC
-#define PT_FPR7_HI 0xD0
-#define PT_FPR7_LO 0xD4
-#define PT_FPR8_HI 0xD8
-#define PT_FPR8_LO 0XDC
-#define PT_FPR9_HI 0xE0
-#define PT_FPR9_LO 0xE4
-#define PT_FPR10_HI 0xE8
-#define PT_FPR10_LO 0xEC
-#define PT_FPR11_HI 0xF0
-#define PT_FPR11_LO 0xF4
-#define PT_FPR12_HI 0xF8
-#define PT_FPR12_LO 0xFC
-#define PT_FPR13_HI 0x100
-#define PT_FPR13_LO 0x104
-#define PT_FPR14_HI 0x108
-#define PT_FPR14_LO 0x10C
-#define PT_FPR15_HI 0x110
-#define PT_FPR15_LO 0x114
-#define PT_CR_9 0x118
-#define PT_CR_10 0x11C
-#define PT_CR_11 0x120
-#define PT_IEEE_IP 0x13C
-#define PT_LASTOFF PT_IEEE_IP
-#define PT_ENDREGS 0x140-1
-
-#define GPR_SIZE 4
-#define CR_SIZE 4
-
-#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */
-
-#else /* __s390x__ */
-
#define PT_PSWMASK 0x00
#define PT_PSWADDR 0x08
#define PT_GPR0 0x10
@@ -166,38 +78,6 @@
#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */
-#endif /* __s390x__ */
-
-#ifndef __s390x__
-
-#define PSW_MASK_PER _AC(0x40000000, UL)
-#define PSW_MASK_DAT _AC(0x04000000, UL)
-#define PSW_MASK_IO _AC(0x02000000, UL)
-#define PSW_MASK_EXT _AC(0x01000000, UL)
-#define PSW_MASK_KEY _AC(0x00F00000, UL)
-#define PSW_MASK_BASE _AC(0x00080000, UL) /* always one */
-#define PSW_MASK_MCHECK _AC(0x00040000, UL)
-#define PSW_MASK_WAIT _AC(0x00020000, UL)
-#define PSW_MASK_PSTATE _AC(0x00010000, UL)
-#define PSW_MASK_ASC _AC(0x0000C000, UL)
-#define PSW_MASK_CC _AC(0x00003000, UL)
-#define PSW_MASK_PM _AC(0x00000F00, UL)
-#define PSW_MASK_RI _AC(0x00000000, UL)
-#define PSW_MASK_EA _AC(0x00000000, UL)
-#define PSW_MASK_BA _AC(0x00000000, UL)
-
-#define PSW_MASK_USER _AC(0x0000FF00, UL)
-
-#define PSW_ADDR_AMODE _AC(0x80000000, UL)
-#define PSW_ADDR_INSN _AC(0x7FFFFFFF, UL)
-
-#define PSW_ASC_PRIMARY _AC(0x00000000, UL)
-#define PSW_ASC_ACCREG _AC(0x00004000, UL)
-#define PSW_ASC_SECONDARY _AC(0x00008000, UL)
-#define PSW_ASC_HOME _AC(0x0000C000, UL)
-
-#else /* __s390x__ */
-
#define PSW_MASK_PER _AC(0x4000000000000000, UL)
#define PSW_MASK_DAT _AC(0x0400000000000000, UL)
#define PSW_MASK_IO _AC(0x0200000000000000, UL)
@@ -224,8 +104,6 @@
#define PSW_ASC_SECONDARY _AC(0x0000800000000000, UL)
#define PSW_ASC_HOME _AC(0x0000C00000000000, UL)
-#endif /* __s390x__ */
-
#define NUM_GPRS 16
#define NUM_FPRS 16
#define NUM_CRS 16
@@ -242,7 +120,8 @@
#define PTRACE_OLDSETOPTIONS 21
#define PTRACE_SYSEMU 31
#define PTRACE_SYSEMU_SINGLESTEP 32
-#ifndef __ASSEMBLY__
+
+#ifndef __ASSEMBLER__
#include <linux/stddef.h>
#include <linux/types.h>
@@ -307,9 +186,7 @@ typedef struct {
#define PER_EM_MASK 0xE8000000UL
typedef struct {
-#ifdef __s390x__
unsigned : 32;
-#endif /* __s390x__ */
unsigned em_branching : 1;
unsigned em_instruction_fetch : 1;
/*
@@ -450,6 +327,6 @@ struct user_regs_struct {
unsigned long ieee_instruction_pointer; /* obsolete, always 0 */
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_S390_PTRACE_H */
diff --git a/arch/s390/include/uapi/asm/schid.h b/arch/s390/include/uapi/asm/schid.h
index a3e1cf168553..d804d1a5b1b3 100644
--- a/arch/s390/include/uapi/asm/schid.h
+++ b/arch/s390/include/uapi/asm/schid.h
@@ -4,7 +4,7 @@
#include <linux/types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct subchannel_id {
__u32 cssid : 8;
@@ -15,6 +15,6 @@ struct subchannel_id {
__u32 sch_no : 16;
} __attribute__ ((packed, aligned(4)));
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPIASM_SCHID_H */
diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h
index 8b35033334c4..7c90b30c50fd 100644
--- a/arch/s390/include/uapi/asm/sigcontext.h
+++ b/arch/s390/include/uapi/asm/sigcontext.h
@@ -17,24 +17,12 @@
#define __NUM_VXRS_LOW 16
#define __NUM_VXRS_HIGH 16
-#ifndef __s390x__
-
-/* Has to be at least _NSIG_WORDS from asm/signal.h */
-#define _SIGCONTEXT_NSIG 64
-#define _SIGCONTEXT_NSIG_BPW 32
-/* Size of stack frame allocated when calling signal handler. */
-#define __SIGNAL_FRAMESIZE 96
-
-#else /* __s390x__ */
-
/* Has to be at least _NSIG_WORDS from asm/signal.h */
#define _SIGCONTEXT_NSIG 64
#define _SIGCONTEXT_NSIG_BPW 64
/* Size of stack frame allocated when calling signal handler. */
#define __SIGNAL_FRAMESIZE 160
-#endif /* __s390x__ */
-
#define _SIGCONTEXT_NSIG_WORDS (_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW)
#define _SIGMASK_COPY_SIZE (sizeof(unsigned long)*_SIGCONTEXT_NSIG_WORDS)
@@ -66,9 +54,6 @@ typedef struct
typedef struct
{
-#ifndef __s390x__
- unsigned long gprs_high[__NUM_GPRS];
-#endif
unsigned long long vxrs_low[__NUM_VXRS_LOW];
__vector128 vxrs_high[__NUM_VXRS_HIGH];
unsigned char __reserved[128];
diff --git a/arch/s390/include/uapi/asm/stat.h b/arch/s390/include/uapi/asm/stat.h
index ac253d23606b..21599298c2f5 100644
--- a/arch/s390/include/uapi/asm/stat.h
+++ b/arch/s390/include/uapi/asm/stat.h
@@ -8,74 +8,6 @@
#ifndef _S390_STAT_H
#define _S390_STAT_H
-#ifndef __s390x__
-struct __old_kernel_stat {
- unsigned short st_dev;
- unsigned short st_ino;
- unsigned short st_mode;
- unsigned short st_nlink;
- unsigned short st_uid;
- unsigned short st_gid;
- unsigned short st_rdev;
- unsigned long st_size;
- unsigned long st_atime;
- unsigned long st_mtime;
- unsigned long st_ctime;
-};
-
-struct stat {
- unsigned short st_dev;
- unsigned short __pad1;
- unsigned long st_ino;
- unsigned short st_mode;
- unsigned short st_nlink;
- unsigned short st_uid;
- unsigned short st_gid;
- unsigned short st_rdev;
- unsigned short __pad2;
- unsigned long st_size;
- unsigned long st_blksize;
- unsigned long st_blocks;
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
- unsigned long st_mtime_nsec;
- unsigned long st_ctime;
- unsigned long st_ctime_nsec;
- unsigned long __unused4;
- unsigned long __unused5;
-};
-
-/* This matches struct stat64 in glibc2.1, hence the absolutely
- * insane amounts of padding around dev_t's.
- */
-struct stat64 {
- unsigned long long st_dev;
- unsigned int __pad1;
-#define STAT64_HAS_BROKEN_ST_INO 1
- unsigned long __st_ino;
- unsigned int st_mode;
- unsigned int st_nlink;
- unsigned long st_uid;
- unsigned long st_gid;
- unsigned long long st_rdev;
- unsigned int __pad3;
- long long st_size;
- unsigned long st_blksize;
- unsigned char __pad4[4];
- unsigned long __pad5; /* future possible st_blocks high bits */
- unsigned long st_blocks; /* Number 512-byte blocks allocated. */
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
- unsigned long st_mtime_nsec;
- unsigned long st_ctime;
- unsigned long st_ctime_nsec; /* will be high 32 bits of ctime someday */
- unsigned long long st_ino;
-};
-
-#else /* __s390x__ */
-
struct stat {
unsigned long st_dev;
unsigned long st_ino;
@@ -97,8 +29,6 @@ struct stat {
unsigned long __unused[3];
};
-#endif /* __s390x__ */
-
#define STAT_HAVE_NSEC 1
#endif
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
index 84457dbb26b4..4ab468c5032e 100644
--- a/arch/s390/include/uapi/asm/types.h
+++ b/arch/s390/include/uapi/asm/types.h
@@ -10,7 +10,7 @@
#include <asm-generic/int-ll64.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
typedef unsigned long addr_t;
typedef __signed__ long saddr_t;
@@ -25,6 +25,6 @@ typedef struct {
};
} __attribute__((packed, aligned(4))) __vector128;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_S390_TYPES_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 01b5fe8b9db6..b0c5afe19db2 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -8,10 +8,6 @@
#ifndef _UAPI_ASM_S390_UNISTD_H_
#define _UAPI_ASM_S390_UNISTD_H_
-#ifdef __s390x__
#include <asm/unistd_64.h>
-#else
-#include <asm/unistd_32.h>
-#endif
#endif /* _UAPI_ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/uapi/asm/uvdevice.h b/arch/s390/include/uapi/asm/uvdevice.h
index b9c2f14a6af3..4947f26ad9fb 100644
--- a/arch/s390/include/uapi/asm/uvdevice.h
+++ b/arch/s390/include/uapi/asm/uvdevice.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
- * Copyright IBM Corp. 2022
+ * Copyright IBM Corp. 2022, 2024
* Author(s): Steffen Eiden <seiden@linux.ibm.com>
*/
#ifndef __S390_ASM_UVDEVICE_H
@@ -52,7 +52,7 @@ struct uvio_uvdev_info {
__u64 supp_uvio_cmds;
/*
* If bit `n` is set, the Ultravisor(UV) supports the UV-call
- * corresponding to the IOCTL with nr `n` in the calling contextx (host
+ * corresponding to the IOCTL with nr `n` in the calling context (host
* or guest). The value is only valid if the corresponding bit in
* @supp_uvio_cmds is set as well.
*/
@@ -71,6 +71,7 @@ struct uvio_uvdev_info {
#define UVIO_ATT_ADDITIONAL_MAX_LEN 0x8000
#define UVIO_ADD_SECRET_MAX_LEN 0x100000
#define UVIO_LIST_SECRETS_LEN 0x1000
+#define UVIO_RETR_SECRET_MAX_LEN 0x2000
#define UVIO_DEVICE_NAME "uv"
#define UVIO_TYPE_UVC 'u'
@@ -81,22 +82,25 @@ enum UVIO_IOCTL_NR {
UVIO_IOCTL_ADD_SECRET_NR,
UVIO_IOCTL_LIST_SECRETS_NR,
UVIO_IOCTL_LOCK_SECRETS_NR,
+ UVIO_IOCTL_RETR_SECRET_NR,
/* must be the last entry */
UVIO_IOCTL_NUM_IOCTLS
};
-#define UVIO_IOCTL(nr) _IOWR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb)
-#define UVIO_IOCTL_UVDEV_INFO UVIO_IOCTL(UVIO_IOCTL_UVDEV_INFO_NR)
-#define UVIO_IOCTL_ATT UVIO_IOCTL(UVIO_IOCTL_ATT_NR)
-#define UVIO_IOCTL_ADD_SECRET UVIO_IOCTL(UVIO_IOCTL_ADD_SECRET_NR)
-#define UVIO_IOCTL_LIST_SECRETS UVIO_IOCTL(UVIO_IOCTL_LIST_SECRETS_NR)
-#define UVIO_IOCTL_LOCK_SECRETS UVIO_IOCTL(UVIO_IOCTL_LOCK_SECRETS_NR)
+#define UVIO_IOCTL(nr) _IOWR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb)
+#define UVIO_IOCTL_UVDEV_INFO UVIO_IOCTL(UVIO_IOCTL_UVDEV_INFO_NR)
+#define UVIO_IOCTL_ATT UVIO_IOCTL(UVIO_IOCTL_ATT_NR)
+#define UVIO_IOCTL_ADD_SECRET UVIO_IOCTL(UVIO_IOCTL_ADD_SECRET_NR)
+#define UVIO_IOCTL_LIST_SECRETS UVIO_IOCTL(UVIO_IOCTL_LIST_SECRETS_NR)
+#define UVIO_IOCTL_LOCK_SECRETS UVIO_IOCTL(UVIO_IOCTL_LOCK_SECRETS_NR)
+#define UVIO_IOCTL_RETR_SECRET UVIO_IOCTL(UVIO_IOCTL_RETR_SECRET_NR)
-#define UVIO_SUPP_CALL(nr) (1ULL << (nr))
-#define UVIO_SUPP_UDEV_INFO UVIO_SUPP_CALL(UVIO_IOCTL_UDEV_INFO_NR)
-#define UVIO_SUPP_ATT UVIO_SUPP_CALL(UVIO_IOCTL_ATT_NR)
-#define UVIO_SUPP_ADD_SECRET UVIO_SUPP_CALL(UVIO_IOCTL_ADD_SECRET_NR)
-#define UVIO_SUPP_LIST_SECRETS UVIO_SUPP_CALL(UVIO_IOCTL_LIST_SECRETS_NR)
-#define UVIO_SUPP_LOCK_SECRETS UVIO_SUPP_CALL(UVIO_IOCTL_LOCK_SECRETS_NR)
+#define UVIO_SUPP_CALL(nr) (1ULL << (nr))
+#define UVIO_SUPP_UDEV_INFO UVIO_SUPP_CALL(UVIO_IOCTL_UDEV_INFO_NR)
+#define UVIO_SUPP_ATT UVIO_SUPP_CALL(UVIO_IOCTL_ATT_NR)
+#define UVIO_SUPP_ADD_SECRET UVIO_SUPP_CALL(UVIO_IOCTL_ADD_SECRET_NR)
+#define UVIO_SUPP_LIST_SECRETS UVIO_SUPP_CALL(UVIO_IOCTL_LIST_SECRETS_NR)
+#define UVIO_SUPP_LOCK_SECRETS UVIO_SUPP_CALL(UVIO_IOCTL_LOCK_SECRETS_NR)
+#define UVIO_SUPP_RETR_SECRET UVIO_SUPP_CALL(UVIO_IOCTL_RETR_SECRET_NR)
#endif /* __S390_ASM_UVDEVICE_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 7241fa194709..42c83d60d6fa 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -36,27 +36,26 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
-obj-y := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
+obj-y := head.o traps.o time.o process.o early.o setup.o idle.o vtime.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
+obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o
obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
-obj-y += entry.o reipl.o kdebugfs.o alternative.o
+obj-y += entry.o reipl.o kdebugfs.o alternative.o skey.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
+obj-y += diag/
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
+obj-$(CONFIG_SYSFS) += cpacf.o
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
+obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o hiperdispatch.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_AUDIT) += audit.o
-compat-obj-$(CONFIG_AUDIT) += compat_audit.o
-obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
-obj-$(CONFIG_COMPAT) += $(compat-obj-y)
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KPROBES) += mcount.o
@@ -68,7 +67,7 @@ obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
-
+obj-$(CONFIG_STACKPROTECTOR) += stackprotector.o
obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o
obj-$(CONFIG_CERT_STORE) += cert_store.o
@@ -77,11 +76,9 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
-obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
+obj-$(CONFIG_PERF_EVENTS) += perf_pai.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
-obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
# vdso
-obj-y += vdso64/
-obj-$(CONFIG_COMPAT) += vdso32/
+obj-y += vdso/
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
index f9efc54ec4b7..6252b7d115dd 100644
--- a/arch/s390/kernel/abs_lowcore.c
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -2,6 +2,7 @@
#include <linux/pgtable.h>
#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
unsigned long __bootdata_preserved(__abs_lowcore);
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 1ac5f707dd70..90c0e6408992 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -1,68 +1,90 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <asm/text-patching.h>
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) "alt: " fmt
+#endif
+
+#include <linux/uaccess.h>
+#include <linux/printk.h>
+#include <asm/nospec-branch.h>
+#include <asm/abs_lowcore.h>
#include <asm/alternative.h>
#include <asm/facility.h>
-#include <asm/nospec-branch.h>
+#include <asm/sections.h>
+#include <asm/machine.h>
+
+#ifndef a_debug
+#define a_debug pr_debug
+#endif
+
+#ifndef __kernel_va
+#define __kernel_va(x) (void *)(x)
+#endif
+
+unsigned long __bootdata_preserved(machine_features[1]);
-static int __initdata_or_module alt_instr_disabled;
+struct alt_debug {
+ unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG];
+ unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG];
+ int spec;
+};
-static int __init disable_alternative_instructions(char *str)
+static struct alt_debug __bootdata_preserved(alt_debug);
+
+static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data)
{
- alt_instr_disabled = 1;
- return 0;
-}
+ char oinsn[33], ninsn[33];
+ unsigned long kptr;
+ unsigned int pos;
-early_param("noaltinstr", disable_alternative_instructions);
+ for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++)
+ hex_byte_pack(&oinsn[2 * pos], old[pos]);
+ oinsn[2 * pos] = 0;
+ for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++)
+ hex_byte_pack(&ninsn[2 * pos], new[pos]);
+ ninsn[2 * pos] = 0;
+ kptr = (unsigned long)__kernel_va(old);
+ a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn);
+}
-static void __init_or_module __apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
+void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx)
{
+ struct alt_debug *d;
struct alt_instr *a;
- u8 *instr, *replacement;
+ bool debug, replace;
+ u8 *old, *new;
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
*/
+ d = &alt_debug;
for (a = start; a < end; a++) {
- instr = (u8 *)&a->instr_offset + a->instr_offset;
- replacement = (u8 *)&a->repl_offset + a->repl_offset;
-
- if (!__test_facility(a->facility, alt_stfle_fac_list))
+ if (!(a->ctx & ctx))
continue;
- s390_kernel_write(instr, replacement, a->instrlen);
+ switch (a->type) {
+ case ALT_TYPE_FACILITY:
+ replace = test_facility(a->data);
+ debug = __test_facility(a->data, d->facilities);
+ break;
+ case ALT_TYPE_FEATURE:
+ replace = test_machine_feature(a->data);
+ debug = __test_machine_feature(a->data, d->mfeatures);
+ break;
+ case ALT_TYPE_SPEC:
+ replace = nobp_enabled();
+ debug = d->spec;
+ break;
+ default:
+ replace = false;
+ debug = false;
+ }
+ if (!replace)
+ continue;
+ old = (u8 *)&a->instr_offset + a->instr_offset;
+ new = (u8 *)&a->repl_offset + a->repl_offset;
+ if (debug)
+ alternative_dump(old, new, a->instrlen, a->type, a->data);
+ s390_kernel_write(old, new, a->instrlen);
}
}
-
-void __init_or_module apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
-{
- if (!alt_instr_disabled)
- __apply_alternatives(start, end);
-}
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-void __init apply_alternative_instructions(void)
-{
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
-static void do_sync_core(void *info)
-{
- sync_core();
-}
-
-void text_poke_sync(void)
-{
- on_each_cpu(do_sync_core, NULL, 1);
-}
-
-void text_poke_sync_lock(void)
-{
- cpus_read_lock();
- text_poke_sync();
- cpus_read_unlock();
-}
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index f55979f64d49..e1a5b5b54e4f 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -4,17 +4,16 @@
* This code generates raw asm output which is post-processed to extract
* and format the required data.
*/
-
-#define ASM_OFFSETS_C
+#define COMPILE_OFFSETS
#include <linux/kbuild.h>
-#include <linux/kvm_host.h>
#include <linux/sched.h>
#include <linux/purgatory.h>
#include <linux/pgtable.h>
-#include <linux/ftrace.h>
-#include <asm/gmap.h>
+#include <linux/ftrace_regs.h>
+#include <asm/kvm_host_types.h>
#include <asm/stacktrace.h>
+#include <asm/ptrace.h>
int main(void)
{
@@ -22,12 +21,16 @@ int main(void)
OFFSET(__TASK_stack, task_struct, stack);
OFFSET(__TASK_thread, task_struct, thread);
OFFSET(__TASK_pid, task_struct, pid);
+#ifdef CONFIG_STACKPROTECTOR
+ OFFSET(__TASK_stack_canary, task_struct, stack_canary);
+#endif
BLANK();
/* thread struct offsets */
OFFSET(__THREAD_ksp, thread_struct, ksp);
BLANK();
/* thread info offsets */
OFFSET(__TI_flags, task_struct, thread_info.flags);
+ OFFSET(__TI_sie, task_struct, thread_info.sie);
BLANK();
/* pt_regs offsets */
OFFSET(__PT_PSW, pt_regs, psw);
@@ -49,8 +52,8 @@ int main(void)
OFFSET(__PT_R14, pt_regs, gprs[14]);
OFFSET(__PT_R15, pt_regs, gprs[15]);
OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
+ OFFSET(__PT_INT_CODE, pt_regs, int_code);
OFFSET(__PT_FLAGS, pt_regs, flags);
- OFFSET(__PT_CR1, pt_regs, cr1);
OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
@@ -63,6 +66,8 @@ int main(void)
OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
+ OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce);
+ OFFSET(__SF_SIE_IRQ, stack_frame, sie_irq);
DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
BLANK();
OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain);
@@ -75,7 +80,8 @@ int main(void)
OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr);
OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code);
OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc);
- OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code);
+ OFFSET(__LC_PGM_CODE, lowcore, pgm_code);
+ OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code);
OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code);
OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num);
OFFSET(__LC_PER_CODE, lowcore, per_code);
@@ -110,10 +116,9 @@ int main(void)
OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw);
OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw);
/* software defined lowcore locations 0x200 - 0xdff*/
- OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
- OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
+ OFFSET(__LC_SAVE_AREA, lowcore, save_area);
OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
- OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
+ OFFSET(__LC_PCPU, lowcore, pcpu);
OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
@@ -122,7 +127,6 @@ int main(void)
OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer);
OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
- OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
OFFSET(__LC_CURRENT, lowcore, current_task);
OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
@@ -137,9 +141,9 @@ int main(void)
OFFSET(__LC_USER_ASCE, lowcore, user_asce);
OFFSET(__LC_LPP, lowcore, lpp);
OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
- OFFSET(__LC_GMAP, lowcore, gmap);
OFFSET(__LC_LAST_BREAK, lowcore, last_break);
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
+ OFFSET(__LC_STACK_CANARY, lowcore, stack_canary);
OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info);
OFFSET(__LC_OS_INFO, lowcore, os_info);
@@ -160,7 +164,6 @@ int main(void)
OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
BLANK();
/* gmap/sie offsets */
- OFFSET(__GMAP_ASCE, gmap, asce);
OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20);
/* kexec_sha_region */
@@ -177,13 +180,9 @@ int main(void)
DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /* function graph return value tracing */
- OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2);
- OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp);
- DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs));
-#endif
- OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs);
- DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs));
+ OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs);
+ DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs));
+
+ OFFSET(__PCPU_FLAGS, pcpu, flags);
return 0;
}
diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c
index 02051a596b87..7897d9411e13 100644
--- a/arch/s390/kernel/audit.c
+++ b/arch/s390/kernel/audit.c
@@ -3,7 +3,6 @@
#include <linux/types.h>
#include <linux/audit.h>
#include <asm/unistd.h>
-#include "audit.h"
static unsigned dir_class[] = {
#include <asm-generic/audit_dir_write.h>
@@ -32,19 +31,11 @@ static unsigned signal_class[] = {
int audit_classify_arch(int arch)
{
-#ifdef CONFIG_COMPAT
- if (arch == AUDIT_ARCH_S390)
- return 1;
-#endif
return 0;
}
int audit_classify_syscall(int abi, unsigned syscall)
{
-#ifdef CONFIG_COMPAT
- if (abi == AUDIT_ARCH_S390)
- return s390_classify_syscall(syscall);
-#endif
switch(syscall) {
case __NR_open:
return AUDITSC_OPEN;
@@ -63,13 +54,6 @@ int audit_classify_syscall(int abi, unsigned syscall)
static int __init audit_classes_init(void)
{
-#ifdef CONFIG_COMPAT
- audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class);
- audit_register_class(AUDIT_CLASS_READ_32, s390_read_class);
- audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class);
- audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class);
- audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class);
-#endif
audit_register_class(AUDIT_CLASS_WRITE, write_class);
audit_register_class(AUDIT_CLASS_READ, read_class);
audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h
deleted file mode 100644
index 4d4b596412ec..000000000000
--- a/arch/s390/kernel/audit.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ARCH_S390_KERNEL_AUDIT_H
-#define __ARCH_S390_KERNEL_AUDIT_H
-
-#include <linux/types.h>
-
-#ifdef CONFIG_COMPAT
-extern int s390_classify_syscall(unsigned);
-extern __u32 s390_dir_class[];
-extern __u32 s390_write_class[];
-extern __u32 s390_read_class[];
-extern __u32 s390_chattr_class[];
-extern __u32 s390_signal_class[];
-#endif /* CONFIG_COMPAT */
-
-#endif /* __ARCH_S390_KERNEL_AUDIT_H */
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
index bf983513dd33..c217a5e64094 100644
--- a/arch/s390/kernel/cert_store.c
+++ b/arch/s390/kernel/cert_store.c
@@ -138,7 +138,7 @@ static void cert_store_key_describe(const struct key *key, struct seq_file *m)
* First 64 bytes of the key description is key name in EBCDIC CP 500.
* Convert it to ASCII for displaying in /proc/keys.
*/
- strscpy(ascii, key->description, sizeof(ascii));
+ strscpy(ascii, key->description);
EBCASC_500(ascii, VC_NAME_LEN_BYTES);
seq_puts(m, ascii);
@@ -235,7 +235,7 @@ static int __diag320(unsigned long subcode, void *addr)
{
union register_pair rp = { .even = (unsigned long)addr, };
- asm volatile(
+ asm_inline volatile(
" diag %[rp],%[subcode],0x320\n"
"0: nopr %%r7\n"
EX_TABLE(0b, 0b)
diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c
deleted file mode 100644
index a7c46e8310f0..000000000000
--- a/arch/s390/kernel/compat_audit.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#undef __s390x__
-#include <linux/audit_arch.h>
-#include <asm/unistd.h>
-#include "audit.h"
-
-unsigned s390_dir_class[] = {
-#include <asm-generic/audit_dir_write.h>
-~0U
-};
-
-unsigned s390_chattr_class[] = {
-#include <asm-generic/audit_change_attr.h>
-~0U
-};
-
-unsigned s390_write_class[] = {
-#include <asm-generic/audit_write.h>
-~0U
-};
-
-unsigned s390_read_class[] = {
-#include <asm-generic/audit_read.h>
-~0U
-};
-
-unsigned s390_signal_class[] = {
-#include <asm-generic/audit_signal.h>
-~0U
-};
-
-int s390_classify_syscall(unsigned syscall)
-{
- switch(syscall) {
- case __NR_open:
- return AUDITSC_OPEN;
- case __NR_openat:
- return AUDITSC_OPENAT;
- case __NR_socketcall:
- return AUDITSC_SOCKETCALL;
- case __NR_execve:
- return AUDITSC_EXECVE;
- case __NR_openat2:
- return AUDITSC_OPENAT2;
- default:
- return AUDITSC_COMPAT;
- }
-}
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
deleted file mode 100644
index f9d418d1b619..000000000000
--- a/arch/s390/kernel/compat_linux.c
+++ /dev/null
@@ -1,289 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * S390 version
- * Copyright IBM Corp. 2000
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Gerhard Tonn (ton@de.ibm.com)
- * Thomas Spatzier (tspat@de.ibm.com)
- *
- * Conversion between 31bit and 64bit native syscalls.
- *
- * Heavily inspired by the 32-bit Sparc compat code which is
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
- *
- */
-
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/file.h>
-#include <linux/signal.h>
-#include <linux/resource.h>
-#include <linux/times.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/uio.h>
-#include <linux/quota.h>
-#include <linux/poll.h>
-#include <linux/personality.h>
-#include <linux/stat.h>
-#include <linux/filter.h>
-#include <linux/highmem.h>
-#include <linux/mman.h>
-#include <linux/ipv6.h>
-#include <linux/in.h>
-#include <linux/icmpv6.h>
-#include <linux/syscalls.h>
-#include <linux/sysctl.h>
-#include <linux/binfmts.h>
-#include <linux/capability.h>
-#include <linux/compat.h>
-#include <linux/vfs.h>
-#include <linux/ptrace.h>
-#include <linux/fadvise.h>
-#include <linux/ipc.h>
-#include <linux/slab.h>
-
-#include <asm/types.h>
-#include <linux/uaccess.h>
-
-#include <net/scm.h>
-#include <net/sock.h>
-
-#include "compat_linux.h"
-
-#ifdef CONFIG_SYSVIPC
-COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second,
- compat_ulong_t, third, compat_uptr_t, ptr)
-{
- if (call >> 16) /* hack for backward compatibility */
- return -EINVAL;
- return compat_ksys_ipc(call, first, second, third, ptr, third);
-}
-#endif
-
-COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low)
-{
- return ksys_truncate(path, (unsigned long)high << 32 | low);
-}
-
-COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low)
-{
- return ksys_ftruncate(fd, (unsigned long)high << 32 | low);
-}
-
-COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf,
- compat_size_t, count, u32, high, u32, low)
-{
- if ((compat_ssize_t) count < 0)
- return -EINVAL;
- return ksys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low);
-}
-
-COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf,
- compat_size_t, count, u32, high, u32, low)
-{
- if ((compat_ssize_t) count < 0)
- return -EINVAL;
- return ksys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low);
-}
-
-COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count)
-{
- return ksys_readahead(fd, (unsigned long)high << 32 | low, count);
-}
-
-struct stat64_emu31 {
- unsigned long long st_dev;
- unsigned int __pad1;
-#define STAT64_HAS_BROKEN_ST_INO 1
- u32 __st_ino;
- unsigned int st_mode;
- unsigned int st_nlink;
- u32 st_uid;
- u32 st_gid;
- unsigned long long st_rdev;
- unsigned int __pad3;
- long st_size;
- u32 st_blksize;
- unsigned char __pad4[4];
- u32 __pad5; /* future possible st_blocks high bits */
- u32 st_blocks; /* Number 512-byte blocks allocated. */
- u32 st_atime;
- u32 __pad6;
- u32 st_mtime;
- u32 __pad7;
- u32 st_ctime;
- u32 __pad8; /* will be high 32 bits of ctime someday */
- unsigned long st_ino;
-};
-
-static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat)
-{
- struct stat64_emu31 tmp;
-
- memset(&tmp, 0, sizeof(tmp));
-
- tmp.st_dev = huge_encode_dev(stat->dev);
- tmp.st_ino = stat->ino;
- tmp.__st_ino = (u32)stat->ino;
- tmp.st_mode = stat->mode;
- tmp.st_nlink = (unsigned int)stat->nlink;
- tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
- tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
- tmp.st_rdev = huge_encode_dev(stat->rdev);
- tmp.st_size = stat->size;
- tmp.st_blksize = (u32)stat->blksize;
- tmp.st_blocks = (u32)stat->blocks;
- tmp.st_atime = (u32)stat->atime.tv_sec;
- tmp.st_mtime = (u32)stat->mtime.tv_sec;
- tmp.st_ctime = (u32)stat->ctime.tv_sec;
-
- return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
-}
-
-COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
-{
- struct kstat stat;
- int ret = vfs_stat(filename, &stat);
- if (!ret)
- ret = cp_stat64(statbuf, &stat);
- return ret;
-}
-
-COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
-{
- struct kstat stat;
- int ret = vfs_lstat(filename, &stat);
- if (!ret)
- ret = cp_stat64(statbuf, &stat);
- return ret;
-}
-
-COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf)
-{
- struct kstat stat;
- int ret = vfs_fstat(fd, &stat);
- if (!ret)
- ret = cp_stat64(statbuf, &stat);
- return ret;
-}
-
-COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename,
- struct stat64_emu31 __user *, statbuf, int, flag)
-{
- struct kstat stat;
- int error;
-
- error = vfs_fstatat(dfd, filename, &stat, flag);
- if (error)
- return error;
- return cp_stat64(statbuf, &stat);
-}
-
-/*
- * Linux/i386 didn't use to be able to handle more than
- * 4 system call parameters, so these system calls used a memory
- * block for parameter passing..
- */
-
-struct mmap_arg_struct_emu31 {
- compat_ulong_t addr;
- compat_ulong_t len;
- compat_ulong_t prot;
- compat_ulong_t flags;
- compat_ulong_t fd;
- compat_ulong_t offset;
-};
-
-COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg)
-{
- struct mmap_arg_struct_emu31 a;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- return -EFAULT;
- if (a.offset & ~PAGE_MASK)
- return -EINVAL;
- return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
- a.offset >> PAGE_SHIFT);
-}
-
-COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg)
-{
- struct mmap_arg_struct_emu31 a;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- return -EFAULT;
- return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-}
-
-COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count)
-{
- if ((compat_ssize_t) count < 0)
- return -EINVAL;
-
- return ksys_read(fd, buf, count);
-}
-
-COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count)
-{
- if ((compat_ssize_t) count < 0)
- return -EINVAL;
-
- return ksys_write(fd, buf, count);
-}
-
-/*
- * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
- * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
- * because the 31 bit values differ from the 64 bit values.
- */
-
-COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise)
-{
- if (advise == 4)
- advise = POSIX_FADV_DONTNEED;
- else if (advise == 5)
- advise = POSIX_FADV_NOREUSE;
- return ksys_fadvise64_64(fd, (unsigned long)high << 32 | low, len,
- advise);
-}
-
-struct fadvise64_64_args {
- int fd;
- long long offset;
- long long len;
- int advice;
-};
-
-COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
-{
- struct fadvise64_64_args a;
-
- if ( copy_from_user(&a, args, sizeof(a)) )
- return -EFAULT;
- if (a.advice == 4)
- a.advice = POSIX_FADV_DONTNEED;
- else if (a.advice == 5)
- a.advice = POSIX_FADV_NOREUSE;
- return ksys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
-}
-
-COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow,
- u32, nhigh, u32, nlow, unsigned int, flags)
-{
- return ksys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow,
- ((u64)nhigh << 32) + nlow, flags);
-}
-
-COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow,
- u32, lenhigh, u32, lenlow)
-{
- return ksys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow,
- ((u64)lenhigh << 32) + lenlow);
-}
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
deleted file mode 100644
index ef23739b277c..000000000000
--- a/arch/s390/kernel/compat_linux.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_S390X_S390_H
-#define _ASM_S390X_S390_H
-
-#include <linux/compat.h>
-#include <linux/socket.h>
-#include <linux/syscalls.h>
-#include <asm/ptrace.h>
-
-/*
- * Macro that masks the high order bit of a 32 bit pointer and
- * converts it to a 64 bit pointer.
- */
-#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL))
-#define AA(__x) ((unsigned long)(__x))
-
-/* Now 32bit compatibility types */
-struct ipc_kludge_32 {
- __u32 msgp; /* pointer */
- __s32 msgtyp;
-};
-
-/* asm/sigcontext.h */
-typedef union {
- __u64 d;
- __u32 f;
-} freg_t32;
-
-typedef struct {
- unsigned int fpc;
- unsigned int pad;
- freg_t32 fprs[__NUM_FPRS];
-} _s390_fp_regs32;
-
-typedef struct {
- psw_t32 psw;
- __u32 gprs[__NUM_GPRS];
- __u32 acrs[__NUM_ACRS];
-} _s390_regs_common32;
-
-typedef struct {
- _s390_regs_common32 regs;
- _s390_fp_regs32 fpregs;
-} _sigregs32;
-
-typedef struct {
- __u32 gprs_high[__NUM_GPRS];
- __u64 vxrs_low[__NUM_VXRS_LOW];
- __vector128 vxrs_high[__NUM_VXRS_HIGH];
- __u8 __reserved[128];
-} _sigregs_ext32;
-
-#define _SIGCONTEXT_NSIG32 64
-#define _SIGCONTEXT_NSIG_BPW32 32
-#define __SIGNAL_FRAMESIZE32 96
-#define _SIGMASK_COPY_SIZE32 (sizeof(u32) * 2)
-
-struct sigcontext32 {
- __u32 oldmask[_COMPAT_NSIG_WORDS];
- __u32 sregs; /* pointer */
-};
-
-/* asm/signal.h */
-
-/* asm/ucontext.h */
-struct ucontext32 {
- __u32 uc_flags;
- __u32 uc_link; /* pointer */
- compat_stack_t uc_stack;
- _sigregs32 uc_mcontext;
- compat_sigset_t uc_sigmask;
- /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
- unsigned char __unused[128 - sizeof(compat_sigset_t)];
- _sigregs_ext32 uc_mcontext_ext;
-};
-
-struct stat64_emu31;
-struct mmap_arg_struct_emu31;
-struct fadvise64_64_args;
-
-long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low);
-long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low);
-long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low);
-long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low);
-long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count);
-long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
-long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
-long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf);
-long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag);
-long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg);
-long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg);
-long compat_sys_s390_read(unsigned int fd, char __user *buf, compat_size_t count);
-long compat_sys_s390_write(unsigned int fd, const char __user *buf, compat_size_t count);
-long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise);
-long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
-long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags);
-long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow);
-long compat_sys_sigreturn(void);
-long compat_sys_rt_sigreturn(void);
-
-#endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
deleted file mode 100644
index 3c400fc7e987..000000000000
--- a/arch/s390/kernel/compat_ptrace.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PTRACE32_H
-#define _PTRACE32_H
-
-#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */
-#include "compat_linux.h" /* needed for psw_compat_t */
-
-struct compat_per_struct_kernel {
- __u32 cr9; /* PER control bits */
- __u32 cr10; /* PER starting address */
- __u32 cr11; /* PER ending address */
- __u32 bits; /* Obsolete software bits */
- __u32 starting_addr; /* User specified start address */
- __u32 ending_addr; /* User specified end address */
- __u16 perc_atmid; /* PER trap ATMID */
- __u32 address; /* PER trap instruction address */
- __u8 access_id; /* PER trap access identification */
-};
-
-struct compat_user_regs_struct
-{
- psw_compat_t psw;
- u32 gprs[NUM_GPRS];
- u32 acrs[NUM_ACRS];
- u32 orig_gpr2;
- /* nb: there's a 4-byte hole here */
- s390_fp_regs fp_regs;
- /*
- * These per registers are in here so that gdb can modify them
- * itself as there is no "official" ptrace interface for hardware
- * watchpoints. This is the way intel does it.
- */
- struct compat_per_struct_kernel per_info;
- u32 ieee_instruction_pointer; /* obsolete, always 0 */
-};
-
-struct compat_user {
- /* We start with the registers, to mimic the way that "memory"
- is returned from the ptrace(3,...) function. */
- struct compat_user_regs_struct regs;
- /* The rest of this junk is to help gdb figure out what goes where */
- u32 u_tsize; /* Text segment size (pages). */
- u32 u_dsize; /* Data segment size (pages). */
- u32 u_ssize; /* Stack segment size (pages). */
- u32 start_code; /* Starting virtual address of text. */
- u32 start_stack; /* Starting virtual address of stack area.
- This is actually the bottom of the stack,
- the top of the stack is always found in the
- esp register. */
- s32 signal; /* Signal that caused the core dump. */
- u32 u_ar0; /* Used by gdb to help find the values for */
- /* the registers. */
- u32 magic; /* To uniquely identify a core file */
- char u_comm[32]; /* User command that was responsible */
-};
-
-typedef struct
-{
- __u32 len;
- __u32 kernel_addr;
- __u32 process_addr;
-} compat_ptrace_area;
-
-#endif /* _PTRACE32_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
deleted file mode 100644
index 1942e2a9f8db..000000000000
--- a/arch/s390/kernel/compat_signal.c
+++ /dev/null
@@ -1,420 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright IBM Corp. 2000, 2006
- * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
- * Gerhard Tonn (ton@de.ibm.com)
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
- */
-
-#include <linux/compat.h>
-#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/tty.h>
-#include <linux/personality.h>
-#include <linux/binfmts.h>
-#include <asm/access-regs.h>
-#include <asm/ucontext.h>
-#include <linux/uaccess.h>
-#include <asm/lowcore.h>
-#include <asm/vdso.h>
-#include <asm/fpu.h>
-#include "compat_linux.h"
-#include "compat_ptrace.h"
-#include "entry.h"
-
-typedef struct
-{
- __u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
- struct sigcontext32 sc;
- _sigregs32 sregs;
- int signo;
- _sigregs_ext32 sregs_ext;
- __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */
-} sigframe32;
-
-typedef struct
-{
- __u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
- __u16 svc_insn;
- compat_siginfo_t info;
- struct ucontext32 uc;
-} rt_sigframe32;
-
-/* Store registers needed to create the signal frame */
-static void store_sigregs(void)
-{
- save_access_regs(current->thread.acrs);
- save_user_fpu_regs();
-}
-
-/* Load registers after signal return */
-static void load_sigregs(void)
-{
- restore_access_regs(current->thread.acrs);
-}
-
-static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
-{
- _sigregs32 user_sregs;
- int i;
-
- user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32);
- user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI;
- user_sregs.regs.psw.mask |= PSW32_USER_BITS;
- user_sregs.regs.psw.addr = (__u32) regs->psw.addr |
- (__u32)(regs->psw.mask & PSW_MASK_BA);
- for (i = 0; i < NUM_GPRS; i++)
- user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
- memcpy(&user_sregs.regs.acrs, current->thread.acrs,
- sizeof(user_sregs.regs.acrs));
- fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.ufpu);
- if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
- return -EFAULT;
- return 0;
-}
-
-static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
-{
- _sigregs32 user_sregs;
- int i;
-
- /* Always make any pending restarted system call return -EINTR */
- current->restart_block.fn = do_no_restart_syscall;
-
- if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs)))
- return -EFAULT;
-
- if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
- return -EINVAL;
-
- /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
- regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
- (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 |
- (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 |
- (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE);
- /* Check for invalid user address space control. */
- if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
- regs->psw.mask = PSW_ASC_PRIMARY |
- (regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN);
- for (i = 0; i < NUM_GPRS; i++)
- regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
- memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
- sizeof(current->thread.acrs));
- fpregs_load((_s390_fp_regs *)&user_sregs.fpregs, &current->thread.ufpu);
-
- clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
- return 0;
-}
-
-static int save_sigregs_ext32(struct pt_regs *regs,
- _sigregs_ext32 __user *sregs_ext)
-{
- __u32 gprs_high[NUM_GPRS];
- __u64 vxrs[__NUM_VXRS_LOW];
- int i;
-
- /* Save high gprs to signal stack */
- for (i = 0; i < NUM_GPRS; i++)
- gprs_high[i] = regs->gprs[i] >> 32;
- if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high,
- sizeof(sregs_ext->gprs_high)))
- return -EFAULT;
-
- /* Save vector registers to signal stack */
- if (cpu_has_vx()) {
- for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = current->thread.ufpu.vxrs[i].low;
- if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
- sizeof(sregs_ext->vxrs_low)) ||
- __copy_to_user(&sregs_ext->vxrs_high,
- current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
- sizeof(sregs_ext->vxrs_high)))
- return -EFAULT;
- }
- return 0;
-}
-
-static int restore_sigregs_ext32(struct pt_regs *regs,
- _sigregs_ext32 __user *sregs_ext)
-{
- __u32 gprs_high[NUM_GPRS];
- __u64 vxrs[__NUM_VXRS_LOW];
- int i;
-
- /* Restore high gprs from signal stack */
- if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
- sizeof(sregs_ext->gprs_high)))
- return -EFAULT;
- for (i = 0; i < NUM_GPRS; i++)
- *(__u32 *)&regs->gprs[i] = gprs_high[i];
-
- /* Restore vector registers from signal stack */
- if (cpu_has_vx()) {
- if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
- sizeof(sregs_ext->vxrs_low)) ||
- __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
- &sregs_ext->vxrs_high,
- sizeof(sregs_ext->vxrs_high)))
- return -EFAULT;
- for (i = 0; i < __NUM_VXRS_LOW; i++)
- current->thread.ufpu.vxrs[i].low = vxrs[i];
- }
- return 0;
-}
-
-COMPAT_SYSCALL_DEFINE0(sigreturn)
-{
- struct pt_regs *regs = task_pt_regs(current);
- sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15];
- sigset_t set;
-
- if (get_compat_sigset(&set, (compat_sigset_t __user *)frame->sc.oldmask))
- goto badframe;
- set_current_blocked(&set);
- save_user_fpu_regs();
- if (restore_sigregs32(regs, &frame->sregs))
- goto badframe;
- if (restore_sigregs_ext32(regs, &frame->sregs_ext))
- goto badframe;
- load_sigregs();
- return regs->gprs[2];
-badframe:
- force_sig(SIGSEGV);
- return 0;
-}
-
-COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
-{
- struct pt_regs *regs = task_pt_regs(current);
- rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15];
- sigset_t set;
-
- if (get_compat_sigset(&set, &frame->uc.uc_sigmask))
- goto badframe;
- set_current_blocked(&set);
- if (compat_restore_altstack(&frame->uc.uc_stack))
- goto badframe;
- save_user_fpu_regs();
- if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
- goto badframe;
- if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
- goto badframe;
- load_sigregs();
- return regs->gprs[2];
-badframe:
- force_sig(SIGSEGV);
- return 0;
-}
-
-/*
- * Set up a signal frame.
- */
-
-
-/*
- * Determine which stack to use..
- */
-static inline void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
-{
- unsigned long sp;
-
- /* Default to using normal stack */
- sp = (unsigned long) A(regs->gprs[15]);
-
- /* Overflow on alternate signal stack gives SIGSEGV. */
- if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
- return (void __user *) -1UL;
-
- /* This is the X/Open sanctioned signal stack switching. */
- if (ka->sa.sa_flags & SA_ONSTACK) {
- if (! sas_ss_flags(sp))
- sp = current->sas_ss_sp + current->sas_ss_size;
- }
-
- return (void __user *)((sp - frame_size) & -8ul);
-}
-
-static int setup_frame32(struct ksignal *ksig, sigset_t *set,
- struct pt_regs *regs)
-{
- int sig = ksig->sig;
- sigframe32 __user *frame;
- unsigned long restorer;
- size_t frame_size;
-
- /*
- * gprs_high are always present for 31-bit compat tasks.
- * The space for vector registers is only allocated if
- * the machine supports it
- */
- frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved);
- if (!cpu_has_vx())
- frame_size -= sizeof(frame->sregs_ext.vxrs_low) +
- sizeof(frame->sregs_ext.vxrs_high);
- frame = get_sigframe(&ksig->ka, regs, frame_size);
- if (frame == (void __user *) -1UL)
- return -EFAULT;
-
- /* Set up backchain. */
- if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
- return -EFAULT;
-
- /* Create struct sigcontext32 on the signal stack */
- if (put_compat_sigset((compat_sigset_t __user *)frame->sc.oldmask,
- set, sizeof(compat_sigset_t)))
- return -EFAULT;
- if (__put_user(ptr_to_compat(&frame->sregs), &frame->sc.sregs))
- return -EFAULT;
-
- /* Store registers needed to create the signal frame */
- store_sigregs();
-
- /* Create _sigregs32 on the signal stack */
- if (save_sigregs32(regs, &frame->sregs))
- return -EFAULT;
-
- /* Place signal number on stack to allow backtrace from handler. */
- if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
- return -EFAULT;
-
- /* Create _sigregs_ext32 on the signal stack */
- if (save_sigregs_ext32(regs, &frame->sregs_ext))
- return -EFAULT;
-
- /* Set up to return from userspace. If provided, use a stub
- already in userspace. */
- if (ksig->ka.sa.sa_flags & SA_RESTORER) {
- restorer = (unsigned long __force)
- ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
- } else {
- restorer = VDSO32_SYMBOL(current, sigreturn);
- }
-
- /* Set up registers for signal handler */
- regs->gprs[14] = restorer;
- regs->gprs[15] = (__force __u64) frame;
- /* Force 31 bit amode and default user address space control. */
- regs->psw.mask = PSW_MASK_BA |
- (PSW_USER_BITS & PSW_MASK_ASC) |
- (regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (__force __u64) ksig->ka.sa.sa_handler;
-
- regs->gprs[2] = sig;
- regs->gprs[3] = (__force __u64) &frame->sc;
-
- /* We forgot to include these in the sigcontext.
- To avoid breaking binary compatibility, they are passed as args. */
- if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
- sig == SIGTRAP || sig == SIGFPE) {
- /* set extra registers only for synchronous signals */
- regs->gprs[4] = regs->int_code & 127;
- regs->gprs[5] = regs->int_parm_long;
- regs->gprs[6] = current->thread.last_break;
- }
-
- return 0;
-}
-
-static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
- struct pt_regs *regs)
-{
- rt_sigframe32 __user *frame;
- unsigned long restorer;
- size_t frame_size;
- u32 uc_flags;
-
- frame_size = sizeof(*frame) -
- sizeof(frame->uc.uc_mcontext_ext.__reserved);
- /*
- * gprs_high are always present for 31-bit compat tasks.
- * The space for vector registers is only allocated if
- * the machine supports it
- */
- uc_flags = UC_GPRS_HIGH;
- if (cpu_has_vx()) {
- uc_flags |= UC_VXRS;
- } else {
- frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
- sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
- }
- frame = get_sigframe(&ksig->ka, regs, frame_size);
- if (frame == (void __user *) -1UL)
- return -EFAULT;
-
- /* Set up backchain. */
- if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
- return -EFAULT;
-
- /* Set up to return from userspace. If provided, use a stub
- already in userspace. */
- if (ksig->ka.sa.sa_flags & SA_RESTORER) {
- restorer = (unsigned long __force)
- ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
- } else {
- restorer = VDSO32_SYMBOL(current, rt_sigreturn);
- }
-
- /* Create siginfo on the signal stack */
- if (copy_siginfo_to_user32(&frame->info, &ksig->info))
- return -EFAULT;
-
- /* Store registers needed to create the signal frame */
- store_sigregs();
-
- /* Create ucontext on the signal stack. */
- if (__put_user(uc_flags, &frame->uc.uc_flags) ||
- __put_user(0, &frame->uc.uc_link) ||
- __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
- save_sigregs32(regs, &frame->uc.uc_mcontext) ||
- put_compat_sigset(&frame->uc.uc_sigmask, set, sizeof(compat_sigset_t)) ||
- save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
- return -EFAULT;
-
- /* Set up registers for signal handler */
- regs->gprs[14] = restorer;
- regs->gprs[15] = (__force __u64) frame;
- /* Force 31 bit amode and default user address space control. */
- regs->psw.mask = PSW_MASK_BA |
- (PSW_USER_BITS & PSW_MASK_ASC) |
- (regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (__u64 __force) ksig->ka.sa.sa_handler;
-
- regs->gprs[2] = ksig->sig;
- regs->gprs[3] = (__force __u64) &frame->info;
- regs->gprs[4] = (__force __u64) &frame->uc;
- regs->gprs[5] = current->thread.last_break;
- return 0;
-}
-
-/*
- * OK, we're invoking a handler
- */
-
-void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
- struct pt_regs *regs)
-{
- int ret;
-
- /* Set up the stack frame */
- if (ksig->ka.sa.sa_flags & SA_SIGINFO)
- ret = setup_rt_frame32(ksig, oldset, regs);
- else
- ret = setup_frame32(ksig, oldset, regs);
-
- signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
-}
-
diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c
new file mode 100644
index 000000000000..9d85b4bc7036
--- /dev/null
+++ b/arch/s390/kernel/cpacf.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define pr_fmt(fmt) "cpacf: " fmt
+
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <asm/cpacf.h>
+
+#define CPACF_QUERY(name, instruction) \
+static ssize_t name##_query_raw_read(struct file *fp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t offs, \
+ size_t count) \
+{ \
+ cpacf_mask_t mask; \
+ \
+ if (!cpacf_query(CPACF_##instruction, &mask)) \
+ return -EOPNOTSUPP; \
+ return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \
+} \
+static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t))
+
+CPACF_QUERY(km, KM);
+CPACF_QUERY(kmc, KMC);
+CPACF_QUERY(kimd, KIMD);
+CPACF_QUERY(klmd, KLMD);
+CPACF_QUERY(kmac, KMAC);
+CPACF_QUERY(pckmo, PCKMO);
+CPACF_QUERY(kmf, KMF);
+CPACF_QUERY(kmctr, KMCTR);
+CPACF_QUERY(kmo, KMO);
+CPACF_QUERY(pcc, PCC);
+CPACF_QUERY(prno, PRNO);
+CPACF_QUERY(kma, KMA);
+CPACF_QUERY(kdsa, KDSA);
+
+#define CPACF_QAI(name, instruction) \
+static ssize_t name##_query_auth_info_raw_read( \
+ struct file *fp, struct kobject *kobj, \
+ const struct bin_attribute *attr, char *buf, loff_t offs, \
+ size_t count) \
+{ \
+ cpacf_qai_t qai; \
+ \
+ if (!cpacf_qai(CPACF_##instruction, &qai)) \
+ return -EOPNOTSUPP; \
+ return memory_read_from_buffer(buf, count, &offs, &qai, \
+ sizeof(qai)); \
+} \
+static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t))
+
+CPACF_QAI(km, KM);
+CPACF_QAI(kmc, KMC);
+CPACF_QAI(kimd, KIMD);
+CPACF_QAI(klmd, KLMD);
+CPACF_QAI(kmac, KMAC);
+CPACF_QAI(pckmo, PCKMO);
+CPACF_QAI(kmf, KMF);
+CPACF_QAI(kmctr, KMCTR);
+CPACF_QAI(kmo, KMO);
+CPACF_QAI(pcc, PCC);
+CPACF_QAI(prno, PRNO);
+CPACF_QAI(kma, KMA);
+CPACF_QAI(kdsa, KDSA);
+
+static const struct bin_attribute *const cpacf_attrs[] = {
+ &bin_attr_km_query_raw,
+ &bin_attr_kmc_query_raw,
+ &bin_attr_kimd_query_raw,
+ &bin_attr_klmd_query_raw,
+ &bin_attr_kmac_query_raw,
+ &bin_attr_pckmo_query_raw,
+ &bin_attr_kmf_query_raw,
+ &bin_attr_kmctr_query_raw,
+ &bin_attr_kmo_query_raw,
+ &bin_attr_pcc_query_raw,
+ &bin_attr_prno_query_raw,
+ &bin_attr_kma_query_raw,
+ &bin_attr_kdsa_query_raw,
+ &bin_attr_km_query_auth_info_raw,
+ &bin_attr_kmc_query_auth_info_raw,
+ &bin_attr_kimd_query_auth_info_raw,
+ &bin_attr_klmd_query_auth_info_raw,
+ &bin_attr_kmac_query_auth_info_raw,
+ &bin_attr_pckmo_query_auth_info_raw,
+ &bin_attr_kmf_query_auth_info_raw,
+ &bin_attr_kmctr_query_auth_info_raw,
+ &bin_attr_kmo_query_auth_info_raw,
+ &bin_attr_pcc_query_auth_info_raw,
+ &bin_attr_prno_query_auth_info_raw,
+ &bin_attr_kma_query_auth_info_raw,
+ &bin_attr_kdsa_query_auth_info_raw,
+ NULL,
+};
+
+static const struct attribute_group cpacf_attr_grp = {
+ .name = "cpacf",
+ .bin_attrs = cpacf_attrs,
+};
+
+static int __init cpacf_init(void)
+{
+ struct device *cpu_root;
+ int rc = 0;
+
+ cpu_root = bus_get_dev_root(&cpu_subsys);
+ if (cpu_root) {
+ rc = sysfs_create_group(&cpu_root->kobj, &cpacf_attr_grp);
+ put_device(cpu_root);
+ }
+ return rc;
+}
+device_initcall(cpacf_init);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index b210a29d3ee9..ab611764642a 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -6,8 +6,7 @@
* Christian Borntraeger (cborntra@de.ibm.com),
*/
-#define KMSG_COMPONENT "cpcmd"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "cpcmd: " fmt
#include <linux/kernel.h>
#include <linux/export.h>
@@ -20,6 +19,7 @@
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/cpcmd.h>
+#include <asm/asm.h>
static DEFINE_SPINLOCK(cpcmd_lock);
static char cpcmd_buf[241];
@@ -45,12 +45,11 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
ry.odd = *rlen;
asm volatile(
" diag %[rx],%[ry],0x8\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=&d" (cc), [ry] "+&d" (ry.pair)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [ry] "+d" (ry.pair)
: [rx] "d" (rx.pair)
- : "cc");
- if (cc)
+ : CC_CLOBBER);
+ if (CC_TRANSFORM(cc))
*rlen += ry.odd;
else
*rlen = ry.odd;
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
index 1b2ae42a0c15..c9eef9ed876b 100644
--- a/arch/s390/kernel/cpufeature.c
+++ b/arch/s390/kernel/cpufeature.c
@@ -4,12 +4,15 @@
*/
#include <linux/cpufeature.h>
+#include <linux/export.h>
#include <linux/bug.h>
+#include <asm/machine.h>
#include <asm/elf.h>
enum {
TYPE_HWCAP,
TYPE_FACILITY,
+ TYPE_MACHINE,
};
struct s390_cpu_feature {
@@ -21,6 +24,7 @@ static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
[S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
[S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
[S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158},
+ [S390_CPU_FEATURE_D288] = {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288},
};
/*
@@ -38,6 +42,8 @@ int cpu_have_feature(unsigned int num)
return !!(elf_hwcap & BIT(feature->num));
case TYPE_FACILITY:
return test_facility(feature->num);
+ case TYPE_MACHINE:
+ return test_machine_feature(feature->num);
default:
WARN_ON_ONCE(1);
return 0;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index edae13416196..d4839de8ce9d 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -7,6 +7,7 @@
*/
#include <linux/crash_dump.h>
+#include <linux/export.h>
#include <asm/lowcore.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -63,9 +64,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
{
struct save_area *sa;
- sa = memblock_alloc(sizeof(*sa), 8);
- if (!sa)
- return NULL;
+ sa = memblock_alloc_or_panic(sizeof(*sa), 8);
if (is_boot_cpu)
list_add(&sa->list, &dump_save_areas);
@@ -237,14 +236,16 @@ int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
prot);
}
-static const char *nt_name(Elf64_Word type)
+/*
+ * Return true only when in a kdump or stand-alone kdump environment.
+ * Note that /proc/vmcore might also be available in "standard zfcp/nvme dump"
+ * environments, where this function returns false; see dump_available().
+ */
+bool is_kdump_kernel(void)
{
- const char *name = "LINUX";
-
- if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG)
- name = KEXEC_CORE_NOTE_NAME;
- return name;
+ return oldmem_data.start;
}
+EXPORT_SYMBOL_GPL(is_kdump_kernel);
/*
* Initialize ELF note
@@ -270,10 +271,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len,
return PTR_ADD(buf, len);
}
-static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len)
-{
- return nt_init_name(buf, type, desc, d_len, nt_name(type));
-}
+#define nt_init(buf, type, desc) \
+ nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type)
/*
* Calculate the size of ELF note
@@ -289,10 +288,7 @@ static size_t nt_size_name(int d_len, const char *name)
return size;
}
-static inline size_t nt_size(Elf64_Word type, int d_len)
-{
- return nt_size_name(d_len, nt_name(type));
-}
+#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type)
/*
* Fill ELF notes for one CPU with save area registers
@@ -313,18 +309,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa)
memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc));
memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs));
/* Create ELF notes for the CPU */
- ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus));
- ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset));
- ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer));
- ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp));
- ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg));
- ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs));
- ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix));
+ ptr = nt_init(ptr, PRSTATUS, nt_prstatus);
+ ptr = nt_init(ptr, PRFPREG, nt_fpregset);
+ ptr = nt_init(ptr, S390_TIMER, sa->timer);
+ ptr = nt_init(ptr, S390_TODCMP, sa->todcmp);
+ ptr = nt_init(ptr, S390_TODPREG, sa->todpreg);
+ ptr = nt_init(ptr, S390_CTRS, sa->ctrs);
+ ptr = nt_init(ptr, S390_PREFIX, sa->prefix);
if (cpu_has_vx()) {
- ptr = nt_init(ptr, NT_S390_VXRS_HIGH,
- &sa->vxrs_high, sizeof(sa->vxrs_high));
- ptr = nt_init(ptr, NT_S390_VXRS_LOW,
- &sa->vxrs_low, sizeof(sa->vxrs_low));
+ ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high);
+ ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low);
}
return ptr;
}
@@ -337,16 +331,16 @@ static size_t get_cpu_elf_notes_size(void)
struct save_area *sa = NULL;
size_t size;
- size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus));
- size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t));
- size += nt_size(NT_S390_TIMER, sizeof(sa->timer));
- size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp));
- size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg));
- size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs));
- size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix));
+ size = nt_size(PRSTATUS, struct elf_prstatus);
+ size += nt_size(PRFPREG, elf_fpregset_t);
+ size += nt_size(S390_TIMER, sa->timer);
+ size += nt_size(S390_TODCMP, sa->todcmp);
+ size += nt_size(S390_TODPREG, sa->todpreg);
+ size += nt_size(S390_CTRS, sa->ctrs);
+ size += nt_size(S390_PREFIX, sa->prefix);
if (cpu_has_vx()) {
- size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high));
- size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low));
+ size += nt_size(S390_VXRS_HIGH, sa->vxrs_high);
+ size += nt_size(S390_VXRS_LOW, sa->vxrs_low);
}
return size;
@@ -361,8 +355,8 @@ static void *nt_prpsinfo(void *ptr)
memset(&prpsinfo, 0, sizeof(prpsinfo));
prpsinfo.pr_sname = 'R';
- strcpy(prpsinfo.pr_fname, "vmlinux");
- return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo));
+ strscpy(prpsinfo.pr_fname, "vmlinux");
+ return nt_init(ptr, PRPSINFO, prpsinfo);
}
/*
@@ -497,6 +491,19 @@ static int get_mem_chunk_cnt(void)
return cnt;
}
+static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr,
+ unsigned long vaddr, unsigned long size)
+{
+ phdr->p_type = PT_LOAD;
+ phdr->p_vaddr = vaddr;
+ phdr->p_offset = paddr;
+ phdr->p_paddr = paddr;
+ phdr->p_filesz = size;
+ phdr->p_memsz = size;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
+}
+
/*
* Initialize ELF loads (new kernel)
*/
@@ -509,14 +516,8 @@ static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm)
if (os_info_has_vm)
old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
for_each_physmem_range(idx, &oldmem_type, &start, &end) {
- phdr->p_type = PT_LOAD;
- phdr->p_vaddr = old_identity_base + start;
- phdr->p_offset = start;
- phdr->p_paddr = start;
- phdr->p_filesz = end - start;
- phdr->p_memsz = end - start;
- phdr->p_flags = PF_R | PF_W | PF_X;
- phdr->p_align = PAGE_SIZE;
+ fill_ptload(phdr, start, old_identity_base + start,
+ end - start);
phdr++;
}
}
@@ -526,6 +527,22 @@ static bool os_info_has_vm(void)
return os_info_old_value(OS_INFO_KASLR_OFFSET);
}
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+/*
+ * Fill PT_LOAD for a physical memory range owned by a device and detected by
+ * its device driver.
+ */
+void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr,
+ unsigned long long paddr, unsigned long long size)
+{
+ unsigned long old_identity_base = 0;
+
+ if (os_info_has_vm())
+ old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
+ fill_ptload(phdr, paddr, old_identity_base + paddr, size);
+}
+#endif
+
/*
* Prepare PT_LOAD type program header for kernel image region
*/
@@ -578,7 +595,7 @@ static size_t get_elfcorehdr_size(int phdr_count)
/* PT_NOTES */
size += sizeof(Elf64_Phdr);
/* nt_prpsinfo */
- size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo));
+ size += nt_size(PRPSINFO, struct elf_prpsinfo);
/* regsets */
size += get_cpu_cnt() * get_cpu_elf_notes_size();
/* nt_vmcoreinfo */
diff --git a/arch/s390/kernel/ctlreg.c b/arch/s390/kernel/ctlreg.c
index 8cc26cf2c64a..a0501f4c7e7a 100644
--- a/arch/s390/kernel/ctlreg.c
+++ b/arch/s390/kernel/ctlreg.c
@@ -5,6 +5,7 @@
#include <linux/irqflags.h>
#include <linux/spinlock.h>
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/smp.h>
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 85328a0ef3b6..71cdb6845dd7 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -10,8 +10,7 @@
* Bugreports to: <Linux390@de.ibm.com>
*/
-#define KMSG_COMPONENT "s390dbf"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "s390dbf: " fmt
#include <linux/stddef.h>
#include <linux/kernel.h>
@@ -24,6 +23,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/math.h>
#include <linux/minmax.h>
#include <linux/debugfs.h>
@@ -38,13 +38,13 @@
typedef struct file_private_info {
loff_t offset; /* offset of last read in file */
- int act_area; /* number of last formated area */
+ int act_area; /* number of last formatted area */
int act_page; /* act page in given area */
- int act_entry; /* last formated entry (offset */
+ int act_entry; /* last formatted entry (offset */
/* relative to beginning of last */
- /* formated page) */
+ /* formatted page) */
size_t act_entry_offset; /* up to this offset we copied */
- /* in last read the last formated */
+ /* in last read the last formatted */
/* entry to userland */
char temp_buf[2048]; /* buffer for output */
debug_info_t *debug_info_org; /* original debug information */
@@ -63,7 +63,7 @@ typedef struct {
long args[];
} debug_sprintf_entry_t;
-/* internal function prototyes */
+/* internal function prototypes */
static int debug_init(void);
static ssize_t debug_output(struct file *file, char __user *user_buf,
@@ -77,12 +77,14 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area,
static void debug_info_get(debug_info_t *);
static void debug_info_put(debug_info_t *);
static int debug_prolog_level_fn(debug_info_t *id,
- struct debug_view *view, char *out_buf);
+ struct debug_view *view, char *out_buf,
+ size_t out_buf_size);
static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
static int debug_prolog_pages_fn(debug_info_t *id,
- struct debug_view *view, char *out_buf);
+ struct debug_view *view, char *out_buf,
+ size_t out_buf_size);
static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
@@ -90,9 +92,8 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *in_buf);
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *inbuf);
+ char *out_buf, size_t out_buf_size,
+ const char *in_buf);
static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
static void debug_events_append(debug_info_t *dest, debug_info_t *src);
@@ -163,7 +164,6 @@ static const struct file_operations debug_file_ops = {
.write = debug_input,
.open = debug_open,
.release = debug_close,
- .llseek = no_llseek,
};
static struct dentry *debug_debugfs_root_entry;
@@ -250,7 +250,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
rc->level = level;
rc->buf_size = buf_size;
rc->entry_size = sizeof(debug_entry_t) + buf_size;
- strscpy(rc->name, name, sizeof(rc->name));
+ strscpy(rc->name, name);
memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
refcount_set(&(rc->ref_count), 0);
@@ -351,7 +351,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode)
for (i = 0; i < in->nr_areas; i++) {
for (j = 0; j < in->pages_per_area; j++)
memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE);
+ rc->active_pages[i] = in->active_pages[i];
+ rc->active_entries[i] = in->active_entries[i];
}
+ rc->active_area = in->active_area;
out:
spin_unlock_irqrestore(&in->lock, flags);
return rc;
@@ -381,7 +384,7 @@ static void debug_info_put(debug_info_t *db_info)
/*
* debug_format_entry:
- * - format one debug entry and return size of formated data
+ * - format one debug entry and return size of formatted data
*/
static int debug_format_entry(file_private_info_t *p_info)
{
@@ -392,8 +395,10 @@ static int debug_format_entry(file_private_info_t *p_info)
if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
/* print prolog */
- if (view->prolog_proc)
- len += view->prolog_proc(id_snap, view, p_info->temp_buf);
+ if (view->prolog_proc) {
+ len += view->prolog_proc(id_snap, view, p_info->temp_buf,
+ sizeof(p_info->temp_buf));
+ }
goto out;
}
if (!id_snap->areas) /* this is true, if we have a prolog only view */
@@ -403,21 +408,31 @@ static int debug_format_entry(file_private_info_t *p_info)
if (act_entry->clock == 0LL)
goto out; /* empty entry */
- if (view->header_proc)
+ if (view->header_proc) {
len += view->header_proc(id_snap, view, p_info->act_area,
- act_entry, p_info->temp_buf + len);
- if (view->format_proc)
+ act_entry, p_info->temp_buf + len,
+ sizeof(p_info->temp_buf) - len);
+ }
+ if (view->format_proc) {
len += view->format_proc(id_snap, view, p_info->temp_buf + len,
+ sizeof(p_info->temp_buf) - len,
DEBUG_DATA(act_entry));
+ }
out:
return len;
}
-/*
- * debug_next_entry:
- * - goto next entry in p_info
+/**
+ * debug_next_entry - Go to the next entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the next entry. If no further entry
+ * exists the current position is set to one after the end the return value
+ * indicates that no further entries exist.
+ *
+ * Return: True if there are more following entries, false otherwise
*/
-static inline int debug_next_entry(file_private_info_t *p_info)
+static inline bool debug_next_entry(file_private_info_t *p_info)
{
debug_info_t *id;
@@ -425,10 +440,10 @@ static inline int debug_next_entry(file_private_info_t *p_info)
if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
p_info->act_entry = 0;
p_info->act_page = 0;
- goto out;
+ return true;
}
if (!id->areas)
- return 1;
+ return false;
p_info->act_entry += id->entry_size;
/* switch to next page, if we reached the end of the page */
if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) {
@@ -441,16 +456,93 @@ static inline int debug_next_entry(file_private_info_t *p_info)
p_info->act_page = 0;
}
if (p_info->act_area >= id->nr_areas)
- return 1;
+ return false;
}
-out:
- return 0;
+ return true;
+}
+
+/**
+ * debug_to_act_entry - Go to the currently active entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the currently active
+ * entry of @p_info->debug_info_snap
+ */
+static void debug_to_act_entry(file_private_info_t *p_info)
+{
+ debug_info_t *snap_id;
+
+ snap_id = p_info->debug_info_snap;
+ p_info->act_area = snap_id->active_area;
+ p_info->act_page = snap_id->active_pages[snap_id->active_area];
+ p_info->act_entry = snap_id->active_entries[snap_id->active_area];
+}
+
+/**
+ * debug_prev_entry - Go to the previous entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the previous entry. If no previous entry
+ * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value
+ * indicates that no previous entries exist.
+ *
+ * Return: True if there are more previous entries, false otherwise
+ */
+
+static inline bool debug_prev_entry(file_private_info_t *p_info)
+{
+ debug_info_t *id;
+
+ id = p_info->debug_info_snap;
+ if (p_info->act_entry == DEBUG_PROLOG_ENTRY)
+ debug_to_act_entry(p_info);
+ if (!id->areas)
+ return false;
+ p_info->act_entry -= id->entry_size;
+ /* switch to prev page, if we reached the beginning of the page */
+ if (p_info->act_entry < 0) {
+ /* end of previous page */
+ p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size;
+ p_info->act_page--;
+ if (p_info->act_page < 0) {
+ /* previous area */
+ p_info->act_area--;
+ p_info->act_page = id->pages_per_area - 1;
+ }
+ if (p_info->act_area < 0)
+ p_info->act_area = (id->nr_areas - 1) % id->nr_areas;
+ }
+ /* check full circle */
+ if (id->active_area == p_info->act_area &&
+ id->active_pages[id->active_area] == p_info->act_page &&
+ id->active_entries[id->active_area] == p_info->act_entry)
+ return false;
+ return true;
+}
+
+/**
+ * debug_move_entry - Go to next entry in either the forward or backward direction
+ * @p_info: Private info that is manipulated
+ * @reverse: If true go to the next entry in reverse i.e. previous
+ *
+ * Sets the current position in @p_info to the next (@reverse == false) or
+ * previous (@reverse == true) entry.
+ *
+ * Return: True if there are further entries in that direction,
+ * false otherwise.
+ */
+static bool debug_move_entry(file_private_info_t *p_info, bool reverse)
+{
+ if (reverse)
+ return debug_prev_entry(p_info);
+ else
+ return debug_next_entry(p_info);
}
/*
* debug_output:
* - called for user read()
- * - copies formated debug entries to the user buffer
+ * - copies formatted debug entries to the user buffer
*/
static ssize_t debug_output(struct file *file, /* file descriptor */
char __user *user_buf, /* user buffer */
@@ -486,7 +578,7 @@ static ssize_t debug_output(struct file *file, /* file descriptor */
}
if (copy_size == formatted_line_residue) {
entry_offset = 0;
- if (debug_next_entry(p_info))
+ if (!debug_next_entry(p_info))
goto out;
}
}
@@ -521,15 +613,51 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
return rc; /* number of input characters */
}
+static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info,
+ struct debug_view *view)
+{
+ debug_info_t *debug_info_snapshot;
+ file_private_info_t *p_info;
+
+ /*
+ * Make snapshot of current debug areas to get it consistent.
+ * To copy all the areas is only needed, if we have a view which
+ * formats the debug areas.
+ */
+ if (!view->format_proc && !view->header_proc)
+ debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+ else
+ debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+
+ if (!debug_info_snapshot)
+ return NULL;
+ p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+ if (!p_info) {
+ debug_info_free(debug_info_snapshot);
+ return NULL;
+ }
+ p_info->offset = 0;
+ p_info->debug_info_snap = debug_info_snapshot;
+ p_info->debug_info_org = debug_info;
+ p_info->view = view;
+ p_info->act_area = 0;
+ p_info->act_page = 0;
+ p_info->act_entry = DEBUG_PROLOG_ENTRY;
+ p_info->act_entry_offset = 0;
+ debug_info_get(debug_info);
+
+ return p_info;
+}
+
/*
* debug_open:
* - called for user open()
- * - copies formated output to private_data area of the file
+ * - copies formatted output to private_data area of the file
* handle
*/
static int debug_open(struct inode *inode, struct file *file)
{
- debug_info_t *debug_info, *debug_info_snapshot;
+ debug_info_t *debug_info;
file_private_info_t *p_info;
int i, rc = 0;
@@ -547,42 +675,26 @@ static int debug_open(struct inode *inode, struct file *file)
goto out;
found:
-
- /* Make snapshot of current debug areas to get it consistent. */
- /* To copy all the areas is only needed, if we have a view which */
- /* formats the debug areas. */
-
- if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc)
- debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
- else
- debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
-
- if (!debug_info_snapshot) {
- rc = -ENOMEM;
- goto out;
- }
- p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+ p_info = debug_file_private_alloc(debug_info, debug_info->views[i]);
if (!p_info) {
- debug_info_free(debug_info_snapshot);
rc = -ENOMEM;
goto out;
}
- p_info->offset = 0;
- p_info->debug_info_snap = debug_info_snapshot;
- p_info->debug_info_org = debug_info;
- p_info->view = debug_info->views[i];
- p_info->act_area = 0;
- p_info->act_page = 0;
- p_info->act_entry = DEBUG_PROLOG_ENTRY;
- p_info->act_entry_offset = 0;
file->private_data = p_info;
- debug_info_get(debug_info);
nonseekable_open(inode, file);
out:
mutex_unlock(&debug_mutex);
return rc;
}
+static void debug_file_private_free(file_private_info_t *p_info)
+{
+ if (p_info->debug_info_snap)
+ debug_info_free(p_info->debug_info_snap);
+ debug_info_put(p_info->debug_info_org);
+ kfree(p_info);
+}
+
/*
* debug_close:
* - called for user close()
@@ -593,13 +705,59 @@ static int debug_close(struct inode *inode, struct file *file)
file_private_info_t *p_info;
p_info = (file_private_info_t *) file->private_data;
- if (p_info->debug_info_snap)
- debug_info_free(p_info->debug_info_snap);
- debug_info_put(p_info->debug_info_org);
- kfree(file->private_data);
+ debug_file_private_free(p_info);
+ file->private_data = NULL;
return 0; /* success */
}
+/**
+ * debug_dump - Get a textual representation of debug info, or as much as fits
+ * @id: Debug information to use
+ * @view: View with which to dump the debug information
+ * @buf: Buffer the textual debug data representation is written to
+ * @buf_size: Size of the buffer, including the trailing '\0' byte
+ * @reverse: Go backwards from the last written entry
+ *
+ * This function may be used whenever a textual representation of the debug
+ * information is required without using an s390dbf file.
+ *
+ * Note: It is the callers responsibility to supply a view that is compatible
+ * with the debug information data.
+ *
+ * Return: On success returns the number of bytes written to the buffer not
+ * including the trailing '\0' byte. If bug_size == 0 the function returns 0.
+ * On failure an error code less than 0 is returned.
+ */
+ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
+ char *buf, size_t buf_size, bool reverse)
+{
+ file_private_info_t *p_info;
+ size_t size, offset = 0;
+
+ /* Need space for '\0' byte */
+ if (buf_size < 1)
+ return 0;
+ buf_size--;
+
+ p_info = debug_file_private_alloc(id, view);
+ if (!p_info)
+ return -ENOMEM;
+
+ /* There is always at least the DEBUG_PROLOG_ENTRY */
+ do {
+ size = debug_format_entry(p_info);
+ size = min(size, buf_size - offset);
+ memcpy(buf + offset, p_info->temp_buf, size);
+ offset += size;
+ if (offset >= buf_size)
+ break;
+ } while (debug_move_entry(p_info, reverse));
+ debug_file_private_free(p_info);
+ buf[offset] = '\0';
+
+ return offset;
+}
+
/* Create debugfs entries and add to internal list. */
static void _debug_register(debug_info_t *id)
{
@@ -954,7 +1112,7 @@ static int debug_active = 1;
* always allow read, allow write only if debug_stoppable is set or
* if debug_active is already off
*/
-static int s390dbf_procactive(struct ctl_table *table, int write,
+static int s390dbf_procactive(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
@@ -963,7 +1121,7 @@ static int s390dbf_procactive(struct ctl_table *table, int write,
return 0;
}
-static struct ctl_table s390dbf_table[] = {
+static const struct ctl_table s390dbf_table[] = {
{
.procname = "debug_stoppable",
.data = &debug_stoppable,
@@ -1257,18 +1415,12 @@ static inline char *debug_get_user_string(const char __user *user_buf,
{
char *buffer;
- buffer = kmalloc(user_len + 1, GFP_KERNEL);
- if (!buffer)
- return ERR_PTR(-ENOMEM);
- if (copy_from_user(buffer, user_buf, user_len) != 0) {
- kfree(buffer);
- return ERR_PTR(-EFAULT);
- }
+ buffer = memdup_user_nul(user_buf, user_len);
+ if (IS_ERR(buffer))
+ return buffer;
/* got the string, now strip linefeed. */
if (buffer[user_len - 1] == '\n')
buffer[user_len - 1] = 0;
- else
- buffer[user_len] = 0;
return buffer;
}
@@ -1293,9 +1445,9 @@ static inline int debug_get_uint(char *buf)
*/
static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf)
+ char *out_buf, size_t out_buf_size)
{
- return sprintf(out_buf, "%i\n", id->pages_per_area);
+ return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area);
}
/*
@@ -1342,14 +1494,14 @@ out:
* prints out actual debug level
*/
static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf)
+ char *out_buf, size_t out_buf_size)
{
int rc = 0;
if (id->level == DEBUG_OFF_LEVEL)
- rc = sprintf(out_buf, "-\n");
+ rc = scnprintf(out_buf, out_buf_size, "-\n");
else
- rc = sprintf(out_buf, "%i\n", id->level);
+ rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level);
return rc;
}
@@ -1466,22 +1618,24 @@ out:
* prints debug data in hex/ascii format
*/
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *in_buf)
+ char *out_buf, size_t out_buf_size, const char *in_buf)
{
int i, rc = 0;
- for (i = 0; i < id->buf_size; i++)
- rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]);
- rc += sprintf(out_buf + rc, "| ");
+ for (i = 0; i < id->buf_size; i++) {
+ rc += scnprintf(out_buf + rc, out_buf_size - rc,
+ "%02x ", ((unsigned char *)in_buf)[i]);
+ }
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "| ");
for (i = 0; i < id->buf_size; i++) {
unsigned char c = in_buf[i];
if (isascii(c) && isprint(c))
- rc += sprintf(out_buf + rc, "%c", c);
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c);
else
- rc += sprintf(out_buf + rc, ".");
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, ".");
}
- rc += sprintf(out_buf + rc, "\n");
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n");
return rc;
}
@@ -1489,7 +1643,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
* prints header for debug entry
*/
int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
- int area, debug_entry_t *entry, char *out_buf)
+ int area, debug_entry_t *entry, char *out_buf,
+ size_t out_buf_size)
{
unsigned long sec, usec;
unsigned long caller;
@@ -1506,22 +1661,22 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
else
except_str = "-";
caller = (unsigned long) entry->caller;
- rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px ",
- area, sec, usec, level, except_str,
- entry->cpu, (void *)caller);
+ rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px ",
+ area, sec, usec, level, except_str,
+ entry->cpu, (void *)caller);
return rc;
}
EXPORT_SYMBOL(debug_dflt_header_fn);
/*
- * prints debug data sprintf-formated:
- * debug_sprinf_event/exception calls must be used together with this view
+ * prints debug data sprintf-formatted:
+ * debug_sprintf_event/exception calls must be used together with this view
*/
#define DEBUG_SPRINTF_MAX_ARGS 10
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *inbuf)
+int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size, const char *inbuf)
{
debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf;
int num_longs, num_used_args = 0, i, rc = 0;
@@ -1534,8 +1689,9 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
goto out; /* bufsize of entry too small */
if (num_longs == 1) {
/* no args, we use only the string */
- strcpy(out_buf, curr_event->string);
- rc = strlen(curr_event->string);
+ rc = strscpy(out_buf, curr_event->string, out_buf_size);
+ if (rc == -E2BIG)
+ rc = out_buf_size;
goto out;
}
@@ -1547,15 +1703,17 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
for (i = 0; i < num_used_args; i++)
index[i] = i;
- rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]],
- curr_event->args[index[1]], curr_event->args[index[2]],
- curr_event->args[index[3]], curr_event->args[index[4]],
- curr_event->args[index[5]], curr_event->args[index[6]],
- curr_event->args[index[7]], curr_event->args[index[8]],
- curr_event->args[index[9]]);
+ rc = scnprintf(out_buf, out_buf_size,
+ curr_event->string, curr_event->args[index[0]],
+ curr_event->args[index[1]], curr_event->args[index[2]],
+ curr_event->args[index[3]], curr_event->args[index[4]],
+ curr_event->args[index[5]], curr_event->args[index[6]],
+ curr_event->args[index[7]], curr_event->args[index[8]],
+ curr_event->args[index[9]]);
out:
return rc;
}
+EXPORT_SYMBOL(debug_sprintf_format_fn);
/*
* debug_init:
diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile
new file mode 100644
index 000000000000..956aee6c4090
--- /dev/null
+++ b/arch/s390/kernel/diag/Makefile
@@ -0,0 +1 @@
+obj-y := diag_misc.o diag324.o diag.o diag310.o
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c
index 8dee9aa0ec95..56b862ba9be8 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag/diag.c
@@ -16,7 +16,8 @@
#include <asm/diag.h>
#include <asm/trace/diag.h>
#include <asm/sections.h>
-#include "entry.h"
+#include <asm/asm.h>
+#include "../entry.h"
struct diag_stat {
unsigned int counter[NR_DIAG_STAT];
@@ -50,8 +51,11 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
[DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+ [DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" },
[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
[DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
+ [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" },
+ [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" },
[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
};
@@ -185,11 +189,13 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
}
EXPORT_SYMBOL(diag14);
+#define DIAG204_BUSY_RC 8
+
static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
{
union register_pair rp = { .even = *subcode, .odd = size };
- asm volatile(
+ asm_inline volatile(
" diag %[addr],%[rp],0x204\n"
"0: nopr %%r7\n"
EX_TABLE(0b,0b)
@@ -215,16 +221,18 @@ int diag204(unsigned long subcode, unsigned long size, void *addr)
{
if (addr) {
if (WARN_ON_ONCE(!is_vmalloc_addr(addr)))
- return -1;
+ return -EINVAL;
if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE)))
- return -1;
+ return -EINVAL;
}
if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4)
addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr));
diag_stat_inc(DIAG_STAT_X204);
size = __diag204(&subcode, size, addr);
- if (subcode)
- return -1;
+ if (subcode == DIAG204_BUSY_RC)
+ return -EBUSY;
+ else if (subcode)
+ return -EOPNOTSUPP;
return size;
}
EXPORT_SYMBOL(diag204);
@@ -278,12 +286,14 @@ int diag224(void *ptr)
int rc = -EOPNOTSUPP;
diag_stat_inc(DIAG_STAT_X224);
- asm volatile(
- " diag %1,%2,0x224\n"
- "0: lhi %0,0x0\n"
+ asm_inline volatile("\n"
+ " diag %[type],%[addr],0x224\n"
+ "0: lhi %[rc],0\n"
"1:\n"
EX_TABLE(0b,1b)
- : "+d" (rc) :"d" (0), "d" (addr) : "memory");
+ : [rc] "+d" (rc)
+ , "=m" (*(struct { char buf[PAGE_SIZE]; } *)ptr)
+ : [type] "d" (0), [addr] "d" (addr));
return rc;
}
EXPORT_SYMBOL(diag224);
@@ -297,3 +307,18 @@ int diag26c(void *req, void *resp, enum diag26c_sc subcode)
return diag_amode31_ops.diag26c(virt_to_phys(req), virt_to_phys(resp), subcode);
}
EXPORT_SYMBOL(diag26c);
+
+int diag49c(unsigned long subcode)
+{
+ int cc;
+
+ diag_stat_inc(DIAG_STAT_X49C);
+ asm volatile(
+ " diag %[subcode],0,0x49c\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [subcode] "d" (subcode)
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
+}
+EXPORT_SYMBOL(diag49c);
diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c
new file mode 100644
index 000000000000..f411562aa7f6
--- /dev/null
+++ b/arch/s390/kernel/diag/diag310.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request memory topology information via diag0x310.
+ *
+ * Copyright IBM Corp. 2025
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+#define DIAG310_LEVELMIN 1
+#define DIAG310_LEVELMAX 6
+
+enum diag310_sc {
+ DIAG310_SUBC_0 = 0,
+ DIAG310_SUBC_1 = 1,
+ DIAG310_SUBC_4 = 4,
+ DIAG310_SUBC_5 = 5
+};
+
+enum diag310_retcode {
+ DIAG310_RET_SUCCESS = 0x0001,
+ DIAG310_RET_BUSY = 0x0101,
+ DIAG310_RET_OPNOTSUPP = 0x0102,
+ DIAG310_RET_SC4_INVAL = 0x0401,
+ DIAG310_RET_SC4_NODATA = 0x0402,
+ DIAG310_RET_SC5_INVAL = 0x0501,
+ DIAG310_RET_SC5_NODATA = 0x0502,
+ DIAG310_RET_SC5_ESIZE = 0x0503
+};
+
+union diag310_response {
+ u64 response;
+ struct {
+ u64 result : 32;
+ u64 : 16;
+ u64 rc : 16;
+ };
+};
+
+union diag310_req_subcode {
+ u64 subcode;
+ struct {
+ u64 : 48;
+ u64 st : 8;
+ u64 sc : 8;
+ };
+};
+
+union diag310_req_size {
+ u64 size;
+ struct {
+ u64 page_count : 32;
+ u64 : 32;
+ };
+};
+
+static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr, .odd = size };
+
+ diag_stat_inc(DIAG_STAT_X310);
+ asm volatile("diag %[rp],%[subcode],0x310"
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "memory");
+ return rp.odd;
+}
+
+static int diag310_result_to_errno(unsigned int result)
+{
+ switch (result) {
+ case DIAG310_RET_BUSY:
+ return -EBUSY;
+ case DIAG310_RET_OPNOTSUPP:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int diag310_get_subcode_mask(unsigned long *mask)
+{
+ union diag310_response res;
+
+ res.response = diag310(DIAG310_SUBC_0, 0, NULL);
+ if (res.rc != DIAG310_RET_SUCCESS)
+ return diag310_result_to_errno(res.rc);
+ *mask = res.response;
+ return 0;
+}
+
+static int diag310_get_memtop_stride(unsigned long *stride)
+{
+ union diag310_response res;
+
+ res.response = diag310(DIAG310_SUBC_1, 0, NULL);
+ if (res.rc != DIAG310_RET_SUCCESS)
+ return diag310_result_to_errno(res.rc);
+ *stride = res.result;
+ return 0;
+}
+
+static int diag310_get_memtop_size(unsigned long *pages, unsigned long level)
+{
+ union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level };
+ union diag310_response res;
+
+ res.response = diag310(req.subcode, 0, NULL);
+ switch (res.rc) {
+ case DIAG310_RET_SUCCESS:
+ *pages = res.result;
+ return 0;
+ case DIAG310_RET_SC4_NODATA:
+ return -ENODATA;
+ case DIAG310_RET_SC4_INVAL:
+ return -EINVAL;
+ default:
+ return diag310_result_to_errno(res.rc);
+ }
+}
+
+static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level)
+{
+ union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level };
+ union diag310_req_size req_size = { .page_count = pages };
+ union diag310_response res;
+
+ res.response = diag310(req_sc.subcode, req_size.size, buf);
+ switch (res.rc) {
+ case DIAG310_RET_SUCCESS:
+ return 0;
+ case DIAG310_RET_SC5_NODATA:
+ return -ENODATA;
+ case DIAG310_RET_SC5_ESIZE:
+ return -EOVERFLOW;
+ case DIAG310_RET_SC5_INVAL:
+ return -EINVAL;
+ default:
+ return diag310_result_to_errno(res.rc);
+ }
+}
+
+static int diag310_check_features(void)
+{
+ static int features_available;
+ unsigned long mask;
+ int rc;
+
+ if (READ_ONCE(features_available))
+ return 0;
+ if (!sclp.has_diag310)
+ return -EOPNOTSUPP;
+ rc = diag310_get_subcode_mask(&mask);
+ if (rc)
+ return rc;
+ if (!test_bit_inv(DIAG310_SUBC_1, &mask))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG310_SUBC_4, &mask))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG310_SUBC_5, &mask))
+ return -EOPNOTSUPP;
+ WRITE_ONCE(features_available, 1);
+ return 0;
+}
+
+static int memtop_get_stride_len(unsigned long *res)
+{
+ static unsigned long memtop_stride;
+ unsigned long stride;
+ int rc;
+
+ stride = READ_ONCE(memtop_stride);
+ if (!stride) {
+ rc = diag310_get_memtop_stride(&stride);
+ if (rc)
+ return rc;
+ WRITE_ONCE(memtop_stride, stride);
+ }
+ *res = stride;
+ return 0;
+}
+
+static int memtop_get_page_count(unsigned long *res, unsigned long level)
+{
+ static unsigned long memtop_pages[DIAG310_LEVELMAX];
+ unsigned long pages;
+ int rc;
+
+ if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN)
+ return -EINVAL;
+ pages = READ_ONCE(memtop_pages[level - 1]);
+ if (!pages) {
+ rc = diag310_get_memtop_size(&pages, level);
+ if (rc)
+ return rc;
+ WRITE_ONCE(memtop_pages[level - 1], pages);
+ }
+ *res = pages;
+ return 0;
+}
+
+long diag310_memtop_stride(unsigned long arg)
+{
+ size_t __user *argp = (void __user *)arg;
+ unsigned long stride;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ rc = memtop_get_stride_len(&stride);
+ if (rc)
+ return rc;
+ if (put_user(stride, argp))
+ return -EFAULT;
+ return 0;
+}
+
+long diag310_memtop_len(unsigned long arg)
+{
+ size_t __user *argp = (void __user *)arg;
+ unsigned long pages, level;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ if (get_user(level, argp))
+ return -EFAULT;
+ rc = memtop_get_page_count(&pages, level);
+ if (rc)
+ return rc;
+ if (put_user(pages * PAGE_SIZE, argp))
+ return -EFAULT;
+ return 0;
+}
+
+long diag310_memtop_buf(unsigned long arg)
+{
+ struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg;
+ unsigned long level, pages, data_size;
+ u64 address;
+ void *buf;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ if (get_user(level, &udata->nesting_lvl))
+ return -EFAULT;
+ if (get_user(address, &udata->address))
+ return -EFAULT;
+ rc = memtop_get_page_count(&pages, level);
+ if (rc)
+ return rc;
+ data_size = pages * PAGE_SIZE;
+ buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ if (!buf)
+ return -ENOMEM;
+ rc = diag310_store_topology_map(buf, pages, level);
+ if (rc)
+ goto out;
+ if (copy_to_user((void __user *)address, buf, data_size))
+ rc = -EFAULT;
+out:
+ vfree(buf);
+ return rc;
+}
diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c
new file mode 100644
index 000000000000..fe325c2a2d0d
--- /dev/null
+++ b/arch/s390/kernel/diag/diag324.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request power readings for resources in a computing environment via
+ * diag 0x324. diag 0x324 stores the power readings in the power information
+ * block (pib).
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#define pr_fmt(fmt) "diag324: " fmt
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/ioctl.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <asm/timex.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+enum subcode {
+ DIAG324_SUBC_0 = 0,
+ DIAG324_SUBC_1 = 1,
+ DIAG324_SUBC_2 = 2,
+};
+
+enum retcode {
+ DIAG324_RET_SUCCESS = 0x0001,
+ DIAG324_RET_SUBC_NOTAVAIL = 0x0103,
+ DIAG324_RET_INSUFFICIENT_SIZE = 0x0104,
+ DIAG324_RET_READING_UNAVAILABLE = 0x0105,
+};
+
+union diag324_response {
+ u64 response;
+ struct {
+ u64 installed : 32;
+ u64 : 16;
+ u64 rc : 16;
+ } sc0;
+ struct {
+ u64 format : 16;
+ u64 : 16;
+ u64 pib_len : 16;
+ u64 rc : 16;
+ } sc1;
+ struct {
+ u64 : 48;
+ u64 rc : 16;
+ } sc2;
+};
+
+union diag324_request {
+ u64 request;
+ struct {
+ u64 : 32;
+ u64 allocated : 16;
+ u64 : 12;
+ u64 sc : 4;
+ } sc2;
+};
+
+struct pib {
+ u32 : 8;
+ u32 num : 8;
+ u32 len : 16;
+ u32 : 24;
+ u32 hlen : 8;
+ u64 : 64;
+ u64 intv;
+ u8 r[];
+} __packed;
+
+struct pibdata {
+ struct pib *pib;
+ ktime_t expire;
+ u64 sequence;
+ size_t len;
+ int rc;
+};
+
+static DEFINE_MUTEX(pibmutex);
+static struct pibdata pibdata;
+
+#define PIBWORK_DELAY (5 * NSEC_PER_SEC)
+
+static void pibwork_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(pibwork, pibwork_handler);
+
+static unsigned long diag324(unsigned long subcode, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr };
+
+ diag_stat_inc(DIAG_STAT_X324);
+ asm volatile("diag %[rp],%[subcode],0x324"
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "memory");
+ return rp.odd;
+}
+
+static void pibwork_handler(struct work_struct *work)
+{
+ struct pibdata *data = &pibdata;
+ ktime_t timedout;
+
+ mutex_lock(&pibmutex);
+ timedout = ktime_add_ns(data->expire, PIBWORK_DELAY);
+ if (ktime_before(ktime_get(), timedout)) {
+ mod_delayed_work(system_percpu_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+ goto out;
+ }
+ vfree(data->pib);
+ data->pib = NULL;
+out:
+ mutex_unlock(&pibmutex);
+}
+
+static void pib_update(struct pibdata *data)
+{
+ union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len };
+ union diag324_response res;
+ int rc;
+
+ memset(data->pib, 0, data->len);
+ res.response = diag324(req.request, data->pib);
+ switch (res.sc2.rc) {
+ case DIAG324_RET_SUCCESS:
+ rc = 0;
+ break;
+ case DIAG324_RET_SUBC_NOTAVAIL:
+ rc = -ENOENT;
+ break;
+ case DIAG324_RET_INSUFFICIENT_SIZE:
+ rc = -EMSGSIZE;
+ break;
+ case DIAG324_RET_READING_UNAVAILABLE:
+ rc = -EBUSY;
+ break;
+ default:
+ rc = -EINVAL;
+ }
+ data->rc = rc;
+}
+
+long diag324_pibbuf(unsigned long arg)
+{
+ struct diag324_pib __user *udata = (struct diag324_pib __user *)arg;
+ struct pibdata *data = &pibdata;
+ static bool first = true;
+ u64 address;
+ int rc;
+
+ if (!data->len)
+ return -EOPNOTSUPP;
+ if (get_user(address, &udata->address))
+ return -EFAULT;
+ mutex_lock(&pibmutex);
+ rc = -ENOMEM;
+ if (!data->pib)
+ data->pib = vmalloc(data->len);
+ if (!data->pib)
+ goto out;
+ if (first || ktime_after(ktime_get(), data->expire)) {
+ pib_update(data);
+ data->sequence++;
+ data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv));
+ mod_delayed_work(system_percpu_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+ first = false;
+ }
+ rc = data->rc;
+ if (rc != 0 && rc != -EBUSY)
+ goto out;
+ rc = copy_to_user((void __user *)address, data->pib, data->pib->len);
+ rc |= put_user(data->sequence, &udata->sequence);
+ if (rc)
+ rc = -EFAULT;
+out:
+ mutex_unlock(&pibmutex);
+ return rc;
+}
+
+long diag324_piblen(unsigned long arg)
+{
+ struct pibdata *data = &pibdata;
+
+ if (!data->len)
+ return -EOPNOTSUPP;
+ if (put_user(data->len, (size_t __user *)arg))
+ return -EFAULT;
+ return 0;
+}
+
+static int __init diag324_init(void)
+{
+ union diag324_response res;
+ unsigned long installed;
+
+ if (!sclp.has_diag324)
+ return -EOPNOTSUPP;
+ res.response = diag324(DIAG324_SUBC_0, NULL);
+ if (res.sc0.rc != DIAG324_RET_SUCCESS)
+ return -EOPNOTSUPP;
+ installed = res.response;
+ if (!test_bit_inv(DIAG324_SUBC_1, &installed))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG324_SUBC_2, &installed))
+ return -EOPNOTSUPP;
+ res.response = diag324(DIAG324_SUBC_1, NULL);
+ if (res.sc1.rc != DIAG324_RET_SUCCESS)
+ return -EOPNOTSUPP;
+ pibdata.len = res.sc1.pib_len;
+ return 0;
+}
+device_initcall(diag324_init);
diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h
new file mode 100644
index 000000000000..7080be946785
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_ioctl.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _DIAG_IOCTL_H
+#define _DIAG_IOCTL_H
+
+#include <linux/types.h>
+
+long diag324_pibbuf(unsigned long arg);
+long diag324_piblen(unsigned long arg);
+
+long diag310_memtop_stride(unsigned long arg);
+long diag310_memtop_len(unsigned long arg);
+long diag310_memtop_buf(unsigned long arg);
+
+#endif /* _DIAG_IOCTL_H */
diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c
new file mode 100644
index 000000000000..efffe02ea02e
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_misc.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Provide diagnose information via misc device /dev/diag.
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/types.h>
+
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ long rc;
+
+ switch (cmd) {
+ case DIAG324_GET_PIBLEN:
+ rc = diag324_piblen(arg);
+ break;
+ case DIAG324_GET_PIBBUF:
+ rc = diag324_pibbuf(arg);
+ break;
+ case DIAG310_GET_STRIDE:
+ rc = diag310_memtop_stride(arg);
+ break;
+ case DIAG310_GET_MEMTOPLEN:
+ rc = diag310_memtop_len(arg);
+ break;
+ case DIAG310_GET_MEMTOPBUF:
+ rc = diag310_memtop_buf(arg);
+ break;
+ default:
+ rc = -ENOIOCTLCMD;
+ break;
+ }
+ return rc;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .unlocked_ioctl = diag_ioctl,
+};
+
+static struct miscdevice diagdev = {
+ .name = "diag",
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &fops,
+ .mode = 0444,
+};
+
+static int diag_init(void)
+{
+ return misc_register(&diagdev);
+}
+
+device_initcall(diag_init);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 89dc826a8d2e..1cec93895b3a 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -17,7 +17,6 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/reboot.h>
#include <linux/kprobes.h>
@@ -122,6 +121,7 @@ enum {
U8_32, /* 8 bit unsigned value starting at 32 */
U12_16, /* 12 bit unsigned value starting at 16 */
U16_16, /* 16 bit unsigned value starting at 16 */
+ U16_20, /* 16 bit unsigned value starting at 20 */
U16_32, /* 16 bit unsigned value starting at 32 */
U32_16, /* 32 bit unsigned value starting at 16 */
VX_12, /* Vector index register starting at position 12 */
@@ -184,6 +184,7 @@ static const struct s390_operand operands[] = {
[U8_32] = { 8, 32, 0 },
[U12_16] = { 12, 16, 0 },
[U16_16] = { 16, 16, 0 },
+ [U16_20] = { 16, 20, 0 },
[U16_32] = { 16, 32, 0 },
[U32_16] = { 32, 16, 0 },
[VX_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR },
@@ -257,7 +258,6 @@ static const unsigned char formats[][6] = {
[INSTR_RSL_R0RD] = { D_20, L4_8, B_16, 0, 0, 0 },
[INSTR_RSY_AARD] = { A_8, A_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_CCRD] = { C_8, C_12, D20_20, B_16, 0, 0 },
- [INSTR_RSY_RDRU] = { R_8, D20_20, B_16, U4_12, 0, 0 },
[INSTR_RSY_RRRD] = { R_8, R_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_RURD] = { R_8, U4_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_RURD2] = { R_8, D20_20, B_16, U4_12, 0, 0 },
@@ -300,14 +300,17 @@ static const unsigned char formats[][6] = {
[INSTR_VRI_V0UU2] = { V_8, U16_16, U4_32, 0, 0, 0 },
[INSTR_VRI_V0UUU] = { V_8, U8_16, U8_24, U4_32, 0, 0 },
[INSTR_VRI_VR0UU] = { V_8, R_12, U8_28, U4_24, 0, 0 },
+ [INSTR_VRI_VV0UU] = { V_8, V_12, U8_28, U4_24, 0, 0 },
[INSTR_VRI_VVUU] = { V_8, V_12, U16_16, U4_32, 0, 0 },
[INSTR_VRI_VVUUU] = { V_8, V_12, U12_16, U4_32, U4_28, 0 },
[INSTR_VRI_VVUUU2] = { V_8, V_12, U8_28, U8_16, U4_24, 0 },
[INSTR_VRI_VVV0U] = { V_8, V_12, V_16, U8_24, 0, 0 },
[INSTR_VRI_VVV0UU] = { V_8, V_12, V_16, U8_24, U4_32, 0 },
[INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 },
- [INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 },
+ [INSTR_VRI_VVV0UV] = { V_8, V_12, V_16, V_32, U8_24, 0 },
+ [INSTR_VRR_0V0U] = { V_12, U16_20, 0, 0, 0, 0 },
[INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 },
+ [INSTR_VRR_0VVU] = { V_12, V_16, U16_20, 0, 0, 0 },
[INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 },
[INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 },
[INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 },
@@ -455,21 +458,21 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
if (separator)
ptr += sprintf(ptr, "%c", separator);
if (operand->flags & OPERAND_GPR)
- ptr += sprintf(ptr, "%%r%i", value);
+ ptr += sprintf(ptr, "%%r%u", value);
else if (operand->flags & OPERAND_FPR)
- ptr += sprintf(ptr, "%%f%i", value);
+ ptr += sprintf(ptr, "%%f%u", value);
else if (operand->flags & OPERAND_AR)
- ptr += sprintf(ptr, "%%a%i", value);
+ ptr += sprintf(ptr, "%%a%u", value);
else if (operand->flags & OPERAND_CR)
- ptr += sprintf(ptr, "%%c%i", value);
+ ptr += sprintf(ptr, "%%c%u", value);
else if (operand->flags & OPERAND_VR)
- ptr += sprintf(ptr, "%%v%i", value);
+ ptr += sprintf(ptr, "%%v%u", value);
else if (operand->flags & OPERAND_PCREL) {
void *pcrel = (void *)((int)value + addr);
ptr += sprintf(ptr, "%px", pcrel);
} else if (operand->flags & OPERAND_SIGNED)
- ptr += sprintf(ptr, "%i", value);
+ ptr += sprintf(ptr, "%i", (int)value);
else
ptr += sprintf(ptr, "%u", value);
if (operand->flags & OPERAND_DISP)
@@ -500,24 +503,27 @@ static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len)
void show_code(struct pt_regs *regs)
{
char *mode = user_mode(regs) ? "User" : "Krnl";
+ unsigned long addr, pswaddr;
unsigned char code[64];
char buffer[128], *ptr;
- unsigned long addr;
int start, end, opsize, hops, i;
+ pswaddr = regs->psw.addr;
+ if (test_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED))
+ pswaddr = __forward_psw(regs->psw, regs->int_code >> 16);
/* Get a snapshot of the 64 bytes surrounding the fault address. */
- for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) {
- addr = regs->psw.addr - 34 + start;
+ for (start = 32; start && pswaddr >= 34 - start; start -= 2) {
+ addr = pswaddr - 34 + start;
if (copy_from_regs(regs, code + start - 2, (void *)addr, 2))
break;
}
for (end = 32; end < 64; end += 2) {
- addr = regs->psw.addr + end - 32;
+ addr = pswaddr + end - 32;
if (copy_from_regs(regs, code + end, (void *)addr, 2))
break;
}
/* Code snapshot usable ? */
- if ((regs->psw.addr & 1) || start >= end) {
+ if ((pswaddr & 1) || start >= end) {
printk("%s Code: Bad PSW.\n", mode);
return;
}
@@ -540,12 +546,12 @@ void show_code(struct pt_regs *regs)
while (start < end && hops < 8) {
opsize = insn_length(code[start]);
if (start + opsize == 32)
- *ptr++ = '#';
+ *ptr++ = '*';
else if (start == 32)
*ptr++ = '>';
else
*ptr++ = ' ';
- addr = regs->psw.addr + start - 32;
+ addr = pswaddr + start - 32;
ptr += sprintf(ptr, "%px: ", (void *)addr);
if (start + opsize >= end)
break;
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index d2012635b093..f9d52e05e01e 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -17,6 +17,7 @@
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
+#include <asm/asm-offsets.h>
#include <asm/processor.h>
#include <asm/debug.h>
#include <asm/dis.h>
@@ -61,28 +62,28 @@ static bool in_task_stack(unsigned long sp, struct task_struct *task,
static bool in_irq_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.async_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->async_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_IRQ, stack);
}
static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.nodat_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->nodat_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_NODAT, stack);
}
static bool in_mcck_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.mcck_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->mcck_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_MCCK, stack);
}
static bool in_restart_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.restart_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->restart_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_RESTART, stack);
}
@@ -154,12 +155,16 @@ static void show_last_breaking_event(struct pt_regs *regs)
void show_registers(struct pt_regs *regs)
{
struct psw_bits *psw = &psw_bits(regs->psw);
+ unsigned long pswaddr;
char *mode;
+ pswaddr = regs->psw.addr;
+ if (test_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED))
+ pswaddr = __forward_psw(regs->psw, regs->int_code >> 16);
mode = user_mode(regs) ? "User" : "Krnl";
- printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+ printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)pswaddr);
if (!user_mode(regs))
- pr_cont(" (%pSR)", (void *)regs->psw.addr);
+ pr_cont(" (%pSR)", (void *)pswaddr);
pr_cont("\n");
printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
"P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext,
@@ -198,13 +203,8 @@ void __noreturn die(struct pt_regs *regs, const char *str)
console_verbose();
spin_lock_irq(&die_lock);
bust_spinlocks(1);
- printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
+ printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff,
regs->int_code >> 17, ++die_counter);
-#ifdef CONFIG_PREEMPT
- pr_cont("PREEMPT ");
-#elif defined(CONFIG_PREEMPT_RT)
- pr_cont("PREEMPT_RT ");
-#endif
pr_cont("SMP ");
if (debug_pagealloc_enabled())
pr_cont("DEBUG_PAGEALLOC");
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index c666271433fb..b27239c03d79 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -4,9 +4,10 @@
* Author(s): Hongjie Yang <hongjie@us.ibm.com>,
*/
-#define KMSG_COMPONENT "setup"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "setup: " fmt
+#include <linux/sched/debug.h>
+#include <linux/cpufeature.h>
#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/errno.h>
@@ -19,7 +20,10 @@
#include <linux/kernel.h>
#include <asm/asm-extable.h>
#include <linux/memblock.h>
+#include <linux/kasan.h>
#include <asm/access-regs.h>
+#include <asm/asm-offsets.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/fpu.h>
@@ -35,12 +39,14 @@
#include <asm/boot_data.h>
#include "entry.h"
-#define decompressor_handled_param(param) \
-static int __init ignore_decompressor_param_##param(char *s) \
+#define __decompressor_handled_param(func, param) \
+static int __init ignore_decompressor_param_##func(char *s) \
{ \
return 0; \
} \
-early_param(#param, ignore_decompressor_param_##param)
+early_param(#param, ignore_decompressor_param_##func)
+
+#define decompressor_handled_param(param) __decompressor_handled_param(param, param)
decompressor_handled_param(mem);
decompressor_handled_param(vmalloc);
@@ -48,6 +54,9 @@ decompressor_handled_param(dfltcc);
decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
decompressor_handled_param(cmma);
+decompressor_handled_param(relocate_lowcore);
+decompressor_handled_param(bootdebug);
+__decompressor_handled_param(debug_alternative, debug-alternative);
#if IS_ENABLED(CONFIG_KVM)
decompressor_handled_param(prot_virt);
#endif
@@ -56,25 +65,10 @@ static void __init kasan_early_init(void)
{
#ifdef CONFIG_KASAN
init_task.kasan_depth = 0;
- sclp_early_printk("KernelAddressSanitizer initialized\n");
+ kasan_init_generic();
#endif
}
-static void __init reset_tod_clock(void)
-{
- union tod_clock clk;
-
- if (store_tod_clock_ext_cc(&clk) == 0)
- return;
- /* TOD clock not running. Set the clock to Unix Epoch. */
- if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk))
- disabled_wait();
-
- memset(&tod_clock_base, 0, sizeof(tod_clock_base));
- tod_clock_base.tod = TOD_UNIX_EPOCH;
- S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
-}
-
/*
* Initialize storage key for kernel pages
*/
@@ -93,26 +87,6 @@ static noinline __init void init_kernel_storage_key(void)
static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
-static noinline __init void detect_machine_type(void)
-{
- struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
-
- /* Check current-configuration-level */
- if (stsi(NULL, 0, 0, 0) <= 2) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
- return;
- }
- /* Get virtual-machine cpu information. */
- if (stsi(vmms, 3, 2, 2) || !vmms->count)
- return;
-
- /* Detect known hypervisors */
- if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
- S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
- else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
- S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
-}
-
/* Remove leading, trailing and double whitespace. */
static inline void strim_all(char *str)
{
@@ -131,6 +105,8 @@ static inline void strim_all(char *str)
}
}
+char arch_hw_string[128];
+
static noinline __init void setup_arch_string(void)
{
struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
@@ -143,20 +119,21 @@ static noinline __init void setup_arch_string(void)
EBCASC(mach->type, sizeof(mach->type));
EBCASC(mach->model, sizeof(mach->model));
EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
- sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s",
- mach->manufacturer, mach->type,
- mach->model, mach->model_capacity);
+ scnprintf(mstr, sizeof(mstr), "%-16.16s %-4.4s %-16.16s %-16.16s",
+ mach->manufacturer, mach->type,
+ mach->model, mach->model_capacity);
strim_all(mstr);
if (stsi(vm, 3, 2, 2) == 0 && vm->count) {
EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi));
- sprintf(hvstr, "%-16.16s", vm->vm[0].cpi);
+ scnprintf(hvstr, sizeof(hvstr), "%-16.16s", vm->vm[0].cpi);
strim_all(hvstr);
} else {
- sprintf(hvstr, "%s",
- MACHINE_IS_LPAR ? "LPAR" :
- MACHINE_IS_VM ? "z/VM" :
- MACHINE_IS_KVM ? "KVM" : "unknown");
+ scnprintf(hvstr, sizeof(hvstr), "%s",
+ machine_is_lpar() ? "LPAR" :
+ machine_is_vm() ? "z/VM" :
+ machine_is_kvm() ? "KVM" : "unknown");
}
+ scnprintf(arch_hw_string, sizeof(arch_hw_string), "HW: %s (%s)", mstr, hvstr);
dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
}
@@ -164,9 +141,8 @@ static __init void setup_topology(void)
{
int max_mnest;
- if (!test_facility(11))
+ if (!cpu_has_topology())
return;
- S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
for (max_mnest = 6; max_mnest > 1; max_mnest--) {
if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
break;
@@ -174,86 +150,52 @@ static __init void setup_topology(void)
topology_max_mnest = max_mnest;
}
-void __do_early_pgm_check(struct pt_regs *regs)
+void __init __do_early_pgm_check(struct pt_regs *regs)
{
- if (!fixup_exception(regs))
- disabled_wait();
+ struct lowcore *lc = get_lowcore();
+ unsigned long ip;
+
+ regs->int_code = lc->pgm_int_code;
+ regs->int_parm_long = lc->trans_exc_code;
+ regs->last_break = lc->pgm_last_break;
+ ip = __rewind_psw(regs->psw, regs->int_code >> 16);
+
+ /* Monitor Event? Might be a warning */
+ if ((regs->int_code & PGM_INT_CODE_MASK) == 0x40) {
+ if (report_bug(ip, regs) == BUG_TRAP_TYPE_WARN)
+ return;
+ }
+ if (fixup_exception(regs))
+ return;
+ /*
+ * Unhandled exception - system cannot continue but try to get some
+ * helpful messages to the console. Use early_printk() to print
+ * some basic information in case it is too early for printk().
+ */
+ register_early_console();
+ early_printk("PANIC: early exception %04x PSW: %016lx %016lx\n",
+ regs->int_code & 0xffff, regs->psw.mask, regs->psw.addr);
+ show_regs(regs);
+ disabled_wait();
}
static noinline __init void setup_lowcore_early(void)
{
+ struct lowcore *lc = get_lowcore();
psw_t psw;
psw.addr = (unsigned long)early_pgm_check_handler;
psw.mask = PSW_KERNEL_BITS;
- S390_lowcore.program_new_psw = psw;
- S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
-}
-
-static noinline __init void setup_facility_list(void)
-{
- memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list));
- if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
- __clear_facility(82, alt_stfle_fac_list);
-}
-
-static __init void detect_diag9c(void)
-{
- unsigned int cpu_address;
- int rc;
-
- cpu_address = stap();
- diag_stat_inc(DIAG_STAT_X09C);
- asm volatile(
- " diag %2,0,0x9c\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
- if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
-}
-
-static __init void detect_machine_facilities(void)
-{
- if (test_facility(8)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
- system_ctl_set_bit(0, CR0_EDAT_BIT);
- }
- if (test_facility(78))
- S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
- if (test_facility(3))
- S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
- if (test_facility(50) && test_facility(73)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
- system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT);
- }
- if (test_facility(51))
- S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
- if (test_facility(129))
- system_ctl_set_bit(0, CR0_VECTOR_BIT);
- if (test_facility(130))
- S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
- if (test_facility(133))
- S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
- if (test_facility(139) && (tod_clock_base.tod >> 63)) {
- /* Enabled signed clock comparator comparisons */
- S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
- clock_comparator_max = -1ULL >> 1;
- system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT);
- }
- if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
- /* the control bit is set during PCI initialization */
- }
- if (test_facility(194))
- S390_lowcore.machine_flags |= MACHINE_FLAG_RDP;
+ lc->program_new_psw = psw;
+ lc->preempt_count = INIT_PREEMPT_COUNT;
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
}
static inline void save_vector_registers(void)
{
#ifdef CONFIG_CRASH_DUMP
- if (test_facility(129))
+ if (cpu_has_vx())
save_vx_regs(boot_cpu_vector_save_area);
#endif
}
@@ -285,18 +227,13 @@ static void __init sort_amode31_extable(void)
void __init startup_init(void)
{
kasan_early_init();
- reset_tod_clock();
time_early_init();
init_kernel_storage_key();
lockdep_off();
sort_amode31_extable();
setup_lowcore_early();
- setup_facility_list();
- detect_machine_type();
setup_arch_string();
setup_boot_command_line();
- detect_diag9c();
- detect_machine_facilities();
save_vector_registers();
setup_topology();
sclp_early_detect();
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
index d9d53f44008a..cefe020a3be3 100644
--- a/arch/s390/kernel/early_printk.c
+++ b/arch/s390/kernel/early_printk.c
@@ -6,6 +6,7 @@
#include <linux/console.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <asm/setup.h>
#include <asm/sclp.h>
static void sclp_early_write(struct console *con, const char *s, unsigned int len)
@@ -20,6 +21,16 @@ static struct console sclp_early_console = {
.index = -1,
};
+void __init register_early_console(void)
+{
+ if (early_console)
+ return;
+ if (!sclp.has_linemode && !sclp.has_vt220)
+ return;
+ early_console = &sclp_early_console;
+ register_console(early_console);
+}
+
static int __init setup_early_printk(char *buf)
{
if (early_console)
@@ -27,10 +38,7 @@ static int __init setup_early_printk(char *buf)
/* Accept only "earlyprintk" and "earlyprintk=sclp" */
if (buf && !str_has_prefix(buf, "sclp"))
return 0;
- if (!sclp.has_linemode && !sclp.has_vt220)
- return 0;
- early_console = &sclp_early_console;
- register_console(early_console);
+ register_early_console();
return 0;
}
early_param("earlyprintk", setup_early_printk);
diff --git a/arch/s390/kernel/earlypgm.S b/arch/s390/kernel/earlypgm.S
deleted file mode 100644
index c634871f0d90..000000000000
--- a/arch/s390/kernel/earlypgm.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 2006, 2007
- * Author(s): Michael Holzheu <holzheu@de.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-SYM_CODE_START(early_pgm_check_handler)
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
- aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
- lgr %r2,%r11
- brasl %r14,__do_early_pgm_check
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
- lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- lpswe __LC_RETURN_PSW
-SYM_CODE_END(early_pgm_check_handler)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 60cf917a7122..b7f1553d9ee5 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -12,7 +12,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/asm-extable.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/dwarf.h>
@@ -28,53 +28,47 @@
#include <asm/setup.h>
#include <asm/nmi.h>
#include <asm/nospec-insn.h>
+#include <asm/lowcore.h>
+#include <asm/machine.h>
_LPP_OFFSET = __LC_LPP
.macro STBEAR address
- ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193)
.endm
.macro LBEAR address
- ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193)
.endm
- .macro LPSWEY address,lpswe
- ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
+ .macro LPSWEY address, lpswe
+ ALTERNATIVE_2 "b \lpswe;nopr", \
+ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \
+ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \
+ ALT_FEATURE(MFEATURE_LOWCORE)
.endm
- .macro MBEAR reg
- ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+ .macro MBEAR reg, lowcore
+ ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\
+ ALT_FACILITY(193)
.endm
- .macro CHECK_STACK savearea
-#ifdef CONFIG_CHECK_STACK
- tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD
- lghi %r14,\savearea
- jz stack_overflow
-#endif
- .endm
-
- .macro CHECK_VMAP_STACK savearea,oklabel
-#ifdef CONFIG_VMAP_STACK
+ .macro CHECK_VMAP_STACK savearea, lowcore, oklabel
lgr %r14,%r15
nill %r14,0x10000 - THREAD_SIZE
oill %r14,STACK_INIT_OFFSET
- clg %r14,__LC_KERNEL_STACK
+ clg %r14,__LC_KERNEL_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_ASYNC_STACK
+ clg %r14,__LC_ASYNC_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_MCCK_STACK
+ clg %r14,__LC_MCCK_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_NODAT_STACK
+ clg %r14,__LC_NODAT_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_RESTART_STACK
+ clg %r14,__LC_RESTART_STACK(\lowcore)
je \oklabel
- lghi %r14,\savearea
- j stack_overflow
-#else
- j \oklabel
-#endif
+ la %r14,\savearea(\lowcore)
+ j stack_invalid
.endm
/*
@@ -100,36 +94,37 @@ _LPP_OFFSET = __LC_LPP
.endm
.macro BPOFF
- ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82)
.endm
.macro BPON
- ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
.macro BPENTER tif_ptr,tif_mask
ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
- "j .+12; nop; nop", 82
+ "j .+12; nop; nop", ALT_SPEC(82)
.endm
.macro BPEXIT tif_ptr,tif_mask
TSTMSK \tif_ptr,\tif_mask
ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \
- "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82
+ "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
#if IS_ENABLED(CONFIG_KVM)
- .macro SIEEXIT sie_control
- lg %r9,\sie_control # get control block pointer
- ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
- ni __LC_CPU_FLAGS+7,255-_CIF_SIE
+ .macro SIEEXIT sie_control,lowcore
+ lg %r9,\sie_control # get control block pointer
+ ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce
+ lg %r9,__LC_CURRENT(\lowcore)
+ mvi __TI_sie(%r9),0
larl %r9,sie_exit # skip forward to sie_exit
.endm
#endif
.macro STACKLEAK_ERASE
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+#ifdef CONFIG_KSTACK_ERASE
brasl %r14,stackleak_erase_on_task_stack
#endif
.endm
@@ -163,13 +158,18 @@ SYM_FUNC_START(__switch_to_asm)
stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
lg %r15,0(%r4,%r3) # start of kernel stack of next
agr %r15,%r5 # end of kernel stack of next
- stg %r3,__LC_CURRENT # store task struct of next
- stg %r15,__LC_KERNEL_STACK # store end of kernel stack
+ GET_LC %r13
+ stg %r3,__LC_CURRENT(%r13) # store task struct of next
+ stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack
lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
- aghi %r3,__TASK_pid
- mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
+ lay %r4,__TASK_pid(%r3)
+ mvc __LC_CURRENT_PID(4,%r13),0(%r4) # store pid of next
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40)
+#ifdef CONFIG_STACKPROTECTOR
+ lg %r3,__TASK_stack_canary(%r3)
+ stg %r3,__LC_STACK_CANARY(%r13)
+#endif
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
- ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
BR_EX %r14
SYM_FUNC_END(__switch_to_asm)
@@ -179,22 +179,22 @@ SYM_FUNC_END(__switch_to_asm)
* %r2 pointer to sie control block phys
* %r3 pointer to sie control block virt
* %r4 guest register save area
+ * %r5 guest asce
*/
SYM_FUNC_START(__sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
- lg %r12,__LC_CURRENT
+ GET_LC %r13
+ lg %r14,__LC_CURRENT(%r13)
stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical..
stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses
stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area
+ stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce
xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
- mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
+ mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
lmg %r0,%r13,0(%r4) # load guest gprs 0-13
- lg %r14,__LC_GMAP # get gmap pointer
- ltgr %r14,%r14
- jz .Lsie_gmap
- oi __LC_CPU_FLAGS+7,_CIF_SIE
- lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
-.Lsie_gmap:
+ mvi __TI_sie(%r14),1
+ stosm __SF_SIE_IRQ(%r15),0x03 # enable interrupts
+ lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
tm __SIE_PROG20+3(%r14),3 # last exit...
@@ -212,20 +212,12 @@ SYM_FUNC_START(__sie64a)
.Lsie_skip:
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
- ni __LC_CPU_FLAGS+7,255-_CIF_SIE
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
-# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
-# Other instructions between __sie64a and .Lsie_done should not cause program
-# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
-.Lrewind_pad6:
- nopr 7
-.Lrewind_pad4:
- nopr 7
-.Lrewind_pad2:
- nopr 7
+ GET_LC %r14
+ lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce
+ lg %r14,__LC_CURRENT(%r14)
+ mvi __TI_sie(%r14),0
SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
+ stnsm __SF_SIE_IRQ(%r15),0xfc # disable interrupts
lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
xgr %r0,%r0 # clear guest registers to
@@ -236,15 +228,6 @@ SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
lg %r2,__SF_SIE_REASON(%r15) # return exit reason code
BR_EX %r14
-.Lsie_fault:
- lghi %r14,-EFAULT
- stg %r14,__SF_SIE_REASON(%r15) # set exit reason code
- j sie_exit
-
- EX_TABLE(.Lrewind_pad6,.Lsie_fault)
- EX_TABLE(.Lrewind_pad4,.Lsie_fault)
- EX_TABLE(.Lrewind_pad2,.Lsie_fault)
- EX_TABLE(sie_exit,.Lsie_fault)
SYM_FUNC_END(__sie64a)
EXPORT_SYMBOL(__sie64a)
EXPORT_SYMBOL(sie_exit)
@@ -256,14 +239,14 @@ EXPORT_SYMBOL(sie_exit)
*/
SYM_CODE_START(system_call)
- stpt __LC_SYS_ENTER_TIMER
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
lghi %r14,0
.Lsysc_per:
- STBEAR __LC_LAST_BREAK
- lctlg %c1,%c1,__LC_KERNEL_ASCE
- lg %r15,__LC_KERNEL_STACK
+ STBEAR __LC_LAST_BREAK(%r13)
+ lg %r15,__LC_KERNEL_STACK(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
# clear user controlled register to prevent speculative use
@@ -278,17 +261,16 @@ SYM_CODE_START(system_call)
xgr %r10,%r10
xgr %r11,%r11
la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
- mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC
- MBEAR %r2
+ mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13)
+ MBEAR %r2,%r13
lgr %r3,%r14
brasl %r14,__do_syscall
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(system_call)
@@ -299,12 +281,12 @@ SYM_CODE_START(ret_from_fork)
lgr %r3,%r11
brasl %r14,__ret_from_fork
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ GET_LC %r13
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(ret_from_fork)
@@ -313,41 +295,37 @@ SYM_CODE_END(ret_from_fork)
*/
SYM_CODE_START(pgm_check_handler)
- stpt __LC_SYS_ENTER_TIMER
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
- lgr %r10,%r15
- lmg %r8,%r9,__LC_PGM_OLD_PSW
+ lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
+ xgr %r10,%r10
tmhh %r8,0x0001 # coming from user space?
- jno .Lpgm_skip_asce
- lctlg %c1,%c1,__LC_KERNEL_ASCE
- j 3f # -> fault in user space
-.Lpgm_skip_asce:
+ jo 3f # -> fault in user space
+#if IS_ENABLED(CONFIG_KVM)
+ lg %r11,__LC_CURRENT(%r13)
+ tm __TI_sie(%r11),0xff
+ jz 1f
+ BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
+ lghi %r10,_PIF_GUEST_FAULT
+#endif
1: tmhh %r8,0x4000 # PER bit set in old PSW ?
jnz 2f # -> enabled, can't be a double fault
- tm __LC_PGM_ILC+3,0x80 # check for per exception
+ tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-2: CHECK_STACK __LC_SAVE_AREA_SYNC
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- # CHECK_VMAP_STACK branches to stack_overflow or 4f
- CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
-3: lg %r15,__LC_KERNEL_STACK
+2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ # CHECK_VMAP_STACK branches to stack_invalid or 4f
+ CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
+3: lg %r15,__LC_KERNEL_STACK(%r13)
4: la %r11,STACK_FRAME_OVERHEAD(%r15)
- xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ stg %r10,__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
- mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
- stctg %c1,%c1,__PT_CR1(%r11)
-#if IS_ENABLED(CONFIG_KVM)
- ltg %r12,__LC_GMAP
- jz 5f
- clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11)
- jne 5f
- BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r10)
-#endif
-5: stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13)
+ stmg %r8,%r9,__PT_PSW(%r11)
# clear user controlled registers to prevent speculative use
xgr %r0,%r0
xgr %r1,%r1
@@ -356,16 +334,16 @@ SYM_CODE_START(pgm_check_handler)
xgr %r5,%r5
xgr %r6,%r6
xgr %r7,%r7
+ xgr %r12,%r12
lgr %r2,%r11
brasl %r14,__do_pgm_check
tmhh %r8,0x0001 # returning to user space?
jno .Lpgm_exit_kernel
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
BPON
- stpt __LC_EXIT_TIMER
+ stpt __LC_EXIT_TIMER(%r13)
.Lpgm_exit_kernel:
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
@@ -374,11 +352,11 @@ SYM_CODE_START(pgm_check_handler)
# single stepped system call
#
.Lpgm_svcper:
- mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+ mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13)
larl %r14,.Lsysc_per
- stg %r14,__LC_RETURN_PSW+8
+ stg %r14,__LC_RETURN_PSW+8(%r13)
lghi %r14,1
- LBEAR __LC_PGM_LAST_BREAK
+ LBEAR __LC_PGM_LAST_BREAK(%r13)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
SYM_CODE_END(pgm_check_handler)
@@ -387,25 +365,25 @@ SYM_CODE_END(pgm_check_handler)
*/
.macro INT_HANDLER name,lc_old_psw,handler
SYM_CODE_START(\name)
- stckf __LC_INT_CLOCK
- stpt __LC_SYS_ENTER_TIMER
- STBEAR __LC_LAST_BREAK
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stckf __LC_INT_CLOCK(%r13)
+ stpt __LC_SYS_ENTER_TIMER(%r13)
+ STBEAR __LC_LAST_BREAK(%r13)
BPOFF
- stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
- lmg %r8,%r9,\lc_old_psw
+ lmg %r8,%r9,\lc_old_psw(%r13)
tmhh %r8,0x0001 # interrupting from user ?
jnz 1f
#if IS_ENABLED(CONFIG_KVM)
- TSTMSK __LC_CPU_FLAGS,_CIF_SIE
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
jz 0f
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r15)
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
-0: CHECK_STACK __LC_SAVE_AREA_ASYNC
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
-1: lctlg %c1,%c1,__LC_KERNEL_ASCE
- lg %r15,__LC_KERNEL_STACK
+1: lg %r15,__LC_KERNEL_STACK(%r13)
2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@@ -419,61 +397,66 @@ SYM_CODE_START(\name)
xgr %r7,%r7
xgr %r10,%r10
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
- MBEAR %r11
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ MBEAR %r11,%r13
stmg %r8,%r9,__PT_PSW(%r11)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,\handler
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11)
tmhh %r8,0x0001 # returning to user ?
jno 2f
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
BPON
- stpt __LC_EXIT_TIMER
+ stpt __LC_EXIT_TIMER(%r13)
2: LBEAR __PT_LAST_BREAK(%r11)
lmg %r0,%r15,__PT_R0(%r11)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(\name)
.endm
+ .section .irqentry.text, "ax"
+
INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq
INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
+ .section .kprobes.text, "ax"
+
/*
* Machine check handler routines
*/
SYM_CODE_START(mcck_int_handler)
BPOFF
- lmg %r8,%r9,__LC_MCK_OLD_PSW
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
+ GET_LC %r13
+ lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE
jo .Lmcck_panic # yes -> rest of mcck code invalid
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID
jno .Lmcck_panic # control registers invalid -> panic
ptlb
- lghi %r14,__LC_CPU_TIMER_SAVE_AREA
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
+ lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13)
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID
jo 3f
- la %r14,__LC_SYS_ENTER_TIMER
- clc 0(8,%r14),__LC_EXIT_TIMER
+ la %r14,__LC_SYS_ENTER_TIMER(%r13)
+ clc 0(8,%r14),__LC_EXIT_TIMER(%r13)
jl 1f
- la %r14,__LC_EXIT_TIMER
-1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_EXIT_TIMER(%r13)
+1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13)
jl 2f
- la %r14,__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_LAST_UPDATE_TIMER(%r13)
2: spt 0(%r14)
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
-3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID
jno .Lmcck_panic
tmhh %r8,0x0001 # interrupting from user ?
jnz .Lmcck_user
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID
jno .Lmcck_panic
#if IS_ENABLED(CONFIG_KVM)
- TSTMSK __LC_CPU_FLAGS,_CIF_SIE
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
jz .Lmcck_user
- # Need to compare the address instead of a CIF_SIE* flag.
+ # Need to compare the address instead of __TI_SIE flag.
# Otherwise there would be a race between setting the flag
# and entering SIE (or leaving and clearing the flag). This
# would cause machine checks targeted at the guest to be
@@ -482,18 +465,17 @@ SYM_CODE_START(mcck_int_handler)
clgrjl %r9,%r14, 4f
larl %r14,.Lsie_leave
clgrjhe %r9,%r14, 4f
- oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+ lg %r10,__LC_PCPU(%r13)
+ oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST
4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r15)
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
.Lmcck_user:
- lg %r15,__LC_MCCK_STACK
+ lg %r15,__LC_MCCK_STACK(%r13)
la %r11,STACK_FRAME_OVERHEAD(%r15)
- stctg %c1,%c1,__PT_CR1(%r11)
- lctlg %c1,%c1,__LC_KERNEL_ASCE
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- lghi %r14,__LC_GPREGS_SAVE_AREA+64
- stmg %r0,%r7,__PT_R0(%r11)
+ lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
+ mvc __PT_R0(128,%r11),0(%r14)
# clear user controlled registers to prevent speculative use
xgr %r0,%r0
xgr %r1,%r1
@@ -503,20 +485,19 @@ SYM_CODE_START(mcck_int_handler)
xgr %r6,%r6
xgr %r7,%r7
xgr %r10,%r10
- mvc __PT_R8(64,%r11),0(%r14)
stmg %r8,%r9,__PT_PSW(%r11)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,s390_do_machine_check
- lctlg %c1,%c1,__PT_CR1(%r11)
lmg %r0,%r10,__PT_R0(%r11)
- mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
- tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+ mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
+ tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
jno 0f
BPON
- stpt __LC_EXIT_TIMER
-0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+ stpt __LC_EXIT_TIMER(%r13)
+0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\
+ ALT_FACILITY(193)
LBEAR 0(%r12)
lmg %r11,%r15,__PT_R11(%r11)
LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
@@ -552,7 +533,7 @@ SYM_CODE_START(mcck_int_handler)
SYM_CODE_END(mcck_int_handler)
SYM_CODE_START(restart_int_handler)
- ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40)
stg %r15,__LC_SAVE_AREA_RESTART
TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
jz 0f
@@ -560,15 +541,17 @@ SYM_CODE_START(restart_int_handler)
0: larl %r15,daton_psw
lpswe 0(%r15) # turn dat on, keep irqs off
.Ldaton:
- lg %r15,__LC_RESTART_STACK
+ GET_LC %r15
+ lg %r15,__LC_RESTART_STACK(%r15)
xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
- mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
+ GET_LC %r13
+ mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13)
+ mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13)
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
- lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
- lg %r2,__LC_RESTART_DATA
- lgf %r3,__LC_RESTART_SOURCE
+ lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu
+ lg %r2,__LC_RESTART_DATA(%r13)
+ lgf %r3,__LC_RESTART_SOURCE(%r13)
ltgr %r3,%r3 # test source cpu address
jm 1f # negative -> skip source stop
0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
@@ -581,26 +564,44 @@ SYM_CODE_START(restart_int_handler)
3: j 3b
SYM_CODE_END(restart_int_handler)
+ __INIT
+SYM_CODE_START(early_pgm_check_handler)
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW(%r13)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ lgr %r2,%r11
+ brasl %r14,__do_early_pgm_check
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(early_pgm_check_handler)
+ __FINIT
+
.section .kprobes.text, "ax"
-#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
/*
- * The synchronous or the asynchronous stack overflowed. We are dead.
+ * The synchronous or the asynchronous stack pointer is invalid. We are dead.
* No need to properly save the registers, we are going to panic anyway.
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
-SYM_CODE_START(stack_overflow)
- lg %r15,__LC_NODAT_STACK # change to panic stack
+SYM_CODE_START(stack_invalid)
+ GET_LC %r15
+ lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_R8(64,%r11),0(%r14)
- stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+ GET_LC %r2
+ mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
- jg kernel_stack_overflow
-SYM_CODE_END(stack_overflow)
-#endif
+ jg kernel_stack_invalid
+SYM_CODE_END(stack_invalid)
.section .data, "aw"
.balign 4
@@ -611,20 +612,3 @@ SYM_DATA_START_LOCAL(daton_psw)
.quad PSW_KERNEL_BITS
.quad .Ldaton
SYM_DATA_END(daton_psw)
-
- .section .rodata, "a"
- .balign 8
-#define SYSCALL(esame,emu) .quad __s390x_ ## esame
-SYM_DATA_START(sys_call_table)
-#include "asm/syscall_table.h"
-SYM_DATA_END(sys_call_table)
-#undef SYSCALL
-
-#ifdef CONFIG_COMPAT
-
-#define SYSCALL(esame,emu) .quad __s390_ ## emu
-SYM_DATA_START(sys_call_table_emu)
-#include "asm/syscall_table.h"
-SYM_DATA_END(sys_call_table_emu)
-#undef SYSCALL
-#endif
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 21969520f947..dd55cc6bbc28 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -31,7 +31,7 @@ void do_secure_storage_access(struct pt_regs *regs);
void do_non_secure_storage_access(struct pt_regs *regs);
void do_secure_storage_violation(struct pt_regs *regs);
void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
-void kernel_stack_overflow(struct pt_regs * regs);
+void kernel_stack_invalid(struct pt_regs *regs);
void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
struct pt_regs *regs);
@@ -41,7 +41,6 @@ void do_restart(void *arg);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
int setup_profiling_timer(unsigned int multiplier);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
struct s390_mmap_arg_struct;
struct fadvise64_64_args;
diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c
index f02127219a27..d028b0be5c1d 100644
--- a/arch/s390/kernel/facility.c
+++ b/arch/s390/kernel/facility.c
@@ -3,6 +3,7 @@
* Copyright IBM Corp. 2023
*/
+#include <linux/export.h>
#include <asm/facility.h>
unsigned int stfle_size(void)
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index fa90bbdc5ef9..03a8973aec3c 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -5,6 +5,8 @@
* Copyright IBM Corp. 2015
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
+
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/cpu.h>
#include <linux/sched.h>
@@ -113,7 +115,7 @@ void load_fpu_state(struct fpu *state, int flags)
int mask;
if (flags & KERNEL_FPC)
- fpu_lfpc(&state->fpc);
+ fpu_lfpc_safe(&state->fpc);
if (!cpu_has_vx()) {
if (flags & KERNEL_VXR_V0V7)
load_fp_regs_vx(state->vxrs);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index ddf2ee47cb87..e94bb98f5231 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -12,6 +12,8 @@
#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/kmsan-checks.h>
+#include <linux/cpufeature.h>
#include <linux/kprobes.h>
#include <linux/execmem.h>
#include <trace/syscall.h>
@@ -49,10 +51,6 @@ struct ftrace_insn {
s32 disp;
} __packed;
-#ifdef CONFIG_MODULES
-static char *ftrace_plt;
-#endif /* CONFIG_MODULES */
-
static const char *ftrace_shared_hotpatch_trampoline(const char **end)
{
const char *tstart, *tend;
@@ -72,19 +70,20 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end)
bool ftrace_need_init_nop(void)
{
- return true;
+ return !cpu_has_seq_insn();
}
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline =
__ftrace_hotpatch_trampolines_start;
- static const char orig[6] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
+ static const struct ftrace_insn orig = { .opc = 0xc004, .disp = 0 };
static struct ftrace_hotpatch_trampoline *trampoline;
struct ftrace_hotpatch_trampoline **next_trampoline;
struct ftrace_hotpatch_trampoline *trampolines_end;
struct ftrace_hotpatch_trampoline tmp;
struct ftrace_insn *insn;
+ struct ftrace_insn old;
const char *shared;
s32 disp;
@@ -98,7 +97,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
if (mod) {
next_trampoline = &mod->arch.next_trampoline;
trampolines_end = mod->arch.trampolines_end;
- shared = ftrace_plt;
}
#endif
@@ -106,8 +104,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
return -ENOMEM;
trampoline = (*next_trampoline)++;
+ if (copy_from_kernel_nofault(&old, (void *)rec->ip, sizeof(old)))
+ return -EFAULT;
/* Check for the compiler-generated fentry nop (brcl 0, .). */
- if (WARN_ON_ONCE(memcmp((const void *)rec->ip, &orig, sizeof(orig))))
+ if (WARN_ON_ONCE(memcmp(&orig, &old, sizeof(old))))
return -EINVAL;
/* Generate the trampoline. */
@@ -143,8 +143,35 @@ static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrac
return trampoline;
}
-int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
- unsigned long addr)
+static inline struct ftrace_insn
+ftrace_generate_branch_insn(unsigned long ip, unsigned long target)
+{
+ /* brasl r0,target or brcl 0,0 */
+ return (struct ftrace_insn){ .opc = target ? 0xc005 : 0xc004,
+ .disp = target ? (target - ip) / 2 : 0 };
+}
+
+static int ftrace_patch_branch_insn(unsigned long ip, unsigned long old_target,
+ unsigned long target)
+{
+ struct ftrace_insn orig = ftrace_generate_branch_insn(ip, old_target);
+ struct ftrace_insn new = ftrace_generate_branch_insn(ip, target);
+ struct ftrace_insn old;
+
+ if (!IS_ALIGNED(ip, 8))
+ return -EINVAL;
+ if (copy_from_kernel_nofault(&old, (void *)ip, sizeof(old)))
+ return -EFAULT;
+ /* Verify that the to be replaced code matches what we expect. */
+ if (memcmp(&orig, &old, sizeof(old)))
+ return -EINVAL;
+ s390_kernel_write((void *)ip, &new, sizeof(new));
+ return 0;
+}
+
+static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec,
+ unsigned long old_addr,
+ unsigned long addr)
{
struct ftrace_hotpatch_trampoline *trampoline;
u64 old;
@@ -160,6 +187,15 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
return 0;
}
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ if (cpu_has_seq_insn())
+ return ftrace_patch_branch_insn(rec->ip, old_addr, addr);
+ else
+ return ftrace_modify_trampoline_call(rec, old_addr, addr);
+}
+
static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
{
u16 old;
@@ -178,11 +214,14 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
- /* Expect brcl 0xf,... */
- return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
+ /* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */
+ if (cpu_has_seq_insn())
+ return ftrace_patch_branch_insn(rec->ip, addr, 0);
+ else
+ return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
}
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long addr)
{
struct ftrace_hotpatch_trampoline *trampoline;
@@ -194,6 +233,14 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true);
}
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ if (cpu_has_seq_insn())
+ return ftrace_patch_branch_insn(rec->ip, 0, addr);
+ else
+ return ftrace_make_trampoline_call(rec, addr);
+}
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
ftrace_func = func;
@@ -214,75 +261,20 @@ void ftrace_arch_code_modify_post_process(void)
text_poke_sync_lock();
}
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int __init ftrace_plt_init(void)
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- const char *start, *end;
+ unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14];
+ unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15];
- ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
- if (!ftrace_plt)
- panic("cannot allocate ftrace plt\n");
-
- start = ftrace_shared_hotpatch_trampoline(&end);
- memcpy(ftrace_plt, start, end - start);
- set_memory_rox((unsigned long)ftrace_plt, 1);
- return 0;
-}
-device_initcall(ftrace_plt_init);
-
-#endif /* CONFIG_MODULES */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-/*
- * Hook the return address and push it in the stack of return addresses
- * in current thread info.
- */
-unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp,
- unsigned long ip)
-{
if (unlikely(ftrace_graph_is_dead()))
- goto out;
+ return;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
- goto out;
- ip -= MCOUNT_INSN_SIZE;
- if (!function_graph_enter(ra, ip, 0, (void *) sp))
- ra = (unsigned long) return_to_handler;
-out:
- return ra;
-}
-NOKPROBE_SYMBOL(prepare_ftrace_return);
-
-/*
- * Patch the kernel code at ftrace_graph_caller location. The instruction
- * there is branch relative on condition. To enable the ftrace graph code
- * block, we simply patch the mask field of the instruction to zero and
- * turn the instruction into a nop.
- * To disable the ftrace graph code the mask field will be patched to
- * all ones, which turns the instruction into an unconditional branch.
- */
-int ftrace_enable_ftrace_graph_caller(void)
-{
- int rc;
-
- /* Expect brc 0xf,... */
- rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
- if (rc)
- return rc;
- text_poke_sync_lock();
- return 0;
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
- int rc;
-
- /* Expect brc 0x0,... */
- rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
- if (rc)
- return rc;
- text_poke_sync_lock();
- return 0;
+ return;
+ if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs))
+ *parent = (unsigned long)&return_to_handler;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -303,6 +295,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
if (bit < 0)
return;
+ kmsan_unpoison_memory(fregs, ftrace_regs_size());
regs = ftrace_get_regs(fregs);
p = get_kprobe((kprobe_opcode_t *)ip);
if (!regs || unlikely(!p) || kprobe_disabled(p))
diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h
index 7f75a9616406..23337065f402 100644
--- a/arch/s390/kernel/ftrace.h
+++ b/arch/s390/kernel/ftrace.h
@@ -18,7 +18,5 @@ extern const char ftrace_shared_hotpatch_trampoline_br[];
extern const char ftrace_shared_hotpatch_trampoline_br_end[];
extern const char ftrace_shared_hotpatch_trampoline_exrl[];
extern const char ftrace_shared_hotpatch_trampoline_exrl_end[];
-extern const char ftrace_plt_template[];
-extern const char ftrace_plt_template_end[];
#endif /* _FTRACE_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
index 0b68168d9566..cf26d7a37425 100644
--- a/arch/s390/kernel/guarded_storage.c
+++ b/arch/s390/kernel/guarded_storage.c
@@ -4,6 +4,7 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
+#include <linux/cpufeature.h>
#include <linux/kernel.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
@@ -109,7 +110,7 @@ static int gs_broadcast(void)
SYSCALL_DEFINE2(s390_guarded_storage, int, command,
struct gs_cb __user *, gs_cb)
{
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -EOPNOTSUPP;
switch (command) {
case GS_ENABLE:
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head.S
index 45413b04efc5..7edb9ded199c 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head.S
@@ -10,6 +10,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/lowcore.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page.h>
@@ -17,15 +18,14 @@
__HEAD
SYM_CODE_START(startup_continue)
- larl %r1,tod_clock_base
- mvc 0(16,%r1),__LC_BOOT_CLOCK
#
# Setup stack
#
+ GET_LC %r2
larl %r14,init_task
- stg %r14,__LC_CURRENT
+ stg %r14,__LC_CURRENT(%r2)
larl %r15,init_thread_union+STACK_INIT_OFFSET
- stg %r15,__LC_KERNEL_STACK
+ stg %r15,__LC_KERNEL_STACK(%r2)
brasl %r14,sclp_early_adjust_va # allow sclp_early_printk
brasl %r14,startup_init # s390 specific early init
brasl %r14,start_kernel # common init code
diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c
new file mode 100644
index 000000000000..217206522266
--- /dev/null
+++ b/arch/s390/kernel/hiperdispatch.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define pr_fmt(fmt) "hd: " fmt
+
+/*
+ * Hiperdispatch:
+ * Dynamically calculates the optimum number of high capacity COREs
+ * by considering the state the system is in. When hiperdispatch decides
+ * that a capacity update is necessary, it schedules a topology update.
+ * During topology updates the CPU capacities are always re-adjusted.
+ *
+ * There is two places where CPU capacities are being accessed within
+ * hiperdispatch.
+ * -> hiperdispatch's reoccuring work function reads CPU capacities to
+ * determine high capacity CPU count.
+ * -> during a topology update hiperdispatch's adjustment function
+ * updates CPU capacities.
+ * These two can run on different CPUs in parallel which can cause
+ * hiperdispatch to make wrong decisions. This can potentially cause
+ * some overhead by leading to extra rebuild_sched_domains() calls
+ * for correction. Access to capacities within hiperdispatch has to be
+ * serialized to prevent the overhead.
+ *
+ * Hiperdispatch decision making revolves around steal time.
+ * HD_STEAL_THRESHOLD value is taken as reference. Whenever steal time
+ * crosses the threshold value hiperdispatch falls back to giving high
+ * capacities to entitled CPUs. When steal time drops below the
+ * threshold boundary, hiperdispatch utilizes all CPUs by giving all
+ * of them high capacity.
+ *
+ * The theory behind HD_STEAL_THRESHOLD is related to the SMP thread
+ * performance. Comparing the throughput of;
+ * - single CORE, with N threads, running N tasks
+ * - N separate COREs running N tasks,
+ * using individual COREs for individual tasks yield better
+ * performance. This performance difference is roughly ~30% (can change
+ * between machine generations)
+ *
+ * Hiperdispatch tries to hint scheduler to use individual COREs for
+ * each task, as long as steal time on those COREs are less than 30%,
+ * therefore delaying the throughput loss caused by using SMP threads.
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/cpumask.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/kernel_stat.h>
+#include <linux/kstrtox.h>
+#include <linux/ktime.h>
+#include <linux/sysctl.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <asm/hiperdispatch.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/hiperdispatch.h>
+
+#define HD_DELAY_FACTOR (4)
+#define HD_DELAY_INTERVAL (HZ / 4)
+#define HD_STEAL_THRESHOLD 10
+#define HD_STEAL_AVG_WEIGHT 16
+
+static cpumask_t hd_vl_coremask; /* Mask containing all vertical low COREs */
+static cpumask_t hd_vmvl_cpumask; /* Mask containing vertical medium and low CPUs */
+static int hd_high_capacity_cores; /* Current CORE count with high capacity */
+static int hd_entitled_cores; /* Total vertical high and medium CORE count */
+static int hd_online_cores; /* Current online CORE count */
+
+static unsigned long hd_previous_steal; /* Previous iteration's CPU steal timer total */
+static unsigned long hd_high_time; /* Total time spent while all cpus have high capacity */
+static unsigned long hd_low_time; /* Total time spent while vl cpus have low capacity */
+static atomic64_t hd_adjustments; /* Total occurrence count of hiperdispatch adjustments */
+
+static unsigned int hd_steal_threshold = HD_STEAL_THRESHOLD;
+static unsigned int hd_delay_factor = HD_DELAY_FACTOR;
+static int hd_enabled;
+
+static void hd_capacity_work_fn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn);
+
+static int hd_set_hiperdispatch_mode(int enable)
+{
+ if (!cpu_has_topology())
+ enable = 0;
+ if (hd_enabled == enable)
+ return 0;
+ hd_enabled = enable;
+ return 1;
+}
+
+void hd_reset_state(void)
+{
+ cpumask_clear(&hd_vl_coremask);
+ cpumask_clear(&hd_vmvl_cpumask);
+ hd_entitled_cores = 0;
+ hd_online_cores = 0;
+}
+
+void hd_add_core(int cpu)
+{
+ const struct cpumask *siblings;
+ int polarization;
+
+ hd_online_cores++;
+ polarization = smp_cpu_get_polarization(cpu);
+ siblings = topology_sibling_cpumask(cpu);
+ switch (polarization) {
+ case POLARIZATION_VH:
+ hd_entitled_cores++;
+ break;
+ case POLARIZATION_VM:
+ hd_entitled_cores++;
+ cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+ break;
+ case POLARIZATION_VL:
+ cpumask_set_cpu(cpu, &hd_vl_coremask);
+ cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+ break;
+ }
+}
+
+/* Serialize update and read operations of debug counters. */
+static DEFINE_MUTEX(hd_counter_mutex);
+
+static void hd_update_times(void)
+{
+ static ktime_t prev;
+ ktime_t now;
+
+ /*
+ * Check if hiperdispatch is active, if not set the prev to 0.
+ * This way it is possible to differentiate the first update iteration after
+ * enabling hiperdispatch.
+ */
+ if (hd_entitled_cores == 0 || hd_enabled == 0) {
+ prev = ktime_set(0, 0);
+ return;
+ }
+ now = ktime_get();
+ if (ktime_after(prev, 0)) {
+ if (hd_high_capacity_cores == hd_online_cores)
+ hd_high_time += ktime_ms_delta(now, prev);
+ else
+ hd_low_time += ktime_ms_delta(now, prev);
+ }
+ prev = now;
+}
+
+static void hd_update_capacities(void)
+{
+ int cpu, upscaling_cores;
+ unsigned long capacity;
+
+ upscaling_cores = hd_high_capacity_cores - hd_entitled_cores;
+ capacity = upscaling_cores > 0 ? CPU_CAPACITY_HIGH : CPU_CAPACITY_LOW;
+ hd_high_capacity_cores = hd_entitled_cores;
+ for_each_cpu(cpu, &hd_vl_coremask) {
+ smp_set_core_capacity(cpu, capacity);
+ if (capacity != CPU_CAPACITY_HIGH)
+ continue;
+ hd_high_capacity_cores++;
+ upscaling_cores--;
+ if (upscaling_cores == 0)
+ capacity = CPU_CAPACITY_LOW;
+ }
+}
+
+void hd_disable_hiperdispatch(void)
+{
+ cancel_delayed_work_sync(&hd_capacity_work);
+ hd_high_capacity_cores = hd_online_cores;
+ hd_previous_steal = 0;
+}
+
+int hd_enable_hiperdispatch(void)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ mutex_unlock(&hd_counter_mutex);
+ if (hd_enabled == 0)
+ return 0;
+ if (hd_entitled_cores == 0)
+ return 0;
+ if (hd_online_cores <= hd_entitled_cores)
+ return 0;
+ mod_delayed_work(system_dfl_wq, &hd_capacity_work, HD_DELAY_INTERVAL * hd_delay_factor);
+ hd_update_capacities();
+ return 1;
+}
+
+static unsigned long hd_steal_avg(unsigned long new)
+{
+ static unsigned long steal;
+
+ steal = (steal * (HD_STEAL_AVG_WEIGHT - 1) + new) / HD_STEAL_AVG_WEIGHT;
+ return steal;
+}
+
+static unsigned long hd_calculate_steal_percentage(void)
+{
+ unsigned long time_delta, steal_delta, steal, percentage;
+ static ktime_t prev;
+ int cpus, cpu;
+ ktime_t now;
+
+ cpus = 0;
+ steal = 0;
+ percentage = 0;
+ for_each_cpu(cpu, &hd_vmvl_cpumask) {
+ steal += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+ cpus++;
+ }
+ /*
+ * If there is no vertical medium and low CPUs steal time
+ * is 0 as vertical high CPUs shouldn't experience steal time.
+ */
+ if (cpus == 0)
+ return percentage;
+ now = ktime_get();
+ time_delta = ktime_to_ns(ktime_sub(now, prev));
+ if (steal > hd_previous_steal && hd_previous_steal != 0) {
+ steal_delta = (steal - hd_previous_steal) * 100 / time_delta;
+ percentage = steal_delta / cpus;
+ }
+ hd_previous_steal = steal;
+ prev = now;
+ return percentage;
+}
+
+static void hd_capacity_work_fn(struct work_struct *work)
+{
+ unsigned long steal_percentage, new_cores;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ /*
+ * If online cores are less or equal to entitled cores hiperdispatch
+ * does not need to make any adjustments, call a topology update to
+ * disable hiperdispatch.
+ * Normally this check is handled on topology update, but during cpu
+ * unhotplug, topology and cpu mask updates are done in reverse
+ * order, causing hd_enable_hiperdispatch() to get stale data.
+ */
+ if (hd_online_cores <= hd_entitled_cores) {
+ topology_schedule_update();
+ mutex_unlock(&smp_cpu_state_mutex);
+ return;
+ }
+ steal_percentage = hd_steal_avg(hd_calculate_steal_percentage());
+ if (steal_percentage < hd_steal_threshold)
+ new_cores = hd_online_cores;
+ else
+ new_cores = hd_entitled_cores;
+ if (hd_high_capacity_cores != new_cores) {
+ trace_s390_hd_rebuild_domains(hd_high_capacity_cores, new_cores);
+ hd_high_capacity_cores = new_cores;
+ atomic64_inc(&hd_adjustments);
+ topology_schedule_update();
+ }
+ trace_s390_hd_work_fn(steal_percentage, hd_entitled_cores, hd_high_capacity_cores);
+ mutex_unlock(&smp_cpu_state_mutex);
+ schedule_delayed_work(&hd_capacity_work, HD_DELAY_INTERVAL);
+}
+
+static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int hiperdispatch;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &hiperdispatch,
+ .maxlen = sizeof(int),
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ hiperdispatch = hd_enabled;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+ mutex_lock(&smp_cpu_state_mutex);
+ if (hd_set_hiperdispatch_mode(hiperdispatch))
+ topology_schedule_update();
+ mutex_unlock(&smp_cpu_state_mutex);
+ return 0;
+}
+
+static const struct ctl_table hiperdispatch_ctl_table[] = {
+ {
+ .procname = "hiperdispatch",
+ .mode = 0644,
+ .proc_handler = hiperdispatch_ctl_handler,
+ },
+};
+
+static ssize_t hd_steal_threshold_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", hd_steal_threshold);
+}
+
+static ssize_t hd_steal_threshold_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (val > 100)
+ return -ERANGE;
+ hd_steal_threshold = val;
+ return count;
+}
+
+static DEVICE_ATTR_RW(hd_steal_threshold);
+
+static ssize_t hd_delay_factor_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", hd_delay_factor);
+}
+
+static ssize_t hd_delay_factor_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (!val)
+ return -ERANGE;
+ hd_delay_factor = val;
+ return count;
+}
+
+static DEVICE_ATTR_RW(hd_delay_factor);
+
+static struct attribute *hd_attrs[] = {
+ &dev_attr_hd_steal_threshold.attr,
+ &dev_attr_hd_delay_factor.attr,
+ NULL,
+};
+
+static const struct attribute_group hd_attr_group = {
+ .name = "hiperdispatch",
+ .attrs = hd_attrs,
+};
+
+static int hd_greedy_time_get(void *unused, u64 *val)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ *val = hd_high_time;
+ mutex_unlock(&hd_counter_mutex);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_greedy_time_fops, hd_greedy_time_get, NULL, "%llu\n");
+
+static int hd_conservative_time_get(void *unused, u64 *val)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ *val = hd_low_time;
+ mutex_unlock(&hd_counter_mutex);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_conservative_time_fops, hd_conservative_time_get, NULL, "%llu\n");
+
+static int hd_adjustment_count_get(void *unused, u64 *val)
+{
+ *val = atomic64_read(&hd_adjustments);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_adjustments_fops, hd_adjustment_count_get, NULL, "%llu\n");
+
+static void __init hd_create_debugfs_counters(void)
+{
+ struct dentry *dir;
+
+ dir = debugfs_create_dir("hiperdispatch", arch_debugfs_dir);
+ debugfs_create_file("conservative_time_ms", 0400, dir, NULL, &hd_conservative_time_fops);
+ debugfs_create_file("greedy_time_ms", 0400, dir, NULL, &hd_greedy_time_fops);
+ debugfs_create_file("adjustment_count", 0400, dir, NULL, &hd_adjustments_fops);
+}
+
+static void __init hd_create_attributes(void)
+{
+ struct device *dev;
+
+ dev = bus_get_dev_root(&cpu_subsys);
+ if (!dev)
+ return;
+ if (sysfs_create_group(&dev->kobj, &hd_attr_group))
+ pr_warn("Unable to create hiperdispatch attribute group\n");
+ put_device(dev);
+}
+
+static int __init hd_init(void)
+{
+ if (IS_ENABLED(CONFIG_HIPERDISPATCH_ON)) {
+ hd_set_hiperdispatch_mode(1);
+ topology_schedule_update();
+ }
+ if (!register_sysctl("s390", hiperdispatch_ctl_table))
+ pr_warn("Failed to register s390.hiperdispatch sysctl attribute\n");
+ hd_create_debugfs_counters();
+ hd_create_attributes();
+ return 0;
+}
+late_initcall(hd_init);
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index af9c97c0ad73..39cb8d0ae348 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -24,6 +24,7 @@ static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
void account_idle_time_irq(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ struct lowcore *lc = get_lowcore();
unsigned long idle_time;
u64 cycles_new[8];
int i;
@@ -34,13 +35,13 @@ void account_idle_time_irq(void)
this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
}
- idle_time = S390_lowcore.int_clock - idle->clock_idle_enter;
+ idle_time = lc->int_clock - idle->clock_idle_enter;
- S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
- S390_lowcore.last_update_clock = S390_lowcore.int_clock;
+ lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
+ lc->last_update_clock = lc->int_clock;
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
- S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+ lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
+ lc->last_update_timer = lc->sys_enter_timer;
/* Account time spent with enabled wait psw loaded as idle time. */
WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 3a7d6e172211..961a3d60a4dd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -22,6 +22,7 @@
#include <linux/debug_locks.h>
#include <linux/vmalloc.h>
#include <asm/asm-extable.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/ipl.h>
#include <asm/smp.h>
@@ -185,7 +186,7 @@ static inline int __diag308(unsigned long subcode, unsigned long addr)
r1.even = addr;
r1.odd = 0;
- asm volatile(
+ asm_inline volatile(
" diag %[r1],%[subcode],0x308\n"
"0: nopr %%r7\n"
EX_TABLE(0b,0b)
@@ -209,7 +210,7 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, \
char *page) \
{ \
- return scnprintf(page, PAGE_SIZE, _format, ##args); \
+ return sysfs_emit(page, _format, ##args); \
}
#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \
@@ -269,8 +270,8 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
{ \
if (len >= sizeof(_value)) \
return -E2BIG; \
- len = strscpy(_value, buf, sizeof(_value)); \
- if (len < 0) \
+ len = strscpy(_value, buf); \
+ if ((ssize_t)len < 0) \
return len; \
strim(_value); \
return len; \
@@ -280,58 +281,58 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
sys_##_prefix##_##_name##_show, \
sys_##_prefix##_##_name##_store)
-#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
-static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \
- struct kobject *kobj, \
- struct bin_attribute *attr, \
- char *buf, loff_t off, \
- size_t count) \
-{ \
- size_t size = _ipl_block.scp_data_len; \
- void *scp_data = _ipl_block.scp_data; \
- \
- return memory_read_from_buffer(buf, count, &off, \
- scp_data, size); \
+#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
+static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t off, \
+ size_t count) \
+{ \
+ size_t size = _ipl_block.scp_data_len; \
+ void *scp_data = _ipl_block.scp_data; \
+ \
+ return memory_read_from_buffer(buf, count, &off, \
+ scp_data, size); \
}
#define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
-static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \
- struct kobject *kobj, \
- struct bin_attribute *attr, \
- char *buf, loff_t off, \
- size_t count) \
-{ \
- size_t scpdata_len = count; \
- size_t padding; \
- \
- if (off) \
- return -EINVAL; \
- \
- memcpy(_ipl_block.scp_data, buf, count); \
- if (scpdata_len % 8) { \
- padding = 8 - (scpdata_len % 8); \
- memset(_ipl_block.scp_data + scpdata_len, \
- 0, padding); \
- scpdata_len += padding; \
- } \
- \
- _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \
- _ipl_block.len = _ipl_bp0_len + scpdata_len; \
- _ipl_block.scp_data_len = scpdata_len; \
- \
- return count; \
+static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t off, \
+ size_t count) \
+{ \
+ size_t scpdata_len = count; \
+ size_t padding; \
+ \
+ if (off) \
+ return -EINVAL; \
+ \
+ memcpy(_ipl_block.scp_data, buf, count); \
+ if (scpdata_len % 8) { \
+ padding = 8 - (scpdata_len % 8); \
+ memset(_ipl_block.scp_data + scpdata_len, \
+ 0, padding); \
+ scpdata_len += padding; \
+ } \
+ \
+ _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \
+ _ipl_block.len = _ipl_bp0_len + scpdata_len; \
+ _ipl_block.scp_data_len = scpdata_len; \
+ \
+ return count; \
}
#define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size) \
IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
-static struct bin_attribute sys_##_prefix##_scp_data_attr = \
+static const struct bin_attribute sys_##_prefix##_scp_data_attr = \
__BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show, \
NULL, _size)
#define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\
IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
-static struct bin_attribute sys_##_prefix##_scp_data_attr = \
+static const struct bin_attribute sys_##_prefix##_scp_data_attr = \
__BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show, \
sys_##_prefix##_scp_data_store, _size)
@@ -372,7 +373,7 @@ EXPORT_SYMBOL_GPL(ipl_info);
static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
char *page)
{
- return sprintf(page, "%s\n", ipl_type_str(ipl_info.type));
+ return sysfs_emit(page, "%s\n", ipl_type_str(ipl_info.type));
}
static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
@@ -380,7 +381,7 @@ static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
static ssize_t ipl_secure_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%i\n", !!ipl_secure_flag);
+ return sysfs_emit(page, "%i\n", !!ipl_secure_flag);
}
static struct kobj_attribute sys_ipl_secure_attr =
@@ -389,7 +390,7 @@ static struct kobj_attribute sys_ipl_secure_attr =
static ssize_t ipl_has_secure_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%i\n", !!sclp.has_sipl);
+ return sysfs_emit(page, "%i\n", !!sclp.has_sipl);
}
static struct kobj_attribute sys_ipl_has_secure_attr =
@@ -402,7 +403,7 @@ static ssize_t ipl_vm_parm_show(struct kobject *kobj,
if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW))
ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block);
- return sprintf(page, "%s\n", parm);
+ return sysfs_emit(page, "%s\n", parm);
}
static struct kobj_attribute sys_ipl_vm_parm_attr =
@@ -413,18 +414,18 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
{
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- return sprintf(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
- ipl_block.ccw.devno);
+ return sysfs_emit(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
+ ipl_block.ccw.devno);
case IPL_TYPE_ECKD:
case IPL_TYPE_ECKD_DUMP:
- return sprintf(page, "0.%x.%04x\n", ipl_block.eckd.ssid,
- ipl_block.eckd.devno);
+ return sysfs_emit(page, "0.%x.%04x\n", ipl_block.eckd.ssid,
+ ipl_block.eckd.devno);
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
- return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
+ return sysfs_emit(page, "0.0.%04x\n", ipl_block.fcp.devno);
case IPL_TYPE_NVME:
case IPL_TYPE_NVME_DUMP:
- return sprintf(page, "%08ux\n", ipl_block.nvme.fid);
+ return sysfs_emit(page, "%08ux\n", ipl_block.nvme.fid);
default:
return 0;
}
@@ -434,19 +435,19 @@ static struct kobj_attribute sys_ipl_device_attr =
__ATTR(device, 0444, sys_ipl_device_show, NULL);
static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
+ const struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
return memory_read_from_buffer(buf, count, &off, &ipl_block,
ipl_block.hdr.len);
}
-static struct bin_attribute sys_ipl_parameter_attr =
+static const struct bin_attribute sys_ipl_parameter_attr =
__BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL,
PAGE_SIZE);
DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE);
-static struct bin_attribute *ipl_fcp_bin_attrs[] = {
+static const struct bin_attribute *const ipl_fcp_bin_attrs[] = {
&sys_ipl_parameter_attr,
&sys_ipl_fcp_scp_data_attr,
NULL,
@@ -454,7 +455,7 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = {
DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE);
-static struct bin_attribute *ipl_nvme_bin_attrs[] = {
+static const struct bin_attribute *const ipl_nvme_bin_attrs[] = {
&sys_ipl_parameter_attr,
&sys_ipl_nvme_scp_data_attr,
NULL,
@@ -462,7 +463,7 @@ static struct bin_attribute *ipl_nvme_bin_attrs[] = {
DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE);
-static struct bin_attribute *ipl_eckd_bin_attrs[] = {
+static const struct bin_attribute *const ipl_eckd_bin_attrs[] = {
&sys_ipl_parameter_attr,
&sys_ipl_eckd_scp_data_attr,
NULL,
@@ -503,12 +504,12 @@ static ssize_t eckd_##_name##_br_chr_show(struct kobject *kobj, \
if (!ipb->br_chr.cyl && \
!ipb->br_chr.head && \
!ipb->br_chr.record) \
- return sprintf(buf, "auto\n"); \
+ return sysfs_emit(buf, "auto\n"); \
\
- return sprintf(buf, "0x%x,0x%x,0x%x\n", \
- ipb->br_chr.cyl, \
- ipb->br_chr.head, \
- ipb->br_chr.record); \
+ return sysfs_emit(buf, "0x%x,0x%x,0x%x\n", \
+ ipb->br_chr.cyl, \
+ ipb->br_chr.head, \
+ ipb->br_chr.record); \
}
#define IPL_ATTR_BR_CHR_STORE_FN(_name, _ipb) \
@@ -573,11 +574,11 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
char loadparm[LOADPARM_LEN + 1] = {};
if (!sclp_ipl_info.is_valid)
- return sprintf(page, "#unknown#\n");
+ return sysfs_emit(page, "#unknown#\n");
memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
EBCASC(loadparm, LOADPARM_LEN);
strim(loadparm);
- return sprintf(page, "%s\n", loadparm);
+ return sysfs_emit(page, "%s\n", loadparm);
}
static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
@@ -593,7 +594,7 @@ static struct attribute *ipl_fcp_attrs[] = {
NULL,
};
-static struct attribute_group ipl_fcp_attr_group = {
+static const struct attribute_group ipl_fcp_attr_group = {
.attrs = ipl_fcp_attrs,
.bin_attrs = ipl_fcp_bin_attrs,
};
@@ -607,7 +608,7 @@ static struct attribute *ipl_nvme_attrs[] = {
NULL,
};
-static struct attribute_group ipl_nvme_attr_group = {
+static const struct attribute_group ipl_nvme_attr_group = {
.attrs = ipl_nvme_attrs,
.bin_attrs = ipl_nvme_bin_attrs,
};
@@ -620,7 +621,7 @@ static struct attribute *ipl_eckd_attrs[] = {
NULL,
};
-static struct attribute_group ipl_eckd_attr_group = {
+static const struct attribute_group ipl_eckd_attr_group = {
.attrs = ipl_eckd_attrs,
.bin_attrs = ipl_eckd_bin_attrs,
};
@@ -640,11 +641,11 @@ static struct attribute *ipl_ccw_attrs_lpar[] = {
NULL,
};
-static struct attribute_group ipl_ccw_attr_group_vm = {
+static const struct attribute_group ipl_ccw_attr_group_vm = {
.attrs = ipl_ccw_attrs_vm,
};
-static struct attribute_group ipl_ccw_attr_group_lpar = {
+static const struct attribute_group ipl_ccw_attr_group_lpar = {
.attrs = ipl_ccw_attrs_lpar
};
@@ -655,7 +656,7 @@ static struct attribute *ipl_common_attrs[] = {
NULL,
};
-static struct attribute_group ipl_common_attr_group = {
+static const struct attribute_group ipl_common_attr_group = {
.attrs = ipl_common_attrs,
};
@@ -685,7 +686,7 @@ static int __init ipl_init(void)
goto out;
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
rc = sysfs_create_group(&ipl_kset->kobj,
&ipl_ccw_attr_group_vm);
else
@@ -731,7 +732,7 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
ipl_block_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
- return sprintf(page, "%s\n", vmparm);
+ return sysfs_emit(page, "%s\n", vmparm);
}
static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
@@ -808,7 +809,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr,
IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN,
DIAG308_SCPDATA_SIZE);
-static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+static const struct bin_attribute *const reipl_fcp_bin_attrs[] = {
&sys_reipl_fcp_scp_data_attr,
NULL,
};
@@ -839,7 +840,7 @@ static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
char buf[LOADPARM_LEN + 1];
reipl_get_ascii_loadparm(buf, ipb);
- return sprintf(page, "%s\n", buf);
+ return sysfs_emit(page, "%s\n", buf);
}
static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
@@ -895,7 +896,7 @@ DEFINE_GENERIC_LOADPARM(eckd);
static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%u\n", reipl_fcp_clear);
+ return sysfs_emit(page, "%u\n", reipl_fcp_clear);
}
static ssize_t reipl_fcp_clear_store(struct kobject *kobj,
@@ -917,7 +918,7 @@ static struct attribute *reipl_fcp_attrs[] = {
NULL,
};
-static struct attribute_group reipl_fcp_attr_group = {
+static const struct attribute_group reipl_fcp_attr_group = {
.attrs = reipl_fcp_attrs,
.bin_attrs = reipl_fcp_bin_attrs,
};
@@ -932,7 +933,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr,
IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN,
DIAG308_SCPDATA_SIZE);
-static struct bin_attribute *reipl_nvme_bin_attrs[] = {
+static const struct bin_attribute *const reipl_nvme_bin_attrs[] = {
&sys_reipl_nvme_scp_data_attr,
NULL,
};
@@ -955,7 +956,7 @@ static struct attribute *reipl_nvme_attrs[] = {
NULL,
};
-static struct attribute_group reipl_nvme_attr_group = {
+static const struct attribute_group reipl_nvme_attr_group = {
.attrs = reipl_nvme_attrs,
.bin_attrs = reipl_nvme_bin_attrs
};
@@ -963,7 +964,7 @@ static struct attribute_group reipl_nvme_attr_group = {
static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%u\n", reipl_nvme_clear);
+ return sysfs_emit(page, "%u\n", reipl_nvme_clear);
}
static ssize_t reipl_nvme_clear_store(struct kobject *kobj,
@@ -984,7 +985,7 @@ DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%u\n", reipl_ccw_clear);
+ return sysfs_emit(page, "%u\n", reipl_ccw_clear);
}
static ssize_t reipl_ccw_clear_store(struct kobject *kobj,
@@ -1031,7 +1032,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr,
IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN,
DIAG308_SCPDATA_SIZE);
-static struct bin_attribute *reipl_eckd_bin_attrs[] = {
+static const struct bin_attribute *const reipl_eckd_bin_attrs[] = {
&sys_reipl_eckd_scp_data_attr,
NULL,
};
@@ -1048,7 +1049,7 @@ static struct attribute *reipl_eckd_attrs[] = {
NULL,
};
-static struct attribute_group reipl_eckd_attr_group = {
+static const struct attribute_group reipl_eckd_attr_group = {
.attrs = reipl_eckd_attrs,
.bin_attrs = reipl_eckd_bin_attrs
};
@@ -1056,7 +1057,7 @@ static struct attribute_group reipl_eckd_attr_group = {
static ssize_t reipl_eckd_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%u\n", reipl_eckd_clear);
+ return sysfs_emit(page, "%u\n", reipl_eckd_clear);
}
static ssize_t reipl_eckd_clear_store(struct kobject *kobj,
@@ -1086,7 +1087,7 @@ static ssize_t reipl_nss_name_show(struct kobject *kobj,
char nss_name[NSS_NAME_SIZE + 1] = {};
reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
- return sprintf(page, "%s\n", nss_name);
+ return sysfs_emit(page, "%s\n", nss_name);
}
static ssize_t reipl_nss_name_store(struct kobject *kobj,
@@ -1171,7 +1172,7 @@ static int reipl_set_type(enum ipl_type type)
static ssize_t reipl_type_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+ return sysfs_emit(page, "%s\n", ipl_type_str(reipl_type));
}
static ssize_t reipl_type_store(struct kobject *kobj,
@@ -1272,7 +1273,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM;
/* VM PARM */
- if (MACHINE_IS_VM && ipl_block_valid &&
+ if (machine_is_vm() && ipl_block_valid &&
(ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) {
ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
@@ -1286,7 +1287,7 @@ static int __init reipl_nss_init(void)
{
int rc;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 0;
reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
@@ -1311,8 +1312,8 @@ static int __init reipl_ccw_init(void)
return -ENOMEM;
rc = sysfs_create_group(&reipl_kset->kobj,
- MACHINE_IS_VM ? &reipl_ccw_attr_group_vm
- : &reipl_ccw_attr_group_lpar);
+ machine_is_vm() ? &reipl_ccw_attr_group_vm
+ : &reipl_ccw_attr_group_lpar);
if (rc)
return rc;
@@ -1587,12 +1588,12 @@ static struct attribute *dump_fcp_attrs[] = {
NULL,
};
-static struct bin_attribute *dump_fcp_bin_attrs[] = {
+static const struct bin_attribute *const dump_fcp_bin_attrs[] = {
&sys_dump_fcp_scp_data_attr,
NULL,
};
-static struct attribute_group dump_fcp_attr_group = {
+static const struct attribute_group dump_fcp_attr_group = {
.name = IPL_FCP_STR,
.attrs = dump_fcp_attrs,
.bin_attrs = dump_fcp_bin_attrs,
@@ -1621,12 +1622,12 @@ static struct attribute *dump_nvme_attrs[] = {
NULL,
};
-static struct bin_attribute *dump_nvme_bin_attrs[] = {
+static const struct bin_attribute *const dump_nvme_bin_attrs[] = {
&sys_dump_nvme_scp_data_attr,
NULL,
};
-static struct attribute_group dump_nvme_attr_group = {
+static const struct attribute_group dump_nvme_attr_group = {
.name = IPL_NVME_STR,
.attrs = dump_nvme_attrs,
.bin_attrs = dump_nvme_bin_attrs,
@@ -1655,12 +1656,12 @@ static struct attribute *dump_eckd_attrs[] = {
NULL,
};
-static struct bin_attribute *dump_eckd_bin_attrs[] = {
+static const struct bin_attribute *const dump_eckd_bin_attrs[] = {
&sys_dump_eckd_scp_data_attr,
NULL,
};
-static struct attribute_group dump_eckd_attr_group = {
+static const struct attribute_group dump_eckd_attr_group = {
.name = IPL_ECKD_STR,
.attrs = dump_eckd_attrs,
.bin_attrs = dump_eckd_bin_attrs,
@@ -1692,7 +1693,7 @@ static int dump_set_type(enum dump_type type)
static ssize_t dump_type_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", dump_type_str(dump_type));
+ return sysfs_emit(page, "%s\n", dump_type_str(dump_type));
}
static ssize_t dump_type_store(struct kobject *kobj,
@@ -1717,6 +1718,24 @@ static ssize_t dump_type_store(struct kobject *kobj,
static struct kobj_attribute dump_type_attr =
__ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+static ssize_t dump_area_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sysfs_emit(page, "%lu\n", sclp.hsa_size);
+}
+
+static struct kobj_attribute dump_area_size_attr = __ATTR_RO(dump_area_size);
+
+static struct attribute *dump_attrs[] = {
+ &dump_type_attr.attr,
+ &dump_area_size_attr.attr,
+ NULL,
+};
+
+static struct attribute_group dump_attr_group = {
+ .attrs = dump_attrs,
+};
+
static struct kset *dump_kset;
static void diag308_dump(void *dump_block)
@@ -1853,7 +1872,7 @@ static int __init dump_init(void)
dump_kset = kset_create_and_add("dump", NULL, firmware_kobj);
if (!dump_kset)
return -ENOMEM;
- rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr);
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_attr_group);
if (rc) {
kset_unregister(dump_kset);
return rc;
@@ -1969,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger)
static int vmcmd_init(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return -EOPNOTSUPP;
vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
if (!vmcmd_kset)
@@ -2034,7 +2053,7 @@ static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR,
static ssize_t on_reboot_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_reboot_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_reboot_trigger.action->name);
}
static ssize_t on_reboot_store(struct kobject *kobj,
@@ -2060,7 +2079,7 @@ static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action};
static ssize_t on_panic_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_panic_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_panic_trigger.action->name);
}
static ssize_t on_panic_store(struct kobject *kobj,
@@ -2086,7 +2105,7 @@ static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
static ssize_t on_restart_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_restart_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_restart_trigger.action->name);
}
static ssize_t on_restart_store(struct kobject *kobj,
@@ -2112,7 +2131,7 @@ void do_restart(void *arg)
tracing_off();
debug_locks_off();
lgr_info_log();
- smp_call_online_cpu(__do_restart, arg);
+ smp_call_ipl_cpu(__do_restart, arg);
}
/* on halt */
@@ -2122,7 +2141,7 @@ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
static ssize_t on_halt_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_halt_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_halt_trigger.action->name);
}
static ssize_t on_halt_store(struct kobject *kobj,
@@ -2148,7 +2167,7 @@ static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action};
static ssize_t on_poff_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_poff_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_poff_trigger.action->name);
}
static ssize_t on_poff_store(struct kobject *kobj,
@@ -2230,26 +2249,28 @@ static int __init s390_ipl_init(void)
__initcall(s390_ipl_init);
-static void __init strncpy_skip_quote(char *dst, char *src, int n)
+static void __init strscpy_skip_quote(char *dst, char *src, int n)
{
int sx, dx;
- dx = 0;
- for (sx = 0; src[sx] != 0; sx++) {
+ if (!n)
+ return;
+ for (sx = 0, dx = 0; src[sx]; sx++) {
if (src[sx] == '"')
continue;
- dst[dx++] = src[sx];
- if (dx >= n)
+ dst[dx] = src[sx];
+ if (dx + 1 == n)
break;
+ dx++;
}
+ dst[dx] = '\0';
}
static int __init vmcmd_on_reboot_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_reboot, str, VMCMD_MAX_SIZE);
- vmcmd_on_reboot[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot));
on_reboot_trigger.action = &vmcmd_action;
return 1;
}
@@ -2257,10 +2278,9 @@ __setup("vmreboot=", vmcmd_on_reboot_setup);
static int __init vmcmd_on_panic_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_panic, str, VMCMD_MAX_SIZE);
- vmcmd_on_panic[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic));
on_panic_trigger.action = &vmcmd_action;
return 1;
}
@@ -2268,10 +2288,9 @@ __setup("vmpanic=", vmcmd_on_panic_setup);
static int __init vmcmd_on_halt_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_halt, str, VMCMD_MAX_SIZE);
- vmcmd_on_halt[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt));
on_halt_trigger.action = &vmcmd_action;
return 1;
}
@@ -2279,10 +2298,9 @@ __setup("vmhalt=", vmcmd_on_halt_setup);
static int __init vmcmd_on_poff_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_poff, str, VMCMD_MAX_SIZE);
- vmcmd_on_poff[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff));
on_poff_trigger.action = &vmcmd_action;
return 1;
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 9acc6630abd3..bdf9c7cb5685 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -9,6 +9,7 @@
*/
#include <linux/kernel_stat.h>
+#include <linux/cpufeature.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
@@ -25,11 +26,13 @@
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/lowcore.h>
+#include <asm/machine.h>
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/stacktrace.h>
#include <asm/softirq_stack.h>
#include <asm/vtime.h>
+#include <asm/asm.h>
#include "entry.h"
DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -76,13 +79,13 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+ {.irq = IRQEXT_WTI, .name = "WTI", .desc = "[EXT] Warning Track"},
{.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
{.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"},
{.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"},
{.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
{.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"},
{.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
- {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
{.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
{.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
{.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
@@ -100,8 +103,8 @@ static const struct irq_class irqclass_sub_desc[] = {
static void do_IRQ(struct pt_regs *regs, int irq)
{
- if (tod_after_eq(S390_lowcore.int_clock,
- S390_lowcore.clock_comparator))
+ if (tod_after_eq(get_lowcore()->int_clock,
+ get_lowcore()->clock_comparator))
/* Serve timer interrupts first. */
clock_comparator_work();
generic_handle_irq(irq);
@@ -111,7 +114,7 @@ static int on_async_stack(void)
{
unsigned long frame = current_frame_address();
- return ((S390_lowcore.async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
+ return ((get_lowcore()->async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
}
static void do_irq_async(struct pt_regs *regs, int irq)
@@ -119,7 +122,7 @@ static void do_irq_async(struct pt_regs *regs, int irq)
if (on_async_stack()) {
do_IRQ(regs, irq);
} else {
- call_on_stack(2, S390_lowcore.async_stack, void, do_IRQ,
+ call_on_stack(2, get_lowcore()->async_stack, void, do_IRQ,
struct pt_regs *, regs, int, irq);
}
}
@@ -128,9 +131,13 @@ static int irq_pending(struct pt_regs *regs)
{
int cc;
- asm volatile("tpi 0\n"
- "ipm %0" : "=d" (cc) : : "cc");
- return cc >> 28;
+ asm volatile(
+ " tpi 0\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ :
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
}
void noinstr do_io_irq(struct pt_regs *regs)
@@ -143,7 +150,7 @@ void noinstr do_io_irq(struct pt_regs *regs)
if (user_mode(regs)) {
update_timer_sys();
- if (static_branch_likely(&cpu_has_bear))
+ if (cpu_has_bear())
current->thread.last_break = regs->last_break;
}
@@ -153,12 +160,12 @@ void noinstr do_io_irq(struct pt_regs *regs)
set_cpu_flag(CIF_NOHZ_DELAY);
do {
- regs->tpi_info = S390_lowcore.tpi_info;
- if (S390_lowcore.tpi_info.adapter_IO)
+ regs->tpi_info = get_lowcore()->tpi_info;
+ if (get_lowcore()->tpi_info.adapter_IO)
do_irq_async(regs, THIN_INTERRUPT);
else
do_irq_async(regs, IO_INTERRUPT);
- } while (MACHINE_IS_LPAR && irq_pending(regs));
+ } while (machine_is_lpar() && irq_pending(regs));
irq_exit_rcu();
@@ -179,13 +186,13 @@ void noinstr do_ext_irq(struct pt_regs *regs)
if (user_mode(regs)) {
update_timer_sys();
- if (static_branch_likely(&cpu_has_bear))
+ if (cpu_has_bear())
current->thread.last_break = regs->last_break;
}
- regs->int_code = S390_lowcore.ext_int_code_addr;
- regs->int_parm = S390_lowcore.ext_params;
- regs->int_parm_long = S390_lowcore.ext_params2;
+ regs->int_code = get_lowcore()->ext_int_code_addr;
+ regs->int_parm = get_lowcore()->ext_params;
+ regs->int_parm_long = get_lowcore()->ext_params2;
from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
@@ -252,7 +259,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
goto out;
}
- if (index < nr_irqs) {
+ if (index < irq_get_nr_irqs()) {
show_msi_interrupt(p, index);
goto out;
}
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 4d364de43799..143e34a4eca5 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -16,7 +16,7 @@
static int kexec_file_add_kernel_elf(struct kimage *image,
struct s390_load_data *data)
{
- struct kexec_buf buf;
+ struct kexec_buf buf = {};
const Elf_Ehdr *ehdr;
const Elf_Phdr *phdr;
Elf_Addr entry;
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index a32ce8bea745..9a439175723c 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -16,7 +16,7 @@
static int kexec_file_add_kernel_image(struct kimage *image,
struct s390_load_data *data)
{
- struct kexec_buf buf;
+ struct kexec_buf buf = {};
buf.image = image;
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 05c83505e979..c450120b4474 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -13,6 +13,7 @@
#include <linux/ptrace.h>
#include <linux/preempt.h>
#include <linux/stop_machine.h>
+#include <linux/cpufeature.h>
#include <linux/kdebug.h>
#include <linux/uaccess.h>
#include <linux/extable.h>
@@ -21,6 +22,7 @@
#include <linux/hardirq.h>
#include <linux/ftrace.h>
#include <linux/execmem.h>
+#include <asm/text-patching.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/dis.h>
@@ -152,7 +154,12 @@ void arch_arm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
- stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ if (cpu_has_seq_insn()) {
+ swap_instruction(&args);
+ text_poke_sync();
+ } else {
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ }
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
@@ -160,7 +167,12 @@ void arch_disarm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
- stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ if (cpu_has_seq_insn()) {
+ swap_instruction(&args);
+ text_poke_sync();
+ } else {
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ }
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
@@ -478,6 +490,12 @@ int __init arch_init_kprobes(void)
return 0;
}
+int __init arch_populate_kprobe_blacklist(void)
+{
+ return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+ (unsigned long)__irqentry_text_end);
+}
+
int arch_trampoline_kprobe(struct kprobe *p)
{
return 0;
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index 6652e54cf3db..6d1ffca5f798 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -166,7 +166,7 @@ static struct timer_list lgr_timer;
*/
static void lgr_timer_set(void)
{
- mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC));
+ mod_timer(&lgr_timer, jiffies + secs_to_jiffies(LGR_TIMER_INTERVAL_SECS));
}
/*
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3aee98efc374..baeb3dcfc1c8 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -13,7 +13,9 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/debug_locks.h>
+#include <linux/cpufeature.h>
#include <asm/guarded_storage.h>
+#include <asm/machine.h>
#include <asm/pfault.h>
#include <asm/cio.h>
#include <asm/fpu.h>
@@ -52,7 +54,7 @@ static void __do_machine_kdump(void *data)
purgatory = (purgatory_t)image->start;
/* store_status() saved the prefix register to lowcore */
- prefix = (unsigned long) S390_lowcore.prefixreg_save_area;
+ prefix = (unsigned long)get_lowcore()->prefixreg_save_area;
/* Now do the reset */
s390_reset_system();
@@ -62,7 +64,7 @@ static void __do_machine_kdump(void *data)
* This need to be done *after* s390_reset_system set the
* prefix register of this CPU to zero
*/
- memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
+ memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area),
phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512);
call_nodat(1, int, purgatory, int, 1);
@@ -91,10 +93,10 @@ static noinline void __machine_kdump(void *image)
continue;
}
/* Store status of the boot CPU */
- mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+ mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK);
if (cpu_has_vx())
save_vx_regs((__vector128 *) mcesa->vector_save_area);
- if (MACHINE_HAS_GS) {
+ if (cpu_has_gs()) {
local_ctl_store(2, &cr2_old.reg);
cr2_new = cr2_old;
cr2_new.gse = 1;
@@ -178,7 +180,7 @@ void arch_kexec_unprotect_crashkres(void)
static int machine_kexec_prepare_kdump(void)
{
#ifdef CONFIG_CRASH_DUMP
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
diag10_range(PFN_DOWN(crashk_res.start),
PFN_DOWN(crashk_res.end - crashk_res.start + 1));
return 0;
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index c2bac14dd668..a36d7311c668 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -129,7 +129,7 @@ static int kexec_file_update_purgatory(struct kimage *image,
static int kexec_file_add_purgatory(struct kimage *image,
struct s390_load_data *data)
{
- struct kexec_buf buf;
+ struct kexec_buf buf = {};
int ret;
buf.image = image;
@@ -152,7 +152,7 @@ static int kexec_file_add_purgatory(struct kimage *image,
static int kexec_file_add_initrd(struct kimage *image,
struct s390_load_data *data)
{
- struct kexec_buf buf;
+ struct kexec_buf buf = {};
int ret;
buf.image = image;
@@ -184,7 +184,7 @@ static int kexec_file_add_ipl_report(struct kimage *image,
{
__u32 *lc_ipl_parmblock_ptr;
unsigned int len, ncerts;
- struct kexec_buf buf;
+ struct kexec_buf buf = {};
unsigned long addr;
void *ptr, *end;
int ret;
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index ae4d4fd9afcd..1fec370fecf4 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -9,6 +9,7 @@
#include <asm/ftrace.h>
#include <asm/nospec-insn.h>
#include <asm/ptrace.h>
+#include <asm/march.h>
#define STACK_FRAME_SIZE_PTREGS (STACK_FRAME_OVERHEAD + __PT_SIZE)
#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
@@ -88,7 +89,7 @@ SYM_CODE_START(ftrace_caller)
SYM_CODE_END(ftrace_caller)
SYM_CODE_START(ftrace_common)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
aghik %r2,%r0,-MCOUNT_INSN_SIZE
lgrl %r4,function_trace_op
lgrl %r1,ftrace_func
@@ -103,19 +104,8 @@ SYM_CODE_START(ftrace_common)
lgr %r3,%r14
la %r5,STACK_FREGS(%r15)
BASR_EX %r14,%r1
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-# The j instruction gets runtime patched to a nop instruction.
-# See ftrace_enable_ftrace_graph_caller.
-SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
- j .Lftrace_graph_caller_end
- lmg %r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
- lg %r4,(STACK_FREGS_PTREGS_PSW+8)(%r15)
- brasl %r14,prepare_ftrace_return
- stg %r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
-.Lftrace_graph_caller_end:
-#endif
lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
locgrz %r1,%r0
#else
@@ -133,14 +123,14 @@ SYM_CODE_END(ftrace_common)
SYM_FUNC_START(return_to_handler)
stmg %r2,%r5,32(%r15)
lgr %r1,%r15
- aghi %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE)
+ # allocate ftrace_regs and stack frame for ftrace_return_to_handler
+ aghi %r15,-STACK_FRAME_SIZE_FREGS
stg %r1,__SF_BACKCHAIN(%r15)
- la %r3,STACK_FRAME_OVERHEAD(%r15)
- stg %r1,__FGRAPH_RET_FP(%r3)
- stg %r2,__FGRAPH_RET_GPR2(%r3)
- lgr %r2,%r3
+ stg %r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+ stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+ la %r2,STACK_FRAME_OVERHEAD(%r15)
brasl %r14,ftrace_return_to_handler
- aghi %r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE
+ aghi %r15,STACK_FRAME_SIZE_FREGS
lgr %r14,%r2
lmg %r2,%r5,32(%r15)
BR_EX %r14
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 91e207b50394..9d1f8a50f5a4 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -22,12 +22,14 @@
#include <linux/bug.h>
#include <linux/memory.h>
#include <linux/execmem.h>
+#include <asm/arch-stackprotector.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/facility.h>
#include <asm/ftrace.lds.h>
#include <asm/set_memory.h>
#include <asm/setup.h>
+#include <asm/asm-offsets.h>
#if 0
#define DEBUGP printk
@@ -495,9 +497,7 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *s;
char *secstrings, *secname;
void *aseg;
-#ifdef CONFIG_FUNCTION_TRACER
- int ret;
-#endif
+ int rc = 0;
if (IS_ENABLED(CONFIG_EXPOLINE) &&
!nospec_disable && me->arch.plt_size) {
@@ -527,14 +527,21 @@ int module_finalize(const Elf_Ehdr *hdr,
(str_has_prefix(secname, ".s390_return")))
nospec_revert(aseg, aseg + s->sh_size);
+ if (IS_ENABLED(CONFIG_STACKPROTECTOR) &&
+ (str_has_prefix(secname, "__stack_protector_loc"))) {
+ rc = stack_protector_apply(aseg, aseg + s->sh_size);
+ if (rc)
+ break;
+ }
+
#ifdef CONFIG_FUNCTION_TRACER
if (!strcmp(FTRACE_CALLSITE_SECTION, secname)) {
- ret = module_alloc_ftrace_hotpatch_trampolines(me, s);
- if (ret < 0)
- return ret;
+ rc = module_alloc_ftrace_hotpatch_trampolines(me, s);
+ if (rc)
+ break;
}
#endif /* CONFIG_FUNCTION_TRACER */
}
- return 0;
+ return rc;
}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 230d010bac9b..a55abbf65333 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -9,6 +9,8 @@
*/
#include <linux/kernel_stat.h>
+#include <linux/utsname.h>
+#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/entry-common.h>
@@ -20,7 +22,6 @@
#include <linux/module.h>
#include <linux/sched/signal.h>
#include <linux/kvm_host.h>
-#include <linux/export.h>
#include <asm/lowcore.h>
#include <asm/ctlreg.h>
#include <asm/fpu.h>
@@ -45,7 +46,7 @@ static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
static inline int nmi_needs_mcesa(void)
{
- return cpu_has_vx() || MACHINE_HAS_GS;
+ return cpu_has_vx() || cpu_has_gs();
}
/*
@@ -61,7 +62,7 @@ void __init nmi_alloc_mcesa_early(u64 *mcesad)
if (!nmi_needs_mcesa())
return;
*mcesad = __pa(&boot_mcesa);
- if (MACHINE_HAS_GS)
+ if (cpu_has_gs())
*mcesad |= ilog2(MCESA_MAX_SIZE);
}
@@ -73,14 +74,14 @@ int nmi_alloc_mcesa(u64 *mcesad)
*mcesad = 0;
if (!nmi_needs_mcesa())
return 0;
- size = MACHINE_HAS_GS ? MCESA_MAX_SIZE : MCESA_MIN_SIZE;
+ size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE;
origin = kmalloc(size, GFP_KERNEL);
if (!origin)
return -ENOMEM;
/* The pointer is stored with mcesa_bits ORed in */
kmemleak_not_leak(origin);
*mcesad = __pa(origin);
- if (MACHINE_HAS_GS)
+ if (cpu_has_gs())
*mcesad |= ilog2(MCESA_MAX_SIZE);
return 0;
}
@@ -115,17 +116,82 @@ static __always_inline char *u64_to_hex(char *dest, u64 val)
return dest;
}
-static notrace void s390_handle_damage(void)
+static notrace void nmi_print_info(void)
{
- union ctlreg0 cr0, cr0_new;
+ struct lowcore *lc = get_lowcore();
char message[100];
- psw_t psw_save;
char *ptr;
+ int i;
+
+ ptr = nmi_puts(message, "Unrecoverable machine check, code: ");
+ ptr = u64_to_hex(ptr, lc->mcck_interruption_code);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, init_utsname()->release);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, arch_hw_string);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, "PSW: ");
+ ptr = u64_to_hex(ptr, lc->mcck_old_psw.mask);
+ ptr = nmi_puts(ptr, " ");
+ ptr = u64_to_hex(ptr, lc->mcck_old_psw.addr);
+ ptr = nmi_puts(ptr, " PFX: ");
+ ptr = u64_to_hex(ptr, (u64)get_lowcore());
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, "LBA: ");
+ ptr = u64_to_hex(ptr, lc->last_break_save_area);
+ ptr = nmi_puts(ptr, " EDC: ");
+ ptr = u64_to_hex(ptr, lc->external_damage_code);
+ ptr = nmi_puts(ptr, " FSA: ");
+ ptr = u64_to_hex(ptr, lc->failing_storage_address);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, "CRS:\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ for (i = 0; i < 16; i++) {
+ ptr = u64_to_hex(ptr, lc->cregs_save_area[i].val);
+ ptr = nmi_puts(ptr, " ");
+ if ((i + 1) % 4 == 0) {
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ }
+ }
+
+ ptr = nmi_puts(message, "GPRS:\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ for (i = 0; i < 16; i++) {
+ ptr = u64_to_hex(ptr, lc->gpregs_save_area[i]);
+ ptr = nmi_puts(ptr, " ");
+ if ((i + 1) % 4 == 0) {
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ }
+ }
+
+ ptr = nmi_puts(message, "System stopped\n");
+ sclp_emergency_printk(message);
+}
+
+static notrace void __noreturn s390_handle_damage(void)
+{
+ struct lowcore *lc = get_lowcore();
+ union ctlreg0 cr0, cr0_new;
+ psw_t psw_save;
smp_emergency_stop();
diag_amode31_ops.diag308_reset();
- ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x");
- u64_to_hex(ptr, S390_lowcore.mcck_interruption_code);
/*
* Disable low address protection and make machine check new PSW a
@@ -135,20 +201,19 @@ static notrace void s390_handle_damage(void)
cr0_new = cr0;
cr0_new.lap = 0;
local_ctl_load(0, &cr0_new.reg);
- psw_save = S390_lowcore.mcck_new_psw;
- psw_bits(S390_lowcore.mcck_new_psw).io = 0;
- psw_bits(S390_lowcore.mcck_new_psw).ext = 0;
- psw_bits(S390_lowcore.mcck_new_psw).wait = 1;
- sclp_emergency_printk(message);
+ psw_save = lc->mcck_new_psw;
+ psw_bits(lc->mcck_new_psw).io = 0;
+ psw_bits(lc->mcck_new_psw).ext = 0;
+ psw_bits(lc->mcck_new_psw).wait = 1;
+ nmi_print_info();
/*
* Restore machine check new PSW and control register 0 to original
* values. This makes possible system dump analysis easier.
*/
- S390_lowcore.mcck_new_psw = psw_save;
+ lc->mcck_new_psw = psw_save;
local_ctl_load(0, &cr0.reg);
disabled_wait();
- while (1);
}
NOKPROBE_SYMBOL(s390_handle_damage);
@@ -226,7 +291,7 @@ static bool notrace nmi_registers_valid(union mci mci)
/*
* Set the clock comparator register to the next expected value.
*/
- set_clock_comparator(S390_lowcore.clock_comparator);
+ set_clock_comparator(get_lowcore()->clock_comparator);
if (!mci.gr || !mci.fp || !mci.fc)
return false;
/*
@@ -252,7 +317,7 @@ static bool notrace nmi_registers_valid(union mci mci)
* check handling must take care of this. The host values are saved by
* KVM and are not affected.
*/
- cr2.reg = S390_lowcore.cregs_save_area[2];
+ cr2.reg = get_lowcore()->cregs_save_area[2];
if (cr2.gse && !mci.gs && !test_cpu_flag(CIF_MCCK_GUEST))
return false;
if (!mci.ms || !mci.pm || !mci.ia)
@@ -278,11 +343,10 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs)
sie_page = container_of(sie_block, struct sie_page, sie_block);
mcck_backup = &sie_page->mcck_info;
- mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+ mcck_backup->mcic = get_lowcore()->mcck_interruption_code &
~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
- mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
- mcck_backup->failing_storage_address
- = S390_lowcore.failing_storage_address;
+ mcck_backup->ext_damage_code = get_lowcore()->external_damage_code;
+ mcck_backup->failing_storage_address = get_lowcore()->failing_storage_address;
}
NOKPROBE_SYMBOL(s390_backup_mcck_info);
@@ -302,6 +366,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
static int ipd_count;
static DEFINE_SPINLOCK(ipd_lock);
static unsigned long long last_ipd;
+ struct lowcore *lc = get_lowcore();
struct mcck_struct *mcck;
unsigned long long tmp;
irqentry_state_t irq_state;
@@ -314,7 +379,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
if (user_mode(regs))
update_timer_mcck();
inc_irq_stat(NMI_NMI);
- mci.val = S390_lowcore.mcck_interruption_code;
+ mci.val = lc->mcck_interruption_code;
mcck = this_cpu_ptr(&cpu_mcck);
/*
@@ -382,9 +447,9 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
}
if (mci.ed && mci.ec) {
/* External damage */
- if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+ if (lc->external_damage_code & (1U << ED_STP_SYNC))
mcck->stp_queue |= stp_sync_check();
- if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+ if (lc->external_damage_code & (1U << ED_STP_ISLAND))
mcck->stp_queue |= stp_island_check();
mcck_pending = 1;
}
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 9b8c24ebb008..e11ec15960a1 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -4,6 +4,8 @@
#include <linux/cpu.h>
#include <asm/nospec-branch.h>
+int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP);
+
static int __init nobp_setup_early(char *str)
{
bool enabled;
@@ -17,11 +19,11 @@ static int __init nobp_setup_early(char *str)
* The user explicitly requested nobp=1, enable it and
* disable the expoline support.
*/
- __set_facility(82, alt_stfle_fac_list);
+ nobp = 1;
if (IS_ENABLED(CONFIG_EXPOLINE))
nospec_disable = 1;
} else {
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
return 0;
}
@@ -29,7 +31,7 @@ early_param("nobp", nobp_setup_early);
static int __init nospec_setup_early(char *str)
{
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
return 0;
}
early_param("nospec", nospec_setup_early);
@@ -40,7 +42,7 @@ static int __init nospec_report(void)
pr_info("Spectre V2 mitigation: etokens\n");
if (nospec_uses_trampoline())
pr_info("Spectre V2 mitigation: execute trampolines\n");
- if (__test_facility(82, alt_stfle_fac_list))
+ if (nobp_enabled())
pr_info("Spectre V2 mitigation: limited branch prediction\n");
return 0;
}
@@ -66,14 +68,14 @@ void __init nospec_auto_detect(void)
*/
if (__is_defined(CC_USING_EXPOLINE))
nospec_disable = 1;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
} else if (__is_defined(CC_USING_EXPOLINE)) {
/*
* The kernel has been compiled with expolines.
* Keep expolines enabled and disable nobp.
*/
nospec_disable = 0;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
/*
* If the kernel has not been compiled with expolines the
@@ -86,7 +88,7 @@ static int __init spectre_v2_setup_early(char *str)
{
if (str && !strncmp(str, "on", 2)) {
nospec_disable = 0;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
if (str && !strncmp(str, "off", 3))
nospec_disable = 1;
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
index 52d4353188ad..5970dd3ee7c5 100644
--- a/arch/s390/kernel/nospec-sysfs.c
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -7,17 +7,17 @@
ssize_t cpu_show_spectre_v1(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ return sysfs_emit(buf, "Mitigation: __user pointer sanitization\n");
}
ssize_t cpu_show_spectre_v2(struct device *dev,
struct device_attribute *attr, char *buf)
{
if (test_facility(156))
- return sprintf(buf, "Mitigation: etokens\n");
+ return sysfs_emit(buf, "Mitigation: etokens\n");
if (nospec_uses_trampoline())
- return sprintf(buf, "Mitigation: execute trampolines\n");
- if (__test_facility(82, alt_stfle_fac_list))
- return sprintf(buf, "Mitigation: limited branch prediction\n");
- return sprintf(buf, "Vulnerable\n");
+ return sysfs_emit(buf, "Mitigation: execute trampolines\n");
+ if (nobp_enabled())
+ return sysfs_emit(buf, "Mitigation: limited branch prediction\n");
+ return sysfs_emit(buf, "Vulnerable\n");
}
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
index 23ab9f02f278..2fc40f97c0ad 100644
--- a/arch/s390/kernel/numa.c
+++ b/arch/s390/kernel/numa.c
@@ -14,9 +14,6 @@
#include <linux/node.h>
#include <asm/numa.h>
-struct pglist_data *node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(node_data);
-
void __init numa_setup(void)
{
int nid;
@@ -24,12 +21,8 @@ void __init numa_setup(void)
nodes_clear(node_possible_map);
node_set(0, node_possible_map);
node_set_online(0);
- for (nid = 0; nid < MAX_NUMNODES; nid++) {
- NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
- if (!NODE_DATA(nid))
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(pg_data_t), 8);
- }
+ for (nid = 0; nid < MAX_NUMNODES; nid++)
+ NODE_DATA(nid) = memblock_alloc_or_panic(sizeof(pg_data_t), 8);
NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
NODE_DATA(0)->node_id = 0;
}
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index b695f980bbde..94fa44776d0c 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -6,8 +6,7 @@
* Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "os_info"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "os_info: " fmt
#include <linux/crash_dump.h>
#include <linux/kernel.h>
@@ -18,6 +17,7 @@
#include <asm/physmem_info.h>
#include <asm/maccess.h>
#include <asm/asm-offsets.h>
+#include <asm/sections.h>
#include <asm/ipl.h>
/*
@@ -180,7 +180,7 @@ fail:
}
/*
- * Return pointer to os infor entry and its size
+ * Return pointer to os info entry and its size
*/
void *os_info_old_entry(int nr, unsigned long *size)
{
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 1434642e9cba..408ab93112bf 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -6,15 +6,13 @@
* Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
* Thomas Richter <tmricht@linux.ibm.com>
*/
-#define KMSG_COMPONENT "cpum_cf"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "cpum_cf: " fmt
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/init.h>
-#include <linux/export.h>
#include <linux/miscdevice.h>
#include <linux/perf_event.h>
@@ -22,6 +20,10 @@
#include <asm/hwctrset.h>
#include <asm/debug.h>
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */
+#define PERF_EVENT_CPUM_CF_DIAG 0xBC000UL /* Event: Counter sets */
+
enum cpumf_ctr_set {
CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
@@ -438,7 +440,7 @@ static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)
ctrset_size = 48;
else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5)
ctrset_size = 128;
- else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7)
+ else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8)
ctrset_size = 160;
break;
case CPUMF_CTR_SET_MT_DIAG:
@@ -556,25 +558,31 @@ static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
struct cf_trailer_entry *trailer_start, *trailer_stop;
struct cf_ctrset_entry *ctrstart, *ctrstop;
size_t offset = 0;
+ int i;
- auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
- do {
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
+ /* Counter set not authorized */
+ if (!(auth & cpumf_ctr_ctl[i]))
+ continue;
+ /* Counter set size zero was not saved */
+ if (!cpum_cf_read_setsize(i))
+ continue;
+
if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
pr_err_once("cpum_cf_diag counter set compare error "
"in set %i\n", ctrstart->set);
return 0;
}
- auth &= ~cpumf_ctr_ctl[ctrstart->set];
if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
cfdiag_diffctrset((u64 *)(ctrstart + 1),
(u64 *)(ctrstop + 1), ctrstart->ctr);
offset += ctrstart->ctr * sizeof(u64) +
sizeof(*ctrstart);
}
- } while (ctrstart->def && auth);
+ }
/* Save time_stamp from start of event in stop's trailer */
trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
@@ -751,8 +759,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
break;
case PERF_TYPE_HARDWARE:
- if (is_sampling_event(event)) /* No sampling support */
- return -ENOENT;
ev = attr->config;
if (!attr->exclude_user && attr->exclude_kernel) {
/*
@@ -825,7 +831,7 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
return validate_ctr_version(hwc->config, set);
}
-/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
+/* Events CPU_CYCLES and INSTRUCTIONS can be submitted with two different
* attribute::type values:
* - PERF_TYPE_HARDWARE:
* - pmu->type:
@@ -848,18 +854,15 @@ static int cpumf_pmu_event_type(struct perf_event *event)
static int cpumf_pmu_event_init(struct perf_event *event)
{
unsigned int type = event->attr.type;
- int err;
+ int err = -ENOENT;
+ if (is_sampling_event(event)) /* No sampling support */
+ return err;
if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
err = __hw_perf_event_init(event, type);
else if (event->pmu->type == type)
/* Registered as unknown PMU */
err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
- else
- return -ENOENT;
-
- if (unlikely(err) && event->destroy)
- event->destroy(event);
return err;
}
@@ -869,8 +872,8 @@ static int hw_perf_event_reset(struct perf_event *event)
u64 prev, new;
int err;
+ prev = local64_read(&event->hw.prev_count);
do {
- prev = local64_read(&event->hw.prev_count);
err = ecctr(event->hw.config, &new);
if (err) {
if (err != 3)
@@ -882,7 +885,7 @@ static int hw_perf_event_reset(struct perf_event *event)
*/
new = 0;
}
- } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+ } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
return err;
}
@@ -892,12 +895,12 @@ static void hw_perf_event_update(struct perf_event *event)
u64 prev, new, delta;
int err;
+ prev = local64_read(&event->hw.prev_count);
do {
- prev = local64_read(&event->hw.prev_count);
err = ecctr(event->hw.config, &new);
if (err)
return;
- } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+ } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
delta = (prev <= new) ? new - prev
: (-1ULL - prev) + new + 1; /* overflow */
@@ -971,12 +974,10 @@ static int cfdiag_push_sample(struct perf_event *event,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = cpuhw->usedss;
raw.frag.data = cpuhw->stop;
- perf_sample_save_raw_data(&data, &raw);
+ perf_sample_save_raw_data(&data, event, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
- if (overflow)
- event->pmu->stop(event, 0);
perf_event_update_userpage(event);
return overflow;
@@ -1044,7 +1045,7 @@ static void cpumf_pmu_del(struct perf_event *event, int flags)
*
* When a new perf event has been added but not yet started, this can
* clear enable control and resets all counters in a set. Therefore,
- * cpumf_pmu_start() always has to reenable a counter set.
+ * cpumf_pmu_start() always has to re-enable a counter set.
*/
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
if (!atomic_read(&cpuhw->ctr_set[i]))
@@ -1204,7 +1205,7 @@ static int __init cpumf_pmu_init(void)
}
/* Setup s390dbf facility */
- cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
+ cf_dbg = debug_register("cpum_cf", 2, 1, 128);
if (!cf_dbg) {
pr_err("Registration of s390dbf(cpum_cf) failed\n");
rc = -ENOMEM;
@@ -1687,8 +1688,6 @@ static const struct file_operations cfset_fops = {
.open = cfset_open,
.release = cfset_release,
.unlocked_ioctl = cfset_ioctl,
- .compat_ioctl = cfset_ioctl,
- .llseek = no_llseek
};
static struct miscdevice cfset_dev = {
@@ -1810,8 +1809,6 @@ static int cfdiag_event_init(struct perf_event *event)
event->destroy = hw_perf_event_destroy;
err = cfdiag_event_init2(event);
- if (unlikely(err))
- event->destroy(event);
out:
return err;
}
@@ -1854,7 +1851,7 @@ static const struct attribute_group *cfdiag_attr_groups[] = {
/* Performance monitoring unit for event CF_DIAG. Since this event
* is also started and stopped via the perf_event_open() system call, use
* the same event enable/disable call back functions. They do not
- * have a pointer to the perf_event strcture as first parameter.
+ * have a pointer to the perf_event structure as first parameter.
*
* The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common.
* Reuse them and distinguish the event (always first parameter) via
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index e4a6bfc91080..7ace1f9e4ccf 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -237,7 +237,6 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4);
CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
-
CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080);
CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081);
CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082);
@@ -291,8 +290,8 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
-CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
-CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080);
@@ -365,6 +364,83 @@ CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d);
CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e);
CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a);
+CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e);
+CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca);
+CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb);
+CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc);
+CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd);
+CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce);
+CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd);
+CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117);
+CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
@@ -414,7 +490,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
NULL,
};
-static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = {
+static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
@@ -779,6 +855,87 @@ static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = {
NULL,
};
+static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD),
+ CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD),
+ CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD),
+ CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD),
+ CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION),
+ CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES),
+ CPUMF_EVENT_PTR(cf_z17, SORTL),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CC),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO),
+ CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
static struct attribute_group cpumcf_pmu_events_group = {
@@ -859,7 +1016,7 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
if (ci.csvn >= 1 && ci.csvn <= 5)
csvn = cpumcf_svn_12345_pmu_event_attr;
else if (ci.csvn >= 6)
- csvn = cpumcf_svn_67_pmu_event_attr;
+ csvn = cpumcf_svn_678_pmu_event_attr;
/* Determine model-specific counter set(s) */
get_cpu_id(&cpu_id);
@@ -892,6 +1049,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
case 0x3932:
model = cpumcf_z16_pmu_event_attr;
break;
+ case 0x9175:
+ case 0x9176:
+ model = cpumcf_z17_pmu_event_attr;
+ break;
default:
model = none;
break;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 06efad5b4f93..459af23a47a5 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -5,8 +5,7 @@
* Copyright IBM Corp. 2013, 2018
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "cpum_sf"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "cpum_sf: " fmt
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
@@ -14,7 +13,6 @@
#include <linux/percpu.h>
#include <linux/pid.h>
#include <linux/notifier.h>
-#include <linux/export.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/moduleparam.h>
@@ -24,6 +22,22 @@
#include <asm/timex.h>
#include <linux/io.h>
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR 2
+#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_FREQ_MODE 0x0008 /* Sampling with frequency */
+
+#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
+#define TEAR_REG(hwc) ((hwc)->last_tag)
+#define SAMPL_RATE(hwc) ((hwc)->event_base)
+#define SAMPL_FLAGS(hwc) ((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SAMPL_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
+
/* Minimum number of sample-data-block-tables:
* At least one table is required for the sampling buffer structure.
* A single table contains up to 511 pointers to sample-data-blocks.
@@ -113,17 +127,6 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
return USEC_PER_SEC * qsi->cpu_speed / rate;
}
-/* Return TOD timestamp contained in an trailer entry */
-static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
-{
- /* TOD in STCKE format */
- if (te->header.t)
- return *((unsigned long long *)&te->timestamp[1]);
-
- /* TOD in STCK format */
- return *((unsigned long long *)&te->timestamp[0]);
-}
-
/* Return pointer to trailer entry of an sample data block */
static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
{
@@ -154,12 +157,12 @@ static inline unsigned long *get_next_sdbt(unsigned long *s)
/*
* sf_disable() - Switch off sampling facility
*/
-static int sf_disable(void)
+static void sf_disable(void)
{
struct hws_lsctl_request_block sreq;
memset(&sreq, 0, sizeof(sreq));
- return lsctl(&sreq);
+ lsctl(&sreq);
}
/*
@@ -175,41 +178,27 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
*/
static void free_sampling_buffer(struct sf_buffer *sfb)
{
- unsigned long *sdbt, *curr;
-
- if (!sfb->sdbt)
- return;
+ unsigned long *sdbt, *curr, *head;
sdbt = sfb->sdbt;
- curr = sdbt;
-
+ if (!sdbt)
+ return;
+ sfb->sdbt = NULL;
/* Free the SDBT after all SDBs are processed... */
- while (1) {
- if (!*curr || !sdbt)
- break;
-
- /* Process table-link entries */
+ head = sdbt;
+ curr = sdbt;
+ do {
if (is_link_entry(curr)) {
+ /* Process table-link entries */
curr = get_next_sdbt(curr);
- if (sdbt)
- free_page((unsigned long)sdbt);
-
- /* If the origin is reached, sampling buffer is freed */
- if (curr == sfb->sdbt)
- break;
- else
- sdbt = curr;
+ free_page((unsigned long)sdbt);
+ sdbt = curr;
} else {
/* Process SDB pointer */
- if (*curr) {
- free_page((unsigned long)phys_to_virt(*curr));
- curr++;
- }
+ free_page((unsigned long)phys_to_virt(*curr));
+ curr++;
}
- }
-
- debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
- (unsigned long)sfb->sdbt);
+ } while (curr != head);
memset(sfb, 0, sizeof(*sfb));
}
@@ -265,10 +254,8 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
* the sampling buffer origin.
*/
if (sfb->sdbt != get_next_sdbt(tail)) {
- debug_sprintf_event(sfdbg, 3, "%s: "
- "sampling buffer is not linked: origin %#lx"
- " tail %#lx\n", __func__,
- (unsigned long)sfb->sdbt,
+ debug_sprintf_event(sfdbg, 3, "%s buffer not linked origin %#lx tail %#lx\n",
+ __func__, (unsigned long)sfb->sdbt,
(unsigned long)tail);
return -EINVAL;
}
@@ -318,9 +305,6 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
*tail = virt_to_phys(sfb->sdbt) + 1;
sfb->tail = tail;
- debug_sprintf_event(sfdbg, 4, "%s: new buffer"
- " settings: sdbt %lu sdb %lu\n", __func__,
- sfb->num_sdbt, sfb->num_sdb);
return rc;
}
@@ -357,15 +341,8 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
/* Allocate requested number of sample-data-blocks */
rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
- if (rc) {
+ if (rc)
free_sampling_buffer(sfb);
- debug_sprintf_event(sfdbg, 4, "%s: "
- "realloc_sampling_buffer failed with rc %i\n",
- __func__, rc);
- } else
- debug_sprintf_event(sfdbg, 4,
- "%s: tear %#lx dear %#lx\n", __func__,
- (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
return rc;
}
@@ -377,8 +354,8 @@ static void sfb_set_limits(unsigned long min, unsigned long max)
CPUM_SF_MAX_SDB = max;
memset(&si, 0, sizeof(si));
- if (!qsi(&si))
- CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+ qsi(&si);
+ CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
}
static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
@@ -397,12 +374,6 @@ static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
return 0;
}
-static int sfb_has_pending_allocs(struct sf_buffer *sfb,
- struct hw_perf_event *hwc)
-{
- return sfb_pending_allocs(sfb, hwc) > 0;
-}
-
static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
{
/* Limit the number of SDBs to not exceed the maximum */
@@ -419,14 +390,13 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
{
- if (cpuhw->sfb.sdbt)
+ if (sf_buffer_available(cpuhw))
free_sampling_buffer(&cpuhw->sfb);
}
static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
{
unsigned long n_sdb, freq;
- size_t sample_size;
/* Calculate sampling buffers using 4K pages
*
@@ -457,7 +427,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
* ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
* to 511 SDBs).
*/
- sample_size = sizeof(struct hws_basic_entry);
freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
@@ -473,12 +442,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
if (sf_buffer_available(cpuhw))
return 0;
- debug_sprintf_event(sfdbg, 3,
- "%s: rate %lu f %lu sdb %lu/%lu"
- " sample_size %lu cpuhw %p\n", __func__,
- SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
- sample_size, cpuhw);
-
return alloc_sampling_buffer(&cpuhw->sfb,
sfb_pending_allocs(&cpuhw->sfb, hwc));
}
@@ -535,8 +498,6 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
if (num)
sfb_account_allocs(num, hwc);
- debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
- __func__, OVERFLOW_REG(hwc), ratio, num);
OVERFLOW_REG(hwc) = 0;
}
@@ -554,13 +515,11 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
static void extend_sampling_buffer(struct sf_buffer *sfb,
struct hw_perf_event *hwc)
{
- unsigned long num, num_old;
- int rc;
+ unsigned long num;
num = sfb_pending_allocs(sfb, hwc);
if (!num)
return;
- num_old = sfb->num_sdb;
/* Disable the sampling facility to reset any states and also
* clear pending measurement alerts.
@@ -572,51 +531,32 @@ static void extend_sampling_buffer(struct sf_buffer *sfb,
* called by perf. Because this is a reallocation, it is fine if the
* new SDB-request cannot be satisfied immediately.
*/
- rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
- if (rc)
- debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
- __func__, rc);
-
- if (sfb_has_pending_allocs(sfb, hwc))
- debug_sprintf_event(sfdbg, 5, "%s: "
- "req %lu alloc %lu remaining %lu\n",
- __func__, num, sfb->num_sdb - num_old,
- sfb_pending_allocs(sfb, hwc));
+ realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
}
/* Number of perf events counting hardware events */
-static atomic_t num_events;
+static refcount_t num_events;
/* Used to avoid races in calling reserve/release_cpumf_hardware */
static DEFINE_MUTEX(pmc_reserve_mutex);
#define PMC_INIT 0
#define PMC_RELEASE 1
-#define PMC_FAILURE 2
static void setup_pmc_cpu(void *flags)
{
- struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
- int err = 0;
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+ sf_disable();
switch (*((int *)flags)) {
case PMC_INIT:
- memset(cpusf, 0, sizeof(*cpusf));
- err = qsi(&cpusf->qsi);
- if (err)
- break;
- cpusf->flags |= PMU_F_RESERVED;
- err = sf_disable();
+ memset(cpuhw, 0, sizeof(*cpuhw));
+ qsi(&cpuhw->qsi);
+ cpuhw->flags |= PMU_F_RESERVED;
break;
case PMC_RELEASE:
- cpusf->flags &= ~PMU_F_RESERVED;
- err = sf_disable();
- if (!err)
- deallocate_buffers(cpusf);
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ deallocate_buffers(cpuhw);
break;
}
- if (err) {
- *((int *)flags) |= PMC_FAILURE;
- pr_err("Switching off the sampling facility failed with rc %i\n", err);
- }
}
static void release_pmc_hardware(void)
@@ -627,27 +567,19 @@ static void release_pmc_hardware(void)
on_each_cpu(setup_pmc_cpu, &flags, 1);
}
-static int reserve_pmc_hardware(void)
+static void reserve_pmc_hardware(void)
{
int flags = PMC_INIT;
on_each_cpu(setup_pmc_cpu, &flags, 1);
- if (flags & PMC_FAILURE) {
- release_pmc_hardware();
- return -ENODEV;
- }
irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
- return 0;
}
static void hw_perf_event_destroy(struct perf_event *event)
{
/* Release PMC if this is the last perf event */
- if (!atomic_add_unless(&num_events, -1, 1)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- release_pmc_hardware();
+ if (refcount_dec_and_mutex_lock(&num_events, &pmc_reserve_mutex)) {
+ release_pmc_hardware();
mutex_unlock(&pmc_reserve_mutex);
}
}
@@ -751,9 +683,6 @@ static unsigned long getrate(bool freq, unsigned long sample,
*/
if (sample_rate_to_freq(si, rate) >
sysctl_perf_event_sample_rate) {
- debug_sprintf_event(sfdbg, 1, "%s: "
- "Sampling rate exceeds maximum "
- "perf sample rate\n", __func__);
rate = 0;
}
}
@@ -798,9 +727,6 @@ static int __hw_perf_event_init_rate(struct perf_event *event,
attr->sample_period = rate;
SAMPL_RATE(hwc) = rate;
hw_init_period(hwc, SAMPL_RATE(hwc));
- debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
- __func__, event->cpu, event->attr.sample_period,
- event->attr.freq, SAMPLE_FREQ_MODE(hwc));
return 0;
}
@@ -810,23 +736,16 @@ static int __hw_perf_event_init(struct perf_event *event)
struct hws_qsi_info_block si;
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
- int cpu, err;
+ int cpu, err = 0;
/* Reserve CPU-measurement sampling facility */
- err = 0;
- if (!atomic_inc_not_zero(&num_events)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
- err = -EBUSY;
- else
- atomic_inc(&num_events);
- mutex_unlock(&pmc_reserve_mutex);
+ mutex_lock(&pmc_reserve_mutex);
+ if (!refcount_inc_not_zero(&num_events)) {
+ reserve_pmc_hardware();
+ refcount_set(&num_events, 1);
}
event->destroy = hw_perf_event_destroy;
- if (err)
- goto out;
-
/* Access per-CPU sampling information (query sampling info) */
/*
* The event->cpu value can be -1 to count on every CPU, for example,
@@ -838,9 +757,9 @@ static int __hw_perf_event_init(struct perf_event *event)
*/
memset(&si, 0, sizeof(si));
cpuhw = NULL;
- if (event->cpu == -1)
+ if (event->cpu == -1) {
qsi(&si);
- else {
+ } else {
/* Event is pinned to a particular CPU, retrieve the per-CPU
* sampling structure for accessing the CPU-specific QSI.
*/
@@ -881,13 +800,9 @@ static int __hw_perf_event_init(struct perf_event *event)
if (err)
goto out;
- /* Initialize sample data overflow accounting */
- hwc->extra_reg.reg = REG_OVERFLOW;
- OVERFLOW_REG(hwc) = 0;
-
/* Use AUX buffer. No need to allocate it by ourself */
if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
- return 0;
+ goto out;
/* Allocate the per-CPU sampling buffer using the CPU information
* from the event. If the event is not pinned to a particular
@@ -917,6 +832,7 @@ static int __hw_perf_event_init(struct perf_event *event)
if (is_default_overflow_handler(event))
event->overflow_handler = cpumsf_output_event_pid;
out:
+ mutex_unlock(&pmc_reserve_mutex);
return err;
}
@@ -967,9 +883,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
event->attr.exclude_idle = 0;
err = __hw_perf_event_init(event);
- if (unlikely(err))
- if (event->destroy)
- event->destroy(event);
return err;
}
@@ -979,10 +892,14 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
struct hw_perf_event *hwc;
int err;
- if (cpuhw->flags & PMU_F_ENABLED)
- return;
-
- if (cpuhw->flags & PMU_F_ERR_MASK)
+ /*
+ * Event must be
+ * - added/started on this CPU (PMU_F_IN_USE set)
+ * - and CPU must be available (PMU_F_RESERVED set)
+ * - and not already enabled (PMU_F_ENABLED not set)
+ * - and not in error condition (PMU_F_ERR_MASK not set)
+ */
+ if (cpuhw->flags != (PMU_F_IN_USE | PMU_F_RESERVED))
return;
/* Check whether to extent the sampling buffer.
@@ -996,39 +913,27 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
* facility, but it can be fully re-enabled using sampling controls that
* have been saved in cpumsf_pmu_disable().
*/
- if (cpuhw->event) {
- hwc = &cpuhw->event->hw;
- if (!(SAMPL_DIAG_MODE(hwc))) {
- /*
- * Account number of overflow-designated
- * buffer extents
- */
- sfb_account_overflows(cpuhw, hwc);
- extend_sampling_buffer(&cpuhw->sfb, hwc);
- }
- /* Rate may be adjusted with ioctl() */
- cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
+ hwc = &cpuhw->event->hw;
+ if (!(SAMPL_DIAG_MODE(hwc))) {
+ /*
+ * Account number of overflow-designated buffer extents
+ */
+ sfb_account_overflows(cpuhw, hwc);
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
}
+ /* Rate may be adjusted with ioctl() */
+ cpuhw->lsctl.interval = SAMPL_RATE(hwc);
/* (Re)enable the PMU and sampling facility */
- cpuhw->flags |= PMU_F_ENABLED;
- barrier();
-
err = lsctl(&cpuhw->lsctl);
if (err) {
- cpuhw->flags &= ~PMU_F_ENABLED;
pr_err("Loading sampling controls failed: op 1 err %i\n", err);
return;
}
/* Load current program parameter */
- lpp(&S390_lowcore.lpp);
-
- debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
- "interval %#lx tear %#lx dear %#lx\n", __func__,
- cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
- cpuhw->lsctl.cd, cpuhw->lsctl.interval,
- cpuhw->lsctl.tear, cpuhw->lsctl.dear);
+ lpp(&get_lowcore()->lpp);
+ cpuhw->flags |= PMU_F_ENABLED;
}
static void cpumsf_pmu_disable(struct pmu *pmu)
@@ -1055,26 +960,23 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
return;
}
- /* Save state of TEAR and DEAR register contents */
- err = qsi(&si);
- if (!err) {
- /* TEAR/DEAR values are valid only if the sampling facility is
- * enabled. Note that cpumsf_pmu_disable() might be called even
- * for a disabled sampling facility because cpumsf_pmu_enable()
- * controls the enable/disable state.
- */
- if (si.es) {
- cpuhw->lsctl.tear = si.tear;
- cpuhw->lsctl.dear = si.dear;
- }
- } else
- debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
- __func__, err);
+ /*
+ * Save state of TEAR and DEAR register contents.
+ * TEAR/DEAR values are valid only if the sampling facility is
+ * enabled. Note that cpumsf_pmu_disable() might be called even
+ * for a disabled sampling facility because cpumsf_pmu_enable()
+ * controls the enable/disable state.
+ */
+ qsi(&si);
+ if (si.es) {
+ cpuhw->lsctl.tear = si.tear;
+ cpuhw->lsctl.dear = si.dear;
+ }
cpuhw->flags &= ~PMU_F_ENABLED;
}
-/* perf_exclude_event() - Filter event
+/* perf_event_exclude() - Filter event
* @event: The perf event
* @regs: pt_regs structure
* @sde_regs: Sample-data-entry (sde) regs structure
@@ -1083,7 +985,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
*
* Return non-zero if the event shall be excluded.
*/
-static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs,
struct perf_sf_sde_regs *sde_regs)
{
if (event->attr.exclude_user && user_mode(regs))
@@ -1166,12 +1068,9 @@ static int perf_push_sample(struct perf_event *event,
data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
overflow = 0;
- if (perf_exclude_event(event, &regs, sde_regs))
+ if (perf_event_exclude(event, &regs, sde_regs))
goto out;
- if (perf_event_overflow(event, &data, &regs)) {
- overflow = 1;
- event->pmu->stop(event, 0);
- }
+ overflow = perf_event_overflow(event, &data, &regs);
perf_event_update_userpage(event);
out:
return overflow;
@@ -1193,7 +1092,7 @@ static void perf_event_count_update(struct perf_event *event, u64 count)
* combined-sampling data entry consists of a basic- and a diagnostic-sampling
* data entry. The sampling function is determined by the flags in the perf
* event hardware structure. The function always works with a combined-sampling
- * data entry but ignores the the diagnostic portion if it is not available.
+ * data entry but ignores the diagnostic portion if it is not available.
*
* Note that the implementation focuses on basic-sampling data entries and, if
* such an entry is not valid, the entire combined-sampling data entry is
@@ -1235,11 +1134,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
/* Count discarded samples */
*overflow += 1;
} else {
- debug_sprintf_event(sfdbg, 4,
- "%s: Found unknown"
- " sampling data entry: te->f %i"
- " basic.def %#4x (%p)\n", __func__,
- te->header.f, sample->def, sample);
/* Sample slot is not yet written or other record.
*
* This condition can occur if the buffer was reused
@@ -1274,8 +1168,8 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
static void hw_perf_event_update(struct perf_event *event, int flush_all)
{
unsigned long long event_overflow, sampl_overflow, num_sdb;
- union hws_trailer_header old, prev, new;
struct hw_perf_event *hwc = &event->hw;
+ union hws_trailer_header prev, new;
struct hws_trailer_entry *te;
unsigned long *sdbt, sdb;
int done;
@@ -1284,7 +1178,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
* AUX buffer is used when in diagnostic sampling mode.
* No perf events/samples are created.
*/
- if (SAMPL_DIAG_MODE(&event->hw))
+ if (SAMPL_DIAG_MODE(hwc))
return;
sdbt = (unsigned long *)TEAR_REG(hwc);
@@ -1309,13 +1203,6 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
*/
sampl_overflow += te->header.overflow;
- /* Timestamps are valid for full sample-data-blocks only */
- debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx/%#lx "
- "overflow %llu timestamp %#llx\n",
- __func__, sdb, (unsigned long)sdbt,
- te->header.overflow,
- (te->header.f) ? trailer_timestamp(te) : 0ULL);
-
/* Collect all samples from a single sample-data-block and
* flag if an (perf) event overflow happened. If so, the PMU
* is stopped and remaining samples will be discarded.
@@ -1326,13 +1213,11 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
/* Reset trailer (using compare-double-and-swap) */
prev.val = READ_ONCE_ALIGNED_128(te->header.val);
do {
- old.val = prev.val;
new.val = prev.val;
new.f = 0;
new.a = 1;
new.overflow = 0;
- prev.val = cmpxchg128(&te->header.val, old.val, new.val);
- } while (prev.val != old.val);
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
/* Advance to next sample-data-block */
sdbt++;
@@ -1340,7 +1225,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
sdbt = get_next_sdbt(sdbt);
/* Update event hardware registers */
- TEAR_REG(hwc) = (unsigned long) sdbt;
+ TEAR_REG(hwc) = (unsigned long)sdbt;
/* Stop processing sample-data if all samples of the current
* sample-data-block were flushed even if it was not full.
@@ -1362,19 +1247,8 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
* are dropped.
* Slightly increase the interval to avoid hitting this limit.
*/
- if (event_overflow) {
+ if (event_overflow)
SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
- debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
- __func__,
- DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
- }
-
- if (sampl_overflow || event_overflow)
- debug_sprintf_event(sfdbg, 4, "%s: "
- "overflows: sample %llu event %llu"
- " total %llu num_sdb %llu\n",
- __func__, sampl_overflow, event_overflow,
- OVERFLOW_REG(hwc), num_sdb);
}
static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
@@ -1442,9 +1316,6 @@ static void aux_output_end(struct perf_output_handle *handle)
/* Remove alert indicators in the buffer */
te = aux_sdb_trailer(aux, aux->alert_mark);
te->header.a = 0;
-
- debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
- __func__, i, range_scan, aux->head);
}
/*
@@ -1463,7 +1334,7 @@ static int aux_output_begin(struct perf_output_handle *handle,
unsigned long range, i, range_scan, idx, head, base, offset;
struct hws_trailer_entry *te;
- if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
+ if (handle->head & ~PAGE_MASK)
return -EINVAL;
aux->head = handle->head >> PAGE_SHIFT;
@@ -1475,10 +1346,6 @@ static int aux_output_begin(struct perf_output_handle *handle,
* SDBs between aux->head and aux->empty_mark are already ready
* for new data. range_scan is num of SDBs not within them.
*/
- debug_sprintf_event(sfdbg, 6,
- "%s: range %ld head %ld alert %ld empty %ld\n",
- __func__, range, aux->head, aux->alert_mark,
- aux->empty_mark);
if (range > aux_sdb_num_empty(aux)) {
range_scan = range - aux_sdb_num_empty(aux);
idx = aux->empty_mark + 1;
@@ -1504,12 +1371,6 @@ static int aux_output_begin(struct perf_output_handle *handle,
cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long);
cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]);
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
- "index %ld tear %#lx dear %#lx\n", __func__,
- aux->head, aux->alert_mark, aux->empty_mark,
- head / CPUM_SF_SDB_PER_TABLE,
- cpuhw->lsctl.tear, cpuhw->lsctl.dear);
-
return 0;
}
@@ -1522,16 +1383,15 @@ static int aux_output_begin(struct perf_output_handle *handle,
static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
unsigned long long *overflow)
{
- union hws_trailer_header old, prev, new;
+ union hws_trailer_header prev, new;
struct hws_trailer_entry *te;
te = aux_sdb_trailer(aux, alert_index);
prev.val = READ_ONCE_ALIGNED_128(te->header.val);
do {
- old.val = prev.val;
new.val = prev.val;
- *overflow = old.overflow;
- if (old.f) {
+ *overflow = prev.overflow;
+ if (prev.f) {
/*
* SDB is already set by hardware.
* Abort and try to set somewhere
@@ -1541,8 +1401,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
}
new.a = 1;
new.overflow = 0;
- prev.val = cmpxchg128(&te->header.val, old.val, new.val);
- } while (prev.val != old.val);
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
return true;
}
@@ -1571,14 +1430,11 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
unsigned long long *overflow)
{
- unsigned long i, range_scan, idx, idx_old;
- union hws_trailer_header old, prev, new;
+ union hws_trailer_header prev, new;
+ unsigned long i, range_scan, idx;
unsigned long long orig_overflow;
struct hws_trailer_entry *te;
- debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
- "empty %ld\n", __func__, range, aux->head,
- aux->alert_mark, aux->empty_mark);
if (range <= aux_sdb_num_empty(aux))
/*
* No need to scan. All SDBs in range are marked as empty.
@@ -1601,31 +1457,26 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
* indicator fall into this range, set it.
*/
range_scan = range - aux_sdb_num_empty(aux);
- idx_old = idx = aux->empty_mark + 1;
+ idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
prev.val = READ_ONCE_ALIGNED_128(te->header.val);
do {
- old.val = prev.val;
new.val = prev.val;
- orig_overflow = old.overflow;
+ orig_overflow = prev.overflow;
new.f = 0;
new.overflow = 0;
if (idx == aux->alert_mark)
new.a = 1;
else
new.a = 0;
- prev.val = cmpxchg128(&te->header.val, old.val, new.val);
- } while (prev.val != old.val);
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
*overflow += orig_overflow;
}
/* Update empty_mark to new position */
aux->empty_mark = aux->head + range - 1;
- debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
- "empty %ld\n", __func__, range_scan, idx_old,
- idx - 1, aux->empty_mark);
return true;
}
@@ -1642,12 +1493,12 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
unsigned long num_sdb;
aux = perf_get_aux(handle);
- if (WARN_ON_ONCE(!aux))
+ if (!aux)
return;
/* Inform user space new data arrived */
size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
- debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+ debug_sprintf_event(sfdbg, 6, "%s #alert %ld\n", __func__,
size >> PAGE_SHIFT);
perf_aux_output_end(handle, size);
@@ -1661,7 +1512,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
num_sdb);
break;
}
- if (WARN_ON_ONCE(!aux))
+ if (!aux)
return;
/* Update head and alert_mark to new position */
@@ -1681,23 +1532,11 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
perf_aux_output_end(&cpuhw->handle, size);
pr_err("Sample data caused the AUX buffer with %lu "
"pages to overflow\n", aux->sfb.num_sdb);
- debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
- "overflow %lld\n", __func__,
- aux->head, range, overflow);
} else {
size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
- "already full, try another\n",
- __func__,
- aux->head, aux->alert_mark);
}
}
-
- if (done)
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
- "empty %ld\n", __func__, aux->head,
- aux->alert_mark, aux->empty_mark);
}
/*
@@ -1719,8 +1558,6 @@ static void aux_buffer_free(void *data)
kfree(aux->sdbt_index);
kfree(aux->sdb_index);
kfree(aux);
-
- debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
}
static void aux_sdb_init(unsigned long sdb)
@@ -1828,9 +1665,6 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
*/
aux->empty_mark = sfb->num_sdb - 1;
- debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
- sfb->num_sdbt, sfb->num_sdb);
-
return aux;
no_sdbt:
@@ -1863,8 +1697,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
memset(&si, 0, sizeof(si));
if (event->cpu == -1) {
- if (qsi(&si))
- return -ENODEV;
+ qsi(&si);
} else {
/* Event is pinned to a particular CPU, retrieve the per-CPU
* sampling structure for accessing the CPU-specific QSI.
@@ -1874,7 +1707,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
si = cpuhw->qsi;
}
- do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
+ do_freq = !!SAMPL_FREQ_MODE(&event->hw);
rate = getrate(do_freq, value, &si);
if (!rate)
return -EINVAL;
@@ -1882,10 +1715,6 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
event->attr.sample_period = rate;
SAMPL_RATE(&event->hw) = rate;
hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
- debug_sprintf_event(sfdbg, 4, "%s:"
- " cpu %d value %#llx period %#llx freq %d\n",
- __func__, event->cpu, value,
- event->attr.sample_period, do_freq);
return 0;
}
@@ -1896,12 +1725,8 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags)
{
struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ if (!(event->hw.state & PERF_HES_STOPPED))
return;
-
- if (flags & PERF_EF_RELOAD)
- WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-
perf_pmu_disable(event->pmu);
event->hw.state = 0;
cpuhw->lsctl.cs = 1;
@@ -1926,7 +1751,9 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags)
event->hw.state |= PERF_HES_STOPPED;
if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
- hw_perf_event_update(event, 1);
+ /* CPU hotplug off removes SDBs. No samples to extract. */
+ if (cpuhw->flags & PMU_F_RESERVED)
+ hw_perf_event_update(event, 1);
event->hw.state |= PERF_HES_UPTODATE;
}
perf_pmu_enable(event->pmu);
@@ -1936,15 +1763,14 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
struct aux_buffer *aux;
- int err;
+ int err = 0;
if (cpuhw->flags & PMU_F_IN_USE)
return -EAGAIN;
- if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
+ if (!SAMPL_DIAG_MODE(&event->hw) && !sf_buffer_available(cpuhw))
return -EINVAL;
- err = 0;
perf_pmu_disable(event->pmu);
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -2104,18 +1930,17 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
/* Program alert request */
if (alert & CPU_MF_INT_SF_PRA) {
- if (cpuhw->flags & PMU_F_IN_USE)
+ if (cpuhw->flags & PMU_F_IN_USE) {
if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
hw_collect_aux(cpuhw);
else
hw_perf_event_update(cpuhw->event, 0);
- else
- WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+ }
}
/* Report measurement alerts only for non-PRA codes */
if (alert != CPU_MF_INT_SF_PRA)
- debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
+ debug_sprintf_event(sfdbg, 6, "%s alert %#x\n", __func__,
alert);
/* Sampling authorization change request */
@@ -2131,7 +1956,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
/* Invalid sampling buffer entry */
if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
- pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+ pr_err("A sampling buffer entry is incorrect (alert=%#x)\n",
alert);
cpuhw->flags |= PMU_F_ERR_IBE;
sf_disable();
@@ -2143,7 +1968,7 @@ static int cpusf_pmu_setup(unsigned int cpu, int flags)
/* Ignore the notification if no events are scheduled on the PMU.
* This might be racy...
*/
- if (!atomic_read(&num_events))
+ if (!refcount_read(&num_events))
return 0;
local_irq_disable();
@@ -2205,10 +2030,12 @@ static const struct kernel_param_ops param_ops_sfb_size = {
.get = param_get_sfb_size,
};
-#define RS_INIT_FAILURE_QSI 0x0001
-#define RS_INIT_FAILURE_BSDES 0x0002
-#define RS_INIT_FAILURE_ALRT 0x0003
-#define RS_INIT_FAILURE_PERF 0x0004
+enum {
+ RS_INIT_FAILURE_BSDES = 2, /* Bad basic sampling size */
+ RS_INIT_FAILURE_ALRT = 3, /* IRQ registration failure */
+ RS_INIT_FAILURE_PERF = 4 /* PMU registration failure */
+};
+
static void __init pr_cpumsf_err(unsigned int reason)
{
pr_err("Sampling facility support for perf is not available: "
@@ -2224,11 +2051,7 @@ static int __init init_cpum_sampling_pmu(void)
return -ENODEV;
memset(&si, 0, sizeof(si));
- if (qsi(&si)) {
- pr_cpumsf_err(RS_INIT_FAILURE_QSI);
- return -ENODEV;
- }
-
+ qsi(&si);
if (!si.as && !si.ad)
return -ENODEV;
@@ -2246,7 +2069,7 @@ static int __init init_cpum_sampling_pmu(void)
CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
}
- sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+ sfdbg = debug_register("cpum_sf", 2, 1, 80);
if (!sfdbg) {
pr_err("Registering for s390dbf failed\n");
return -ENOMEM;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 5fff629b1a89..606750bae508 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -5,18 +5,15 @@
* Copyright IBM Corp. 2012, 2013
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "perf"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "perf: " fmt
#include <linux/kernel.h>
#include <linux/perf_event.h>
#include <linux/kvm_host.h>
#include <linux/percpu.h>
-#include <linux/export.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
-#include <linux/compat.h>
#include <linux/sysfs.h>
#include <asm/stacktrace.h>
#include <asm/irq.h>
@@ -57,7 +54,7 @@ static unsigned long instruction_pointer_guest(struct pt_regs *regs)
return sie_block(regs)->gpsw.addr;
}
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
{
return is_in_guest(regs) ? instruction_pointer_guest(regs)
: instruction_pointer(regs);
@@ -84,7 +81,7 @@ static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
return flags;
}
-unsigned long perf_misc_flags(struct pt_regs *regs)
+unsigned long perf_arch_misc_flags(struct pt_regs *regs)
{
/* Check if the cpum_sf PMU has created the pt_regs structure.
* In this case, perf misc flags can be easily extracted. Otherwise,
@@ -228,5 +225,5 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
struct perf_pmu_events_attr *pmu_attr;
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
- return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+ return sysfs_emit(page, "event=0x%04llx\n", pmu_attr->id);
}
diff --git a/arch/s390/kernel/perf_pai.c b/arch/s390/kernel/perf_pai.c
new file mode 100644
index 000000000000..810f5b6c5e01
--- /dev/null
+++ b/arch/s390/kernel/perf_pai.c
@@ -0,0 +1,1230 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Facility
+ *
+ * Copyright IBM Corp. 2026
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define pr_fmt(fmt) "pai: " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/perf_event.h>
+#include <asm/ctlreg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+static debug_info_t *paidbg;
+
+DEFINE_STATIC_KEY_FALSE(pai_key);
+
+enum {
+ PAI_PMU_CRYPTO, /* Index of PMU pai_crypto */
+ PAI_PMU_EXT, /* Index of PMU pai_ext */
+ PAI_PMU_MAX /* # of PAI PMUs */
+};
+
+enum {
+ PAIE1_CB_SZ = 0x200, /* Size of PAIE1 control block */
+ PAIE1_CTRBLOCK_SZ = 0x400 /* Size of PAIE1 counter blocks */
+};
+
+struct pai_userdata {
+ u16 num;
+ u64 value;
+} __packed;
+
+/* Create the PAI extension 1 control block area.
+ * The PAI extension control block 1 is pointed to by lowcore
+ * address 0x1508 for each CPU. This control block is 512 bytes in size
+ * and requires a 512 byte boundary alignment.
+ */
+struct paiext_cb { /* PAI extension 1 control block */
+ u64 header; /* Not used */
+ u64 reserved1;
+ u64 acc; /* Addr to analytics counter control block */
+ u8 reserved2[PAIE1_CTRBLOCK_SZ - 3 * sizeof(u64)];
+} __packed;
+
+struct pai_map {
+ unsigned long *area; /* Area for CPU to store counters */
+ struct pai_userdata *save; /* Page to store no-zero counters */
+ unsigned int active_events; /* # of PAI crypto users */
+ refcount_t refcnt; /* Reference count mapped buffers */
+ struct perf_event *event; /* Perf event for sampling */
+ struct list_head syswide_list; /* List system-wide sampling events */
+ struct paiext_cb *paiext_cb; /* PAI extension control block area */
+ bool fullpage; /* True: counter area is a full page */
+};
+
+struct pai_mapptr {
+ struct pai_map *mapptr;
+};
+
+static struct pai_root { /* Anchor to per CPU data */
+ refcount_t refcnt; /* Overall active events */
+ struct pai_mapptr __percpu *mapptr;
+} pai_root[PAI_PMU_MAX];
+
+/* This table defines the different parameters of the PAI PMUs. During
+ * initialization the machine dependent values are extracted and saved.
+ * However most of the values are static and do not change.
+ * There is one table entry per PAI PMU.
+ */
+struct pai_pmu { /* Define PAI PMU characteristics */
+ const char *pmuname; /* Name of PMU */
+ const int facility_nr; /* Facility number to check for support */
+ unsigned int num_avail; /* # Counters defined by hardware */
+ unsigned int num_named; /* # Counters known by name */
+ unsigned long base; /* Counter set base number */
+ unsigned long kernel_offset; /* Offset to kernel part in counter page */
+ unsigned long area_size; /* Size of counter area */
+ const char * const *names; /* List of counter names */
+ struct pmu *pmu; /* Ptr to supporting PMU */
+ int (*init)(struct pai_pmu *p); /* PMU support init function */
+ void (*exit)(struct pai_pmu *p); /* PMU support exit function */
+ struct attribute_group *event_group; /* Ptr to attribute of events */
+};
+
+static struct pai_pmu pai_pmu[]; /* Forward declaration */
+
+/* Free per CPU data when the last event is removed. */
+static void pai_root_free(int idx)
+{
+ if (refcount_dec_and_test(&pai_root[idx].refcnt)) {
+ free_percpu(pai_root[idx].mapptr);
+ pai_root[idx].mapptr = NULL;
+ }
+ debug_sprintf_event(paidbg, 5, "%s root[%d].refcount %d\n", __func__,
+ idx, refcount_read(&pai_root[idx].refcnt));
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int pai_root_alloc(int idx)
+{
+ if (!refcount_inc_not_zero(&pai_root[idx].refcnt)) {
+ /* The memory is already zeroed. */
+ pai_root[idx].mapptr = alloc_percpu(struct pai_mapptr);
+ if (!pai_root[idx].mapptr)
+ return -ENOMEM;
+ refcount_set(&pai_root[idx].refcnt, 1);
+ }
+ return 0;
+}
+
+/* Release the PMU if event is the last perf event */
+static DEFINE_MUTEX(pai_reserve_mutex);
+
+/* Free all memory allocated for event counting/sampling setup */
+static void pai_free(struct pai_mapptr *mp)
+{
+ if (mp->mapptr->fullpage)
+ free_page((unsigned long)mp->mapptr->area);
+ else
+ kfree(mp->mapptr->area);
+ kfree(mp->mapptr->paiext_cb);
+ kvfree(mp->mapptr->save);
+ kfree(mp->mapptr);
+ mp->mapptr = NULL;
+}
+
+/* Adjust usage counters and remove allocated memory when all users are
+ * gone.
+ */
+static void pai_event_destroy_cpu(struct perf_event *event, int cpu)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_mapptr *mp = per_cpu_ptr(pai_root[idx].mapptr, cpu);
+ struct pai_map *cpump = mp->mapptr;
+
+ mutex_lock(&pai_reserve_mutex);
+ debug_sprintf_event(paidbg, 5, "%s event %#llx idx %d cpu %d users %d "
+ "refcnt %u\n", __func__, event->attr.config, idx,
+ event->cpu, cpump->active_events,
+ refcount_read(&cpump->refcnt));
+ if (refcount_dec_and_test(&cpump->refcnt))
+ pai_free(mp);
+ pai_root_free(idx);
+ mutex_unlock(&pai_reserve_mutex);
+}
+
+static void pai_event_destroy(struct perf_event *event)
+{
+ int cpu;
+
+ free_page(PAI_SAVE_AREA(event));
+ if (event->cpu == -1) {
+ struct cpumask *mask = PAI_CPU_MASK(event);
+
+ for_each_cpu(cpu, mask)
+ pai_event_destroy_cpu(event, cpu);
+ kfree(mask);
+ } else {
+ pai_event_destroy_cpu(event, event->cpu);
+ }
+}
+
+static void paicrypt_event_destroy(struct perf_event *event)
+{
+ static_branch_dec(&pai_key);
+ pai_event_destroy(event);
+}
+
+static u64 pai_getctr(unsigned long *page, int nr, unsigned long offset)
+{
+ if (offset)
+ nr += offset / sizeof(*page);
+ return page[nr];
+}
+
+/* Read the counter values. Return value from location in CMP. For base
+ * event xxx_ALL sum up all events. Returns counter value.
+ */
+static u64 pai_getdata(struct perf_event *event, bool kernel)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_pmu *pp = &pai_pmu[idx];
+ struct pai_map *cpump = mp->mapptr;
+ unsigned int i;
+ u64 sum = 0;
+
+ if (event->attr.config != pp->base) {
+ return pai_getctr(cpump->area,
+ event->attr.config - pp->base,
+ kernel ? pp->kernel_offset : 0);
+ }
+
+ for (i = 1; i <= pp->num_avail; i++) {
+ u64 val = pai_getctr(cpump->area, i,
+ kernel ? pp->kernel_offset : 0);
+
+ if (!val)
+ continue;
+ sum += val;
+ }
+ return sum;
+}
+
+static u64 paicrypt_getall(struct perf_event *event)
+{
+ u64 sum = 0;
+
+ if (!event->attr.exclude_kernel)
+ sum += pai_getdata(event, true);
+ if (!event->attr.exclude_user)
+ sum += pai_getdata(event, false);
+
+ return sum;
+}
+
+/* Check concurrent access of counting and sampling for crypto events.
+ * This function is called in process context and it is save to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int pai_alloc_cpu(struct perf_event *event, int cpu)
+{
+ int rc, idx = PAI_PMU_IDX(event);
+ struct pai_map *cpump = NULL;
+ bool need_paiext_cb = false;
+ struct pai_mapptr *mp;
+
+ mutex_lock(&pai_reserve_mutex);
+ /* Allocate root node */
+ rc = pai_root_alloc(idx);
+ if (rc)
+ goto unlock;
+
+ /* Allocate node for this event */
+ mp = per_cpu_ptr(pai_root[idx].mapptr, cpu);
+ cpump = mp->mapptr;
+ if (!cpump) { /* Paicrypt_map allocated? */
+ rc = -ENOMEM;
+ cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+ if (!cpump)
+ goto undo;
+ /* Allocate memory for counter page and counter extraction.
+ * Only the first counting event has to allocate a page.
+ */
+ mp->mapptr = cpump;
+ if (idx == PAI_PMU_CRYPTO) {
+ cpump->area = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ /* free_page() can handle 0x0 address */
+ cpump->fullpage = true;
+ } else { /* PAI_PMU_EXT */
+ /*
+ * Allocate memory for counter area and counter extraction.
+ * These are
+ * - a 512 byte block and requires 512 byte boundary
+ * alignment.
+ * - a 1KB byte block and requires 1KB boundary
+ * alignment.
+ * Only the first counting event has to allocate the area.
+ *
+ * Note: This works with commit 59bb47985c1d by default.
+ * Backporting this to kernels without this commit might
+ * needs adjustment.
+ */
+ cpump->area = kzalloc(pai_pmu[idx].area_size, GFP_KERNEL);
+ cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
+ need_paiext_cb = true;
+ }
+ cpump->save = kvmalloc_array(pai_pmu[idx].num_avail + 1,
+ sizeof(struct pai_userdata),
+ GFP_KERNEL);
+ if (!cpump->area || !cpump->save ||
+ (need_paiext_cb && !cpump->paiext_cb)) {
+ pai_free(mp);
+ goto undo;
+ }
+ INIT_LIST_HEAD(&cpump->syswide_list);
+ refcount_set(&cpump->refcnt, 1);
+ rc = 0;
+ } else {
+ refcount_inc(&cpump->refcnt);
+ }
+
+undo:
+ if (rc) {
+ /* Error in allocation of event, decrement anchor. Since
+ * the event in not created, its destroy() function is never
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ pai_root_free(idx);
+ }
+unlock:
+ mutex_unlock(&pai_reserve_mutex);
+ /* If rc is non-zero, no increment of counter/sampler was done. */
+ return rc;
+}
+
+static int pai_alloc(struct perf_event *event)
+{
+ struct cpumask *maskptr;
+ int cpu, rc = -ENOMEM;
+
+ maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL);
+ if (!maskptr)
+ goto out;
+
+ for_each_online_cpu(cpu) {
+ rc = pai_alloc_cpu(event, cpu);
+ if (rc) {
+ for_each_cpu(cpu, maskptr)
+ pai_event_destroy_cpu(event, cpu);
+ kfree(maskptr);
+ goto out;
+ }
+ cpumask_set_cpu(cpu, maskptr);
+ }
+
+ /*
+ * On error all cpumask are freed and all events have been destroyed.
+ * Save of which CPUs data structures have been allocated for.
+ * Release them in pai_event_destroy call back function
+ * for this event.
+ */
+ PAI_CPU_MASK(event) = maskptr;
+ rc = 0;
+out:
+ return rc;
+}
+
+/* Validate event number and return error if event is not supported.
+ * On successful return, PAI_PMU_IDX(event) is set to the index of
+ * the supporting paing_support[] array element.
+ */
+static int pai_event_valid(struct perf_event *event, int idx)
+{
+ struct perf_event_attr *a = &event->attr;
+ struct pai_pmu *pp = &pai_pmu[idx];
+
+ /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
+ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+ return -ENOENT;
+ /* Allow only CRYPTO_ALL/NNPA_ALL for sampling */
+ if (a->sample_period && a->config != pp->base)
+ return -EINVAL;
+ /* PAI crypto event must be in valid range, try others if not */
+ if (a->config < pp->base || a->config > pp->base + pp->num_avail)
+ return -ENOENT;
+ if (idx == PAI_PMU_EXT && a->exclude_user)
+ return -EINVAL;
+ PAI_PMU_IDX(event) = idx;
+ return 0;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int pai_event_init(struct perf_event *event, int idx)
+{
+ struct perf_event_attr *a = &event->attr;
+ int rc;
+
+ /* PAI event must be valid and in supported range */
+ rc = pai_event_valid(event, idx);
+ if (rc)
+ goto out;
+ /* Get a page to store last counter values for sampling */
+ if (a->sample_period) {
+ PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL);
+ if (!PAI_SAVE_AREA(event)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ }
+
+ if (event->cpu >= 0)
+ rc = pai_alloc_cpu(event, event->cpu);
+ else
+ rc = pai_alloc(event);
+ if (rc) {
+ free_page(PAI_SAVE_AREA(event));
+ goto out;
+ }
+
+ if (a->sample_period) {
+ a->sample_period = 1;
+ a->freq = 0;
+ /* Register for paicrypt_sched_task() to be called */
+ event->attach_state |= PERF_ATTACH_SCHED_CB;
+ /* Add raw data which contain the memory mapped counters */
+ a->sample_type |= PERF_SAMPLE_RAW;
+ /* Turn off inheritance */
+ a->inherit = 0;
+ }
+out:
+ return rc;
+}
+
+static int paicrypt_event_init(struct perf_event *event)
+{
+ int rc = pai_event_init(event, PAI_PMU_CRYPTO);
+
+ if (!rc) {
+ event->destroy = paicrypt_event_destroy;
+ static_branch_inc(&pai_key);
+ }
+ return rc;
+}
+
+static void pai_read(struct perf_event *event,
+ u64 (*fct)(struct perf_event *event))
+{
+ u64 prev, new, delta;
+
+ prev = local64_read(&event->hw.prev_count);
+ new = fct(event);
+ local64_set(&event->hw.prev_count, new);
+ delta = (prev <= new) ? new - prev : (-1ULL - prev) + new + 1;
+ local64_add(delta, &event->count);
+}
+
+static void paicrypt_read(struct perf_event *event)
+{
+ pai_read(event, paicrypt_getall);
+}
+
+static void pai_start(struct perf_event *event, int flags,
+ u64 (*fct)(struct perf_event *event))
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_pmu *pp = &pai_pmu[idx];
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_map *cpump = mp->mapptr;
+ u64 sum;
+
+ if (!event->attr.sample_period) { /* Counting */
+ sum = fct(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
+ } else { /* Sampling */
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size);
+ /* Enable context switch callback for system-wide sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ list_add_tail(PAI_SWLIST(event), &cpump->syswide_list);
+ perf_sched_cb_inc(event->pmu);
+ } else {
+ cpump->event = event;
+ }
+ }
+}
+
+static void paicrypt_start(struct perf_event *event, int flags)
+{
+ pai_start(event, flags, paicrypt_getall);
+}
+
+static int pai_add(struct perf_event *event, int flags)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+ unsigned long ccd;
+
+ if (++cpump->active_events == 1) {
+ if (!pcb) { /* PAI crypto */
+ ccd = virt_to_phys(cpump->area) | PAI_CRYPTO_KERNEL_OFFSET;
+ WRITE_ONCE(get_lowcore()->ccd, ccd);
+ local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
+ } else { /* PAI extension 1 */
+ ccd = virt_to_phys(pcb);
+ WRITE_ONCE(get_lowcore()->aicd, ccd);
+ pcb->acc = virt_to_phys(cpump->area) | 0x1;
+ /* Enable CPU instruction lookup for PAIE1 control block */
+ local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT);
+ }
+ }
+ if (flags & PERF_EF_START)
+ pai_pmu[idx].pmu->start(event, PERF_EF_RELOAD);
+ event->hw.state = 0;
+ return 0;
+}
+
+static int paicrypt_add(struct perf_event *event, int flags)
+{
+ return pai_add(event, flags);
+}
+
+static void pai_have_sample(struct perf_event *, struct pai_map *);
+static void pai_stop(struct perf_event *event, int flags)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
+ pai_pmu[idx].pmu->read(event);
+ } else { /* Sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ perf_sched_cb_dec(event->pmu);
+ list_del(PAI_SWLIST(event));
+ } else {
+ pai_have_sample(event, cpump);
+ cpump->event = NULL;
+ }
+ }
+ event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paicrypt_stop(struct perf_event *event, int flags)
+{
+ pai_stop(event, flags);
+}
+
+static void pai_del(struct perf_event *event, int flags)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+
+ pai_pmu[idx].pmu->stop(event, PERF_EF_UPDATE);
+ if (--cpump->active_events == 0) {
+ if (!pcb) { /* PAI crypto */
+ local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
+ WRITE_ONCE(get_lowcore()->ccd, 0);
+ } else { /* PAI extension 1 */
+ /* Disable CPU instruction lookup for PAIE1 control block */
+ local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
+ pcb->acc = 0;
+ WRITE_ONCE(get_lowcore()->aicd, 0);
+ }
+ }
+}
+
+static void paicrypt_del(struct perf_event *event, int flags)
+{
+ pai_del(event, flags);
+}
+
+/* Create raw data and save it in buffer. Calculate the delta for each
+ * counter between this invocation and the last invocation.
+ * Returns number of bytes copied.
+ * Saves only entries with positive counter difference of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t pai_copy(struct pai_userdata *userdata, unsigned long *page,
+ struct pai_pmu *pp, unsigned long *page_old,
+ bool exclude_user, bool exclude_kernel)
+{
+ int i, outidx = 0;
+
+ for (i = 1; i <= pp->num_avail; i++) {
+ u64 val = 0, val_old = 0;
+
+ if (!exclude_kernel) {
+ val += pai_getctr(page, i, pp->kernel_offset);
+ val_old += pai_getctr(page_old, i, pp->kernel_offset);
+ }
+ if (!exclude_user) {
+ val += pai_getctr(page, i, 0);
+ val_old += pai_getctr(page_old, i, 0);
+ }
+ if (val >= val_old)
+ val -= val_old;
+ else
+ val = (~0ULL - val_old) + val + 1;
+ if (val) {
+ userdata[outidx].num = i;
+ userdata[outidx].value = val;
+ outidx++;
+ }
+ }
+ return outidx * sizeof(*userdata);
+}
+
+/* Write sample when one or more counters values are nonzero.
+ *
+ * Note: The function paicrypt_sched_task() and pai_push_sample() are not
+ * invoked after function paicrypt_del() has been called because of function
+ * perf_sched_cb_dec(). Both functions are only
+ * called when sampling is active. Function perf_sched_cb_inc()
+ * has been invoked to install function paicrypt_sched_task() as call back
+ * to run at context switch time.
+ *
+ * This causes function perf_event_context_sched_out() and
+ * perf_event_context_sched_in() to check whether the PMU has installed an
+ * sched_task() callback. That callback is not active after paicrypt_del()
+ * returns and has deleted the event on that CPU.
+ */
+static int pai_push_sample(size_t rawsize, struct pai_map *cpump,
+ struct perf_event *event)
+{
+ int idx = PAI_PMU_IDX(event);
+ struct pai_pmu *pp = &pai_pmu[idx];
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ int overflow;
+
+ /* Setup perf sample */
+ memset(&regs, 0, sizeof(regs));
+ memset(&raw, 0, sizeof(raw));
+ memset(&data, 0, sizeof(data));
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ if (event->attr.sample_type & PERF_SAMPLE_TID) {
+ data.tid_entry.pid = task_tgid_nr(current);
+ data.tid_entry.tid = task_pid_nr(current);
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_TIME)
+ data.time = event->clock();
+ if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+ data.id = event->id;
+ if (event->attr.sample_type & PERF_SAMPLE_CPU) {
+ data.cpu_entry.cpu = smp_processor_id();
+ data.cpu_entry.reserved = 0;
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.frag.size = rawsize;
+ raw.frag.data = cpump->save;
+ perf_sample_save_raw_data(&data, event, &raw);
+ }
+
+ overflow = perf_event_overflow(event, &data, &regs);
+ perf_event_update_userpage(event);
+ /* Save crypto counter lowcore page after reading event data. */
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size);
+ return overflow;
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void pai_have_sample(struct perf_event *event, struct pai_map *cpump)
+{
+ struct pai_pmu *pp;
+ size_t rawsize;
+
+ if (!event) /* No event active */
+ return;
+ pp = &pai_pmu[PAI_PMU_IDX(event)];
+ rawsize = pai_copy(cpump->save, cpump->area, pp,
+ (unsigned long *)PAI_SAVE_AREA(event),
+ event->attr.exclude_user,
+ event->attr.exclude_kernel);
+ if (rawsize) /* No incremented counters */
+ pai_push_sample(rawsize, cpump, event);
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void pai_have_samples(int idx)
+{
+ struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
+ struct pai_map *cpump = mp->mapptr;
+ struct perf_event *event;
+
+ list_for_each_entry(event, &cpump->syswide_list, hw.tp_list)
+ pai_have_sample(event, cpump);
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event CRYPTO_ALL is allowed.
+ */
+static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
+{
+ /* We started with a clean page on event installation. So read out
+ * results on schedule_out and if page was dirty, save old values.
+ */
+ if (!sched_in)
+ pai_have_samples(PAI_PMU_CRYPTO);
+}
+
+/* ============================= paiext ====================================*/
+
+static void paiext_event_destroy(struct perf_event *event)
+{
+ pai_event_destroy(event);
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paiext_event_init(struct perf_event *event)
+{
+ int rc = pai_event_init(event, PAI_PMU_EXT);
+
+ if (!rc) {
+ event->attr.exclude_kernel = true; /* No kernel space part */
+ event->destroy = paiext_event_destroy;
+ /* Offset of NNPA in paiext_cb */
+ event->hw.config_base = offsetof(struct paiext_cb, acc);
+ }
+ return rc;
+}
+
+static u64 paiext_getall(struct perf_event *event)
+{
+ return pai_getdata(event, false);
+}
+
+static void paiext_read(struct perf_event *event)
+{
+ pai_read(event, paiext_getall);
+}
+
+static void paiext_start(struct perf_event *event, int flags)
+{
+ pai_start(event, flags, paiext_getall);
+}
+
+static int paiext_add(struct perf_event *event, int flags)
+{
+ return pai_add(event, flags);
+}
+
+static void paiext_stop(struct perf_event *event, int flags)
+{
+ pai_stop(event, flags);
+}
+
+static void paiext_del(struct perf_event *event, int flags)
+{
+ pai_del(event, flags);
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event NNPA_ALL is allowed.
+ */
+static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
+{
+ /* We started with a clean page on event installation. So read out
+ * results on schedule_out and if page was dirty, save old values.
+ */
+ if (!sched_in)
+ pai_have_samples(PAI_PMU_EXT);
+}
+
+/* Attribute definitions for paicrypt interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1000 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paicrypt_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group paicrypt_events_group = {
+ .name = "events",
+ .attrs = NULL /* Filled in attr_event_init() */
+};
+
+static struct attribute_group paicrypt_format_group = {
+ .name = "format",
+ .attrs = paicrypt_format_attr,
+};
+
+static const struct attribute_group *paicrypt_attr_groups[] = {
+ &paicrypt_events_group,
+ &paicrypt_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paicrypt = {
+ .task_ctx_nr = perf_hw_context,
+ .event_init = paicrypt_event_init,
+ .add = paicrypt_add,
+ .del = paicrypt_del,
+ .start = paicrypt_start,
+ .stop = paicrypt_stop,
+ .read = paicrypt_read,
+ .sched_task = paicrypt_sched_task,
+ .attr_groups = paicrypt_attr_groups
+};
+
+/* List of symbolic PAI counter names. */
+static const char * const paicrypt_ctrnames[] = {
+ [0] = "CRYPTO_ALL",
+ [1] = "KM_DEA",
+ [2] = "KM_TDEA_128",
+ [3] = "KM_TDEA_192",
+ [4] = "KM_ENCRYPTED_DEA",
+ [5] = "KM_ENCRYPTED_TDEA_128",
+ [6] = "KM_ENCRYPTED_TDEA_192",
+ [7] = "KM_AES_128",
+ [8] = "KM_AES_192",
+ [9] = "KM_AES_256",
+ [10] = "KM_ENCRYPTED_AES_128",
+ [11] = "KM_ENCRYPTED_AES_192",
+ [12] = "KM_ENCRYPTED_AES_256",
+ [13] = "KM_XTS_AES_128",
+ [14] = "KM_XTS_AES_256",
+ [15] = "KM_XTS_ENCRYPTED_AES_128",
+ [16] = "KM_XTS_ENCRYPTED_AES_256",
+ [17] = "KMC_DEA",
+ [18] = "KMC_TDEA_128",
+ [19] = "KMC_TDEA_192",
+ [20] = "KMC_ENCRYPTED_DEA",
+ [21] = "KMC_ENCRYPTED_TDEA_128",
+ [22] = "KMC_ENCRYPTED_TDEA_192",
+ [23] = "KMC_AES_128",
+ [24] = "KMC_AES_192",
+ [25] = "KMC_AES_256",
+ [26] = "KMC_ENCRYPTED_AES_128",
+ [27] = "KMC_ENCRYPTED_AES_192",
+ [28] = "KMC_ENCRYPTED_AES_256",
+ [29] = "KMC_PRNG",
+ [30] = "KMA_GCM_AES_128",
+ [31] = "KMA_GCM_AES_192",
+ [32] = "KMA_GCM_AES_256",
+ [33] = "KMA_GCM_ENCRYPTED_AES_128",
+ [34] = "KMA_GCM_ENCRYPTED_AES_192",
+ [35] = "KMA_GCM_ENCRYPTED_AES_256",
+ [36] = "KMF_DEA",
+ [37] = "KMF_TDEA_128",
+ [38] = "KMF_TDEA_192",
+ [39] = "KMF_ENCRYPTED_DEA",
+ [40] = "KMF_ENCRYPTED_TDEA_128",
+ [41] = "KMF_ENCRYPTED_TDEA_192",
+ [42] = "KMF_AES_128",
+ [43] = "KMF_AES_192",
+ [44] = "KMF_AES_256",
+ [45] = "KMF_ENCRYPTED_AES_128",
+ [46] = "KMF_ENCRYPTED_AES_192",
+ [47] = "KMF_ENCRYPTED_AES_256",
+ [48] = "KMCTR_DEA",
+ [49] = "KMCTR_TDEA_128",
+ [50] = "KMCTR_TDEA_192",
+ [51] = "KMCTR_ENCRYPTED_DEA",
+ [52] = "KMCTR_ENCRYPTED_TDEA_128",
+ [53] = "KMCTR_ENCRYPTED_TDEA_192",
+ [54] = "KMCTR_AES_128",
+ [55] = "KMCTR_AES_192",
+ [56] = "KMCTR_AES_256",
+ [57] = "KMCTR_ENCRYPTED_AES_128",
+ [58] = "KMCTR_ENCRYPTED_AES_192",
+ [59] = "KMCTR_ENCRYPTED_AES_256",
+ [60] = "KMO_DEA",
+ [61] = "KMO_TDEA_128",
+ [62] = "KMO_TDEA_192",
+ [63] = "KMO_ENCRYPTED_DEA",
+ [64] = "KMO_ENCRYPTED_TDEA_128",
+ [65] = "KMO_ENCRYPTED_TDEA_192",
+ [66] = "KMO_AES_128",
+ [67] = "KMO_AES_192",
+ [68] = "KMO_AES_256",
+ [69] = "KMO_ENCRYPTED_AES_128",
+ [70] = "KMO_ENCRYPTED_AES_192",
+ [71] = "KMO_ENCRYPTED_AES_256",
+ [72] = "KIMD_SHA_1",
+ [73] = "KIMD_SHA_256",
+ [74] = "KIMD_SHA_512",
+ [75] = "KIMD_SHA3_224",
+ [76] = "KIMD_SHA3_256",
+ [77] = "KIMD_SHA3_384",
+ [78] = "KIMD_SHA3_512",
+ [79] = "KIMD_SHAKE_128",
+ [80] = "KIMD_SHAKE_256",
+ [81] = "KIMD_GHASH",
+ [82] = "KLMD_SHA_1",
+ [83] = "KLMD_SHA_256",
+ [84] = "KLMD_SHA_512",
+ [85] = "KLMD_SHA3_224",
+ [86] = "KLMD_SHA3_256",
+ [87] = "KLMD_SHA3_384",
+ [88] = "KLMD_SHA3_512",
+ [89] = "KLMD_SHAKE_128",
+ [90] = "KLMD_SHAKE_256",
+ [91] = "KMAC_DEA",
+ [92] = "KMAC_TDEA_128",
+ [93] = "KMAC_TDEA_192",
+ [94] = "KMAC_ENCRYPTED_DEA",
+ [95] = "KMAC_ENCRYPTED_TDEA_128",
+ [96] = "KMAC_ENCRYPTED_TDEA_192",
+ [97] = "KMAC_AES_128",
+ [98] = "KMAC_AES_192",
+ [99] = "KMAC_AES_256",
+ [100] = "KMAC_ENCRYPTED_AES_128",
+ [101] = "KMAC_ENCRYPTED_AES_192",
+ [102] = "KMAC_ENCRYPTED_AES_256",
+ [103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA",
+ [104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128",
+ [105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192",
+ [106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA",
+ [107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128",
+ [108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192",
+ [109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128",
+ [110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192",
+ [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256",
+ [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128",
+ [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192",
+ [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256",
+ [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128",
+ [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256",
+ [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128",
+ [118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256",
+ [119] = "PCC_SCALAR_MULTIPLY_P256",
+ [120] = "PCC_SCALAR_MULTIPLY_P384",
+ [121] = "PCC_SCALAR_MULTIPLY_P521",
+ [122] = "PCC_SCALAR_MULTIPLY_ED25519",
+ [123] = "PCC_SCALAR_MULTIPLY_ED448",
+ [124] = "PCC_SCALAR_MULTIPLY_X25519",
+ [125] = "PCC_SCALAR_MULTIPLY_X448",
+ [126] = "PRNO_SHA_512_DRNG",
+ [127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO",
+ [128] = "PRNO_TRNG",
+ [129] = "KDSA_ECDSA_VERIFY_P256",
+ [130] = "KDSA_ECDSA_VERIFY_P384",
+ [131] = "KDSA_ECDSA_VERIFY_P521",
+ [132] = "KDSA_ECDSA_SIGN_P256",
+ [133] = "KDSA_ECDSA_SIGN_P384",
+ [134] = "KDSA_ECDSA_SIGN_P521",
+ [135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256",
+ [136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384",
+ [137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521",
+ [138] = "KDSA_EDDSA_VERIFY_ED25519",
+ [139] = "KDSA_EDDSA_VERIFY_ED448",
+ [140] = "KDSA_EDDSA_SIGN_ED25519",
+ [141] = "KDSA_EDDSA_SIGN_ED448",
+ [142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519",
+ [143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448",
+ [144] = "PCKMO_ENCRYPT_DEA_KEY",
+ [145] = "PCKMO_ENCRYPT_TDEA_128_KEY",
+ [146] = "PCKMO_ENCRYPT_TDEA_192_KEY",
+ [147] = "PCKMO_ENCRYPT_AES_128_KEY",
+ [148] = "PCKMO_ENCRYPT_AES_192_KEY",
+ [149] = "PCKMO_ENCRYPT_AES_256_KEY",
+ [150] = "PCKMO_ENCRYPT_ECC_P256_KEY",
+ [151] = "PCKMO_ENCRYPT_ECC_P384_KEY",
+ [152] = "PCKMO_ENCRYPT_ECC_P521_KEY",
+ [153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY",
+ [154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
+ [155] = "IBM_RESERVED_155",
+ [156] = "IBM_RESERVED_156",
+ [157] = "KM_FULL_XTS_AES_128",
+ [158] = "KM_FULL_XTS_AES_256",
+ [159] = "KM_FULL_XTS_ENCRYPTED_AES_128",
+ [160] = "KM_FULL_XTS_ENCRYPTED_AES_256",
+ [161] = "KMAC_HMAC_SHA_224",
+ [162] = "KMAC_HMAC_SHA_256",
+ [163] = "KMAC_HMAC_SHA_384",
+ [164] = "KMAC_HMAC_SHA_512",
+ [165] = "KMAC_HMAC_ENCRYPTED_SHA_224",
+ [166] = "KMAC_HMAC_ENCRYPTED_SHA_256",
+ [167] = "KMAC_HMAC_ENCRYPTED_SHA_384",
+ [168] = "KMAC_HMAC_ENCRYPTED_SHA_512",
+ [169] = "PCKMO_ENCRYPT_HMAC_512_KEY",
+ [170] = "PCKMO_ENCRYPT_HMAC_1024_KEY",
+ [171] = "PCKMO_ENCRYPT_AES_XTS_128",
+ [172] = "PCKMO_ENCRYPT_AES_XTS_256",
+};
+
+static struct attribute *paiext_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group paiext_events_group = {
+ .name = "events",
+ .attrs = NULL, /* Filled in attr_event_init() */
+};
+
+static struct attribute_group paiext_format_group = {
+ .name = "format",
+ .attrs = paiext_format_attr,
+};
+
+static const struct attribute_group *paiext_attr_groups[] = {
+ &paiext_events_group,
+ &paiext_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paiext = {
+ .task_ctx_nr = perf_hw_context,
+ .event_init = paiext_event_init,
+ .add = paiext_add,
+ .del = paiext_del,
+ .start = paiext_start,
+ .stop = paiext_stop,
+ .read = paiext_read,
+ .sched_task = paiext_sched_task,
+ .attr_groups = paiext_attr_groups,
+};
+
+/* List of symbolic PAI extension 1 NNPA counter names. */
+static const char * const paiext_ctrnames[] = {
+ [0] = "NNPA_ALL",
+ [1] = "NNPA_ADD",
+ [2] = "NNPA_SUB",
+ [3] = "NNPA_MUL",
+ [4] = "NNPA_DIV",
+ [5] = "NNPA_MIN",
+ [6] = "NNPA_MAX",
+ [7] = "NNPA_LOG",
+ [8] = "NNPA_EXP",
+ [9] = "NNPA_IBM_RESERVED_9",
+ [10] = "NNPA_RELU",
+ [11] = "NNPA_TANH",
+ [12] = "NNPA_SIGMOID",
+ [13] = "NNPA_SOFTMAX",
+ [14] = "NNPA_BATCHNORM",
+ [15] = "NNPA_MAXPOOL2D",
+ [16] = "NNPA_AVGPOOL2D",
+ [17] = "NNPA_LSTMACT",
+ [18] = "NNPA_GRUACT",
+ [19] = "NNPA_CONVOLUTION",
+ [20] = "NNPA_MATMUL_OP",
+ [21] = "NNPA_MATMUL_OP_BCAST23",
+ [22] = "NNPA_SMALLBATCH",
+ [23] = "NNPA_LARGEDIM",
+ [24] = "NNPA_SMALLTENSOR",
+ [25] = "NNPA_1MFRAME",
+ [26] = "NNPA_2GFRAME",
+ [27] = "NNPA_ACCESSEXCEPT",
+ [28] = "NNPA_TRANSFORM",
+ [29] = "NNPA_GELU",
+ [30] = "NNPA_MOMENTS",
+ [31] = "NNPA_LAYERNORM",
+ [32] = "NNPA_MATMUL_OP_BCAST1",
+ [33] = "NNPA_SQRT",
+ [34] = "NNPA_INVSQRT",
+ [35] = "NNPA_NORM",
+ [36] = "NNPA_REDUCE",
+};
+
+static void __init attr_event_free(struct attribute **attrs)
+{
+ struct perf_pmu_events_attr *pa;
+ unsigned int i;
+
+ for (i = 0; attrs[i]; i++) {
+ struct device_attribute *dap;
+
+ dap = container_of(attrs[i], struct device_attribute, attr);
+ pa = container_of(dap, struct perf_pmu_events_attr, attr);
+ kfree(pa);
+ }
+ kfree(attrs);
+}
+
+static struct attribute * __init attr_event_init_one(int num,
+ unsigned long base,
+ const char *name)
+{
+ struct perf_pmu_events_attr *pa;
+
+ pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+ if (!pa)
+ return NULL;
+
+ sysfs_attr_init(&pa->attr.attr);
+ pa->id = base + num;
+ pa->attr.attr.name = name;
+ pa->attr.attr.mode = 0444;
+ pa->attr.show = cpumf_events_sysfs_show;
+ pa->attr.store = NULL;
+ return &pa->attr.attr;
+}
+
+static struct attribute ** __init attr_event_init(struct pai_pmu *p)
+{
+ unsigned int min_attr = min_t(unsigned int, p->num_named, p->num_avail);
+ struct attribute **attrs;
+ unsigned int i;
+
+ attrs = kmalloc_array(min_attr + 1, sizeof(*attrs), GFP_KERNEL | __GFP_ZERO);
+ if (!attrs)
+ goto out;
+ for (i = 0; i < min_attr; i++) {
+ attrs[i] = attr_event_init_one(i, p->base, p->names[i]);
+ if (!attrs[i]) {
+ attr_event_free(attrs);
+ attrs = NULL;
+ goto out;
+ }
+ }
+ attrs[i] = NULL;
+out:
+ return attrs;
+}
+
+static void __init pai_pmu_exit(struct pai_pmu *p)
+{
+ attr_event_free(p->event_group->attrs);
+ p->event_group->attrs = NULL;
+}
+
+/* Add a PMU. Install its events and register the PMU device driver
+ * call back functions.
+ */
+static int __init pai_pmu_init(struct pai_pmu *p)
+{
+ int rc = -ENOMEM;
+
+
+ /* Export known PAI events */
+ p->event_group->attrs = attr_event_init(p);
+ if (!p->event_group->attrs) {
+ pr_err("Creation of PMU %s /sysfs failed\n", p->pmuname);
+ goto out;
+ }
+
+ rc = perf_pmu_register(p->pmu, p->pmuname, -1);
+ if (rc) {
+ pai_pmu_exit(p);
+ pr_err("Registering PMU %s failed with rc=%i\n", p->pmuname,
+ rc);
+ }
+out:
+ return rc;
+}
+
+/* PAI PMU characteristics table */
+static struct pai_pmu pai_pmu[] __refdata = {
+ [PAI_PMU_CRYPTO] = {
+ .pmuname = "pai_crypto",
+ .facility_nr = 196,
+ .num_named = ARRAY_SIZE(paicrypt_ctrnames),
+ .names = paicrypt_ctrnames,
+ .base = PAI_CRYPTO_BASE,
+ .kernel_offset = PAI_CRYPTO_KERNEL_OFFSET,
+ .area_size = PAGE_SIZE,
+ .init = pai_pmu_init,
+ .exit = pai_pmu_exit,
+ .pmu = &paicrypt,
+ .event_group = &paicrypt_events_group
+ },
+ [PAI_PMU_EXT] = {
+ .pmuname = "pai_ext",
+ .facility_nr = 197,
+ .num_named = ARRAY_SIZE(paiext_ctrnames),
+ .names = paiext_ctrnames,
+ .base = PAI_NNPA_BASE,
+ .kernel_offset = 0,
+ .area_size = PAIE1_CTRBLOCK_SZ,
+ .init = pai_pmu_init,
+ .exit = pai_pmu_exit,
+ .pmu = &paiext,
+ .event_group = &paiext_events_group
+ }
+};
+
+/*
+ * Check if the PMU (via facility) is supported by machine. Try all of the
+ * supported PAI PMUs.
+ * Return number of successfully installed PMUs.
+ */
+static int __init paipmu_setup(void)
+{
+ struct qpaci_info_block ib;
+ int install_ok = 0, rc;
+ struct pai_pmu *p;
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(pai_pmu); ++i) {
+ p = &pai_pmu[i];
+
+ if (!test_facility(p->facility_nr))
+ continue;
+
+ qpaci(&ib);
+ switch (i) {
+ case PAI_PMU_CRYPTO:
+ p->num_avail = ib.num_cc;
+ if (p->num_avail >= PAI_CRYPTO_MAXCTR) {
+ pr_err("Too many PMU %s counters %d\n",
+ p->pmuname, p->num_avail);
+ continue;
+ }
+ break;
+ case PAI_PMU_EXT:
+ p->num_avail = ib.num_nnpa;
+ break;
+ }
+ p->num_avail += 1; /* Add xxx_ALL event */
+ if (p->init) {
+ rc = p->init(p);
+ if (!rc)
+ ++install_ok;
+ }
+ }
+ return install_ok;
+}
+
+static int __init pai_init(void)
+{
+ /* Setup s390dbf facility */
+ paidbg = debug_register("pai", 32, 256, 128);
+ if (!paidbg) {
+ pr_err("Registration of s390dbf pai failed\n");
+ return -ENOMEM;
+ }
+ debug_register_view(paidbg, &debug_sprintf_view);
+
+ if (!paipmu_setup()) {
+ /* No PMU registration, no need for debug buffer */
+ debug_unregister_view(paidbg, &debug_sprintf_view);
+ debug_unregister(paidbg);
+ return -ENODEV;
+ }
+ return 0;
+}
+
+device_initcall(pai_init);
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
deleted file mode 100644
index 4ad472d130a3..000000000000
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ /dev/null
@@ -1,804 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Performance event support - Processor Activity Instrumentation Facility
- *
- * Copyright IBM Corp. 2022
- * Author(s): Thomas Richter <tmricht@linux.ibm.com>
- */
-#define KMSG_COMPONENT "pai_crypto"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/kernel_stat.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/io.h>
-#include <linux/perf_event.h>
-#include <asm/ctlreg.h>
-#include <asm/pai.h>
-#include <asm/debug.h>
-
-static debug_info_t *cfm_dbg;
-static unsigned int paicrypt_cnt; /* Size of the mapped counter sets */
- /* extracted with QPACI instruction */
-
-DEFINE_STATIC_KEY_FALSE(pai_key);
-
-struct pai_userdata {
- u16 num;
- u64 value;
-} __packed;
-
-struct paicrypt_map {
- unsigned long *page; /* Page for CPU to store counters */
- struct pai_userdata *save; /* Page to store no-zero counters */
- unsigned int active_events; /* # of PAI crypto users */
- refcount_t refcnt; /* Reference count mapped buffers */
- enum paievt_mode mode; /* Type of event */
- struct perf_event *event; /* Perf event for sampling */
-};
-
-struct paicrypt_mapptr {
- struct paicrypt_map *mapptr;
-};
-
-static struct paicrypt_root { /* Anchor to per CPU data */
- refcount_t refcnt; /* Overall active events */
- struct paicrypt_mapptr __percpu *mapptr;
-} paicrypt_root;
-
-/* Free per CPU data when the last event is removed. */
-static void paicrypt_root_free(void)
-{
- if (refcount_dec_and_test(&paicrypt_root.refcnt)) {
- free_percpu(paicrypt_root.mapptr);
- paicrypt_root.mapptr = NULL;
- }
- debug_sprintf_event(cfm_dbg, 5, "%s root.refcount %d\n", __func__,
- refcount_read(&paicrypt_root.refcnt));
-}
-
-/*
- * On initialization of first event also allocate per CPU data dynamically.
- * Start with an array of pointers, the array size is the maximum number of
- * CPUs possible, which might be larger than the number of CPUs currently
- * online.
- */
-static int paicrypt_root_alloc(void)
-{
- if (!refcount_inc_not_zero(&paicrypt_root.refcnt)) {
- /* The memory is already zeroed. */
- paicrypt_root.mapptr = alloc_percpu(struct paicrypt_mapptr);
- if (!paicrypt_root.mapptr)
- return -ENOMEM;
- refcount_set(&paicrypt_root.refcnt, 1);
- }
- return 0;
-}
-
-/* Release the PMU if event is the last perf event */
-static DEFINE_MUTEX(pai_reserve_mutex);
-
-/* Adjust usage counters and remove allocated memory when all users are
- * gone.
- */
-static void paicrypt_event_destroy(struct perf_event *event)
-{
- struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr,
- event->cpu);
- struct paicrypt_map *cpump = mp->mapptr;
-
- static_branch_dec(&pai_key);
- mutex_lock(&pai_reserve_mutex);
- debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
- " mode %d refcnt %u\n", __func__,
- event->attr.config, event->cpu,
- cpump->active_events, cpump->mode,
- refcount_read(&cpump->refcnt));
- free_page(PAI_SAVE_AREA(event));
- if (refcount_dec_and_test(&cpump->refcnt)) {
- debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
- __func__, (unsigned long)cpump->page,
- cpump->save);
- free_page((unsigned long)cpump->page);
- kvfree(cpump->save);
- kfree(cpump);
- mp->mapptr = NULL;
- }
- paicrypt_root_free();
- mutex_unlock(&pai_reserve_mutex);
-}
-
-static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel)
-{
- if (kernel)
- nr += PAI_CRYPTO_MAXCTR;
- return page[nr];
-}
-
-/* Read the counter values. Return value from location in CMP. For event
- * CRYPTO_ALL sum up all events.
- */
-static u64 paicrypt_getdata(struct perf_event *event, bool kernel)
-{
- struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
- struct paicrypt_map *cpump = mp->mapptr;
- u64 sum = 0;
- int i;
-
- if (event->attr.config != PAI_CRYPTO_BASE) {
- return paicrypt_getctr(cpump->page,
- event->attr.config - PAI_CRYPTO_BASE,
- kernel);
- }
-
- for (i = 1; i <= paicrypt_cnt; i++) {
- u64 val = paicrypt_getctr(cpump->page, i, kernel);
-
- if (!val)
- continue;
- sum += val;
- }
- return sum;
-}
-
-static u64 paicrypt_getall(struct perf_event *event)
-{
- u64 sum = 0;
-
- if (!event->attr.exclude_kernel)
- sum += paicrypt_getdata(event, true);
- if (!event->attr.exclude_user)
- sum += paicrypt_getdata(event, false);
-
- return sum;
-}
-
-/* Used to avoid races in checking concurrent access of counting and
- * sampling for crypto events
- *
- * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is
- * allowed and when this event is running, no counting event is allowed.
- * Several counting events are allowed in parallel, but no sampling event
- * is allowed while one (or more) counting events are running.
- *
- * This function is called in process context and it is save to block.
- * When the event initialization functions fails, no other call back will
- * be invoked.
- *
- * Allocate the memory for the event.
- */
-static struct paicrypt_map *paicrypt_busy(struct perf_event *event)
-{
- struct perf_event_attr *a = &event->attr;
- struct paicrypt_map *cpump = NULL;
- struct paicrypt_mapptr *mp;
- int rc;
-
- mutex_lock(&pai_reserve_mutex);
-
- /* Allocate root node */
- rc = paicrypt_root_alloc();
- if (rc)
- goto unlock;
-
- /* Allocate node for this event */
- mp = per_cpu_ptr(paicrypt_root.mapptr, event->cpu);
- cpump = mp->mapptr;
- if (!cpump) { /* Paicrypt_map allocated? */
- cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
- if (!cpump) {
- rc = -ENOMEM;
- goto free_root;
- }
- }
-
- if (a->sample_period) { /* Sampling requested */
- if (cpump->mode != PAI_MODE_NONE)
- rc = -EBUSY; /* ... sampling/counting active */
- } else { /* Counting requested */
- if (cpump->mode == PAI_MODE_SAMPLING)
- rc = -EBUSY; /* ... and sampling active */
- }
- /*
- * This error case triggers when there is a conflict:
- * Either sampling requested and counting already active, or visa
- * versa. Therefore the struct paicrypto_map for this CPU is
- * needed or the error could not have occurred. Only adjust root
- * node refcount.
- */
- if (rc)
- goto free_root;
-
- /* Allocate memory for counter page and counter extraction.
- * Only the first counting event has to allocate a page.
- */
- if (cpump->page) {
- refcount_inc(&cpump->refcnt);
- goto unlock;
- }
-
- rc = -ENOMEM;
- cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
- if (!cpump->page)
- goto free_paicrypt_map;
- cpump->save = kvmalloc_array(paicrypt_cnt + 1,
- sizeof(struct pai_userdata), GFP_KERNEL);
- if (!cpump->save) {
- free_page((unsigned long)cpump->page);
- cpump->page = NULL;
- goto free_paicrypt_map;
- }
-
- /* Set mode and reference count */
- rc = 0;
- refcount_set(&cpump->refcnt, 1);
- cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING;
- mp->mapptr = cpump;
- debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d"
- " mode %d refcnt %u page %#lx save %p rc %d\n",
- __func__, a->sample_period, cpump->active_events,
- cpump->mode, refcount_read(&cpump->refcnt),
- (unsigned long)cpump->page, cpump->save, rc);
- goto unlock;
-
-free_paicrypt_map:
- kfree(cpump);
- mp->mapptr = NULL;
-free_root:
- paicrypt_root_free();
-
-unlock:
- mutex_unlock(&pai_reserve_mutex);
- return rc ? ERR_PTR(rc) : cpump;
-}
-
-/* Might be called on different CPU than the one the event is intended for. */
-static int paicrypt_event_init(struct perf_event *event)
-{
- struct perf_event_attr *a = &event->attr;
- struct paicrypt_map *cpump;
- int rc = 0;
-
- /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
- if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
- return -ENOENT;
- /* PAI crypto event must be in valid range */
- if (a->config < PAI_CRYPTO_BASE ||
- a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
- return -EINVAL;
- /* Allow only CPU wide operation, no process context for now. */
- if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)
- return -ENOENT;
- /* Allow only CRYPTO_ALL for sampling. */
- if (a->sample_period && a->config != PAI_CRYPTO_BASE)
- return -EINVAL;
- /* Get a page to store last counter values for sampling */
- if (a->sample_period) {
- PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL);
- if (!PAI_SAVE_AREA(event)) {
- rc = -ENOMEM;
- goto out;
- }
- }
-
- cpump = paicrypt_busy(event);
- if (IS_ERR(cpump)) {
- free_page(PAI_SAVE_AREA(event));
- rc = PTR_ERR(cpump);
- goto out;
- }
-
- event->destroy = paicrypt_event_destroy;
-
- if (a->sample_period) {
- a->sample_period = 1;
- a->freq = 0;
- /* Register for paicrypt_sched_task() to be called */
- event->attach_state |= PERF_ATTACH_SCHED_CB;
- /* Add raw data which contain the memory mapped counters */
- a->sample_type |= PERF_SAMPLE_RAW;
- /* Turn off inheritance */
- a->inherit = 0;
- }
-
- static_branch_inc(&pai_key);
-out:
- return rc;
-}
-
-static void paicrypt_read(struct perf_event *event)
-{
- u64 prev, new, delta;
-
- prev = local64_read(&event->hw.prev_count);
- new = paicrypt_getall(event);
- local64_set(&event->hw.prev_count, new);
- delta = (prev <= new) ? new - prev
- : (-1ULL - prev) + new + 1; /* overflow */
- local64_add(delta, &event->count);
-}
-
-static void paicrypt_start(struct perf_event *event, int flags)
-{
- struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
- struct paicrypt_map *cpump = mp->mapptr;
- u64 sum;
-
- if (!event->attr.sample_period) { /* Counting */
- sum = paicrypt_getall(event); /* Get current value */
- local64_set(&event->hw.prev_count, sum);
- } else { /* Sampling */
- cpump->event = event;
- perf_sched_cb_inc(event->pmu);
- }
-}
-
-static int paicrypt_add(struct perf_event *event, int flags)
-{
- struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
- struct paicrypt_map *cpump = mp->mapptr;
- unsigned long ccd;
-
- if (++cpump->active_events == 1) {
- ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET;
- WRITE_ONCE(S390_lowcore.ccd, ccd);
- local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
- }
- if (flags & PERF_EF_START)
- paicrypt_start(event, PERF_EF_RELOAD);
- event->hw.state = 0;
- return 0;
-}
-
-static void paicrypt_stop(struct perf_event *event, int flags)
-{
- struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
- struct paicrypt_map *cpump = mp->mapptr;
-
- if (!event->attr.sample_period) { /* Counting */
- paicrypt_read(event);
- } else { /* Sampling */
- perf_sched_cb_dec(event->pmu);
- cpump->event = NULL;
- }
- event->hw.state = PERF_HES_STOPPED;
-}
-
-static void paicrypt_del(struct perf_event *event, int flags)
-{
- struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
- struct paicrypt_map *cpump = mp->mapptr;
-
- paicrypt_stop(event, PERF_EF_UPDATE);
- if (--cpump->active_events == 0) {
- local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
- WRITE_ONCE(S390_lowcore.ccd, 0);
- }
-}
-
-/* Create raw data and save it in buffer. Calculate the delta for each
- * counter between this invocation and the last invocation.
- * Returns number of bytes copied.
- * Saves only entries with positive counter difference of the form
- * 2 bytes: Number of counter
- * 8 bytes: Value of counter
- */
-static size_t paicrypt_copy(struct pai_userdata *userdata, unsigned long *page,
- unsigned long *page_old, bool exclude_user,
- bool exclude_kernel)
-{
- int i, outidx = 0;
-
- for (i = 1; i <= paicrypt_cnt; i++) {
- u64 val = 0, val_old = 0;
-
- if (!exclude_kernel) {
- val += paicrypt_getctr(page, i, true);
- val_old += paicrypt_getctr(page_old, i, true);
- }
- if (!exclude_user) {
- val += paicrypt_getctr(page, i, false);
- val_old += paicrypt_getctr(page_old, i, false);
- }
- if (val >= val_old)
- val -= val_old;
- else
- val = (~0ULL - val_old) + val + 1;
- if (val) {
- userdata[outidx].num = i;
- userdata[outidx].value = val;
- outidx++;
- }
- }
- return outidx * sizeof(struct pai_userdata);
-}
-
-static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
- struct perf_event *event)
-{
- struct perf_sample_data data;
- struct perf_raw_record raw;
- struct pt_regs regs;
- int overflow;
-
- /* Setup perf sample */
- memset(&regs, 0, sizeof(regs));
- memset(&raw, 0, sizeof(raw));
- memset(&data, 0, sizeof(data));
- perf_sample_data_init(&data, 0, event->hw.last_period);
- if (event->attr.sample_type & PERF_SAMPLE_TID) {
- data.tid_entry.pid = task_tgid_nr(current);
- data.tid_entry.tid = task_pid_nr(current);
- }
- if (event->attr.sample_type & PERF_SAMPLE_TIME)
- data.time = event->clock();
- if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
- data.id = event->id;
- if (event->attr.sample_type & PERF_SAMPLE_CPU) {
- data.cpu_entry.cpu = smp_processor_id();
- data.cpu_entry.reserved = 0;
- }
- if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- raw.frag.size = rawsize;
- raw.frag.data = cpump->save;
- perf_sample_save_raw_data(&data, &raw);
- }
-
- overflow = perf_event_overflow(event, &data, &regs);
- perf_event_update_userpage(event);
- /* Save crypto counter lowcore page after reading event data. */
- memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE);
- return overflow;
-}
-
-/* Check if there is data to be saved on schedule out of a task. */
-static int paicrypt_have_sample(void)
-{
- struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
- struct paicrypt_map *cpump = mp->mapptr;
- struct perf_event *event = cpump->event;
- size_t rawsize;
- int rc = 0;
-
- if (!event) /* No event active */
- return 0;
- rawsize = paicrypt_copy(cpump->save, cpump->page,
- (unsigned long *)PAI_SAVE_AREA(event),
- cpump->event->attr.exclude_user,
- cpump->event->attr.exclude_kernel);
- if (rawsize) /* No incremented counters */
- rc = paicrypt_push_sample(rawsize, cpump, event);
- return rc;
-}
-
-/* Called on schedule-in and schedule-out. No access to event structure,
- * but for sampling only event CRYPTO_ALL is allowed.
- */
-static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
-{
- /* We started with a clean page on event installation. So read out
- * results on schedule_out and if page was dirty, clear values.
- */
- if (!sched_in)
- paicrypt_have_sample();
-}
-
-/* Attribute definitions for paicrypt interface. As with other CPU
- * Measurement Facilities, there is one attribute per mapped counter.
- * The number of mapped counters may vary per machine generation. Use
- * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
- * to determine the number of mapped counters. The instructions returns
- * a positive number, which is the highest number of supported counters.
- * All counters less than this number are also supported, there are no
- * holes. A returned number of zero means no support for mapped counters.
- *
- * The identification of the counter is a unique number. The chosen range
- * is 0x1000 + offset in mapped kernel page.
- * All CPU Measurement Facility counters identifiers must be unique and
- * the numbers from 0 to 496 are already used for the CPU Measurement
- * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already
- * used for the CPU Measurement Sampling facility.
- */
-PMU_FORMAT_ATTR(event, "config:0-63");
-
-static struct attribute *paicrypt_format_attr[] = {
- &format_attr_event.attr,
- NULL,
-};
-
-static struct attribute_group paicrypt_events_group = {
- .name = "events",
- .attrs = NULL /* Filled in attr_event_init() */
-};
-
-static struct attribute_group paicrypt_format_group = {
- .name = "format",
- .attrs = paicrypt_format_attr,
-};
-
-static const struct attribute_group *paicrypt_attr_groups[] = {
- &paicrypt_events_group,
- &paicrypt_format_group,
- NULL,
-};
-
-/* Performance monitoring unit for mapped counters */
-static struct pmu paicrypt = {
- .task_ctx_nr = perf_invalid_context,
- .event_init = paicrypt_event_init,
- .add = paicrypt_add,
- .del = paicrypt_del,
- .start = paicrypt_start,
- .stop = paicrypt_stop,
- .read = paicrypt_read,
- .sched_task = paicrypt_sched_task,
- .attr_groups = paicrypt_attr_groups
-};
-
-/* List of symbolic PAI counter names. */
-static const char * const paicrypt_ctrnames[] = {
- [0] = "CRYPTO_ALL",
- [1] = "KM_DEA",
- [2] = "KM_TDEA_128",
- [3] = "KM_TDEA_192",
- [4] = "KM_ENCRYPTED_DEA",
- [5] = "KM_ENCRYPTED_TDEA_128",
- [6] = "KM_ENCRYPTED_TDEA_192",
- [7] = "KM_AES_128",
- [8] = "KM_AES_192",
- [9] = "KM_AES_256",
- [10] = "KM_ENCRYPTED_AES_128",
- [11] = "KM_ENCRYPTED_AES_192",
- [12] = "KM_ENCRYPTED_AES_256",
- [13] = "KM_XTS_AES_128",
- [14] = "KM_XTS_AES_256",
- [15] = "KM_XTS_ENCRYPTED_AES_128",
- [16] = "KM_XTS_ENCRYPTED_AES_256",
- [17] = "KMC_DEA",
- [18] = "KMC_TDEA_128",
- [19] = "KMC_TDEA_192",
- [20] = "KMC_ENCRYPTED_DEA",
- [21] = "KMC_ENCRYPTED_TDEA_128",
- [22] = "KMC_ENCRYPTED_TDEA_192",
- [23] = "KMC_AES_128",
- [24] = "KMC_AES_192",
- [25] = "KMC_AES_256",
- [26] = "KMC_ENCRYPTED_AES_128",
- [27] = "KMC_ENCRYPTED_AES_192",
- [28] = "KMC_ENCRYPTED_AES_256",
- [29] = "KMC_PRNG",
- [30] = "KMA_GCM_AES_128",
- [31] = "KMA_GCM_AES_192",
- [32] = "KMA_GCM_AES_256",
- [33] = "KMA_GCM_ENCRYPTED_AES_128",
- [34] = "KMA_GCM_ENCRYPTED_AES_192",
- [35] = "KMA_GCM_ENCRYPTED_AES_256",
- [36] = "KMF_DEA",
- [37] = "KMF_TDEA_128",
- [38] = "KMF_TDEA_192",
- [39] = "KMF_ENCRYPTED_DEA",
- [40] = "KMF_ENCRYPTED_TDEA_128",
- [41] = "KMF_ENCRYPTED_TDEA_192",
- [42] = "KMF_AES_128",
- [43] = "KMF_AES_192",
- [44] = "KMF_AES_256",
- [45] = "KMF_ENCRYPTED_AES_128",
- [46] = "KMF_ENCRYPTED_AES_192",
- [47] = "KMF_ENCRYPTED_AES_256",
- [48] = "KMCTR_DEA",
- [49] = "KMCTR_TDEA_128",
- [50] = "KMCTR_TDEA_192",
- [51] = "KMCTR_ENCRYPTED_DEA",
- [52] = "KMCTR_ENCRYPTED_TDEA_128",
- [53] = "KMCTR_ENCRYPTED_TDEA_192",
- [54] = "KMCTR_AES_128",
- [55] = "KMCTR_AES_192",
- [56] = "KMCTR_AES_256",
- [57] = "KMCTR_ENCRYPTED_AES_128",
- [58] = "KMCTR_ENCRYPTED_AES_192",
- [59] = "KMCTR_ENCRYPTED_AES_256",
- [60] = "KMO_DEA",
- [61] = "KMO_TDEA_128",
- [62] = "KMO_TDEA_192",
- [63] = "KMO_ENCRYPTED_DEA",
- [64] = "KMO_ENCRYPTED_TDEA_128",
- [65] = "KMO_ENCRYPTED_TDEA_192",
- [66] = "KMO_AES_128",
- [67] = "KMO_AES_192",
- [68] = "KMO_AES_256",
- [69] = "KMO_ENCRYPTED_AES_128",
- [70] = "KMO_ENCRYPTED_AES_192",
- [71] = "KMO_ENCRYPTED_AES_256",
- [72] = "KIMD_SHA_1",
- [73] = "KIMD_SHA_256",
- [74] = "KIMD_SHA_512",
- [75] = "KIMD_SHA3_224",
- [76] = "KIMD_SHA3_256",
- [77] = "KIMD_SHA3_384",
- [78] = "KIMD_SHA3_512",
- [79] = "KIMD_SHAKE_128",
- [80] = "KIMD_SHAKE_256",
- [81] = "KIMD_GHASH",
- [82] = "KLMD_SHA_1",
- [83] = "KLMD_SHA_256",
- [84] = "KLMD_SHA_512",
- [85] = "KLMD_SHA3_224",
- [86] = "KLMD_SHA3_256",
- [87] = "KLMD_SHA3_384",
- [88] = "KLMD_SHA3_512",
- [89] = "KLMD_SHAKE_128",
- [90] = "KLMD_SHAKE_256",
- [91] = "KMAC_DEA",
- [92] = "KMAC_TDEA_128",
- [93] = "KMAC_TDEA_192",
- [94] = "KMAC_ENCRYPTED_DEA",
- [95] = "KMAC_ENCRYPTED_TDEA_128",
- [96] = "KMAC_ENCRYPTED_TDEA_192",
- [97] = "KMAC_AES_128",
- [98] = "KMAC_AES_192",
- [99] = "KMAC_AES_256",
- [100] = "KMAC_ENCRYPTED_AES_128",
- [101] = "KMAC_ENCRYPTED_AES_192",
- [102] = "KMAC_ENCRYPTED_AES_256",
- [103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA",
- [104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128",
- [105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192",
- [106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA",
- [107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128",
- [108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192",
- [109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128",
- [110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192",
- [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256",
- [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128",
- [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192",
- [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A",
- [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128",
- [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256",
- [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128",
- [118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256",
- [119] = "PCC_SCALAR_MULTIPLY_P256",
- [120] = "PCC_SCALAR_MULTIPLY_P384",
- [121] = "PCC_SCALAR_MULTIPLY_P521",
- [122] = "PCC_SCALAR_MULTIPLY_ED25519",
- [123] = "PCC_SCALAR_MULTIPLY_ED448",
- [124] = "PCC_SCALAR_MULTIPLY_X25519",
- [125] = "PCC_SCALAR_MULTIPLY_X448",
- [126] = "PRNO_SHA_512_DRNG",
- [127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO",
- [128] = "PRNO_TRNG",
- [129] = "KDSA_ECDSA_VERIFY_P256",
- [130] = "KDSA_ECDSA_VERIFY_P384",
- [131] = "KDSA_ECDSA_VERIFY_P521",
- [132] = "KDSA_ECDSA_SIGN_P256",
- [133] = "KDSA_ECDSA_SIGN_P384",
- [134] = "KDSA_ECDSA_SIGN_P521",
- [135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256",
- [136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384",
- [137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521",
- [138] = "KDSA_EDDSA_VERIFY_ED25519",
- [139] = "KDSA_EDDSA_VERIFY_ED448",
- [140] = "KDSA_EDDSA_SIGN_ED25519",
- [141] = "KDSA_EDDSA_SIGN_ED448",
- [142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519",
- [143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448",
- [144] = "PCKMO_ENCRYPT_DEA_KEY",
- [145] = "PCKMO_ENCRYPT_TDEA_128_KEY",
- [146] = "PCKMO_ENCRYPT_TDEA_192_KEY",
- [147] = "PCKMO_ENCRYPT_AES_128_KEY",
- [148] = "PCKMO_ENCRYPT_AES_192_KEY",
- [149] = "PCKMO_ENCRYPT_AES_256_KEY",
- [150] = "PCKMO_ENCRYPT_ECC_P256_KEY",
- [151] = "PCKMO_ENCRYPT_ECC_P384_KEY",
- [152] = "PCKMO_ENCRYPT_ECC_P521_KEY",
- [153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY",
- [154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
- [155] = "IBM_RESERVED_155",
- [156] = "IBM_RESERVED_156",
-};
-
-static void __init attr_event_free(struct attribute **attrs, int num)
-{
- struct perf_pmu_events_attr *pa;
- int i;
-
- for (i = 0; i < num; i++) {
- struct device_attribute *dap;
-
- dap = container_of(attrs[i], struct device_attribute, attr);
- pa = container_of(dap, struct perf_pmu_events_attr, attr);
- kfree(pa);
- }
- kfree(attrs);
-}
-
-static int __init attr_event_init_one(struct attribute **attrs, int num)
-{
- struct perf_pmu_events_attr *pa;
-
- /* Index larger than array_size, no counter name available */
- if (num >= ARRAY_SIZE(paicrypt_ctrnames)) {
- attrs[num] = NULL;
- return 0;
- }
-
- pa = kzalloc(sizeof(*pa), GFP_KERNEL);
- if (!pa)
- return -ENOMEM;
-
- sysfs_attr_init(&pa->attr.attr);
- pa->id = PAI_CRYPTO_BASE + num;
- pa->attr.attr.name = paicrypt_ctrnames[num];
- pa->attr.attr.mode = 0444;
- pa->attr.show = cpumf_events_sysfs_show;
- pa->attr.store = NULL;
- attrs[num] = &pa->attr.attr;
- return 0;
-}
-
-/* Create PMU sysfs event attributes on the fly. */
-static int __init attr_event_init(void)
-{
- struct attribute **attrs;
- int ret, i;
-
- attrs = kmalloc_array(paicrypt_cnt + 2, sizeof(*attrs), GFP_KERNEL);
- if (!attrs)
- return -ENOMEM;
- for (i = 0; i <= paicrypt_cnt; i++) {
- ret = attr_event_init_one(attrs, i);
- if (ret) {
- attr_event_free(attrs, i);
- return ret;
- }
- }
- attrs[i] = NULL;
- paicrypt_events_group.attrs = attrs;
- return 0;
-}
-
-static int __init paicrypt_init(void)
-{
- struct qpaci_info_block ib;
- int rc;
-
- if (!test_facility(196))
- return 0;
-
- qpaci(&ib);
- paicrypt_cnt = ib.num_cc;
- if (paicrypt_cnt == 0)
- return 0;
- if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR) {
- pr_err("Too many PMU pai_crypto counters %d\n", paicrypt_cnt);
- return -E2BIG;
- }
-
- rc = attr_event_init(); /* Export known PAI crypto events */
- if (rc) {
- pr_err("Creation of PMU pai_crypto /sysfs failed\n");
- return rc;
- }
-
- /* Setup s390dbf facility */
- cfm_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
- if (!cfm_dbg) {
- pr_err("Registration of s390dbf pai_crypto failed\n");
- return -ENOMEM;
- }
- debug_register_view(cfm_dbg, &debug_sprintf_view);
-
- rc = perf_pmu_register(&paicrypt, "pai_crypto", -1);
- if (rc) {
- pr_err("Registering the pai_crypto PMU failed with rc=%i\n",
- rc);
- debug_unregister_view(cfm_dbg, &debug_sprintf_view);
- debug_unregister(cfm_dbg);
- return rc;
- }
- return 0;
-}
-
-device_initcall(paicrypt_init);
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
deleted file mode 100644
index a6da7e0cc7a6..000000000000
--- a/arch/s390/kernel/perf_pai_ext.c
+++ /dev/null
@@ -1,695 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Performance event support - Processor Activity Instrumentation Extension
- * Facility
- *
- * Copyright IBM Corp. 2022
- * Author(s): Thomas Richter <tmricht@linux.ibm.com>
- */
-#define KMSG_COMPONENT "pai_ext"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/kernel_stat.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/io.h>
-#include <linux/perf_event.h>
-#include <asm/ctlreg.h>
-#include <asm/pai.h>
-#include <asm/debug.h>
-
-#define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */
-#define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */
-
-static debug_info_t *paiext_dbg;
-static unsigned int paiext_cnt; /* Extracted with QPACI instruction */
-
-struct pai_userdata {
- u16 num;
- u64 value;
-} __packed;
-
-/* Create the PAI extension 1 control block area.
- * The PAI extension control block 1 is pointed to by lowcore
- * address 0x1508 for each CPU. This control block is 512 bytes in size
- * and requires a 512 byte boundary alignment.
- */
-struct paiext_cb { /* PAI extension 1 control block */
- u64 header; /* Not used */
- u64 reserved1;
- u64 acc; /* Addr to analytics counter control block */
- u8 reserved2[488];
-} __packed;
-
-struct paiext_map {
- unsigned long *area; /* Area for CPU to store counters */
- struct pai_userdata *save; /* Area to store non-zero counters */
- enum paievt_mode mode; /* Type of event */
- unsigned int active_events; /* # of PAI Extension users */
- refcount_t refcnt;
- struct perf_event *event; /* Perf event for sampling */
- struct paiext_cb *paiext_cb; /* PAI extension control block area */
-};
-
-struct paiext_mapptr {
- struct paiext_map *mapptr;
-};
-
-static struct paiext_root { /* Anchor to per CPU data */
- refcount_t refcnt; /* Overall active events */
- struct paiext_mapptr __percpu *mapptr;
-} paiext_root;
-
-/* Free per CPU data when the last event is removed. */
-static void paiext_root_free(void)
-{
- if (refcount_dec_and_test(&paiext_root.refcnt)) {
- free_percpu(paiext_root.mapptr);
- paiext_root.mapptr = NULL;
- }
-}
-
-/* On initialization of first event also allocate per CPU data dynamically.
- * Start with an array of pointers, the array size is the maximum number of
- * CPUs possible, which might be larger than the number of CPUs currently
- * online.
- */
-static int paiext_root_alloc(void)
-{
- if (!refcount_inc_not_zero(&paiext_root.refcnt)) {
- /* The memory is already zeroed. */
- paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
- if (!paiext_root.mapptr) {
- /* Returning without refcnt adjustment is ok. The
- * error code is handled by paiext_alloc() which
- * decrements refcnt when an event can not be
- * created.
- */
- return -ENOMEM;
- }
- refcount_set(&paiext_root.refcnt, 1);
- }
- return 0;
-}
-
-/* Protects against concurrent increment of sampler and counter member
- * increments at the same time and prohibits concurrent execution of
- * counting and sampling events.
- * Ensures that analytics counter block is deallocated only when the
- * sampling and counting on that cpu is zero.
- * For details see paiext_alloc().
- */
-static DEFINE_MUTEX(paiext_reserve_mutex);
-
-/* Free all memory allocated for event counting/sampling setup */
-static void paiext_free(struct paiext_mapptr *mp)
-{
- kfree(mp->mapptr->area);
- kfree(mp->mapptr->paiext_cb);
- kvfree(mp->mapptr->save);
- kfree(mp->mapptr);
- mp->mapptr = NULL;
-}
-
-/* Release the PMU if event is the last perf event */
-static void paiext_event_destroy(struct perf_event *event)
-{
- struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
- struct paiext_map *cpump = mp->mapptr;
-
- free_page(PAI_SAVE_AREA(event));
- mutex_lock(&paiext_reserve_mutex);
- if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
- paiext_free(mp);
- paiext_root_free();
- mutex_unlock(&paiext_reserve_mutex);
- debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__,
- event->cpu, mp->mapptr);
-
-}
-
-/* Used to avoid races in checking concurrent access of counting and
- * sampling for pai_extension events.
- *
- * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
- * allowed and when this event is running, no counting event is allowed.
- * Several counting events are allowed in parallel, but no sampling event
- * is allowed while one (or more) counting events are running.
- *
- * This function is called in process context and it is safe to block.
- * When the event initialization functions fails, no other call back will
- * be invoked.
- *
- * Allocate the memory for the event.
- */
-static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
-{
- struct paiext_mapptr *mp;
- struct paiext_map *cpump;
- int rc;
-
- mutex_lock(&paiext_reserve_mutex);
-
- rc = paiext_root_alloc();
- if (rc)
- goto unlock;
-
- mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
- cpump = mp->mapptr;
- if (!cpump) { /* Paiext_map allocated? */
- rc = -ENOMEM;
- cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
- if (!cpump)
- goto undo;
-
- /* Allocate memory for counter area and counter extraction.
- * These are
- * - a 512 byte block and requires 512 byte boundary alignment.
- * - a 1KB byte block and requires 1KB boundary alignment.
- * Only the first counting event has to allocate the area.
- *
- * Note: This works with commit 59bb47985c1d by default.
- * Backporting this to kernels without this commit might
- * need adjustment.
- */
- mp->mapptr = cpump;
- cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
- cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
- cpump->save = kvmalloc_array(paiext_cnt + 1,
- sizeof(struct pai_userdata),
- GFP_KERNEL);
- if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
- paiext_free(mp);
- goto undo;
- }
- refcount_set(&cpump->refcnt, 1);
- cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
- : PAI_MODE_COUNTING;
- } else {
- /* Multiple invocation, check what is active.
- * Supported are multiple counter events or only one sampling
- * event concurrently at any one time.
- */
- if (cpump->mode == PAI_MODE_SAMPLING ||
- (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) {
- rc = -EBUSY;
- goto undo;
- }
- refcount_inc(&cpump->refcnt);
- }
-
- rc = 0;
-
-undo:
- if (rc) {
- /* Error in allocation of event, decrement anchor. Since
- * the event in not created, its destroy() function is never
- * invoked. Adjust the reference counter for the anchor.
- */
- paiext_root_free();
- }
-unlock:
- mutex_unlock(&paiext_reserve_mutex);
- /* If rc is non-zero, no increment of counter/sampler was done. */
- return rc;
-}
-
-/* The PAI extension 1 control block supports up to 128 entries. Return
- * the index within PAIE1_CB given the event number. Also validate event
- * number.
- */
-static int paiext_event_valid(struct perf_event *event)
-{
- u64 cfg = event->attr.config;
-
- if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
- /* Offset NNPA in paiext_cb */
- event->hw.config_base = offsetof(struct paiext_cb, acc);
- return 0;
- }
- return -EINVAL;
-}
-
-/* Might be called on different CPU than the one the event is intended for. */
-static int paiext_event_init(struct perf_event *event)
-{
- struct perf_event_attr *a = &event->attr;
- int rc;
-
- /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
- if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
- return -ENOENT;
- /* PAI extension event must be valid and in supported range */
- rc = paiext_event_valid(event);
- if (rc)
- return rc;
- /* Allow only CPU wide operation, no process context for now. */
- if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)
- return -ENOENT;
- /* Allow only event NNPA_ALL for sampling. */
- if (a->sample_period && a->config != PAI_NNPA_BASE)
- return -EINVAL;
- /* Prohibit exclude_user event selection */
- if (a->exclude_user)
- return -EINVAL;
- /* Get a page to store last counter values for sampling */
- if (a->sample_period) {
- PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL);
- if (!PAI_SAVE_AREA(event))
- return -ENOMEM;
- }
-
- rc = paiext_alloc(a, event);
- if (rc) {
- free_page(PAI_SAVE_AREA(event));
- return rc;
- }
- event->destroy = paiext_event_destroy;
-
- if (a->sample_period) {
- a->sample_period = 1;
- a->freq = 0;
- /* Register for paicrypt_sched_task() to be called */
- event->attach_state |= PERF_ATTACH_SCHED_CB;
- /* Add raw data which are the memory mapped counters */
- a->sample_type |= PERF_SAMPLE_RAW;
- /* Turn off inheritance */
- a->inherit = 0;
- }
-
- return 0;
-}
-
-static u64 paiext_getctr(unsigned long *area, int nr)
-{
- return area[nr];
-}
-
-/* Read the counter values. Return value from location in buffer. For event
- * NNPA_ALL sum up all events.
- */
-static u64 paiext_getdata(struct perf_event *event)
-{
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- u64 sum = 0;
- int i;
-
- if (event->attr.config != PAI_NNPA_BASE)
- return paiext_getctr(cpump->area,
- event->attr.config - PAI_NNPA_BASE);
-
- for (i = 1; i <= paiext_cnt; i++)
- sum += paiext_getctr(cpump->area, i);
-
- return sum;
-}
-
-static u64 paiext_getall(struct perf_event *event)
-{
- return paiext_getdata(event);
-}
-
-static void paiext_read(struct perf_event *event)
-{
- u64 prev, new, delta;
-
- prev = local64_read(&event->hw.prev_count);
- new = paiext_getall(event);
- local64_set(&event->hw.prev_count, new);
- delta = new - prev;
- local64_add(delta, &event->count);
-}
-
-static void paiext_start(struct perf_event *event, int flags)
-{
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- u64 sum;
-
- if (!event->attr.sample_period) { /* Counting */
- sum = paiext_getall(event); /* Get current value */
- local64_set(&event->hw.prev_count, sum);
- } else { /* Sampling */
- cpump->event = event;
- perf_sched_cb_inc(event->pmu);
- }
-}
-
-static int paiext_add(struct perf_event *event, int flags)
-{
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- struct paiext_cb *pcb = cpump->paiext_cb;
-
- if (++cpump->active_events == 1) {
- S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
- pcb->acc = virt_to_phys(cpump->area) | 0x1;
- /* Enable CPU instruction lookup for PAIE1 control block */
- local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT);
- debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
- __func__, S390_lowcore.aicd, pcb->acc);
- }
- if (flags & PERF_EF_START)
- paiext_start(event, PERF_EF_RELOAD);
- event->hw.state = 0;
- return 0;
-}
-
-static void paiext_stop(struct perf_event *event, int flags)
-{
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
-
- if (!event->attr.sample_period) { /* Counting */
- paiext_read(event);
- } else { /* Sampling */
- perf_sched_cb_dec(event->pmu);
- cpump->event = NULL;
- }
- event->hw.state = PERF_HES_STOPPED;
-}
-
-static void paiext_del(struct perf_event *event, int flags)
-{
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- struct paiext_cb *pcb = cpump->paiext_cb;
-
- paiext_stop(event, PERF_EF_UPDATE);
- if (--cpump->active_events == 0) {
- /* Disable CPU instruction lookup for PAIE1 control block */
- local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
- pcb->acc = 0;
- S390_lowcore.aicd = 0;
- debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
- __func__, S390_lowcore.aicd, pcb->acc);
- }
-}
-
-/* Create raw data and save it in buffer. Returns number of bytes copied.
- * Saves only positive counter entries of the form
- * 2 bytes: Number of counter
- * 8 bytes: Value of counter
- */
-static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area,
- unsigned long *area_old)
-{
- int i, outidx = 0;
-
- for (i = 1; i <= paiext_cnt; i++) {
- u64 val = paiext_getctr(area, i);
- u64 val_old = paiext_getctr(area_old, i);
-
- if (val >= val_old)
- val -= val_old;
- else
- val = (~0ULL - val_old) + val + 1;
- if (val) {
- userdata[outidx].num = i;
- userdata[outidx].value = val;
- outidx++;
- }
- }
- return outidx * sizeof(*userdata);
-}
-
-/* Write sample when one or more counters values are nonzero.
- *
- * Note: The function paiext_sched_task() and paiext_push_sample() are not
- * invoked after function paiext_del() has been called because of function
- * perf_sched_cb_dec().
- * The function paiext_sched_task() and paiext_push_sample() are only
- * called when sampling is active. Function perf_sched_cb_inc()
- * has been invoked to install function paiext_sched_task() as call back
- * to run at context switch time (see paiext_add()).
- *
- * This causes function perf_event_context_sched_out() and
- * perf_event_context_sched_in() to check whether the PMU has installed an
- * sched_task() callback. That callback is not active after paiext_del()
- * returns and has deleted the event on that CPU.
- */
-static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
- struct perf_event *event)
-{
- struct perf_sample_data data;
- struct perf_raw_record raw;
- struct pt_regs regs;
- int overflow;
-
- /* Setup perf sample */
- memset(&regs, 0, sizeof(regs));
- memset(&raw, 0, sizeof(raw));
- memset(&data, 0, sizeof(data));
- perf_sample_data_init(&data, 0, event->hw.last_period);
- if (event->attr.sample_type & PERF_SAMPLE_TID) {
- data.tid_entry.pid = task_tgid_nr(current);
- data.tid_entry.tid = task_pid_nr(current);
- }
- if (event->attr.sample_type & PERF_SAMPLE_TIME)
- data.time = event->clock();
- if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
- data.id = event->id;
- if (event->attr.sample_type & PERF_SAMPLE_CPU)
- data.cpu_entry.cpu = smp_processor_id();
- if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- raw.frag.size = rawsize;
- raw.frag.data = cpump->save;
- perf_sample_save_raw_data(&data, &raw);
- }
-
- overflow = perf_event_overflow(event, &data, &regs);
- perf_event_update_userpage(event);
- /* Save NNPA lowcore area after read in event */
- memcpy((void *)PAI_SAVE_AREA(event), cpump->area,
- PAIE1_CTRBLOCK_SZ);
- return overflow;
-}
-
-/* Check if there is data to be saved on schedule out of a task. */
-static int paiext_have_sample(void)
-{
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- struct perf_event *event = cpump->event;
- size_t rawsize;
- int rc = 0;
-
- if (!event)
- return 0;
- rawsize = paiext_copy(cpump->save, cpump->area,
- (unsigned long *)PAI_SAVE_AREA(event));
- if (rawsize) /* Incremented counters */
- rc = paiext_push_sample(rawsize, cpump, event);
- return rc;
-}
-
-/* Called on schedule-in and schedule-out. No access to event structure,
- * but for sampling only event NNPA_ALL is allowed.
- */
-static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
-{
- /* We started with a clean page on event installation. So read out
- * results on schedule_out and if page was dirty, clear values.
- */
- if (!sched_in)
- paiext_have_sample();
-}
-
-/* Attribute definitions for pai extension1 interface. As with other CPU
- * Measurement Facilities, there is one attribute per mapped counter.
- * The number of mapped counters may vary per machine generation. Use
- * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
- * to determine the number of mapped counters. The instructions returns
- * a positive number, which is the highest number of supported counters.
- * All counters less than this number are also supported, there are no
- * holes. A returned number of zero means no support for mapped counters.
- *
- * The identification of the counter is a unique number. The chosen range
- * is 0x1800 + offset in mapped kernel page.
- * All CPU Measurement Facility counters identifiers must be unique and
- * the numbers from 0 to 496 are already used for the CPU Measurement
- * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
- * counters.
- * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
- * used for the CPU Measurement Sampling facility.
- */
-PMU_FORMAT_ATTR(event, "config:0-63");
-
-static struct attribute *paiext_format_attr[] = {
- &format_attr_event.attr,
- NULL,
-};
-
-static struct attribute_group paiext_events_group = {
- .name = "events",
- .attrs = NULL, /* Filled in attr_event_init() */
-};
-
-static struct attribute_group paiext_format_group = {
- .name = "format",
- .attrs = paiext_format_attr,
-};
-
-static const struct attribute_group *paiext_attr_groups[] = {
- &paiext_events_group,
- &paiext_format_group,
- NULL,
-};
-
-/* Performance monitoring unit for mapped counters */
-static struct pmu paiext = {
- .task_ctx_nr = perf_invalid_context,
- .event_init = paiext_event_init,
- .add = paiext_add,
- .del = paiext_del,
- .start = paiext_start,
- .stop = paiext_stop,
- .read = paiext_read,
- .sched_task = paiext_sched_task,
- .attr_groups = paiext_attr_groups,
-};
-
-/* List of symbolic PAI extension 1 NNPA counter names. */
-static const char * const paiext_ctrnames[] = {
- [0] = "NNPA_ALL",
- [1] = "NNPA_ADD",
- [2] = "NNPA_SUB",
- [3] = "NNPA_MUL",
- [4] = "NNPA_DIV",
- [5] = "NNPA_MIN",
- [6] = "NNPA_MAX",
- [7] = "NNPA_LOG",
- [8] = "NNPA_EXP",
- [9] = "NNPA_IBM_RESERVED_9",
- [10] = "NNPA_RELU",
- [11] = "NNPA_TANH",
- [12] = "NNPA_SIGMOID",
- [13] = "NNPA_SOFTMAX",
- [14] = "NNPA_BATCHNORM",
- [15] = "NNPA_MAXPOOL2D",
- [16] = "NNPA_AVGPOOL2D",
- [17] = "NNPA_LSTMACT",
- [18] = "NNPA_GRUACT",
- [19] = "NNPA_CONVOLUTION",
- [20] = "NNPA_MATMUL_OP",
- [21] = "NNPA_MATMUL_OP_BCAST23",
- [22] = "NNPA_SMALLBATCH",
- [23] = "NNPA_LARGEDIM",
- [24] = "NNPA_SMALLTENSOR",
- [25] = "NNPA_1MFRAME",
- [26] = "NNPA_2GFRAME",
- [27] = "NNPA_ACCESSEXCEPT",
-};
-
-static void __init attr_event_free(struct attribute **attrs, int num)
-{
- struct perf_pmu_events_attr *pa;
- struct device_attribute *dap;
- int i;
-
- for (i = 0; i < num; i++) {
- dap = container_of(attrs[i], struct device_attribute, attr);
- pa = container_of(dap, struct perf_pmu_events_attr, attr);
- kfree(pa);
- }
- kfree(attrs);
-}
-
-static int __init attr_event_init_one(struct attribute **attrs, int num)
-{
- struct perf_pmu_events_attr *pa;
-
- /* Index larger than array_size, no counter name available */
- if (num >= ARRAY_SIZE(paiext_ctrnames)) {
- attrs[num] = NULL;
- return 0;
- }
-
- pa = kzalloc(sizeof(*pa), GFP_KERNEL);
- if (!pa)
- return -ENOMEM;
-
- sysfs_attr_init(&pa->attr.attr);
- pa->id = PAI_NNPA_BASE + num;
- pa->attr.attr.name = paiext_ctrnames[num];
- pa->attr.attr.mode = 0444;
- pa->attr.show = cpumf_events_sysfs_show;
- pa->attr.store = NULL;
- attrs[num] = &pa->attr.attr;
- return 0;
-}
-
-/* Create PMU sysfs event attributes on the fly. */
-static int __init attr_event_init(void)
-{
- struct attribute **attrs;
- int ret, i;
-
- attrs = kmalloc_array(paiext_cnt + 2, sizeof(*attrs), GFP_KERNEL);
- if (!attrs)
- return -ENOMEM;
- for (i = 0; i <= paiext_cnt; i++) {
- ret = attr_event_init_one(attrs, i);
- if (ret) {
- attr_event_free(attrs, i);
- return ret;
- }
- }
- attrs[i] = NULL;
- paiext_events_group.attrs = attrs;
- return 0;
-}
-
-static int __init paiext_init(void)
-{
- struct qpaci_info_block ib;
- int rc = -ENOMEM;
-
- if (!test_facility(197))
- return 0;
-
- qpaci(&ib);
- paiext_cnt = ib.num_nnpa;
- if (paiext_cnt >= PAI_NNPA_MAXCTR)
- paiext_cnt = PAI_NNPA_MAXCTR;
- if (!paiext_cnt)
- return 0;
-
- rc = attr_event_init();
- if (rc) {
- pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
- return rc;
- }
-
- /* Setup s390dbf facility */
- paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
- if (!paiext_dbg) {
- pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
- rc = -ENOMEM;
- goto out_init;
- }
- debug_register_view(paiext_dbg, &debug_sprintf_view);
-
- rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
- if (rc) {
- pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
- "rc=%i\n", rc);
- goto out_pmu;
- }
-
- return 0;
-
-out_pmu:
- debug_unregister_view(paiext_dbg, &debug_sprintf_view);
- debug_unregister(paiext_dbg);
-out_init:
- attr_event_free(paiext_events_group.attrs,
- ARRAY_SIZE(paiext_ctrnames) + 1);
- return rc;
-}
-
-device_initcall(paiext_init);
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index a6b058ee4a36..7b305f1456f8 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -44,9 +44,6 @@ int perf_reg_validate(u64 mask)
u64 perf_reg_abi(struct task_struct *task)
{
- if (test_tsk_thread_flag(task, TIF_31BIT))
- return PERF_SAMPLE_REGS_ABI_32;
-
return PERF_SAMPLE_REGS_ABI_64;
}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index d8740631df4b..0df95dcb2101 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -24,10 +24,8 @@
#include <linux/tick.h>
#include <linux/personality.h>
#include <linux/syscalls.h>
-#include <linux/compat.h>
#include <linux/kprobes.h>
#include <linux/random.h>
-#include <linux/export.h>
#include <linux/init_task.h>
#include <linux/entry-common.h>
#include <linux/io.h>
@@ -71,10 +69,10 @@ void flush_thread(void)
void arch_setup_new_exec(void)
{
- if (S390_lowcore.current_pid != current->pid) {
- S390_lowcore.current_pid = current->pid;
+ if (get_lowcore()->current_pid != current->pid) {
+ get_lowcore()->current_pid = current->pid;
if (test_facility(40))
- lpp(&S390_lowcore.lpp);
+ lpp(&get_lowcore()->lpp);
}
}
@@ -107,7 +105,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
- unsigned long clone_flags = args->flags;
+ u64 clone_flags = args->flags;
unsigned long new_stackp = args->stack;
unsigned long tls = args->tls;
struct fake_frame
@@ -167,12 +165,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
- if (is_compat_task()) {
- p->thread.acrs[0] = (unsigned int)tls;
- } else {
- p->thread.acrs[0] = (unsigned int)(tls >> 32);
- p->thread.acrs[1] = (unsigned int)tls;
- }
+ p->thread.acrs[0] = (unsigned int)(tls >> 32);
+ p->thread.acrs[1] = (unsigned int)tls;
}
/*
* s390 stores the svc return address in arch_data when calling
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 65c1464eea4f..e33a3eccda56 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -4,10 +4,10 @@
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
*/
-#define KMSG_COMPONENT "cpu"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "cpu: " fmt
#include <linux/stop_machine.h>
+#include <linux/cpufeature.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/random.h>
@@ -17,7 +17,9 @@
#include <linux/mm_types.h>
#include <linux/delay.h>
#include <linux/cpu.h>
-
+#include <linux/smp.h>
+#include <asm/text-patching.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/facility.h>
#include <asm/elf.h>
@@ -71,7 +73,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
this_cpu = smp_processor_id();
if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
__this_cpu_write(cpu_relax_retry, 0);
- cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
+ cpu = cpumask_next_wrap(this_cpu, cpumask);
if (cpu >= nr_cpu_ids)
return;
if (arch_vcpu_is_preempted(cpu))
@@ -79,6 +81,23 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
}
}
+static void do_sync_core(void *info)
+{
+ sync_core();
+}
+
+void text_poke_sync(void)
+{
+ on_each_cpu(do_sync_core, NULL, 1);
+}
+
+void text_poke_sync_lock(void)
+{
+ cpus_read_lock();
+ text_poke_sync();
+ cpus_read_unlock();
+}
+
/*
* cpu_init - initializes state that is per-CPU.
*/
@@ -191,14 +210,14 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_DFP;
/* huge page support */
- if (MACHINE_HAS_EDAT1)
+ if (cpu_has_edat1())
elf_hwcap |= HWCAP_HPAGE;
/* 64-bit register support for 31-bit processes */
elf_hwcap |= HWCAP_HIGH_GPRS;
/* transactional execution */
- if (MACHINE_HAS_TE)
+ if (machine_has_tx())
elf_hwcap |= HWCAP_TE;
/* vector */
@@ -226,10 +245,10 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_NNPA;
/* guarded storage */
- if (MACHINE_HAS_GS)
+ if (cpu_has_gs())
elf_hwcap |= HWCAP_GS;
- if (MACHINE_HAS_PCI_MIO)
+ if (test_machine_feature(MFEATURE_PCI_MIO))
elf_hwcap |= HWCAP_PCI_MIO;
/* virtualization support */
@@ -248,31 +267,35 @@ static int __init setup_elf_platform(void)
add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
default: /* Use "z10" as default. */
- strcpy(elf_platform, "z10");
+ strscpy(elf_platform, "z10");
break;
case 0x2817:
case 0x2818:
- strcpy(elf_platform, "z196");
+ strscpy(elf_platform, "z196");
break;
case 0x2827:
case 0x2828:
- strcpy(elf_platform, "zEC12");
+ strscpy(elf_platform, "zEC12");
break;
case 0x2964:
case 0x2965:
- strcpy(elf_platform, "z13");
+ strscpy(elf_platform, "z13");
break;
case 0x3906:
case 0x3907:
- strcpy(elf_platform, "z14");
+ strscpy(elf_platform, "z14");
break;
case 0x8561:
case 0x8562:
- strcpy(elf_platform, "z15");
+ strscpy(elf_platform, "z15");
break;
case 0x3931:
case 0x3932:
- strcpy(elf_platform, "z16");
+ strscpy(elf_platform, "z16");
+ break;
+ case 0x9175:
+ case 0x9176:
+ strscpy(elf_platform, "z17");
break;
}
return 0;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 1cfed8b710b8..ceaa1726e328 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -7,10 +7,10 @@
* Martin Schwidefsky (schwidefsky@de.ibm.com)
*/
-#include "asm/ptrace.h"
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
+#include <linux/cpufeature.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/errno.h>
@@ -22,7 +22,6 @@
#include <linux/elf.h>
#include <linux/regset.h>
#include <linux/seccomp.h>
-#include <linux/compat.h>
#include <trace/syscall.h>
#include <asm/guarded_storage.h>
#include <asm/access-regs.h>
@@ -31,14 +30,13 @@
#include <asm/unistd.h>
#include <asm/runtime_instr.h>
#include <asm/facility.h>
+#include <asm/machine.h>
+#include <asm/ptrace.h>
+#include <asm/rwonce.h>
#include <asm/fpu.h>
#include "entry.h"
-#ifdef CONFIG_COMPAT
-#include "compat_ptrace.h"
-#endif
-
void update_cr_regs(struct task_struct *task)
{
struct pt_regs *regs = task_pt_regs(task);
@@ -60,7 +58,7 @@ void update_cr_regs(struct task_struct *task)
cr0_new = cr0_old;
cr2_new = cr2_old;
/* Take care of the enable/disable of transactional execution. */
- if (MACHINE_HAS_TE) {
+ if (machine_has_tx()) {
/* Set or clear transaction execution TXC bit 8. */
cr0_new.tcx = 1;
if (task->thread.per_flags & PER_FLAG_NO_TE)
@@ -75,7 +73,7 @@ void update_cr_regs(struct task_struct *task)
}
}
/* Take care of enable/disable of guarded storage. */
- if (MACHINE_HAS_GS) {
+ if (cpu_has_gs()) {
cr2_new.gse = 0;
if (task->thread.gs_cb)
cr2_new.gse = 1;
@@ -470,18 +468,18 @@ long arch_ptrace(struct task_struct *child, long request,
case PTRACE_GET_LAST_BREAK:
return put_user(child->thread.last_break, (unsigned long __user *)data);
case PTRACE_ENABLE_TE:
- if (!MACHINE_HAS_TE)
+ if (!machine_has_tx())
return -EIO;
child->thread.per_flags &= ~PER_FLAG_NO_TE;
return 0;
case PTRACE_DISABLE_TE:
- if (!MACHINE_HAS_TE)
+ if (!machine_has_tx())
return -EIO;
child->thread.per_flags |= PER_FLAG_NO_TE;
child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
return 0;
case PTRACE_TE_ABORT_RAND:
- if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+ if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE))
return -EIO;
switch (data) {
case 0UL:
@@ -504,308 +502,6 @@ long arch_ptrace(struct task_struct *child, long request,
}
}
-#ifdef CONFIG_COMPAT
-/*
- * Now the fun part starts... a 31 bit program running in the
- * 31 bit emulation tracing another program. PTRACE_PEEKTEXT,
- * PTRACE_PEEKDATA, PTRACE_POKETEXT and PTRACE_POKEDATA are easy
- * to handle, the difference to the 64 bit versions of the requests
- * is that the access is done in multiples of 4 byte instead of
- * 8 bytes (sizeof(unsigned long) on 31/64 bit).
- * The ugly part are PTRACE_PEEKUSR, PTRACE_PEEKUSR_AREA,
- * PTRACE_POKEUSR and PTRACE_POKEUSR_AREA. If the traced program
- * is a 31 bit program too, the content of struct user can be
- * emulated. A 31 bit program peeking into the struct user of
- * a 64 bit program is a no-no.
- */
-
-/*
- * Same as peek_user_per but for a 31 bit program.
- */
-static inline __u32 __peek_user_per_compat(struct task_struct *child,
- addr_t addr)
-{
- if (addr == offsetof(struct compat_per_struct_kernel, cr9))
- /* Control bits of the active per set. */
- return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
- PER_EVENT_IFETCH : child->thread.per_user.control;
- else if (addr == offsetof(struct compat_per_struct_kernel, cr10))
- /* Start address of the active per set. */
- return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
- 0 : child->thread.per_user.start;
- else if (addr == offsetof(struct compat_per_struct_kernel, cr11))
- /* End address of the active per set. */
- return test_thread_flag(TIF_SINGLE_STEP) ?
- PSW32_ADDR_INSN : child->thread.per_user.end;
- else if (addr == offsetof(struct compat_per_struct_kernel, bits))
- /* Single-step bit. */
- return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
- 0x80000000 : 0;
- else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr))
- /* Start address of the user specified per set. */
- return (__u32) child->thread.per_user.start;
- else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr))
- /* End address of the user specified per set. */
- return (__u32) child->thread.per_user.end;
- else if (addr == offsetof(struct compat_per_struct_kernel, perc_atmid))
- /* PER code, ATMID and AI of the last PER trap */
- return (__u32) child->thread.per_event.cause << 16;
- else if (addr == offsetof(struct compat_per_struct_kernel, address))
- /* Address of the last PER trap */
- return (__u32) child->thread.per_event.address;
- else if (addr == offsetof(struct compat_per_struct_kernel, access_id))
- /* Access id of the last PER trap */
- return (__u32) child->thread.per_event.paid << 24;
- return 0;
-}
-
-/*
- * Same as peek_user but for a 31 bit program.
- */
-static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
-{
- addr_t offset;
- __u32 tmp;
-
- if (addr < offsetof(struct compat_user, regs.acrs)) {
- struct pt_regs *regs = task_pt_regs(child);
- /*
- * psw and gprs are stored on the stack
- */
- if (addr == offsetof(struct compat_user, regs.psw.mask)) {
- /* Fake a 31 bit psw mask. */
- tmp = (__u32)(regs->psw.mask >> 32);
- tmp &= PSW32_MASK_USER | PSW32_MASK_RI;
- tmp |= PSW32_USER_BITS;
- } else if (addr == offsetof(struct compat_user, regs.psw.addr)) {
- /* Fake a 31 bit psw address. */
- tmp = (__u32) regs->psw.addr |
- (__u32)(regs->psw.mask & PSW_MASK_BA);
- } else {
- /* gpr 0-15 */
- tmp = *(__u32 *)((addr_t) &regs->psw + addr*2 + 4);
- }
- } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) {
- /*
- * access registers are stored in the thread structure
- */
- offset = addr - offsetof(struct compat_user, regs.acrs);
- tmp = *(__u32*)((addr_t) &child->thread.acrs + offset);
-
- } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) {
- /*
- * orig_gpr2 is stored on the kernel stack
- */
- tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4);
-
- } else if (addr < offsetof(struct compat_user, regs.fp_regs)) {
- /*
- * prevent reads of padding hole between
- * orig_gpr2 and fp_regs on s390.
- */
- tmp = 0;
-
- } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) {
- /*
- * floating point control reg. is in the thread structure
- */
- tmp = child->thread.ufpu.fpc;
-
- } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
- /*
- * floating point regs. are in the child->thread.ufpu.vxrs array
- */
- offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
- tmp = *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
- } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
- /*
- * Handle access to the per_info structure.
- */
- addr -= offsetof(struct compat_user, regs.per_info);
- tmp = __peek_user_per_compat(child, addr);
-
- } else
- tmp = 0;
-
- return tmp;
-}
-
-static int peek_user_compat(struct task_struct *child,
- addr_t addr, addr_t data)
-{
- __u32 tmp;
-
- if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3)
- return -EIO;
-
- tmp = __peek_user_compat(child, addr);
- return put_user(tmp, (__u32 __user *) data);
-}
-
-/*
- * Same as poke_user_per but for a 31 bit program.
- */
-static inline void __poke_user_per_compat(struct task_struct *child,
- addr_t addr, __u32 data)
-{
- if (addr == offsetof(struct compat_per_struct_kernel, cr9))
- /* PER event mask of the user specified per set. */
- child->thread.per_user.control =
- data & (PER_EVENT_MASK | PER_CONTROL_MASK);
- else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr))
- /* Starting address of the user specified per set. */
- child->thread.per_user.start = data;
- else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr))
- /* Ending address of the user specified per set. */
- child->thread.per_user.end = data;
-}
-
-/*
- * Same as poke_user but for a 31 bit program.
- */
-static int __poke_user_compat(struct task_struct *child,
- addr_t addr, addr_t data)
-{
- __u32 tmp = (__u32) data;
- addr_t offset;
-
- if (addr < offsetof(struct compat_user, regs.acrs)) {
- struct pt_regs *regs = task_pt_regs(child);
- /*
- * psw, gprs, acrs and orig_gpr2 are stored on the stack
- */
- if (addr == offsetof(struct compat_user, regs.psw.mask)) {
- __u32 mask = PSW32_MASK_USER;
-
- mask |= is_ri_task(child) ? PSW32_MASK_RI : 0;
- /* Build a 64 bit psw mask from 31 bit mask. */
- if ((tmp ^ PSW32_USER_BITS) & ~mask)
- /* Invalid psw mask. */
- return -EINVAL;
- if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME)
- /* Invalid address-space-control bits */
- return -EINVAL;
- regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
- (regs->psw.mask & PSW_MASK_BA) |
- (__u64)(tmp & mask) << 32;
- } else if (addr == offsetof(struct compat_user, regs.psw.addr)) {
- /* Build a 64 bit psw address from 31 bit address. */
- regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN;
- /* Transfer 31 bit amode bit to psw mask. */
- regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
- (__u64)(tmp & PSW32_ADDR_AMODE);
- } else {
- if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
- addr == offsetof(struct compat_user, regs.gprs[2])) {
- struct pt_regs *regs = task_pt_regs(child);
-
- regs->int_code = 0x20000 | (data & 0xffff);
- }
- /* gpr 0-15 */
- *(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
- }
- } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) {
- /*
- * access registers are stored in the thread structure
- */
- offset = addr - offsetof(struct compat_user, regs.acrs);
- *(__u32*)((addr_t) &child->thread.acrs + offset) = tmp;
-
- } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) {
- /*
- * orig_gpr2 is stored on the kernel stack
- */
- *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp;
-
- } else if (addr < offsetof(struct compat_user, regs.fp_regs)) {
- /*
- * prevent writess of padding hole between
- * orig_gpr2 and fp_regs on s390.
- */
- return 0;
-
- } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) {
- /*
- * floating point control reg. is in the thread structure
- */
- child->thread.ufpu.fpc = data;
-
- } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
- /*
- * floating point regs. are in the child->thread.ufpu.vxrs array
- */
- offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
- *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = tmp;
- } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
- /*
- * Handle access to the per_info structure.
- */
- addr -= offsetof(struct compat_user, regs.per_info);
- __poke_user_per_compat(child, addr, data);
- }
-
- return 0;
-}
-
-static int poke_user_compat(struct task_struct *child,
- addr_t addr, addr_t data)
-{
- if (!is_compat_task() || (addr & 3) ||
- addr > sizeof(struct compat_user) - 3)
- return -EIO;
-
- return __poke_user_compat(child, addr, data);
-}
-
-long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
- compat_ulong_t caddr, compat_ulong_t cdata)
-{
- unsigned long addr = caddr;
- unsigned long data = cdata;
- compat_ptrace_area parea;
- int copied, ret;
-
- switch (request) {
- case PTRACE_PEEKUSR:
- /* read the word at location addr in the USER area. */
- return peek_user_compat(child, addr, data);
-
- case PTRACE_POKEUSR:
- /* write the word at location addr in the USER area */
- return poke_user_compat(child, addr, data);
-
- case PTRACE_PEEKUSR_AREA:
- case PTRACE_POKEUSR_AREA:
- if (copy_from_user(&parea, (void __force __user *) addr,
- sizeof(parea)))
- return -EFAULT;
- addr = parea.kernel_addr;
- data = parea.process_addr;
- copied = 0;
- while (copied < parea.len) {
- if (request == PTRACE_PEEKUSR_AREA)
- ret = peek_user_compat(child, addr, data);
- else {
- __u32 utmp;
- if (get_user(utmp,
- (__u32 __force __user *) data))
- return -EFAULT;
- ret = poke_user_compat(child, addr, utmp);
- }
- if (ret)
- return ret;
- addr += sizeof(unsigned int);
- data += sizeof(unsigned int);
- copied += sizeof(unsigned int);
- }
- return 0;
- case PTRACE_GET_LAST_BREAK:
- return put_user(child->thread.last_break, (unsigned int __user *)data);
- }
- return compat_ptrace_request(child, request, addr, data);
-}
-#endif
-
/*
* user_regset definitions.
*/
@@ -1033,7 +729,7 @@ static int s390_gs_cb_get(struct task_struct *target,
{
struct gs_cb *data = target->thread.gs_cb;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!data)
return -ENODATA;
@@ -1050,7 +746,7 @@ static int s390_gs_cb_set(struct task_struct *target,
struct gs_cb gs_cb = { }, *data = NULL;
int rc;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!target->thread.gs_cb) {
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -1087,7 +783,7 @@ static int s390_gs_bc_get(struct task_struct *target,
{
struct gs_cb *data = target->thread.gs_bc_cb;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!data)
return -ENODATA;
@@ -1101,7 +797,7 @@ static int s390_gs_bc_set(struct task_struct *target,
{
struct gs_cb *data = target->thread.gs_bc_cb;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!data) {
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -1206,7 +902,7 @@ static int s390_runtime_instr_set(struct task_struct *target,
static const struct user_regset s390_regsets[] = {
{
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = sizeof(s390_regs) / sizeof(long),
.size = sizeof(long),
.align = sizeof(long),
@@ -1214,7 +910,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_regs_set,
},
{
- .core_note_type = NT_PRFPREG,
+ USER_REGSET_NOTE_TYPE(PRFPREG),
.n = sizeof(s390_fp_regs) / sizeof(long),
.size = sizeof(long),
.align = sizeof(long),
@@ -1222,7 +918,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_fpregs_set,
},
{
- .core_note_type = NT_S390_SYSTEM_CALL,
+ USER_REGSET_NOTE_TYPE(S390_SYSTEM_CALL),
.n = 1,
.size = sizeof(unsigned int),
.align = sizeof(unsigned int),
@@ -1230,7 +926,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_system_call_set,
},
{
- .core_note_type = NT_S390_LAST_BREAK,
+ USER_REGSET_NOTE_TYPE(S390_LAST_BREAK),
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
@@ -1238,7 +934,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_last_break_set,
},
{
- .core_note_type = NT_S390_TDB,
+ USER_REGSET_NOTE_TYPE(S390_TDB),
.n = 1,
.size = 256,
.align = 1,
@@ -1246,7 +942,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_tdb_set,
},
{
- .core_note_type = NT_S390_VXRS_LOW,
+ USER_REGSET_NOTE_TYPE(S390_VXRS_LOW),
.n = __NUM_VXRS_LOW,
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1254,7 +950,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_vxrs_low_set,
},
{
- .core_note_type = NT_S390_VXRS_HIGH,
+ USER_REGSET_NOTE_TYPE(S390_VXRS_HIGH),
.n = __NUM_VXRS_HIGH,
.size = sizeof(__vector128),
.align = sizeof(__vector128),
@@ -1262,7 +958,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_vxrs_high_set,
},
{
- .core_note_type = NT_S390_GS_CB,
+ USER_REGSET_NOTE_TYPE(S390_GS_CB),
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1270,7 +966,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_gs_cb_set,
},
{
- .core_note_type = NT_S390_GS_BC,
+ USER_REGSET_NOTE_TYPE(S390_GS_BC),
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1278,7 +974,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_gs_bc_set,
},
{
- .core_note_type = NT_S390_RI_CB,
+ USER_REGSET_NOTE_TYPE(S390_RI_CB),
.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1294,225 +990,8 @@ static const struct user_regset_view user_s390_view = {
.n = ARRAY_SIZE(s390_regsets)
};
-#ifdef CONFIG_COMPAT
-static int s390_compat_regs_get(struct task_struct *target,
- const struct user_regset *regset,
- struct membuf to)
-{
- unsigned n;
-
- if (target == current)
- save_access_regs(target->thread.acrs);
-
- for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t))
- membuf_store(&to, __peek_user_compat(target, n));
- return 0;
-}
-
-static int s390_compat_regs_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int rc = 0;
-
- if (target == current)
- save_access_regs(target->thread.acrs);
-
- if (kbuf) {
- const compat_ulong_t *k = kbuf;
- while (count > 0 && !rc) {
- rc = __poke_user_compat(target, pos, *k++);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- const compat_ulong_t __user *u = ubuf;
- while (count > 0 && !rc) {
- compat_ulong_t word;
- rc = __get_user(word, u++);
- if (rc)
- break;
- rc = __poke_user_compat(target, pos, word);
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
- }
-
- if (rc == 0 && target == current)
- restore_access_regs(target->thread.acrs);
-
- return rc;
-}
-
-static int s390_compat_regs_high_get(struct task_struct *target,
- const struct user_regset *regset,
- struct membuf to)
-{
- compat_ulong_t *gprs_high;
- int i;
-
- gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs;
- for (i = 0; i < NUM_GPRS; i++, gprs_high += 2)
- membuf_store(&to, *gprs_high);
- return 0;
-}
-
-static int s390_compat_regs_high_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- compat_ulong_t *gprs_high;
- int rc = 0;
-
- gprs_high = (compat_ulong_t *)
- &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
- if (kbuf) {
- const compat_ulong_t *k = kbuf;
- while (count > 0) {
- *gprs_high = *k++;
- *gprs_high += 2;
- count -= sizeof(*k);
- }
- } else {
- const compat_ulong_t __user *u = ubuf;
- while (count > 0 && !rc) {
- unsigned long word;
- rc = __get_user(word, u++);
- if (rc)
- break;
- *gprs_high = word;
- *gprs_high += 2;
- count -= sizeof(*u);
- }
- }
-
- return rc;
-}
-
-static int s390_compat_last_break_get(struct task_struct *target,
- const struct user_regset *regset,
- struct membuf to)
-{
- compat_ulong_t last_break = target->thread.last_break;
-
- return membuf_store(&to, (unsigned long)last_break);
-}
-
-static int s390_compat_last_break_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return 0;
-}
-
-static const struct user_regset s390_compat_regsets[] = {
- {
- .core_note_type = NT_PRSTATUS,
- .n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
- .size = sizeof(compat_long_t),
- .align = sizeof(compat_long_t),
- .regset_get = s390_compat_regs_get,
- .set = s390_compat_regs_set,
- },
- {
- .core_note_type = NT_PRFPREG,
- .n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
- .size = sizeof(compat_long_t),
- .align = sizeof(compat_long_t),
- .regset_get = s390_fpregs_get,
- .set = s390_fpregs_set,
- },
- {
- .core_note_type = NT_S390_SYSTEM_CALL,
- .n = 1,
- .size = sizeof(compat_uint_t),
- .align = sizeof(compat_uint_t),
- .regset_get = s390_system_call_get,
- .set = s390_system_call_set,
- },
- {
- .core_note_type = NT_S390_LAST_BREAK,
- .n = 1,
- .size = sizeof(long),
- .align = sizeof(long),
- .regset_get = s390_compat_last_break_get,
- .set = s390_compat_last_break_set,
- },
- {
- .core_note_type = NT_S390_TDB,
- .n = 1,
- .size = 256,
- .align = 1,
- .regset_get = s390_tdb_get,
- .set = s390_tdb_set,
- },
- {
- .core_note_type = NT_S390_VXRS_LOW,
- .n = __NUM_VXRS_LOW,
- .size = sizeof(__u64),
- .align = sizeof(__u64),
- .regset_get = s390_vxrs_low_get,
- .set = s390_vxrs_low_set,
- },
- {
- .core_note_type = NT_S390_VXRS_HIGH,
- .n = __NUM_VXRS_HIGH,
- .size = sizeof(__vector128),
- .align = sizeof(__vector128),
- .regset_get = s390_vxrs_high_get,
- .set = s390_vxrs_high_set,
- },
- {
- .core_note_type = NT_S390_HIGH_GPRS,
- .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
- .size = sizeof(compat_long_t),
- .align = sizeof(compat_long_t),
- .regset_get = s390_compat_regs_high_get,
- .set = s390_compat_regs_high_set,
- },
- {
- .core_note_type = NT_S390_GS_CB,
- .n = sizeof(struct gs_cb) / sizeof(__u64),
- .size = sizeof(__u64),
- .align = sizeof(__u64),
- .regset_get = s390_gs_cb_get,
- .set = s390_gs_cb_set,
- },
- {
- .core_note_type = NT_S390_GS_BC,
- .n = sizeof(struct gs_cb) / sizeof(__u64),
- .size = sizeof(__u64),
- .align = sizeof(__u64),
- .regset_get = s390_gs_bc_get,
- .set = s390_gs_bc_set,
- },
- {
- .core_note_type = NT_S390_RI_CB,
- .n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
- .size = sizeof(__u64),
- .align = sizeof(__u64),
- .regset_get = s390_runtime_instr_get,
- .set = s390_runtime_instr_set,
- },
-};
-
-static const struct user_regset_view user_s390_compat_view = {
- .name = "s390",
- .e_machine = EM_S390,
- .regsets = s390_compat_regsets,
- .n = ARRAY_SIZE(s390_compat_regsets)
-};
-#endif
-
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
-#ifdef CONFIG_COMPAT
- if (test_tsk_thread_flag(task, TIF_31BIT))
- return &user_s390_compat_view;
-#endif
return &user_s390_view;
}
@@ -1521,13 +1000,6 @@ static const char *gpr_names[NUM_GPRS] = {
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
};
-unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
-{
- if (offset >= NUM_GPRS)
- return 0;
- return regs->gprs[offset];
-}
-
int regs_query_register_offset(const char *name)
{
unsigned long offset;
@@ -1547,29 +1019,3 @@ const char *regs_query_register_name(unsigned int offset)
return NULL;
return gpr_names[offset];
}
-
-static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
-{
- unsigned long ksp = kernel_stack_pointer(regs);
-
- return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
-}
-
-/**
- * regs_get_kernel_stack_nth() - get Nth entry of the stack
- * @regs:pt_regs which contains kernel stack pointer.
- * @n:stack entry number.
- *
- * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
- * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
- * this returns 0.
- */
-unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
-{
- unsigned long addr;
-
- addr = kernel_stack_pointer(regs) + n * sizeof(long);
- if (!regs_within_kernel_stack(regs, addr))
- return 0;
- return *(unsigned long *)addr;
-}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 88087a32ebc6..69fcaf54d5ca 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -9,6 +9,7 @@
#include <asm/asm-offsets.h>
#include <asm/nospec-insn.h>
#include <asm/sigp.h>
+#include <asm/lowcore.h>
GEN_BR_THUNK %r9
@@ -20,20 +21,15 @@
# r3 = Parameter for function
#
SYM_CODE_START(store_status)
- /* Save register one and load save area base */
- stg %r1,__LC_SAVE_AREA_RESTART
+ STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA
/* General purpose registers */
- lghi %r1,__LC_GPREGS_SAVE_AREA
- stmg %r0,%r15,0(%r1)
- mvc 8(8,%r1),__LC_SAVE_AREA_RESTART
+ GET_LC %r13
/* Control registers */
- lghi %r1,__LC_CREGS_SAVE_AREA
- stctg %c0,%c15,0(%r1)
+ stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13)
/* Access registers */
- lghi %r1,__LC_AREGS_SAVE_AREA
- stam %a0,%a15,0(%r1)
+ stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13)
/* Floating point registers */
- lghi %r1,__LC_FPREGS_SAVE_AREA
+ lay %r1,__LC_FPREGS_SAVE_AREA(%r13)
std %f0, 0x00(%r1)
std %f1, 0x08(%r1)
std %f2, 0x10(%r1)
@@ -51,21 +47,21 @@ SYM_CODE_START(store_status)
std %f14,0x70(%r1)
std %f15,0x78(%r1)
/* Floating point control register */
- lghi %r1,__LC_FP_CREG_SAVE_AREA
+ lay %r1,__LC_FP_CREG_SAVE_AREA(%r13)
stfpc 0(%r1)
/* CPU timer */
- lghi %r1,__LC_CPU_TIMER_SAVE_AREA
+ lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13)
stpt 0(%r1)
/* Store prefix register */
- lghi %r1,__LC_PREFIX_SAVE_AREA
+ lay %r1,__LC_PREFIX_SAVE_AREA(%r13)
stpx 0(%r1)
/* Clock comparator - seven bytes */
- lghi %r1,__LC_CLOCK_COMP_SAVE_AREA
larl %r4,clkcmp
stckc 0(%r4)
+ lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13)
mvc 1(7,%r1),1(%r4)
/* Program status word */
- lghi %r1,__LC_PSW_SAVE_AREA
+ lay %r1,__LC_PSW_SAVE_AREA(%r13)
epsw %r4,%r5
st %r4,0(%r1)
st %r5,4(%r1)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 90c2c786bb35..c1fe0b53c5ac 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -13,8 +13,7 @@
* This file handles the architecture-dependent parts of initialization
*/
-#define KMSG_COMPONENT "setup"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "setup: " fmt
#include <linux/errno.h>
#include <linux/export.h>
@@ -47,13 +46,13 @@
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/memory.h>
-#include <linux/compat.h>
#include <linux/start_kernel.h>
#include <linux/hugetlb.h>
#include <linux/kmemleak.h>
#include <asm/archrandom.h>
#include <asm/boot_data.h>
+#include <asm/machine.h>
#include <asm/ipl.h>
#include <asm/facility.h>
#include <asm/smp.h>
@@ -111,7 +110,7 @@ struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amod
* Because the AMODE31 sections are relocated below 2G at startup,
* the content of control registers CR2, CR5 and CR15 must be updated
* with new addresses after the relocation. The initial initialization of
- * control registers occurs in head64.S and then gets updated again after AMODE31
+ * control registers occurs in head.S and then gets updated again after AMODE31
* relocation. We must access the relevant AMODE31 tables indirectly via
* pointers placed in the .amode31.refs linker section. Those pointers get
* updated automatically during AMODE31 relocation and always contain a valid
@@ -149,34 +148,37 @@ unsigned long __bootdata_preserved(max_mappable);
struct physmem_info __bootdata(physmem_info);
struct vm_layout __bootdata_preserved(vm_layout);
-EXPORT_SYMBOL_GPL(vm_layout);
+EXPORT_SYMBOL(vm_layout);
int __bootdata_preserved(__kaslr_enabled);
unsigned int __bootdata_preserved(zlib_dfltcc_support);
EXPORT_SYMBOL(zlib_dfltcc_support);
u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
-u64 alt_stfle_fac_list[16];
struct oldmem_data __bootdata_preserved(oldmem_data);
-unsigned long VMALLOC_START;
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+unsigned long __bootdata_preserved(VMALLOC_START);
EXPORT_SYMBOL(VMALLOC_START);
-unsigned long VMALLOC_END;
+unsigned long __bootdata_preserved(VMALLOC_END);
EXPORT_SYMBOL(VMALLOC_END);
-struct page *vmemmap;
+struct page *__bootdata_preserved(vmemmap);
EXPORT_SYMBOL(vmemmap);
-unsigned long vmemmap_size;
+unsigned long __bootdata_preserved(vmemmap_size);
-unsigned long MODULES_VADDR;
-unsigned long MODULES_END;
+unsigned long __bootdata_preserved(MODULES_VADDR);
+unsigned long __bootdata_preserved(MODULES_END);
/* An array with a pointer to the lowcore of every CPU. */
struct lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);
-DEFINE_STATIC_KEY_FALSE(cpu_has_bear);
-
/*
* The Write Back bit position in the physaddr is given by the SLPC PCI.
* Leaving the mask zero always uses write through which is safe
@@ -246,7 +248,7 @@ static void __init conmode_default(void)
char query_buffer[1024];
char *ptr;
- if (MACHINE_IS_VM) {
+ if (machine_is_vm()) {
cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
ptr = strstr(query_buffer, "SUBCHANNEL =");
@@ -284,7 +286,7 @@ static void __init conmode_default(void)
SET_CONSOLE_SCLP;
#endif
}
- } else if (MACHINE_IS_KVM) {
+ } else if (machine_is_kvm()) {
if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
SET_CONSOLE_VT220;
else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
@@ -360,36 +362,24 @@ void *restart_stack;
unsigned long stack_alloc(void)
{
-#ifdef CONFIG_VMAP_STACK
- void *ret;
+ void *stack;
- ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
- NUMA_NO_NODE, __builtin_return_address(0));
- kmemleak_not_leak(ret);
- return (unsigned long)ret;
-#else
- return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
-#endif
+ stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ kmemleak_not_leak(stack);
+ return (unsigned long)stack;
}
void stack_free(unsigned long stack)
{
-#ifdef CONFIG_VMAP_STACK
- vfree((void *) stack);
-#else
- free_pages(stack, THREAD_SIZE_ORDER);
-#endif
+ vfree((void *)stack);
}
static unsigned long __init stack_alloc_early(void)
{
unsigned long stack;
- stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- if (!stack) {
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, THREAD_SIZE, THREAD_SIZE);
- }
+ stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE);
return stack;
}
@@ -406,6 +396,7 @@ static void __init setup_lowcore(void)
panic("%s: Failed to allocate %zu bytes align=%zx\n",
__func__, sizeof(*lc), sizeof(*lc));
+ lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0);
lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
lc->restart_psw.addr = __pa(restart_int_handler);
lc->external_new_psw.mask = PSW_KERNEL_BITS;
@@ -421,16 +412,15 @@ static void __init setup_lowcore(void)
lc->clock_comparator = clock_comparator_max;
lc->current_task = (unsigned long)&init_task;
lc->lpp = LPP_MAGIC;
- lc->machine_flags = S390_lowcore.machine_flags;
- lc->preempt_count = S390_lowcore.preempt_count;
+ lc->preempt_count = get_lowcore()->preempt_count;
nmi_alloc_mcesa_early(&lc->mcesad);
- lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
- lc->exit_timer = S390_lowcore.exit_timer;
- lc->user_timer = S390_lowcore.user_timer;
- lc->system_timer = S390_lowcore.system_timer;
- lc->steal_timer = S390_lowcore.steal_timer;
- lc->last_update_timer = S390_lowcore.last_update_timer;
- lc->last_update_clock = S390_lowcore.last_update_clock;
+ lc->sys_enter_timer = get_lowcore()->sys_enter_timer;
+ lc->exit_timer = get_lowcore()->exit_timer;
+ lc->user_timer = get_lowcore()->user_timer;
+ lc->system_timer = get_lowcore()->system_timer;
+ lc->steal_timer = get_lowcore()->steal_timer;
+ lc->last_update_timer = get_lowcore()->last_update_timer;
+ lc->last_update_clock = get_lowcore()->last_update_clock;
/*
* Allocate the global restart stack which is the same for
* all CPUs in case *one* of them does a PSW restart.
@@ -439,7 +429,7 @@ static void __init setup_lowcore(void)
lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET;
lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET;
lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET;
- lc->kernel_stack = S390_lowcore.kernel_stack;
+ lc->kernel_stack = get_lowcore()->kernel_stack;
/*
* Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
* restart data to the absolute zero lowcore. This is necessary if
@@ -455,8 +445,8 @@ static void __init setup_lowcore(void)
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
lc->preempt_count = PREEMPT_DISABLED;
- lc->kernel_asce = S390_lowcore.kernel_asce;
- lc->user_asce = S390_lowcore.user_asce;
+ lc->kernel_asce = get_lowcore()->kernel_asce;
+ lc->user_asce = get_lowcore()->user_asce;
system_ctlreg_init_save_area(lc);
abs_lc = get_abs_lowcore();
@@ -512,10 +502,7 @@ static void __init setup_resources(void)
bss_resource.end = __pa_symbol(__bss_stop) - 1;
for_each_mem_range(i, &start, &end) {
- res = memblock_alloc(sizeof(*res), 8);
- if (!res)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*res), 8);
+ res = memblock_alloc_or_panic(sizeof(*res), 8);
res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM";
@@ -534,10 +521,7 @@ static void __init setup_resources(void)
std_res->start > res->end)
continue;
if (std_res->end > res->end) {
- sub_res = memblock_alloc(sizeof(*sub_res), 8);
- if (!sub_res)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*sub_res), 8);
+ sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8);
*sub_res = *std_res;
sub_res->end = res->end;
std_res->start = res->end + 1;
@@ -619,7 +603,7 @@ static void __init reserve_crashkernel(void)
int rc;
rc = parse_crashkernel(boot_command_line, ident_map_size,
- &crash_size, &crash_base, NULL, NULL);
+ &crash_size, &crash_base, NULL, NULL, NULL);
crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
@@ -664,7 +648,7 @@ static void __init reserve_crashkernel(void)
return;
}
- if (!oldmem_data.start && MACHINE_IS_VM)
+ if (!oldmem_data.start && machine_is_vm())
diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
@@ -704,7 +688,7 @@ static void __init reserve_physmem_info(void)
{
unsigned long addr, size;
- if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
memblock_reserve(addr, size);
}
@@ -712,7 +696,7 @@ static void __init free_physmem_info(void)
{
unsigned long addr, size;
- if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
memblock_phys_free(addr, size);
}
@@ -733,8 +717,29 @@ static void __init memblock_add_physmem_info(void)
memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
}
+static void __init setup_high_memory(void)
+{
+ high_memory = __va(ident_map_size);
+}
+
+/*
+ * Reserve memory used for lowcore.
+ */
+static void __init reserve_lowcore(void)
+{
+ void *lowcore_start = get_lowcore();
+ void *lowcore_end = lowcore_start + sizeof(struct lowcore);
+ void *start, *end;
+
+ if (absolute_pointer(__identity_base) < lowcore_end) {
+ start = max(lowcore_start, (void *)__identity_base);
+ end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
+ memblock_reserve(__pa(start), __pa(end));
+ }
+}
+
/*
- * Reserve memory used for lowcore/command line/kernel image.
+ * Reserve memory used for absolute lowcore/command line/kernel image.
*/
static void __init reserve_kernel(void)
{
@@ -808,9 +813,7 @@ static void __init setup_randomness(void)
{
struct sysinfo_3_2_2 *vmms;
- vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!vmms)
- panic("Failed to allocate memory for sysinfo structure\n");
+ vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
memblock_free(vmms, PAGE_SIZE);
@@ -834,7 +837,7 @@ static void __init setup_control_program_code(void)
return;
diag_stat_inc(DIAG_STAT_X318);
- asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
+ asm volatile("diag %0,0,0x318" : : "d" (diag318_info.val));
}
/*
@@ -870,6 +873,23 @@ static void __init log_component_list(void)
}
/*
+ * Print avoiding interpretation of % in buf and taking bootdebug option
+ * into consideration.
+ */
+static void __init print_rb_entry(const char *buf)
+{
+ char fmt[] = KERN_SOH "0boot: %s";
+ int level = printk_get_level(buf);
+
+ buf = skip_timestamp(printk_skip_level(buf));
+ if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf)))
+ return;
+
+ fmt[1] = level;
+ printk(fmt, buf);
+}
+
+/*
* Setup function called from init/main.c just after the banner
* was printed.
*/
@@ -879,15 +899,21 @@ void __init setup_arch(char **cmdline_p)
/*
* print what head.S has found out about the machine
*/
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
pr_info("Linux is running as a z/VM "
"guest operating system in 64-bit mode\n");
- else if (MACHINE_IS_KVM)
+ else if (machine_is_kvm())
pr_info("Linux is running under KVM in 64-bit mode\n");
- else if (MACHINE_IS_LPAR)
+ else if (machine_is_lpar())
pr_info("Linux is running natively in 64-bit mode\n");
else
pr_info("Linux is running as a guest in 64-bit mode\n");
+ /* Print decompressor messages if not already printed */
+ if (!boot_earlyprintk)
+ boot_rb_foreach(print_rb_entry);
+
+ if (machine_has_relocated_lowcore())
+ pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
log_component_list();
@@ -915,6 +941,7 @@ void __init setup_arch(char **cmdline_p)
/* Do some memory reservations *before* memory is added to memblock */
reserve_pgtables();
+ reserve_lowcore();
reserve_kernel();
reserve_initrd();
reserve_certificate_list();
@@ -927,6 +954,7 @@ void __init setup_arch(char **cmdline_p)
free_physmem_info();
setup_memory_end();
+ setup_high_memory();
memblock_dump_all();
setup_memory();
@@ -935,7 +963,7 @@ void __init setup_arch(char **cmdline_p)
setup_uv();
dma_contiguous_reserve(ident_map_size);
vmcp_cma_reserve();
- if (MACHINE_HAS_EDAT2)
+ if (cpu_has_edat2())
hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
reserve_crashkernel();
@@ -955,10 +983,7 @@ void __init setup_arch(char **cmdline_p)
numa_setup();
smp_detect_cpus();
topology_init_early();
-
- if (test_facility(193))
- static_branch_enable(&cpu_has_bear);
-
+ setup_protection_map();
/*
* Create kernel page tables.
*/
@@ -986,3 +1011,8 @@ void __init setup_arch(char **cmdline_p)
/* Add system specific data to the random pool */
setup_randomness();
}
+
+void __init arch_cpu_finalize_init(void)
+{
+ sclp_init();
+}
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 6c2cb345402f..4874de5edea0 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -27,12 +27,11 @@
#include <linux/personality.h>
#include <linux/binfmts.h>
#include <linux/syscalls.h>
-#include <linux/compat.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
+#include <asm/vdso-symbols.h>
#include <asm/access-regs.h>
#include <asm/lowcore.h>
-#include <asm/vdso.h>
#include "entry.h"
/*
@@ -290,12 +289,6 @@ static int setup_frame(int sig, struct k_sigaction *ka,
unsigned long restorer;
size_t frame_size;
- /*
- * gprs_high are only present for a 31-bit task running on
- * a 64-bit kernel (see compat_signal.c) but the space for
- * gprs_high need to be allocated if vector registers are
- * included in the signal frame on a 31-bit system.
- */
frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
if (cpu_has_vx())
frame_size += sizeof(frame->sregs_ext);
@@ -333,7 +326,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
if (ka->sa.sa_flags & SA_RESTORER)
restorer = (unsigned long) ka->sa.sa_restorer;
else
- restorer = VDSO64_SYMBOL(current, sigreturn);
+ restorer = VDSO_SYMBOL(current, sigreturn);
/* Set up registers for signal handler */
regs->gprs[14] = restorer;
@@ -367,12 +360,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
size_t frame_size;
frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext);
- /*
- * gprs_high are only present for a 31-bit task running on
- * a 64-bit kernel (see compat_signal.c) but the space for
- * gprs_high need to be allocated if vector registers are
- * included in the signal frame on a 31-bit system.
- */
uc_flags = 0;
if (cpu_has_vx()) {
frame_size += sizeof(_sigregs_ext);
@@ -391,7 +378,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
if (ksig->ka.sa.sa_flags & SA_RESTORER)
restorer = (unsigned long) ksig->ka.sa.sa_restorer;
else
- restorer = VDSO64_SYMBOL(current, rt_sigreturn);
+ restorer = VDSO_SYMBOL(current, rt_sigreturn);
/* Create siginfo on the signal stack */
if (copy_siginfo_to_user(&frame->info, &ksig->info))
@@ -490,10 +477,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs)
clear_pt_regs_flag(regs, PIF_SYSCALL);
rseq_signal_deliver(&ksig, regs);
- if (is_compat_task())
- handle_signal32(&ksig, oldset, regs);
- else
- handle_signal(&ksig, oldset, regs);
+ handle_signal(&ksig, oldset, regs);
return;
}
@@ -506,10 +490,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs)
/* Restart with sys_restart_syscall */
regs->gprs[2] = regs->orig_gpr2;
current->restart_block.arch_data = regs->psw.addr;
- if (is_compat_task())
- regs->psw.addr = VDSO32_SYMBOL(current, restart_syscall);
- else
- regs->psw.addr = VDSO64_SYMBOL(current, restart_syscall);
+ regs->psw.addr = VDSO_SYMBOL(current, restart_syscall);
if (test_thread_flag(TIF_SINGLE_STEP))
clear_thread_flag(TIF_PER_TRAP);
break;
diff --git a/arch/s390/kernel/skey.c b/arch/s390/kernel/skey.c
new file mode 100644
index 000000000000..cc869de6e3a5
--- /dev/null
+++ b/arch/s390/kernel/skey.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/rwonce.h>
+#include <asm/page.h>
+#include <asm/skey.h>
+
+int skey_regions_initialized;
+
+static inline unsigned long load_real_address(unsigned long address)
+{
+ unsigned long real;
+
+ asm volatile(
+ " lra %[real],0(%[address])"
+ : [real] "=d" (real)
+ : [address] "a" (address)
+ : "cc");
+ return real;
+}
+
+/*
+ * Initialize storage keys of registered memory regions with the
+ * default key. This is useful for code which is executed with a
+ * non-default access key.
+ */
+void __skey_regions_initialize(void)
+{
+ unsigned long address, real;
+ struct skey_region *r, *end;
+
+ r = __skey_region_start;
+ end = __skey_region_end;
+ while (r < end) {
+ address = r->start & PAGE_MASK;
+ do {
+ real = load_real_address(address);
+ page_set_storage_key(real, PAGE_DEFAULT_KEY, 1);
+ address += PAGE_SIZE;
+ } while (address < r->end);
+ r++;
+ }
+ /*
+ * Make sure storage keys are initialized before
+ * skey_regions_initialized is changed.
+ */
+ barrier();
+ WRITE_ONCE(skey_regions_initialized, 1);
+}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 0324649aae0a..b7429f30afc1 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -15,9 +15,9 @@
* operates on physical cpu numbers needs to go into smp.c.
*/
-#define KMSG_COMPONENT "cpu"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "cpu: " fmt
+#include <linux/cpufeature.h>
#include <linux/workqueue.h>
#include <linux/memblock.h>
#include <linux/export.h>
@@ -38,6 +38,7 @@
#include <linux/kprobes.h>
#include <asm/access-regs.h>
#include <asm/asm-offsets.h>
+#include <asm/machine.h>
#include <asm/ctlreg.h>
#include <asm/pfault.h>
#include <asm/diag.h>
@@ -74,18 +75,15 @@ enum {
CPU_STATE_CONFIGURED,
};
-static DEFINE_PER_CPU(struct cpu *, cpu_device);
-
-struct pcpu {
- unsigned long ec_mask; /* bit mask for ec_xxx functions */
- unsigned long ec_clk; /* sigp timestamp for ec_xxx */
- signed char state; /* physical cpu state */
- signed char polarization; /* physical polarization */
- u16 address; /* physical cpu address */
-};
-
static u8 boot_core_type;
-static struct pcpu pcpu_devices[NR_CPUS];
+DEFINE_PER_CPU(struct pcpu, pcpu_devices);
+/*
+ * Pointer to the pcpu area of the boot CPU. This is required when a restart
+ * interrupt is triggered on an offline CPU. For that case accessing percpu
+ * data with the common primitives does not work, since the percpu offset is
+ * stored in a non existent lowcore.
+ */
+static struct pcpu *ipl_pcpu;
unsigned int smp_cpu_mt_shift;
EXPORT_SYMBOL(smp_cpu_mt_shift);
@@ -100,13 +98,6 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
static unsigned int smp_max_threads __initdata = -1U;
cpumask_t cpu_setup_mask;
-static int __init early_nosmt(char *s)
-{
- smp_max_threads = 1;
- return 0;
-}
-early_param("nosmt", early_nosmt);
-
static int __init early_smt(char *s)
{
get_option(&s, &smp_max_threads);
@@ -176,20 +167,17 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
int cpu;
for_each_cpu(cpu, mask)
- if (pcpu_devices[cpu].address == address)
- return pcpu_devices + cpu;
+ if (per_cpu(pcpu_devices, cpu).address == address)
+ return &per_cpu(pcpu_devices, cpu);
return NULL;
}
static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
{
- int order;
-
if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
return;
- order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
pcpu->ec_clk = get_tod_clock_fast();
- pcpu_sigp_retry(pcpu, order, 0);
+ pcpu_sigp_retry(pcpu, SIGP_EXTERNAL_CALL, 0);
}
static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
@@ -203,7 +191,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
mcck_stack = stack_alloc();
if (!lc || !nodat_stack || !async_stack || !mcck_stack)
goto out;
- memcpy(lc, &S390_lowcore, 512);
+ memcpy(lc, get_lowcore(), 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512);
lc->async_stack = async_stack + STACK_INIT_OFFSET;
lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
@@ -232,13 +220,11 @@ out:
return -ENOMEM;
}
-static void pcpu_free_lowcore(struct pcpu *pcpu)
+static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu)
{
unsigned long async_stack, nodat_stack, mcck_stack;
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET;
async_stack = lc->async_stack - STACK_INIT_OFFSET;
@@ -261,30 +247,28 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
lc->cpu_nr = cpu;
+ lc->pcpu = (unsigned long)pcpu;
lc->restart_flags = RESTART_FLAG_CTLREGS;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
lc->percpu_offset = __per_cpu_offset[cpu];
- lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->kernel_asce = get_lowcore()->kernel_asce;
lc->user_asce = s390_invalid_asce;
- lc->machine_flags = S390_lowcore.machine_flags;
lc->user_timer = lc->system_timer =
lc->steal_timer = lc->avg_steal_timer = 0;
abs_lc = get_abs_lowcore();
memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
put_abs_lowcore(abs_lc);
- lc->cregs_save_area[1] = lc->kernel_asce;
+ lc->cregs_save_area[1] = lc->user_asce;
lc->cregs_save_area[7] = lc->user_asce;
save_access_regs((unsigned int *) lc->access_regs_save_area);
arch_spin_lock_setup(cpu);
}
-static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+static void pcpu_attach_task(int cpu, struct task_struct *tsk)
{
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET;
lc->current_task = (unsigned long)tsk;
@@ -296,20 +280,21 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
lc->hardirq_timer = tsk->thread.hardirq_timer;
lc->softirq_timer = tsk->thread.softirq_timer;
lc->steal_timer = 0;
+#ifdef CONFIG_STACKPROTECTOR
+ lc->stack_canary = tsk->stack_canary;
+#endif
}
-static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+static void pcpu_start_fn(int cpu, void (*func)(void *), void *data)
{
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
lc->restart_stack = lc->kernel_stack;
lc->restart_fn = (unsigned long) func;
lc->restart_data = (unsigned long) data;
lc->restart_source = -1U;
- pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+ pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0);
}
typedef void (pcpu_delegate_fn)(void *);
@@ -322,14 +307,14 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
func(data); /* should not return */
}
-static void pcpu_delegate(struct pcpu *pcpu,
- pcpu_delegate_fn *func,
- void *data, unsigned long stack)
+static void __noreturn pcpu_delegate(struct pcpu *pcpu, int cpu,
+ pcpu_delegate_fn *func,
+ void *data, unsigned long stack)
{
struct lowcore *lc, *abs_lc;
unsigned int source_cpu;
- lc = lowcore_ptr[pcpu - pcpu_devices];
+ lc = lowcore_ptr[cpu];
source_cpu = stap();
if (pcpu->address == source_cpu) {
@@ -357,7 +342,7 @@ static void pcpu_delegate(struct pcpu *pcpu,
"0: sigp 0,%0,%2 # sigp restart to target cpu\n"
" brc 2,0b # busy, try again\n"
"1: sigp 0,%1,%3 # sigp stop to current cpu\n"
- " brc 2,1b # busy, try again\n"
+ " brc 2,1b # busy, try again"
: : "d" (pcpu->address), "d" (source_cpu),
"K" (SIGP_RESTART), "K" (SIGP_STOP)
: "0", "1", "cc");
@@ -379,38 +364,22 @@ static int pcpu_set_smt(unsigned int mtid)
smp_cpu_mt_shift = 0;
while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
smp_cpu_mt_shift++;
- pcpu_devices[0].address = stap();
+ per_cpu(pcpu_devices, 0).address = stap();
}
return cc;
}
/*
- * Call function on an online CPU.
- */
-void smp_call_online_cpu(void (*func)(void *), void *data)
-{
- struct pcpu *pcpu;
-
- /* Use the current cpu if it is online. */
- pcpu = pcpu_find_address(cpu_online_mask, stap());
- if (!pcpu)
- /* Use the first online cpu. */
- pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
- pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
-}
-
-/*
* Call function on the ipl CPU.
*/
-void smp_call_ipl_cpu(void (*func)(void *), void *data)
+void __noreturn smp_call_ipl_cpu(void (*func)(void *), void *data)
{
struct lowcore *lc = lowcore_ptr[0];
- if (pcpu_devices[0].address == stap())
- lc = &S390_lowcore;
+ if (ipl_pcpu->address == stap())
+ lc = get_lowcore();
- pcpu_delegate(&pcpu_devices[0], func, data,
- lc->nodat_stack);
+ pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack);
}
int smp_find_processor_id(u16 address)
@@ -418,21 +387,21 @@ int smp_find_processor_id(u16 address)
int cpu;
for_each_present_cpu(cpu)
- if (pcpu_devices[cpu].address == address)
+ if (per_cpu(pcpu_devices, cpu).address == address)
return cpu;
return -1;
}
void schedule_mcck_handler(void)
{
- pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending);
}
bool notrace arch_vcpu_is_preempted(int cpu)
{
if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
return false;
- if (pcpu_running(pcpu_devices + cpu))
+ if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu)))
return false;
return true;
}
@@ -440,11 +409,11 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted);
void notrace smp_yield_cpu(int cpu)
{
- if (!MACHINE_HAS_DIAG9C)
+ if (!machine_has_diag9c())
return;
diag_stat_inc_norecursion(DIAG_STAT_X09C);
asm volatile("diag %0,0,0x9c"
- : : "d" (pcpu_devices[cpu].address));
+ : : "d" (per_cpu(pcpu_devices, cpu).address));
}
EXPORT_SYMBOL_GPL(smp_yield_cpu);
@@ -463,18 +432,18 @@ void notrace smp_emergency_stop(void)
cpumask_copy(&cpumask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &cpumask);
- end = get_tod_clock() + (1000000UL << 12);
+ end = get_tod_clock_monotonic() + (1000000UL << 12);
for_each_cpu(cpu, &cpumask) {
- struct pcpu *pcpu = pcpu_devices + cpu;
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
set_bit(ec_stop_cpu, &pcpu->ec_mask);
while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
0, NULL) == SIGP_CC_BUSY &&
- get_tod_clock() < end)
+ get_tod_clock_monotonic() < end)
cpu_relax();
}
- while (get_tod_clock() < end) {
+ while (get_tod_clock_monotonic() < end) {
for_each_cpu(cpu, &cpumask)
- if (pcpu_stopped(pcpu_devices + cpu))
+ if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu)))
cpumask_clear_cpu(cpu, &cpumask);
if (cpumask_empty(&cpumask))
break;
@@ -489,6 +458,7 @@ NOKPROBE_SYMBOL(smp_emergency_stop);
*/
void smp_send_stop(void)
{
+ struct pcpu *pcpu;
int cpu;
/* Disable all interrupts/machine checks */
@@ -504,8 +474,9 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
- pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
- while (!pcpu_stopped(pcpu_devices + cpu))
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
+ pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ while (!pcpu_stopped(pcpu))
cpu_relax();
}
}
@@ -519,7 +490,7 @@ static void smp_handle_ext_call(void)
unsigned long bits;
/* handle bit signal external calls */
- bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+ bits = this_cpu_xchg(pcpu_devices.ec_mask, 0);
if (test_bit(ec_stop_cpu, &bits))
smp_stop_cpu();
if (test_bit(ec_schedule, &bits))
@@ -544,12 +515,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
int cpu;
for_each_cpu(cpu, mask)
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
void arch_send_call_function_single_ipi(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
/*
@@ -559,13 +530,13 @@ void arch_send_call_function_single_ipi(int cpu)
*/
void arch_smp_send_reschedule(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule);
}
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
- pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work);
}
#endif
@@ -577,16 +548,16 @@ int smp_store_status(int cpu)
struct pcpu *pcpu;
unsigned long pa;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
lc = lowcore_ptr[cpu];
pa = __pa(&lc->floating_pt_save_area);
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
- if (!cpu_has_vx() && !MACHINE_HAS_GS)
+ if (!cpu_has_vx() && !cpu_has_gs())
return 0;
pa = lc->mcesad & MCESA_ORIGIN_MASK;
- if (MACHINE_HAS_GS)
+ if (cpu_has_gs())
pa |= lc->mcesad & MCESA_LC_MASK;
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
@@ -596,7 +567,7 @@ int smp_store_status(int cpu)
/*
* Collect CPU state of the previous, crashed system.
- * There are four cases:
+ * There are three cases:
* 1) standard zfcp/nvme dump
* condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
* The state for all CPUs except the boot CPU needs to be collected
@@ -609,16 +580,16 @@ int smp_store_status(int cpu)
* with sigp stop-and-store-status. The firmware or the boot-loader
* stored the registers of the boot CPU in the absolute lowcore in the
* memory of the old system.
- * 3) kdump and the old kernel did not store the CPU state,
- * or stand-alone kdump for DASD
- * condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
+ * 3) kdump or stand-alone kdump for DASD
+ * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == false
* The state for all CPUs except the boot CPU needs to be collected
* with sigp stop-and-store-status. The kexec code or the boot-loader
* stored the registers of the boot CPU in the memory of the old system.
- * 4) kdump and the old kernel stored the CPU state
- * condition: OLDMEM_BASE != NULL && is_kdump_kernel()
- * This case does not exist for s390 anymore, setup_arch explicitly
- * deactivates the elfcorehdr= kernel parameter
+ *
+ * Note that the legacy kdump mode where the old kernel stored the CPU states
+ * does no longer exist: setup_arch() explicitly deactivates the elfcorehdr=
+ * kernel parameter. The is_kdump_kernel() implementation on s390 is independent
+ * of the elfcorehdr= parameter.
*/
static bool dump_available(void)
{
@@ -633,9 +604,7 @@ void __init smp_save_dump_ipl_cpu(void)
if (!dump_available())
return;
sa = save_area_alloc(true);
- regs = memblock_alloc(512, 8);
- if (!sa || !regs)
- panic("could not allocate memory for boot CPU save area\n");
+ regs = memblock_alloc_or_panic(512, 8);
copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
save_area_add_regs(sa, regs);
memblock_free(regs, 512);
@@ -668,8 +637,6 @@ void __init smp_save_dump_secondary_cpus(void)
SIGP_CC_NOT_OPERATIONAL)
continue;
sa = save_area_alloc(false);
- if (!sa)
- panic("could not allocate memory for save area\n");
__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
save_area_add_regs(sa, page);
if (cpu_has_vx()) {
@@ -685,17 +652,36 @@ void __init smp_save_dump_secondary_cpus(void)
void smp_cpu_set_polarization(int cpu, int val)
{
- pcpu_devices[cpu].polarization = val;
+ per_cpu(pcpu_devices, cpu).polarization = val;
}
int smp_cpu_get_polarization(int cpu)
{
- return pcpu_devices[cpu].polarization;
+ return per_cpu(pcpu_devices, cpu).polarization;
+}
+
+void smp_cpu_set_capacity(int cpu, unsigned long val)
+{
+ per_cpu(pcpu_devices, cpu).capacity = val;
+}
+
+unsigned long smp_cpu_get_capacity(int cpu)
+{
+ return per_cpu(pcpu_devices, cpu).capacity;
+}
+
+void smp_set_core_capacity(int cpu, unsigned long val)
+{
+ int i;
+
+ cpu = smp_get_base_cpu(cpu);
+ for (i = cpu; (i <= cpu + smp_cpu_mtid) && (i < nr_cpu_ids); i++)
+ smp_cpu_set_capacity(i, val);
}
int smp_cpu_get_cpu_address(int cpu)
{
- return pcpu_devices[cpu].address;
+ return per_cpu(pcpu_devices, cpu).address;
}
static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
@@ -713,14 +699,13 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
continue;
info->core[info->configured].core_id =
address >> smp_cpu_mt_shift;
+ info->core[info->configured].type = boot_core_type;
info->configured++;
}
info->combined = info->configured;
}
}
-static int smp_add_present_cpu(int cpu);
-
static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
bool configured, bool early)
{
@@ -736,15 +721,16 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
if (pcpu_find_address(cpu_present_mask, address + i))
continue;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
pcpu->address = address + i;
if (configured)
pcpu->state = CPU_STATE_CONFIGURED;
else
pcpu->state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
set_cpu_present(cpu, true);
- if (!early && smp_add_present_cpu(cpu) != 0)
+ if (!early && arch_register_cpu(cpu))
set_cpu_present(cpu, false);
else
nr++;
@@ -771,7 +757,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
* that all SMT threads get subsequent logical CPU numbers.
*/
if (early) {
- core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+ core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift;
for (i = 0; i < info->configured; i++) {
core = &info->core[i];
if (core->core_id == core_id) {
@@ -796,10 +782,7 @@ void __init smp_detect_cpus(void)
u16 address;
/* Get CPU information */
- info = memblock_alloc(sizeof(*info), 8);
- if (!info)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*info), 8);
+ info = memblock_alloc_or_panic(sizeof(*info), 8);
smp_get_core_info(info, 1);
/* Find boot CPU type */
if (sclp.has_core_type) {
@@ -818,6 +801,7 @@ void __init smp_detect_cpus(void)
mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
pcpu_set_smt(mtid);
+ cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1);
/* Print number of CPUs */
c_cpus = s_cpus = 0;
@@ -831,9 +815,6 @@ void __init smp_detect_cpus(void)
s_cpus += smp_cpu_mtid + 1;
}
pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
-
- /* Add CPUs present at boot */
- __smp_rescan_cpus(info, true);
memblock_free(info, sizeof(*info));
}
@@ -842,15 +823,16 @@ void __init smp_detect_cpus(void)
*/
static void smp_start_secondary(void *cpuvoid)
{
+ struct lowcore *lc = get_lowcore();
int cpu = raw_smp_processor_id();
- S390_lowcore.last_update_clock = get_tod_clock();
- S390_lowcore.restart_stack = (unsigned long)restart_stack;
- S390_lowcore.restart_fn = (unsigned long)do_restart;
- S390_lowcore.restart_data = 0;
- S390_lowcore.restart_source = -1U;
- S390_lowcore.restart_flags = 0;
- restore_access_regs(S390_lowcore.access_regs_save_area);
+ lc->last_update_clock = get_tod_clock();
+ lc->restart_stack = (unsigned long)restart_stack;
+ lc->restart_fn = (unsigned long)do_restart;
+ lc->restart_data = 0;
+ lc->restart_source = -1U;
+ lc->restart_flags = 0;
+ restore_access_regs(lc->access_regs_save_area);
cpu_init();
rcutree_report_cpu_starting(cpu);
init_cpu_timer();
@@ -873,7 +855,7 @@ static void smp_start_secondary(void *cpuvoid)
/* Upping and downing of CPUs */
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- struct pcpu *pcpu = pcpu_devices + cpu;
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
int rc;
if (pcpu->state != CPU_STATE_CONFIGURED)
@@ -891,8 +873,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
*/
system_ctlreg_lock();
pcpu_prepare_secondary(pcpu, cpu);
- pcpu_attach_task(pcpu, tidle);
- pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+ pcpu_attach_task(cpu, tidle);
+ pcpu_start_fn(cpu, smp_start_secondary, NULL);
/* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu))
cpu_relax();
@@ -937,18 +919,19 @@ void __cpu_die(unsigned int cpu)
struct pcpu *pcpu;
/* Wait until target cpu is down */
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
while (!pcpu_stopped(pcpu))
cpu_relax();
- pcpu_free_lowcore(pcpu);
+ pcpu_free_lowcore(pcpu, cpu);
cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ pcpu->flags = 0;
}
void __noreturn cpu_die(void)
{
idle_task_exit();
- pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+ pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0);
for (;;) ;
}
@@ -973,24 +956,30 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
system_ctl_set_bit(0, 13);
+ smp_rescan_cpus(true);
}
void __init smp_prepare_boot_cpu(void)
{
- struct pcpu *pcpu = pcpu_devices;
+ struct lowcore *lc = get_lowcore();
WARN_ON(!cpu_present(0) || !cpu_online(0));
- pcpu->state = CPU_STATE_CONFIGURED;
- S390_lowcore.percpu_offset = __per_cpu_offset[0];
+ lc->percpu_offset = __per_cpu_offset[0];
+ ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0);
+ ipl_pcpu->state = CPU_STATE_CONFIGURED;
+ lc->pcpu = (unsigned long)ipl_pcpu;
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+ smp_cpu_set_capacity(0, CPU_CAPACITY_HIGH);
}
void __init smp_setup_processor_id(void)
{
- pcpu_devices[0].address = stap();
- S390_lowcore.cpu_nr = 0;
- S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
- S390_lowcore.spinlock_index = 0;
+ struct lowcore *lc = get_lowcore();
+
+ lc->cpu_nr = 0;
+ per_cpu(pcpu_devices, 0).address = stap();
+ lc->spinlock_lockval = arch_spin_lockval(0);
+ lc->spinlock_index = 0;
}
/*
@@ -1010,7 +999,7 @@ static ssize_t cpu_configure_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+ count = sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -1036,7 +1025,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_online(cpu + i))
goto out;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
rc = 0;
switch (val) {
case 0:
@@ -1048,7 +1037,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
- pcpu[i].state = CPU_STATE_STANDBY;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@@ -1063,7 +1052,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
- pcpu[i].state = CPU_STATE_CONFIGURED;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@@ -1082,7 +1071,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
static ssize_t show_cpu_address(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+ return sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address);
}
static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
@@ -1108,35 +1097,34 @@ static struct attribute_group cpu_online_attr_group = {
static int smp_cpu_online(unsigned int cpu)
{
- struct device *s = &per_cpu(cpu_device, cpu)->dev;
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
- return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+ return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group);
}
static int smp_cpu_pre_down(unsigned int cpu)
{
- struct device *s = &per_cpu(cpu_device, cpu)->dev;
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
- sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+ sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group);
return 0;
}
-static int smp_add_present_cpu(int cpu)
+bool arch_cpu_is_hotpluggable(int cpu)
{
- struct device *s;
- struct cpu *c;
+ return !!cpu;
+}
+
+int arch_register_cpu(int cpu)
+{
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
int rc;
- c = kzalloc(sizeof(*c), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
- per_cpu(cpu_device, cpu) = c;
- s = &c->dev;
- c->hotpluggable = !!cpu;
+ c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
rc = register_cpu(c, cpu);
if (rc)
goto out;
- rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+ rc = sysfs_create_group(&c->dev.kobj, &cpu_common_attr_group);
if (rc)
goto out_cpu;
rc = topology_cpu_init(c);
@@ -1145,14 +1133,14 @@ static int smp_add_present_cpu(int cpu)
return 0;
out_topology:
- sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+ sysfs_remove_group(&c->dev.kobj, &cpu_common_attr_group);
out_cpu:
unregister_cpu(c);
out:
return rc;
}
-int __ref smp_rescan_cpus(void)
+int __ref smp_rescan_cpus(bool early)
{
struct sclp_core_info *info;
int nr;
@@ -1161,7 +1149,7 @@ int __ref smp_rescan_cpus(void)
if (!info)
return -ENOMEM;
smp_get_core_info(info, 0);
- nr = __smp_rescan_cpus(info, false);
+ nr = __smp_rescan_cpus(info, early);
kfree(info);
if (nr)
topology_schedule_update();
@@ -1178,7 +1166,7 @@ static ssize_t __ref rescan_store(struct device *dev,
rc = lock_device_hotplug_sysfs();
if (rc)
return rc;
- rc = smp_rescan_cpus();
+ rc = smp_rescan_cpus(false);
unlock_device_hotplug();
return rc ? rc : count;
}
@@ -1187,7 +1175,7 @@ static DEVICE_ATTR_WO(rescan);
static int __init s390_smp_init(void)
{
struct device *dev_root;
- int cpu, rc = 0;
+ int rc;
dev_root = bus_get_dev_root(&cpu_subsys);
if (dev_root) {
@@ -1196,17 +1184,9 @@ static int __init s390_smp_init(void)
if (rc)
return rc;
}
-
- for_each_present_cpu(cpu) {
- rc = smp_add_present_cpu(cpu);
- if (rc)
- goto out;
- }
-
rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
smp_cpu_online, smp_cpu_pre_down);
rc = rc <= 0 ? rc : 0;
-out:
return rc;
}
subsys_initcall(s390_smp_init);
diff --git a/arch/s390/kernel/stackprotector.c b/arch/s390/kernel/stackprotector.c
new file mode 100644
index 000000000000..d4e40483f008
--- /dev/null
+++ b/arch/s390/kernel/stackprotector.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) "stackprot: " fmt
+#endif
+
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <linux/printk.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
+#include <asm/machine.h>
+#include <asm/asm-offsets.h>
+#include <asm/arch-stackprotector.h>
+
+#ifdef __DECOMPRESSOR
+
+#define DEBUGP boot_debug
+#define EMERGP boot_emerg
+#define PANIC boot_panic
+
+#else /* __DECOMPRESSOR */
+
+#define DEBUGP pr_debug
+#define EMERGP pr_emerg
+#define PANIC panic
+
+#endif /* __DECOMPRESSOR */
+
+int __bootdata_preserved(stack_protector_debug);
+
+unsigned long __stack_chk_guard;
+EXPORT_SYMBOL(__stack_chk_guard);
+
+struct insn_ril {
+ u8 opc1 : 8;
+ u8 r1 : 4;
+ u8 opc2 : 4;
+ u32 imm;
+} __packed;
+
+/*
+ * Convert a virtual instruction address to a real instruction address. The
+ * decompressor needs to patch instructions within the kernel image based on
+ * their virtual addresses, while dynamic address translation is still
+ * disabled. Therefore a translation from virtual kernel image addresses to
+ * the corresponding physical addresses is required.
+ *
+ * After dynamic address translation is enabled and when the kernel needs to
+ * patch instructions such a translation is not required since the addresses
+ * are identical.
+ */
+static struct insn_ril *vaddress_to_insn(unsigned long vaddress)
+{
+#ifdef __DECOMPRESSOR
+ return (struct insn_ril *)__kernel_pa(vaddress);
+#else
+ return (struct insn_ril *)vaddress;
+#endif
+}
+
+static unsigned long insn_to_vaddress(struct insn_ril *insn)
+{
+#ifdef __DECOMPRESSOR
+ return (unsigned long)__kernel_va(insn);
+#else
+ return (unsigned long)insn;
+#endif
+}
+
+#define INSN_RIL_STRING_SIZE (sizeof(struct insn_ril) * 2 + 1)
+
+static void insn_ril_to_string(char *str, struct insn_ril *insn)
+{
+ u8 *ptr = (u8 *)insn;
+ int i;
+
+ for (i = 0; i < sizeof(*insn); i++)
+ hex_byte_pack(&str[2 * i], ptr[i]);
+ str[2 * i] = 0;
+}
+
+static void stack_protector_dump(struct insn_ril *old, struct insn_ril *new)
+{
+ char ostr[INSN_RIL_STRING_SIZE];
+ char nstr[INSN_RIL_STRING_SIZE];
+
+ insn_ril_to_string(ostr, old);
+ insn_ril_to_string(nstr, new);
+ DEBUGP("%016lx: %s -> %s\n", insn_to_vaddress(old), ostr, nstr);
+}
+
+static int stack_protector_verify(struct insn_ril *insn, unsigned long kernel_start)
+{
+ char istr[INSN_RIL_STRING_SIZE];
+ unsigned long vaddress, offset;
+
+ /* larl */
+ if (insn->opc1 == 0xc0 && insn->opc2 == 0x0)
+ return 0;
+ /* lgrl */
+ if (insn->opc1 == 0xc4 && insn->opc2 == 0x8)
+ return 0;
+ insn_ril_to_string(istr, insn);
+ vaddress = insn_to_vaddress(insn);
+ if (__is_defined(__DECOMPRESSOR)) {
+ offset = (unsigned long)insn - kernel_start + TEXT_OFFSET;
+ EMERGP("Unexpected instruction at %016lx/%016lx: %s\n", vaddress, offset, istr);
+ PANIC("Stackprotector error\n");
+ } else {
+ EMERGP("Unexpected instruction at %016lx: %s\n", vaddress, istr);
+ }
+ return -EINVAL;
+}
+
+int __stack_protector_apply(unsigned long *start, unsigned long *end, unsigned long kernel_start)
+{
+ unsigned long canary, *loc;
+ struct insn_ril *insn, new;
+ int rc;
+
+ /*
+ * Convert LARL/LGRL instructions to LLILF so register R1 contains the
+ * address of the per-cpu / per-process stack canary:
+ *
+ * LARL/LGRL R1,__stack_chk_guard => LLILF R1,__lc_stack_canary
+ */
+ canary = __LC_STACK_CANARY;
+ if (machine_has_relocated_lowcore())
+ canary += LOWCORE_ALT_ADDRESS;
+ for (loc = start; loc < end; loc++) {
+ insn = vaddress_to_insn(*loc);
+ rc = stack_protector_verify(insn, kernel_start);
+ if (rc)
+ return rc;
+ new = *insn;
+ new.opc1 = 0xc0;
+ new.opc2 = 0xf;
+ new.imm = canary;
+ if (stack_protector_debug)
+ stack_protector_dump(insn, &new);
+ s390_kernel_write(insn, &new, sizeof(*insn));
+ }
+ return 0;
+}
+
+#ifdef __DECOMPRESSOR
+void __stack_protector_apply_early(unsigned long kernel_start)
+{
+ unsigned long *start, *end;
+
+ start = (unsigned long *)vmlinux.stack_prot_start;
+ end = (unsigned long *)vmlinux.stack_prot_end;
+ __stack_protector_apply(start, end, kernel_start);
+}
+#endif
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 640363b2a105..3aae7f70e6ab 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -8,7 +8,7 @@
#include <linux/perf_event.h>
#include <linux/stacktrace.h>
#include <linux/uaccess.h>
-#include <linux/compat.h>
+#include <asm/asm-offsets.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
#include <asm/kprobes.h>
@@ -106,8 +106,6 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo
unsigned long ip, sp;
bool first = true;
- if (is_compat_task())
- return;
if (!current->mm)
return;
ip = instruction_pointer(regs);
@@ -151,7 +149,7 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo
break;
}
if (!store_ip(consume_entry, cookie, entry, perf, ip))
- return;
+ break;
first = false;
}
pagefault_enable();
@@ -162,22 +160,3 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
{
arch_stack_walk_user_common(consume_entry, cookie, NULL, regs, false);
}
-
-unsigned long return_address(unsigned int n)
-{
- struct unwind_state state;
- unsigned long addr;
-
- /* Increment to skip current stack entry */
- n++;
-
- unwind_for_each_frame(&state, NULL, NULL, 0) {
- addr = unwind_get_return_address(&state);
- if (!addr)
- break;
- if (!n--)
- return addr;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 30bb20461db4..5eae2e25997a 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -5,6 +5,8 @@
* Copyright IBM Corp. 2016
* Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
*/
+
+#include <linux/export.h>
#include <linux/errno.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
@@ -17,6 +19,7 @@
#include <asm/ebcdic.h>
#include <asm/facility.h>
#include <asm/sthyi.h>
+#include <asm/asm.h>
#include "entry.h"
#define DED_WEIGHT 0xffff
@@ -250,7 +253,7 @@ static void fill_diag_mac(struct sthyi_sctns *sctns,
sctns->mac.infmval1 |= MAC_CNT_VLD;
}
-/* Returns a pointer to the the next partition block. */
+/* Returns a pointer to the next partition block. */
static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
bool this_lpar,
void *diag224_buf,
@@ -300,33 +303,56 @@ static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
return (struct diag204_x_part_block *)&block->cpus[i];
}
-static void fill_diag(struct sthyi_sctns *sctns)
+static void *diag204_get_data(bool diag204_allow_busy)
{
- int i, r, pages;
- bool this_lpar;
+ unsigned long subcode;
void *diag204_buf;
- void *diag224_buf = NULL;
- struct diag204_x_info_blk_hdr *ti_hdr;
- struct diag204_x_part_block *part_block;
- struct diag204_x_phys_block *phys_block;
- struct lpar_cpu_inf lpar_inf = {};
-
- /* Errors are handled through the validity bits in the response. */
- pages = diag204((unsigned long)DIAG204_SUBC_RSI |
- (unsigned long)DIAG204_INFO_EXT, 0, NULL);
- if (pages <= 0)
- return;
-
+ int pages, rc;
+
+ subcode = DIAG204_SUBC_RSI;
+ subcode |= DIAG204_INFO_EXT;
+ pages = diag204(subcode, 0, NULL);
+ if (pages < 0)
+ return ERR_PTR(pages);
+ if (pages == 0)
+ return ERR_PTR(-ENODATA);
diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
__builtin_return_address(0));
if (!diag204_buf)
- return;
+ return ERR_PTR(-ENOMEM);
+ subcode = DIAG204_SUBC_STIB7;
+ subcode |= DIAG204_INFO_EXT;
+ if (diag204_has_bif() && diag204_allow_busy)
+ subcode |= DIAG204_BIF_BIT;
+ rc = diag204(subcode, pages, diag204_buf);
+ if (rc < 0) {
+ vfree(diag204_buf);
+ return ERR_PTR(rc);
+ }
+ return diag204_buf;
+}
- r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
- (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
- if (r < 0)
- goto out;
+static bool is_diag204_cached(struct sthyi_sctns *sctns)
+{
+ /*
+ * Check if validity bits are set when diag204 data
+ * is gathered.
+ */
+ if (sctns->par.infpval1)
+ return true;
+ return false;
+}
+
+static void fill_diag(struct sthyi_sctns *sctns, void *diag204_buf)
+{
+ int i;
+ bool this_lpar;
+ void *diag224_buf = NULL;
+ struct diag204_x_info_blk_hdr *ti_hdr;
+ struct diag204_x_part_block *part_block;
+ struct diag204_x_phys_block *phys_block;
+ struct lpar_cpu_inf lpar_inf = {};
diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
if (!diag224_buf || diag224(diag224_buf))
@@ -392,7 +418,6 @@ static void fill_diag(struct sthyi_sctns *sctns)
out:
free_page((unsigned long)diag224_buf);
- vfree(diag204_buf);
}
static int sthyi(u64 vaddr, u64 *rc)
@@ -403,30 +428,41 @@ static int sthyi(u64 vaddr, u64 *rc)
asm volatile(
".insn rre,0xB2560000,%[r1],%[r2]\n"
- "ipm %[cc]\n"
- "srl %[cc],28\n"
- : [cc] "=&d" (cc), [r2] "+&d" (r2.pair)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [r2] "+&d" (r2.pair)
: [r1] "d" (r1.pair)
- : "memory", "cc");
+ : CC_CLOBBER_LIST("memory"));
*rc = r2.odd;
- return cc;
+ return CC_TRANSFORM(cc);
}
static int fill_dst(void *dst, u64 *rc)
{
+ void *diag204_buf;
+
struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
/*
* If the facility is on, we don't want to emulate the instruction.
* We ask the hypervisor to provide the data.
*/
- if (test_facility(74))
+ if (test_facility(74)) {
+ memset(dst, 0, PAGE_SIZE);
return sthyi((u64)dst, rc);
-
+ }
+ /*
+ * When emulating, if diag204 returns BUSY don't reset dst buffer
+ * and use cached data.
+ */
+ *rc = 0;
+ diag204_buf = diag204_get_data(is_diag204_cached(sctns));
+ if (IS_ERR(diag204_buf))
+ return PTR_ERR(diag204_buf);
+ memset(dst, 0, PAGE_SIZE);
fill_hdr(sctns);
fill_stsi(sctns);
- fill_diag(sctns);
- *rc = 0;
+ fill_diag(sctns, diag204_buf);
+ vfree(diag204_buf);
return 0;
}
@@ -445,11 +481,14 @@ static int sthyi_update_cache(u64 *rc)
{
int r;
- memset(sthyi_cache.info, 0, PAGE_SIZE);
r = fill_dst(sthyi_cache.info, rc);
- if (r)
- return r;
- sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+ if (r == 0) {
+ sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+ } else if (r == -EBUSY) {
+ /* mark as expired and return 0 to keep using cached data */
+ sthyi_cache.end = jiffies - 1;
+ r = 0;
+ }
return r;
}
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index 50cbcbbaa03d..795b6cca74c9 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -12,6 +12,7 @@
* platform.
*/
+#include <linux/cpufeature.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -38,6 +39,16 @@
#include "entry.h"
+#define __SYSCALL(nr, sym) long __s390x_##sym(struct pt_regs *);
+#include <asm/syscall_table.h>
+#undef __SYSCALL
+
+#define __SYSCALL(nr, sym) [nr] = (__s390x_##sym),
+const sys_call_ptr_t sys_call_table[__NR_syscalls] = {
+#include <asm/syscall_table.h>
+};
+#undef __SYSCALL
+
#ifdef CONFIG_SYSVIPC
/*
* sys_ipc() is the de-multiplexer for the SysV IPC calls.
@@ -81,25 +92,35 @@ SYSCALL_DEFINE0(ni_syscall)
return -ENOSYS;
}
-static void do_syscall(struct pt_regs *regs)
+void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
{
unsigned long nr;
+ add_random_kstack_offset();
+ enter_from_user_mode(regs);
+ regs->psw = get_lowcore()->svc_old_psw;
+ regs->int_code = get_lowcore()->svc_int_code;
+ update_timer_sys();
+ if (cpu_has_bear())
+ current->thread.last_break = regs->last_break;
+ local_irq_enable();
+ regs->orig_gpr2 = regs->gprs[2];
+ if (unlikely(per_trap))
+ set_thread_flag(TIF_PER_TRAP);
+ regs->flags = 0;
+ set_pt_regs_flag(regs, PIF_SYSCALL);
nr = regs->int_code & 0xffff;
- if (!nr) {
+ if (likely(!nr)) {
nr = regs->gprs[1] & 0xffff;
regs->int_code &= ~0xffffUL;
regs->int_code |= nr;
}
-
regs->gprs[2] = nr;
-
if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) {
regs->psw.addr = current->restart_block.arch_data;
current->restart_block.arch_data = 1;
}
nr = syscall_enter_from_user_mode_work(regs, nr);
-
/*
* In the s390 ptrace ABI, both the syscall number and the return value
* use gpr2. However, userspace puts the syscall number either in the
@@ -107,37 +128,11 @@ static void do_syscall(struct pt_regs *regs)
* work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here
* and if set, the syscall will be skipped.
*/
-
if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)))
goto out;
regs->gprs[2] = -ENOSYS;
- if (likely(nr >= NR_syscalls))
- goto out;
- do {
- regs->gprs[2] = current->thread.sys_call_table[nr](regs);
- } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART));
+ if (likely(nr < NR_syscalls))
+ regs->gprs[2] = sys_call_table[nr](regs);
out:
- syscall_exit_to_user_mode_work(regs);
-}
-
-void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
-{
- add_random_kstack_offset();
- enter_from_user_mode(regs);
- regs->psw = S390_lowcore.svc_old_psw;
- regs->int_code = S390_lowcore.svc_int_code;
- update_timer_sys();
- if (static_branch_likely(&cpu_has_bear))
- current->thread.last_break = regs->last_break;
-
- local_irq_enable();
- regs->orig_gpr2 = regs->gprs[2];
-
- if (per_trap)
- set_thread_flag(TIF_PER_TRAP);
-
- regs->flags = 0;
- set_pt_regs_flag(regs, PIF_SYSCALL);
- do_syscall(regs);
- exit_to_user_mode();
+ syscall_exit_to_user_mode(regs);
}
diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile
index 1bb78b9468e8..d5fca0ca0890 100644
--- a/arch/s390/kernel/syscalls/Makefile
+++ b/arch/s390/kernel/syscalls/Makefile
@@ -1,45 +1,32 @@
# SPDX-License-Identifier: GPL-2.0
+kapi := arch/$(SRCARCH)/include/generated/asm
+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
-gen := arch/$(ARCH)/include/generated
-kapi := $(gen)/asm
-uapi := $(gen)/uapi/asm
-
-syscall := $(src)/syscall.tbl
-systbl := $(src)/syscalltbl
-
-gen-y := $(kapi)/syscall_table.h
-kapi-hdrs-y := $(kapi)/unistd_nr.h
-uapi-hdrs-y := $(uapi)/unistd_32.h
-uapi-hdrs-y += $(uapi)/unistd_64.h
-
-targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y))
-
-PHONY += kapi uapi
-
-kapi: $(gen-y) $(kapi-hdrs-y)
-uapi: $(uapi-hdrs-y)
-
-
-# Create output directory if not already present
$(shell mkdir -p $(uapi) $(kapi))
-filechk_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2" < $<
+syscall := $(src)/syscall.tbl
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
+
+quiet_cmd_syshdr = SYSHDR $@
+ cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis common,$* $< $@
-filechk_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $<
+quiet_cmd_systbl = SYSTBL $@
+ cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis common,$* $< $@
-filechk_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $<
+$(uapi)/unistd_%.h: $(syscall) $(syshdr) FORCE
+ $(call if_changed,syshdr)
-syshdr_abi_unistd_32 := common,32
-$(uapi)/unistd_32.h: $(syscall) FORCE
- $(call filechk,syshdr,$@)
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,systbl)
-syshdr_abi_unistd_64 := common,64
-$(uapi)/unistd_64.h: $(syscall) FORCE
- $(call filechk,syshdr,$@)
+uapisyshdr-y += unistd_64.h
+kapisyshdr-y += syscall_table.h
-$(kapi)/syscall_table.h: $(syscall) FORCE
- $(call filechk,syscalls)
+uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
-sysnr_abi_unistd_nr := common,32,64
-$(kapi)/unistd_nr.h: $(syscall) FORCE
- $(call filechk,sysnr)
+PHONY += all
+all: $(uapisyshdr-y) $(kapisyshdr-y)
+ @:
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 01071182763e..417ed16b3c63 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -3,465 +3,397 @@
# System call table for s390
#
# Format:
+# <nr> <abi> <syscall> <entry>
#
-# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
-#
-# where <abi> can be common, 64, or 32
+# <abi> is always common.
-1 common exit sys_exit sys_exit
-2 common fork sys_fork sys_fork
-3 common read sys_read compat_sys_s390_read
-4 common write sys_write compat_sys_s390_write
-5 common open sys_open compat_sys_open
-6 common close sys_close sys_close
-7 common restart_syscall sys_restart_syscall sys_restart_syscall
-8 common creat sys_creat sys_creat
-9 common link sys_link sys_link
-10 common unlink sys_unlink sys_unlink
-11 common execve sys_execve compat_sys_execve
-12 common chdir sys_chdir sys_chdir
-13 32 time - sys_time32
-14 common mknod sys_mknod sys_mknod
-15 common chmod sys_chmod sys_chmod
-16 32 lchown - sys_lchown16
-19 common lseek sys_lseek compat_sys_lseek
-20 common getpid sys_getpid sys_getpid
-21 common mount sys_mount sys_mount
-22 common umount sys_oldumount sys_oldumount
-23 32 setuid - sys_setuid16
-24 32 getuid - sys_getuid16
-25 32 stime - sys_stime32
-26 common ptrace sys_ptrace compat_sys_ptrace
-27 common alarm sys_alarm sys_alarm
-29 common pause sys_pause sys_pause
-30 common utime sys_utime sys_utime32
-33 common access sys_access sys_access
-34 common nice sys_nice sys_nice
-36 common sync sys_sync sys_sync
-37 common kill sys_kill sys_kill
-38 common rename sys_rename sys_rename
-39 common mkdir sys_mkdir sys_mkdir
-40 common rmdir sys_rmdir sys_rmdir
-41 common dup sys_dup sys_dup
-42 common pipe sys_pipe sys_pipe
-43 common times sys_times compat_sys_times
-45 common brk sys_brk sys_brk
-46 32 setgid - sys_setgid16
-47 32 getgid - sys_getgid16
-48 common signal sys_signal sys_signal
-49 32 geteuid - sys_geteuid16
-50 32 getegid - sys_getegid16
-51 common acct sys_acct sys_acct
-52 common umount2 sys_umount sys_umount
-54 common ioctl sys_ioctl compat_sys_ioctl
-55 common fcntl sys_fcntl compat_sys_fcntl
-57 common setpgid sys_setpgid sys_setpgid
-60 common umask sys_umask sys_umask
-61 common chroot sys_chroot sys_chroot
-62 common ustat sys_ustat compat_sys_ustat
-63 common dup2 sys_dup2 sys_dup2
-64 common getppid sys_getppid sys_getppid
-65 common getpgrp sys_getpgrp sys_getpgrp
-66 common setsid sys_setsid sys_setsid
-67 common sigaction sys_sigaction compat_sys_sigaction
-70 32 setreuid - sys_setreuid16
-71 32 setregid - sys_setregid16
-72 common sigsuspend sys_sigsuspend sys_sigsuspend
-73 common sigpending sys_sigpending compat_sys_sigpending
-74 common sethostname sys_sethostname sys_sethostname
-75 common setrlimit sys_setrlimit compat_sys_setrlimit
-76 32 getrlimit - compat_sys_old_getrlimit
-77 common getrusage sys_getrusage compat_sys_getrusage
-78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday
-79 common settimeofday sys_settimeofday compat_sys_settimeofday
-80 32 getgroups - sys_getgroups16
-81 32 setgroups - sys_setgroups16
-83 common symlink sys_symlink sys_symlink
-85 common readlink sys_readlink sys_readlink
-86 common uselib sys_uselib sys_uselib
-87 common swapon sys_swapon sys_swapon
-88 common reboot sys_reboot sys_reboot
-89 common readdir - compat_sys_old_readdir
-90 common mmap sys_old_mmap compat_sys_s390_old_mmap
-91 common munmap sys_munmap sys_munmap
-92 common truncate sys_truncate compat_sys_truncate
-93 common ftruncate sys_ftruncate compat_sys_ftruncate
-94 common fchmod sys_fchmod sys_fchmod
-95 32 fchown - sys_fchown16
-96 common getpriority sys_getpriority sys_getpriority
-97 common setpriority sys_setpriority sys_setpriority
-99 common statfs sys_statfs compat_sys_statfs
-100 common fstatfs sys_fstatfs compat_sys_fstatfs
-101 32 ioperm - -
-102 common socketcall sys_socketcall compat_sys_socketcall
-103 common syslog sys_syslog sys_syslog
-104 common setitimer sys_setitimer compat_sys_setitimer
-105 common getitimer sys_getitimer compat_sys_getitimer
-106 common stat sys_newstat compat_sys_newstat
-107 common lstat sys_newlstat compat_sys_newlstat
-108 common fstat sys_newfstat compat_sys_newfstat
-110 common lookup_dcookie - -
-111 common vhangup sys_vhangup sys_vhangup
-112 common idle - -
-114 common wait4 sys_wait4 compat_sys_wait4
-115 common swapoff sys_swapoff sys_swapoff
-116 common sysinfo sys_sysinfo compat_sys_sysinfo
-117 common ipc sys_s390_ipc compat_sys_s390_ipc
-118 common fsync sys_fsync sys_fsync
-119 common sigreturn sys_sigreturn compat_sys_sigreturn
-120 common clone sys_clone sys_clone
-121 common setdomainname sys_setdomainname sys_setdomainname
-122 common uname sys_newuname sys_newuname
-124 common adjtimex sys_adjtimex sys_adjtimex_time32
-125 common mprotect sys_mprotect sys_mprotect
-126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask
-127 common create_module - -
-128 common init_module sys_init_module sys_init_module
-129 common delete_module sys_delete_module sys_delete_module
-130 common get_kernel_syms - -
-131 common quotactl sys_quotactl sys_quotactl
-132 common getpgid sys_getpgid sys_getpgid
-133 common fchdir sys_fchdir sys_fchdir
-134 common bdflush sys_ni_syscall sys_ni_syscall
-135 common sysfs sys_sysfs sys_sysfs
-136 common personality sys_s390_personality sys_s390_personality
-137 common afs_syscall - -
-138 32 setfsuid - sys_setfsuid16
-139 32 setfsgid - sys_setfsgid16
-140 32 _llseek - sys_llseek
-141 common getdents sys_getdents compat_sys_getdents
-142 32 _newselect - compat_sys_select
-142 64 select sys_select -
-143 common flock sys_flock sys_flock
-144 common msync sys_msync sys_msync
-145 common readv sys_readv sys_readv
-146 common writev sys_writev sys_writev
-147 common getsid sys_getsid sys_getsid
-148 common fdatasync sys_fdatasync sys_fdatasync
-149 common _sysctl - -
-150 common mlock sys_mlock sys_mlock
-151 common munlock sys_munlock sys_munlock
-152 common mlockall sys_mlockall sys_mlockall
-153 common munlockall sys_munlockall sys_munlockall
-154 common sched_setparam sys_sched_setparam sys_sched_setparam
-155 common sched_getparam sys_sched_getparam sys_sched_getparam
-156 common sched_setscheduler sys_sched_setscheduler sys_sched_setscheduler
-157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler
-158 common sched_yield sys_sched_yield sys_sched_yield
-159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max
-160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min
-161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32
-162 common nanosleep sys_nanosleep sys_nanosleep_time32
-163 common mremap sys_mremap sys_mremap
-164 32 setresuid - sys_setresuid16
-165 32 getresuid - sys_getresuid16
-167 common query_module - -
-168 common poll sys_poll sys_poll
-169 common nfsservctl - -
-170 32 setresgid - sys_setresgid16
-171 32 getresgid - sys_getresgid16
-172 common prctl sys_prctl sys_prctl
-173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn
-174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
-175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
-176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
-177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32
-178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
-179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
-180 common pread64 sys_pread64 compat_sys_s390_pread64
-181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64
-182 32 chown - sys_chown16
-183 common getcwd sys_getcwd sys_getcwd
-184 common capget sys_capget sys_capget
-185 common capset sys_capset sys_capset
-186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack
-187 common sendfile sys_sendfile64 compat_sys_sendfile
-188 common getpmsg - -
-189 common putpmsg - -
-190 common vfork sys_vfork sys_vfork
-191 32 ugetrlimit - compat_sys_getrlimit
-191 64 getrlimit sys_getrlimit -
-192 32 mmap2 - compat_sys_s390_mmap2
-193 32 truncate64 - compat_sys_s390_truncate64
-194 32 ftruncate64 - compat_sys_s390_ftruncate64
-195 32 stat64 - compat_sys_s390_stat64
-196 32 lstat64 - compat_sys_s390_lstat64
-197 32 fstat64 - compat_sys_s390_fstat64
-198 32 lchown32 - sys_lchown
-198 64 lchown sys_lchown -
-199 32 getuid32 - sys_getuid
-199 64 getuid sys_getuid -
-200 32 getgid32 - sys_getgid
-200 64 getgid sys_getgid -
-201 32 geteuid32 - sys_geteuid
-201 64 geteuid sys_geteuid -
-202 32 getegid32 - sys_getegid
-202 64 getegid sys_getegid -
-203 32 setreuid32 - sys_setreuid
-203 64 setreuid sys_setreuid -
-204 32 setregid32 - sys_setregid
-204 64 setregid sys_setregid -
-205 32 getgroups32 - sys_getgroups
-205 64 getgroups sys_getgroups -
-206 32 setgroups32 - sys_setgroups
-206 64 setgroups sys_setgroups -
-207 32 fchown32 - sys_fchown
-207 64 fchown sys_fchown -
-208 32 setresuid32 - sys_setresuid
-208 64 setresuid sys_setresuid -
-209 32 getresuid32 - sys_getresuid
-209 64 getresuid sys_getresuid -
-210 32 setresgid32 - sys_setresgid
-210 64 setresgid sys_setresgid -
-211 32 getresgid32 - sys_getresgid
-211 64 getresgid sys_getresgid -
-212 32 chown32 - sys_chown
-212 64 chown sys_chown -
-213 32 setuid32 - sys_setuid
-213 64 setuid sys_setuid -
-214 32 setgid32 - sys_setgid
-214 64 setgid sys_setgid -
-215 32 setfsuid32 - sys_setfsuid
-215 64 setfsuid sys_setfsuid -
-216 32 setfsgid32 - sys_setfsgid
-216 64 setfsgid sys_setfsgid -
-217 common pivot_root sys_pivot_root sys_pivot_root
-218 common mincore sys_mincore sys_mincore
-219 common madvise sys_madvise sys_madvise
-220 common getdents64 sys_getdents64 sys_getdents64
-221 32 fcntl64 - compat_sys_fcntl64
-222 common readahead sys_readahead compat_sys_s390_readahead
-223 32 sendfile64 - compat_sys_sendfile64
-224 common setxattr sys_setxattr sys_setxattr
-225 common lsetxattr sys_lsetxattr sys_lsetxattr
-226 common fsetxattr sys_fsetxattr sys_fsetxattr
-227 common getxattr sys_getxattr sys_getxattr
-228 common lgetxattr sys_lgetxattr sys_lgetxattr
-229 common fgetxattr sys_fgetxattr sys_fgetxattr
-230 common listxattr sys_listxattr sys_listxattr
-231 common llistxattr sys_llistxattr sys_llistxattr
-232 common flistxattr sys_flistxattr sys_flistxattr
-233 common removexattr sys_removexattr sys_removexattr
-234 common lremovexattr sys_lremovexattr sys_lremovexattr
-235 common fremovexattr sys_fremovexattr sys_fremovexattr
-236 common gettid sys_gettid sys_gettid
-237 common tkill sys_tkill sys_tkill
-238 common futex sys_futex sys_futex_time32
-239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
-240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
-241 common tgkill sys_tgkill sys_tgkill
-243 common io_setup sys_io_setup compat_sys_io_setup
-244 common io_destroy sys_io_destroy sys_io_destroy
-245 common io_getevents sys_io_getevents sys_io_getevents_time32
-246 common io_submit sys_io_submit compat_sys_io_submit
-247 common io_cancel sys_io_cancel sys_io_cancel
-248 common exit_group sys_exit_group sys_exit_group
-249 common epoll_create sys_epoll_create sys_epoll_create
-250 common epoll_ctl sys_epoll_ctl sys_epoll_ctl
-251 common epoll_wait sys_epoll_wait sys_epoll_wait
-252 common set_tid_address sys_set_tid_address sys_set_tid_address
-253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64
-254 common timer_create sys_timer_create compat_sys_timer_create
-255 common timer_settime sys_timer_settime sys_timer_settime32
-256 common timer_gettime sys_timer_gettime sys_timer_gettime32
-257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun
-258 common timer_delete sys_timer_delete sys_timer_delete
-259 common clock_settime sys_clock_settime sys_clock_settime32
-260 common clock_gettime sys_clock_gettime sys_clock_gettime32
-261 common clock_getres sys_clock_getres sys_clock_getres_time32
-262 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32
-264 32 fadvise64_64 - compat_sys_s390_fadvise64_64
-265 common statfs64 sys_statfs64 compat_sys_statfs64
-266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
-267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages
-268 common mbind sys_mbind sys_mbind
-269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy
-270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy
-271 common mq_open sys_mq_open compat_sys_mq_open
-272 common mq_unlink sys_mq_unlink sys_mq_unlink
-273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32
-274 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32
-275 common mq_notify sys_mq_notify compat_sys_mq_notify
-276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
-277 common kexec_load sys_kexec_load compat_sys_kexec_load
-278 common add_key sys_add_key sys_add_key
-279 common request_key sys_request_key sys_request_key
-280 common keyctl sys_keyctl compat_sys_keyctl
-281 common waitid sys_waitid compat_sys_waitid
-282 common ioprio_set sys_ioprio_set sys_ioprio_set
-283 common ioprio_get sys_ioprio_get sys_ioprio_get
-284 common inotify_init sys_inotify_init sys_inotify_init
-285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch
-286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch
-287 common migrate_pages sys_migrate_pages sys_migrate_pages
-288 common openat sys_openat compat_sys_openat
-289 common mkdirat sys_mkdirat sys_mkdirat
-290 common mknodat sys_mknodat sys_mknodat
-291 common fchownat sys_fchownat sys_fchownat
-292 common futimesat sys_futimesat sys_futimesat_time32
-293 32 fstatat64 - compat_sys_s390_fstatat64
-293 64 newfstatat sys_newfstatat -
-294 common unlinkat sys_unlinkat sys_unlinkat
-295 common renameat sys_renameat sys_renameat
-296 common linkat sys_linkat sys_linkat
-297 common symlinkat sys_symlinkat sys_symlinkat
-298 common readlinkat sys_readlinkat sys_readlinkat
-299 common fchmodat sys_fchmodat sys_fchmodat
-300 common faccessat sys_faccessat sys_faccessat
-301 common pselect6 sys_pselect6 compat_sys_pselect6_time32
-302 common ppoll sys_ppoll compat_sys_ppoll_time32
-303 common unshare sys_unshare sys_unshare
-304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
-305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
-306 common splice sys_splice sys_splice
-307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
-308 common tee sys_tee sys_tee
-309 common vmsplice sys_vmsplice sys_vmsplice
-310 common move_pages sys_move_pages sys_move_pages
-311 common getcpu sys_getcpu sys_getcpu
-312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
-313 common utimes sys_utimes sys_utimes_time32
-314 common fallocate sys_fallocate compat_sys_s390_fallocate
-315 common utimensat sys_utimensat sys_utimensat_time32
-316 common signalfd sys_signalfd compat_sys_signalfd
-317 common timerfd - -
-318 common eventfd sys_eventfd sys_eventfd
-319 common timerfd_create sys_timerfd_create sys_timerfd_create
-320 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32
-321 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32
-322 common signalfd4 sys_signalfd4 compat_sys_signalfd4
-323 common eventfd2 sys_eventfd2 sys_eventfd2
-324 common inotify_init1 sys_inotify_init1 sys_inotify_init1
-325 common pipe2 sys_pipe2 sys_pipe2
-326 common dup3 sys_dup3 sys_dup3
-327 common epoll_create1 sys_epoll_create1 sys_epoll_create1
-328 common preadv sys_preadv compat_sys_preadv
-329 common pwritev sys_pwritev compat_sys_pwritev
-330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
-331 common perf_event_open sys_perf_event_open sys_perf_event_open
-332 common fanotify_init sys_fanotify_init sys_fanotify_init
-333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
-334 common prlimit64 sys_prlimit64 sys_prlimit64
-335 common name_to_handle_at sys_name_to_handle_at sys_name_to_handle_at
-336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
-337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32
-338 common syncfs sys_syncfs sys_syncfs
-339 common setns sys_setns sys_setns
-340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv
-341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev
-342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr
-343 common kcmp sys_kcmp sys_kcmp
-344 common finit_module sys_finit_module sys_finit_module
-345 common sched_setattr sys_sched_setattr sys_sched_setattr
-346 common sched_getattr sys_sched_getattr sys_sched_getattr
-347 common renameat2 sys_renameat2 sys_renameat2
-348 common seccomp sys_seccomp sys_seccomp
-349 common getrandom sys_getrandom sys_getrandom
-350 common memfd_create sys_memfd_create sys_memfd_create
-351 common bpf sys_bpf sys_bpf
-352 common s390_pci_mmio_write sys_s390_pci_mmio_write sys_s390_pci_mmio_write
-353 common s390_pci_mmio_read sys_s390_pci_mmio_read sys_s390_pci_mmio_read
-354 common execveat sys_execveat compat_sys_execveat
-355 common userfaultfd sys_userfaultfd sys_userfaultfd
-356 common membarrier sys_membarrier sys_membarrier
-357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32
-358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
-359 common socket sys_socket sys_socket
-360 common socketpair sys_socketpair sys_socketpair
-361 common bind sys_bind sys_bind
-362 common connect sys_connect sys_connect
-363 common listen sys_listen sys_listen
-364 common accept4 sys_accept4 sys_accept4
-365 common getsockopt sys_getsockopt sys_getsockopt
-366 common setsockopt sys_setsockopt sys_setsockopt
-367 common getsockname sys_getsockname sys_getsockname
-368 common getpeername sys_getpeername sys_getpeername
-369 common sendto sys_sendto sys_sendto
-370 common sendmsg sys_sendmsg compat_sys_sendmsg
-371 common recvfrom sys_recvfrom compat_sys_recvfrom
-372 common recvmsg sys_recvmsg compat_sys_recvmsg
-373 common shutdown sys_shutdown sys_shutdown
-374 common mlock2 sys_mlock2 sys_mlock2
-375 common copy_file_range sys_copy_file_range sys_copy_file_range
-376 common preadv2 sys_preadv2 compat_sys_preadv2
-377 common pwritev2 sys_pwritev2 compat_sys_pwritev2
-378 common s390_guarded_storage sys_s390_guarded_storage sys_s390_guarded_storage
-379 common statx sys_statx sys_statx
-380 common s390_sthyi sys_s390_sthyi sys_s390_sthyi
-381 common kexec_file_load sys_kexec_file_load sys_kexec_file_load
-382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents
-383 common rseq sys_rseq sys_rseq
-384 common pkey_mprotect sys_pkey_mprotect sys_pkey_mprotect
-385 common pkey_alloc sys_pkey_alloc sys_pkey_alloc
-386 common pkey_free sys_pkey_free sys_pkey_free
+1 common exit sys_exit
+2 common fork sys_fork
+3 common read sys_read
+4 common write sys_write
+5 common open sys_open
+6 common close sys_close
+7 common restart_syscall sys_restart_syscall
+8 common creat sys_creat
+9 common link sys_link
+10 common unlink sys_unlink
+11 common execve sys_execve
+12 common chdir sys_chdir
+14 common mknod sys_mknod
+15 common chmod sys_chmod
+19 common lseek sys_lseek
+20 common getpid sys_getpid
+21 common mount sys_mount
+22 common umount sys_oldumount
+26 common ptrace sys_ptrace
+27 common alarm sys_alarm
+29 common pause sys_pause
+30 common utime sys_utime
+33 common access sys_access
+34 common nice sys_nice
+36 common sync sys_sync
+37 common kill sys_kill
+38 common rename sys_rename
+39 common mkdir sys_mkdir
+40 common rmdir sys_rmdir
+41 common dup sys_dup
+42 common pipe sys_pipe
+43 common times sys_times
+45 common brk sys_brk
+48 common signal sys_signal
+51 common acct sys_acct
+52 common umount2 sys_umount
+54 common ioctl sys_ioctl
+55 common fcntl sys_fcntl
+57 common setpgid sys_setpgid
+60 common umask sys_umask
+61 common chroot sys_chroot
+62 common ustat sys_ustat
+63 common dup2 sys_dup2
+64 common getppid sys_getppid
+65 common getpgrp sys_getpgrp
+66 common setsid sys_setsid
+67 common sigaction sys_sigaction
+72 common sigsuspend sys_sigsuspend
+73 common sigpending sys_sigpending
+74 common sethostname sys_sethostname
+75 common setrlimit sys_setrlimit
+77 common getrusage sys_getrusage
+78 common gettimeofday sys_gettimeofday
+79 common settimeofday sys_settimeofday
+83 common symlink sys_symlink
+85 common readlink sys_readlink
+86 common uselib sys_uselib
+87 common swapon sys_swapon
+88 common reboot sys_reboot
+89 common readdir sys_ni_syscall
+90 common mmap sys_old_mmap
+91 common munmap sys_munmap
+92 common truncate sys_truncate
+93 common ftruncate sys_ftruncate
+94 common fchmod sys_fchmod
+96 common getpriority sys_getpriority
+97 common setpriority sys_setpriority
+99 common statfs sys_statfs
+100 common fstatfs sys_fstatfs
+102 common socketcall sys_socketcall
+103 common syslog sys_syslog
+104 common setitimer sys_setitimer
+105 common getitimer sys_getitimer
+106 common stat sys_newstat
+107 common lstat sys_newlstat
+108 common fstat sys_newfstat
+110 common lookup_dcookie sys_ni_syscall
+111 common vhangup sys_vhangup
+112 common idle sys_ni_syscall
+114 common wait4 sys_wait4
+115 common swapoff sys_swapoff
+116 common sysinfo sys_sysinfo
+117 common ipc sys_s390_ipc
+118 common fsync sys_fsync
+119 common sigreturn sys_sigreturn
+120 common clone sys_clone
+121 common setdomainname sys_setdomainname
+122 common uname sys_newuname
+124 common adjtimex sys_adjtimex
+125 common mprotect sys_mprotect
+126 common sigprocmask sys_sigprocmask
+127 common create_module sys_ni_syscall
+128 common init_module sys_init_module
+129 common delete_module sys_delete_module
+130 common get_kernel_syms sys_ni_syscall
+131 common quotactl sys_quotactl
+132 common getpgid sys_getpgid
+133 common fchdir sys_fchdir
+134 common bdflush sys_ni_syscall
+135 common sysfs sys_sysfs
+136 common personality sys_s390_personality
+137 common afs_syscall sys_ni_syscall
+141 common getdents sys_getdents
+142 common select sys_select
+143 common flock sys_flock
+144 common msync sys_msync
+145 common readv sys_readv
+146 common writev sys_writev
+147 common getsid sys_getsid
+148 common fdatasync sys_fdatasync
+149 common _sysctl sys_ni_syscall
+150 common mlock sys_mlock
+151 common munlock sys_munlock
+152 common mlockall sys_mlockall
+153 common munlockall sys_munlockall
+154 common sched_setparam sys_sched_setparam
+155 common sched_getparam sys_sched_getparam
+156 common sched_setscheduler sys_sched_setscheduler
+157 common sched_getscheduler sys_sched_getscheduler
+158 common sched_yield sys_sched_yield
+159 common sched_get_priority_max sys_sched_get_priority_max
+160 common sched_get_priority_min sys_sched_get_priority_min
+161 common sched_rr_get_interval sys_sched_rr_get_interval
+162 common nanosleep sys_nanosleep
+163 common mremap sys_mremap
+167 common query_module sys_ni_syscall
+168 common poll sys_poll
+169 common nfsservctl sys_ni_syscall
+172 common prctl sys_prctl
+173 common rt_sigreturn sys_rt_sigreturn
+174 common rt_sigaction sys_rt_sigaction
+175 common rt_sigprocmask sys_rt_sigprocmask
+176 common rt_sigpending sys_rt_sigpending
+177 common rt_sigtimedwait sys_rt_sigtimedwait
+178 common rt_sigqueueinfo sys_rt_sigqueueinfo
+179 common rt_sigsuspend sys_rt_sigsuspend
+180 common pread64 sys_pread64
+181 common pwrite64 sys_pwrite64
+183 common getcwd sys_getcwd
+184 common capget sys_capget
+185 common capset sys_capset
+186 common sigaltstack sys_sigaltstack
+187 common sendfile sys_sendfile64
+188 common getpmsg sys_ni_syscall
+189 common putpmsg sys_ni_syscall
+190 common vfork sys_vfork
+191 common getrlimit sys_getrlimit
+198 common lchown sys_lchown
+199 common getuid sys_getuid
+200 common getgid sys_getgid
+201 common geteuid sys_geteuid
+202 common getegid sys_getegid
+203 common setreuid sys_setreuid
+204 common setregid sys_setregid
+205 common getgroups sys_getgroups
+206 common setgroups sys_setgroups
+207 common fchown sys_fchown
+208 common setresuid sys_setresuid
+209 common getresuid sys_getresuid
+210 common setresgid sys_setresgid
+211 common getresgid sys_getresgid
+212 common chown sys_chown
+213 common setuid sys_setuid
+214 common setgid sys_setgid
+215 common setfsuid sys_setfsuid
+216 common setfsgid sys_setfsgid
+217 common pivot_root sys_pivot_root
+218 common mincore sys_mincore
+219 common madvise sys_madvise
+220 common getdents64 sys_getdents64
+222 common readahead sys_readahead
+224 common setxattr sys_setxattr
+225 common lsetxattr sys_lsetxattr
+226 common fsetxattr sys_fsetxattr
+227 common getxattr sys_getxattr
+228 common lgetxattr sys_lgetxattr
+229 common fgetxattr sys_fgetxattr
+230 common listxattr sys_listxattr
+231 common llistxattr sys_llistxattr
+232 common flistxattr sys_flistxattr
+233 common removexattr sys_removexattr
+234 common lremovexattr sys_lremovexattr
+235 common fremovexattr sys_fremovexattr
+236 common gettid sys_gettid
+237 common tkill sys_tkill
+238 common futex sys_futex
+239 common sched_setaffinity sys_sched_setaffinity
+240 common sched_getaffinity sys_sched_getaffinity
+241 common tgkill sys_tgkill
+243 common io_setup sys_io_setup
+244 common io_destroy sys_io_destroy
+245 common io_getevents sys_io_getevents
+246 common io_submit sys_io_submit
+247 common io_cancel sys_io_cancel
+248 common exit_group sys_exit_group
+249 common epoll_create sys_epoll_create
+250 common epoll_ctl sys_epoll_ctl
+251 common epoll_wait sys_epoll_wait
+252 common set_tid_address sys_set_tid_address
+253 common fadvise64 sys_fadvise64_64
+254 common timer_create sys_timer_create
+255 common timer_settime sys_timer_settime
+256 common timer_gettime sys_timer_gettime
+257 common timer_getoverrun sys_timer_getoverrun
+258 common timer_delete sys_timer_delete
+259 common clock_settime sys_clock_settime
+260 common clock_gettime sys_clock_gettime
+261 common clock_getres sys_clock_getres
+262 common clock_nanosleep sys_clock_nanosleep
+265 common statfs64 sys_statfs64
+266 common fstatfs64 sys_fstatfs64
+267 common remap_file_pages sys_remap_file_pages
+268 common mbind sys_mbind
+269 common get_mempolicy sys_get_mempolicy
+270 common set_mempolicy sys_set_mempolicy
+271 common mq_open sys_mq_open
+272 common mq_unlink sys_mq_unlink
+273 common mq_timedsend sys_mq_timedsend
+274 common mq_timedreceive sys_mq_timedreceive
+275 common mq_notify sys_mq_notify
+276 common mq_getsetattr sys_mq_getsetattr
+277 common kexec_load sys_kexec_load
+278 common add_key sys_add_key
+279 common request_key sys_request_key
+280 common keyctl sys_keyctl
+281 common waitid sys_waitid
+282 common ioprio_set sys_ioprio_set
+283 common ioprio_get sys_ioprio_get
+284 common inotify_init sys_inotify_init
+285 common inotify_add_watch sys_inotify_add_watch
+286 common inotify_rm_watch sys_inotify_rm_watch
+287 common migrate_pages sys_migrate_pages
+288 common openat sys_openat
+289 common mkdirat sys_mkdirat
+290 common mknodat sys_mknodat
+291 common fchownat sys_fchownat
+292 common futimesat sys_futimesat
+293 common newfstatat sys_newfstatat
+294 common unlinkat sys_unlinkat
+295 common renameat sys_renameat
+296 common linkat sys_linkat
+297 common symlinkat sys_symlinkat
+298 common readlinkat sys_readlinkat
+299 common fchmodat sys_fchmodat
+300 common faccessat sys_faccessat
+301 common pselect6 sys_pselect6
+302 common ppoll sys_ppoll
+303 common unshare sys_unshare
+304 common set_robust_list sys_set_robust_list
+305 common get_robust_list sys_get_robust_list
+306 common splice sys_splice
+307 common sync_file_range sys_sync_file_range
+308 common tee sys_tee
+309 common vmsplice sys_vmsplice
+310 common move_pages sys_move_pages
+311 common getcpu sys_getcpu
+312 common epoll_pwait sys_epoll_pwait
+313 common utimes sys_utimes
+314 common fallocate sys_fallocate
+315 common utimensat sys_utimensat
+316 common signalfd sys_signalfd
+317 common timerfd sys_ni_syscall
+318 common eventfd sys_eventfd
+319 common timerfd_create sys_timerfd_create
+320 common timerfd_settime sys_timerfd_settime
+321 common timerfd_gettime sys_timerfd_gettime
+322 common signalfd4 sys_signalfd4
+323 common eventfd2 sys_eventfd2
+324 common inotify_init1 sys_inotify_init1
+325 common pipe2 sys_pipe2
+326 common dup3 sys_dup3
+327 common epoll_create1 sys_epoll_create1
+328 common preadv sys_preadv
+329 common pwritev sys_pwritev
+330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
+331 common perf_event_open sys_perf_event_open
+332 common fanotify_init sys_fanotify_init
+333 common fanotify_mark sys_fanotify_mark
+334 common prlimit64 sys_prlimit64
+335 common name_to_handle_at sys_name_to_handle_at
+336 common open_by_handle_at sys_open_by_handle_at
+337 common clock_adjtime sys_clock_adjtime
+338 common syncfs sys_syncfs
+339 common setns sys_setns
+340 common process_vm_readv sys_process_vm_readv
+341 common process_vm_writev sys_process_vm_writev
+342 common s390_runtime_instr sys_s390_runtime_instr
+343 common kcmp sys_kcmp
+344 common finit_module sys_finit_module
+345 common sched_setattr sys_sched_setattr
+346 common sched_getattr sys_sched_getattr
+347 common renameat2 sys_renameat2
+348 common seccomp sys_seccomp
+349 common getrandom sys_getrandom
+350 common memfd_create sys_memfd_create
+351 common bpf sys_bpf
+352 common s390_pci_mmio_write sys_s390_pci_mmio_write
+353 common s390_pci_mmio_read sys_s390_pci_mmio_read
+354 common execveat sys_execveat
+355 common userfaultfd sys_userfaultfd
+356 common membarrier sys_membarrier
+357 common recvmmsg sys_recvmmsg
+358 common sendmmsg sys_sendmmsg
+359 common socket sys_socket
+360 common socketpair sys_socketpair
+361 common bind sys_bind
+362 common connect sys_connect
+363 common listen sys_listen
+364 common accept4 sys_accept4
+365 common getsockopt sys_getsockopt
+366 common setsockopt sys_setsockopt
+367 common getsockname sys_getsockname
+368 common getpeername sys_getpeername
+369 common sendto sys_sendto
+370 common sendmsg sys_sendmsg
+371 common recvfrom sys_recvfrom
+372 common recvmsg sys_recvmsg
+373 common shutdown sys_shutdown
+374 common mlock2 sys_mlock2
+375 common copy_file_range sys_copy_file_range
+376 common preadv2 sys_preadv2
+377 common pwritev2 sys_pwritev2
+378 common s390_guarded_storage sys_s390_guarded_storage
+379 common statx sys_statx
+380 common s390_sthyi sys_s390_sthyi
+381 common kexec_file_load sys_kexec_file_load
+382 common io_pgetevents sys_io_pgetevents
+383 common rseq sys_rseq
+384 common pkey_mprotect sys_pkey_mprotect
+385 common pkey_alloc sys_pkey_alloc
+386 common pkey_free sys_pkey_free
# room for arch specific syscalls
-392 64 semtimedop sys_semtimedop -
-393 common semget sys_semget sys_semget
-394 common semctl sys_semctl compat_sys_semctl
-395 common shmget sys_shmget sys_shmget
-396 common shmctl sys_shmctl compat_sys_shmctl
-397 common shmat sys_shmat compat_sys_shmat
-398 common shmdt sys_shmdt sys_shmdt
-399 common msgget sys_msgget sys_msgget
-400 common msgsnd sys_msgsnd compat_sys_msgsnd
-401 common msgrcv sys_msgrcv compat_sys_msgrcv
-402 common msgctl sys_msgctl compat_sys_msgctl
-403 32 clock_gettime64 - sys_clock_gettime
-404 32 clock_settime64 - sys_clock_settime
-405 32 clock_adjtime64 - sys_clock_adjtime
-406 32 clock_getres_time64 - sys_clock_getres
-407 32 clock_nanosleep_time64 - sys_clock_nanosleep
-408 32 timer_gettime64 - sys_timer_gettime
-409 32 timer_settime64 - sys_timer_settime
-410 32 timerfd_gettime64 - sys_timerfd_gettime
-411 32 timerfd_settime64 - sys_timerfd_settime
-412 32 utimensat_time64 - sys_utimensat
-413 32 pselect6_time64 - compat_sys_pselect6_time64
-414 32 ppoll_time64 - compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64
-417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64
-418 32 mq_timedsend_time64 - sys_mq_timedsend
-419 32 mq_timedreceive_time64 - sys_mq_timedreceive
-420 32 semtimedop_time64 - sys_semtimedop
-421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64
-422 32 futex_time64 - sys_futex
-423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval
-424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal
-425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup
-426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter
-427 common io_uring_register sys_io_uring_register sys_io_uring_register
-428 common open_tree sys_open_tree sys_open_tree
-429 common move_mount sys_move_mount sys_move_mount
-430 common fsopen sys_fsopen sys_fsopen
-431 common fsconfig sys_fsconfig sys_fsconfig
-432 common fsmount sys_fsmount sys_fsmount
-433 common fspick sys_fspick sys_fspick
-434 common pidfd_open sys_pidfd_open sys_pidfd_open
-435 common clone3 sys_clone3 sys_clone3
-436 common close_range sys_close_range sys_close_range
-437 common openat2 sys_openat2 sys_openat2
-438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
-439 common faccessat2 sys_faccessat2 sys_faccessat2
-440 common process_madvise sys_process_madvise sys_process_madvise
-441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
-442 common mount_setattr sys_mount_setattr sys_mount_setattr
-443 common quotactl_fd sys_quotactl_fd sys_quotactl_fd
-444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset
-445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule
-446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
-447 common memfd_secret sys_memfd_secret sys_memfd_secret
-448 common process_mrelease sys_process_mrelease sys_process_mrelease
-449 common futex_waitv sys_futex_waitv sys_futex_waitv
-450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
-451 common cachestat sys_cachestat sys_cachestat
-452 common fchmodat2 sys_fchmodat2 sys_fchmodat2
-453 common map_shadow_stack sys_map_shadow_stack sys_map_shadow_stack
-454 common futex_wake sys_futex_wake sys_futex_wake
-455 common futex_wait sys_futex_wait sys_futex_wait
-456 common futex_requeue sys_futex_requeue sys_futex_requeue
-457 common statmount sys_statmount sys_statmount
-458 common listmount sys_listmount sys_listmount
-459 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr
-460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr
-461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules
-462 common mseal sys_mseal sys_mseal
+392 common semtimedop sys_semtimedop
+393 common semget sys_semget
+394 common semctl sys_semctl
+395 common shmget sys_shmget
+396 common shmctl sys_shmctl
+397 common shmat sys_shmat
+398 common shmdt sys_shmdt
+399 common msgget sys_msgget
+400 common msgsnd sys_msgsnd
+401 common msgrcv sys_msgrcv
+402 common msgctl sys_msgctl
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 common clone3 sys_clone3
+436 common close_range sys_close_range
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
+441 common epoll_pwait2 sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr
+443 common quotactl_fd sys_quotactl_fd
+444 common landlock_create_ruleset sys_landlock_create_ruleset
+445 common landlock_add_rule sys_landlock_add_rule
+446 common landlock_restrict_self sys_landlock_restrict_self
+447 common memfd_secret sys_memfd_secret
+448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
+453 common map_shadow_stack sys_map_shadow_stack
+454 common futex_wake sys_futex_wake
+455 common futex_wait sys_futex_wait
+456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
+459 common lsm_get_self_attr sys_lsm_get_self_attr
+460 common lsm_set_self_attr sys_lsm_set_self_attr
+461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
+463 common setxattrat sys_setxattrat
+464 common getxattrat sys_getxattrat
+465 common listxattrat sys_listxattrat
+466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
+470 common listns sys_listns
diff --git a/arch/s390/kernel/syscalls/syscalltbl b/arch/s390/kernel/syscalls/syscalltbl
deleted file mode 100755
index fbac1732f874..000000000000
--- a/arch/s390/kernel/syscalls/syscalltbl
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# Generate system call table and header files
-#
-# Copyright IBM Corp. 2018
-# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
-
-#
-# File path to the system call table definition.
-# You can set the path with the -i option. If omitted,
-# system call table definitions are read from standard input.
-#
-SYSCALL_TBL=""
-
-
-create_syscall_table_entries()
-{
- local nr abi name entry64 entry32 _ignore
- local temp=$(mktemp ${TMPDIR:-/tmp}/syscalltbl-common.XXXXXXXXX)
-
- (
- #
- # Initialize with 0 to create an NI_SYSCALL for 0
- #
- local prev_nr=0 prev_32=sys_ni_syscall prev_64=sys_ni_syscall
- while read nr abi name entry64 entry32 _ignore; do
- test x$entry32 = x- && entry32=sys_ni_syscall
- test x$entry64 = x- && entry64=sys_ni_syscall
-
- if test $prev_nr -eq $nr; then
- #
- # Same syscall but different ABI, just update
- # the respective entry point
- #
- case $abi in
- 32)
- prev_32=$entry32
- ;;
- 64)
- prev_64=$entry64
- ;;
- esac
- continue;
- else
- printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32
- fi
-
- prev_nr=$nr
- prev_64=$entry64
- prev_32=$entry32
- done
- printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32
- ) >> $temp
-
- #
- # Check for duplicate syscall numbers
- #
- if ! cat $temp |cut -f1 |uniq -d 2>&1; then
- echo "Error: generated system call table contains duplicate entries: $temp" >&2
- exit 1
- fi
-
- #
- # Generate syscall table
- #
- prev_nr=0
- while read nr entry64 entry32; do
- while test $prev_nr -lt $((nr - 1)); do
- printf "NI_SYSCALL\n"
- prev_nr=$((prev_nr + 1))
- done
- if test x$entry64 = xsys_ni_syscall &&
- test x$entry32 = xsys_ni_syscall; then
- printf "NI_SYSCALL\n"
- else
- printf "SYSCALL(%s,%s)\n" $entry64 $entry32
- fi
- prev_nr=$nr
- done < $temp
- rm $temp
-}
-
-generate_syscall_table()
-{
- cat <<-EoHEADER
- /* SPDX-License-Identifier: GPL-2.0 */
- /*
- * Definitions for sys_call_table, each line represents an
- * entry in the table in the form
- * SYSCALL(64 bit syscall, 31 bit emulated syscall)
- *
- * This file is meant to be included from entry.S.
- */
-
- #define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall)
-
-EoHEADER
- grep -Ev '^(#|[[:blank:]]*$)' $SYSCALL_TBL \
- |sort -k1 -n \
- |create_syscall_table_entries
-}
-
-create_header_defines()
-{
- local nr abi name _ignore
-
- while read nr abi name _ignore; do
- printf "#define __NR_%s %d\n" $name $nr
- done
-}
-
-normalize_fileguard()
-{
- local fileguard="$1"
-
- echo "$1" |tr '[[:lower:]]' '[[:upper:]]' \
- |sed -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'
-}
-
-generate_syscall_header()
-{
- local abis=$(echo "($1)" | tr ',' '|')
- local filename="$2"
- local fileguard suffix
-
- if test "$filename"; then
- fileguard=$(normalize_fileguard "__UAPI_ASM_S390_$2")
- else
- case "$abis" in
- *64*) suffix=64 ;;
- *32*) suffix=32 ;;
- esac
- fileguard=$(normalize_fileguard "__UAPI_ASM_S390_SYSCALLS_$suffix")
- fi
-
- cat <<-EoHEADER
- /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
- #ifndef ${fileguard}
- #define ${fileguard}
-
-EoHEADER
-
- grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL \
- |sort -k1 -n \
- |create_header_defines
-
- cat <<-EoFOOTER
-
- #endif /* ${fileguard} */
-EoFOOTER
-}
-
-__max_syscall_nr()
-{
- local abis=$(echo "($1)" | tr ',' '|')
-
- grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL \
- |sed -ne 's/^\([[:digit:]]*\)[[:space:]].*/\1/p' \
- |sort -n \
- |tail -1
-}
-
-
-generate_syscall_nr()
-{
- local abis="$1"
- local max_syscall_nr num_syscalls
-
- max_syscall_nr=$(__max_syscall_nr "$abis")
- num_syscalls=$((max_syscall_nr + 1))
-
- cat <<-EoHEADER
- /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
- #ifndef __ASM_S390_SYSCALLS_NR
- #define __ASM_S390_SYSCALLS_NR
-
- #define NR_syscalls ${num_syscalls}
-
- #endif /* __ASM_S390_SYSCALLS_NR */
-EoHEADER
-}
-
-
-#
-# Parse command line arguments
-#
-do_syscall_header=""
-do_syscall_table=""
-do_syscall_nr=""
-output_file=""
-abi_list="common,64"
-filename=""
-while getopts ":HNSXi:a:f:" arg; do
- case $arg in
- a)
- abi_list="$OPTARG"
- ;;
- i)
- SYSCALL_TBL="$OPTARG"
- ;;
- f)
- filename=${OPTARG##*/}
- ;;
- H)
- do_syscall_header=1
- ;;
- N)
- do_syscall_nr=1
- ;;
- S)
- do_syscall_table=1
- ;;
- X)
- set -x
- ;;
- :)
- echo "Missing argument for -$OPTARG" >&2
- exit 1
- ;;
- \?)
- echo "Invalid option specified" >&2
- exit 1
- ;;
- esac
-done
-
-test "$do_syscall_header" && generate_syscall_header "$abi_list" "$filename"
-test "$do_syscall_table" && generate_syscall_table
-test "$do_syscall_nr" && generate_syscall_nr "$abi_list"
-
-exit 0
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 2be30a96696a..33ca3e47a0e6 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -5,6 +5,7 @@
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
*/
+#include <linux/cpufeature.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -15,54 +16,17 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <asm/asm-extable.h>
+#include <asm/machine.h>
#include <asm/ebcdic.h>
#include <asm/debug.h>
#include <asm/sysinfo.h>
#include <asm/cpcmd.h>
#include <asm/topology.h>
#include <asm/fpu.h>
+#include <asm/asm.h>
int topology_max_mnest;
-static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl)
-{
- int r0 = (fc << 28) | sel1;
- int rc = 0;
-
- asm volatile(
- " lr 0,%[r0]\n"
- " lr 1,%[r1]\n"
- " stsi 0(%[sysinfo])\n"
- "0: jz 2f\n"
- "1: lhi %[rc],%[retval]\n"
- "2: lr %[r0],0\n"
- EX_TABLE(0b, 1b)
- : [r0] "+d" (r0), [rc] "+d" (rc)
- : [r1] "d" (sel2),
- [sysinfo] "a" (sysinfo),
- [retval] "K" (-EOPNOTSUPP)
- : "cc", "0", "1", "memory");
- *lvl = ((unsigned int) r0) >> 28;
- return rc;
-}
-
-/*
- * stsi - store system information
- *
- * Returns the current configuration level if function code 0 was specified.
- * Otherwise returns 0 on success or a negative value on error.
- */
-int stsi(void *sysinfo, int fc, int sel1, int sel2)
-{
- int lvl, rc;
-
- rc = __stsi(sysinfo, fc, sel1, sel2, &lvl);
- if (rc)
- return rc;
- return fc ? 0 : lvl;
-}
-EXPORT_SYMBOL(stsi);
-
#ifdef CONFIG_PROC_FS
static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
@@ -154,7 +118,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
int i;
seq_putc(m, '\n');
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return;
if (stsi(info, 15, 1, topology_max_mnest))
return;
@@ -415,7 +379,7 @@ static struct service_level service_level_vm = {
static __init int create_proc_service_level(void)
{
proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops);
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
register_service_level(&service_level_vm);
return 0;
}
@@ -498,7 +462,6 @@ static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \
.open = stsi_open_##fc##_##s1##_##s2, \
.release = stsi_release, \
.read = stsi_read, \
- .llseek = no_llseek, \
};
static int stsi_release(struct inode *inode, struct file *file)
@@ -560,10 +523,10 @@ static __init int stsi_init_debugfs(void)
sf = &stsi_file[i];
debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops);
}
- if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) {
+ if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) {
char link_to[10];
- sprintf(link_to, "15_1_%d", topology_mnest_limit());
+ snprintf(link_to, sizeof(link_to), "15_1_%d", topology_mnest_limit());
debugfs_create_symlink("topology", stsi_root, link_to);
}
return 0;
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
index c0a70efa2426..26f2981aa09e 100644
--- a/arch/s390/kernel/text_amode31.S
+++ b/arch/s390/kernel/text_amode31.S
@@ -18,8 +18,7 @@
* affects a few functions that are not performance-relevant.
*/
.macro BR_EX_AMODE31_r14
- larl %r1,0f
- ex 0,0(%r1)
+ exrl 0,0f
j .
0: br %r14
.endm
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index fb9f31f36628..bd0df61d1907 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -12,8 +12,7 @@
* Copyright (C) 1991, 1992, 1995 Linus Torvalds
*/
-#define KMSG_COMPONENT "time"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "time: " fmt
#include <linux/kernel_stat.h>
#include <linux/errno.h>
@@ -36,7 +35,6 @@
#include <linux/profile.h>
#include <linux/timex.h>
#include <linux/notifier.h>
-#include <linux/timekeeper_internal.h>
#include <linux/clockchips.h>
#include <linux/gfp.h>
#include <linux/kprobes.h>
@@ -55,10 +53,10 @@
#include <asm/cio.h>
#include "entry.h"
-union tod_clock tod_clock_base __section(".data");
+union tod_clock __bootdata_preserved(tod_clock_base);
EXPORT_SYMBOL_GPL(tod_clock_base);
-u64 clock_comparator_max = -1ULL;
+u64 __bootdata_preserved(clock_comparator_max);
EXPORT_SYMBOL_GPL(clock_comparator_max);
static DEFINE_PER_CPU(struct clock_event_device, comparators);
@@ -70,8 +68,6 @@ unsigned char ptff_function_mask[16];
static unsigned long lpar_offset;
static unsigned long initial_leap_seconds;
-static unsigned long tod_steering_end;
-static long tod_steering_delta;
/*
* Get time offsets with PTFF
@@ -80,12 +76,8 @@ void __init time_early_init(void)
{
struct ptff_qto qto;
struct ptff_qui qui;
- int cs;
- /* Initialize TOD steering parameters */
- tod_steering_end = tod_clock_base.tod;
- for (cs = 0; cs < CS_BASES; cs++)
- vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
+ vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod;
if (!test_facility(28))
return;
@@ -131,7 +123,7 @@ void clock_comparator_work(void)
{
struct clock_event_device *cd;
- S390_lowcore.clock_comparator = clock_comparator_max;
+ get_lowcore()->clock_comparator = clock_comparator_max;
cd = this_cpu_ptr(&comparators);
cd->event_handler(cd);
}
@@ -139,8 +131,8 @@ void clock_comparator_work(void)
static int s390_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- S390_lowcore.clock_comparator = get_tod_clock() + delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = get_tod_clock() + delta;
+ set_clock_comparator(get_lowcore()->clock_comparator);
return 0;
}
@@ -153,8 +145,8 @@ void init_cpu_timer(void)
struct clock_event_device *cd;
int cpu;
- S390_lowcore.clock_comparator = clock_comparator_max;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = clock_comparator_max;
+ set_clock_comparator(get_lowcore()->clock_comparator);
cpu = smp_processor_id();
cd = &per_cpu(comparators, cpu);
@@ -184,8 +176,8 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
unsigned long param64)
{
inc_irq_stat(IRQEXT_CLK);
- if (S390_lowcore.clock_comparator == clock_comparator_max)
- set_clock_comparator(S390_lowcore.clock_comparator);
+ if (get_lowcore()->clock_comparator == clock_comparator_max)
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
static void stp_timing_alert(struct stp_irq_parm *);
@@ -229,21 +221,7 @@ void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
static u64 read_tod_clock(struct clocksource *cs)
{
- unsigned long now, adj;
-
- preempt_disable(); /* protect from changes to steering parameters */
- now = get_tod_clock();
- adj = tod_steering_end - now;
- if (unlikely((s64) adj > 0))
- /*
- * manually steer by 1 cycle every 2^16 cycles. This
- * corresponds to shifting the tod delta by 15. 1s is
- * therefore steered in ~9h. The adjust will decrease
- * over time, until it finally reaches 0.
- */
- now += (tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
- preempt_enable();
- return now;
+ return get_tod_clock_monotonic();
}
static struct clocksource clocksource_tod = {
@@ -255,6 +233,7 @@ static struct clocksource clocksource_tod = {
.shift = 24,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.vdso_clock_mode = VDSO_CLOCKMODE_TOD,
+ .id = CSID_S390_TOD,
};
struct clocksource * __init clocksource_default_clock(void)
@@ -371,29 +350,11 @@ static inline int check_sync_clock(void)
*/
static void clock_sync_global(long delta)
{
- unsigned long now, adj;
struct ptff_qto qto;
- int cs;
/* Fixup the monotonic sched clock. */
tod_clock_base.eitod += delta;
- /* Adjust TOD steering parameters. */
- now = get_tod_clock();
- adj = tod_steering_end - now;
- if (unlikely((s64) adj >= 0))
- /* Calculate how much of the old adjustment is left. */
- tod_steering_delta = (tod_steering_delta < 0) ?
- -(adj >> 15) : (adj >> 15);
- tod_steering_delta += delta;
- if ((abs(tod_steering_delta) >> 48) != 0)
- panic("TOD clock sync offset %li is too large to drift\n",
- tod_steering_delta);
- tod_steering_end = now + (abs(tod_steering_delta) << 15);
- for (cs = 0; cs < CS_BASES; cs++) {
- vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
- vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta;
- }
-
+ vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod;
/* Update LPAR offset. */
if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
lpar_offset = qto.tod_epoch_difference;
@@ -408,12 +369,12 @@ static void clock_sync_global(long delta)
static void clock_sync_local(long delta)
{
/* Add the delta to the clock comparator. */
- if (S390_lowcore.clock_comparator != clock_comparator_max) {
- S390_lowcore.clock_comparator += delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ if (get_lowcore()->clock_comparator != clock_comparator_max) {
+ get_lowcore()->clock_comparator += delta;
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
/* Adjust the last_update_clock time-stamp. */
- S390_lowcore.last_update_clock += delta;
+ get_lowcore()->last_update_clock += delta;
}
/* Single threaded workqueue used for stp sync events */
@@ -435,7 +396,7 @@ struct clock_sync_data {
/*
* Server Time Protocol (STP) code.
*/
-static bool stp_online;
+static bool stp_online = true;
static struct stp_sstpi stp_info;
static void *stp_page;
@@ -461,13 +422,18 @@ static void __init stp_reset(void)
if (rc == 0)
set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
else if (stp_online) {
- pr_warn("The real or virtual hardware system does not provide an STP interface\n");
free_page((unsigned long) stp_page);
stp_page = NULL;
stp_online = false;
}
}
+bool stp_enabled(void)
+{
+ return test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online;
+}
+EXPORT_SYMBOL(stp_enabled);
+
static void stp_timeout(struct timer_list *unused)
{
queue_work(time_sync_wq, &stp_work);
@@ -579,7 +545,7 @@ static int stp_sync_clock(void *data)
atomic_dec(&sync->cpus);
/* Wait for in_sync to be set. */
while (READ_ONCE(sync->in_sync) == 0)
- __udelay(1);
+ ;
}
if (sync->in_sync != 1)
/* Didn't work. Clear per-cpu in sync bit again. */
@@ -590,81 +556,6 @@ static int stp_sync_clock(void *data)
return 0;
}
-static int stp_clear_leap(void)
-{
- struct __kernel_timex txc;
- int ret;
-
- memset(&txc, 0, sizeof(txc));
-
- ret = do_adjtimex(&txc);
- if (ret < 0)
- return ret;
-
- txc.modes = ADJ_STATUS;
- txc.status &= ~(STA_INS|STA_DEL);
- return do_adjtimex(&txc);
-}
-
-static void stp_check_leap(void)
-{
- struct stp_stzi stzi;
- struct stp_lsoib *lsoib = &stzi.lsoib;
- struct __kernel_timex txc;
- int64_t timediff;
- int leapdiff, ret;
-
- if (!stp_info.lu || !check_sync_clock()) {
- /*
- * Either a scheduled leap second was removed by the operator,
- * or STP is out of sync. In both cases, clear the leap second
- * kernel flags.
- */
- if (stp_clear_leap() < 0)
- pr_err("failed to clear leap second flags\n");
- return;
- }
-
- if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) {
- pr_err("stzi failed\n");
- return;
- }
-
- timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC;
- leapdiff = lsoib->nlso - lsoib->also;
-
- if (leapdiff != 1 && leapdiff != -1) {
- pr_err("Cannot schedule %d leap seconds\n", leapdiff);
- return;
- }
-
- if (timediff < 0) {
- if (stp_clear_leap() < 0)
- pr_err("failed to clear leap second flags\n");
- } else if (timediff < 7200) {
- memset(&txc, 0, sizeof(txc));
- ret = do_adjtimex(&txc);
- if (ret < 0)
- return;
-
- txc.modes = ADJ_STATUS;
- if (leapdiff > 0)
- txc.status |= STA_INS;
- else
- txc.status |= STA_DEL;
- ret = do_adjtimex(&txc);
- if (ret < 0)
- pr_err("failed to set leap second flags\n");
- /* arm Timer to clear leap second flags */
- mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC));
- } else {
- /* The day the leap second is scheduled for hasn't been reached. Retry
- * in one hour.
- */
- mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC));
- }
-}
-
/*
* STP work. Check for the STP state and take over the clock
* synchronization if the STP clock source is usable.
@@ -679,7 +570,7 @@ static void stp_work_fn(struct work_struct *work)
if (!stp_online) {
chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
- del_timer_sync(&stp_timer);
+ timer_delete_sync(&stp_timer);
goto out_unlock;
}
@@ -706,8 +597,6 @@ static void stp_work_fn(struct work_struct *work)
* Retry after a second.
*/
mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
- else if (stp_info.lu)
- stp_check_leap();
out_unlock:
mutex_unlock(&stp_mutex);
@@ -729,8 +618,8 @@ static ssize_t ctn_id_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%016lx\n",
- *(unsigned long *) stp_info.ctnid);
+ ret = sysfs_emit(buf, "%016lx\n",
+ *(unsigned long *)stp_info.ctnid);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -745,7 +634,7 @@ static ssize_t ctn_type_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", stp_info.ctn);
+ ret = sysfs_emit(buf, "%i\n", stp_info.ctn);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -760,7 +649,7 @@ static ssize_t dst_offset_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x2000))
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.dsto);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -775,7 +664,7 @@ static ssize_t leap_seconds_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x8000))
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.leaps);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -801,11 +690,11 @@ static ssize_t leap_seconds_scheduled_show(struct device *dev,
return ret;
if (!stzi.lsoib.p)
- return sprintf(buf, "0,0\n");
+ return sysfs_emit(buf, "0,0\n");
- return sprintf(buf, "%lu,%d\n",
- tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
- stzi.lsoib.nlso - stzi.lsoib.also);
+ return sysfs_emit(buf, "%lu,%d\n",
+ tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
+ stzi.lsoib.nlso - stzi.lsoib.also);
}
static DEVICE_ATTR_RO(leap_seconds_scheduled);
@@ -818,7 +707,7 @@ static ssize_t stratum_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.stratum);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -833,7 +722,7 @@ static ssize_t time_offset_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x0800))
- ret = sprintf(buf, "%i\n", (int) stp_info.tto);
+ ret = sysfs_emit(buf, "%i\n", (int)stp_info.tto);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -848,7 +737,7 @@ static ssize_t time_zone_offset_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x4000))
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.tzo);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -863,7 +752,7 @@ static ssize_t timing_mode_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", stp_info.tmd);
+ ret = sysfs_emit(buf, "%i\n", stp_info.tmd);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -878,7 +767,7 @@ static ssize_t timing_state_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", stp_info.tst);
+ ret = sysfs_emit(buf, "%i\n", stp_info.tst);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -889,7 +778,7 @@ static ssize_t online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%i\n", stp_online);
+ return sysfs_emit(buf, "%i\n", stp_online);
}
static ssize_t online_store(struct device *dev,
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 89e91b8ce842..1913a5566ac2 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -3,9 +3,9 @@
* Copyright IBM Corp. 2007, 2011
*/
-#define KMSG_COMPONENT "cpu"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "cpu: " fmt
+#include <linux/cpufeature.h>
#include <linux/workqueue.h>
#include <linux/memblock.h>
#include <linux/uaccess.h>
@@ -24,7 +24,9 @@
#include <linux/mm.h>
#include <linux/nodemask.h>
#include <linux/node.h>
+#include <asm/hiperdispatch.h>
#include <asm/sysinfo.h>
+#include <asm/asm.h>
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
@@ -47,6 +49,7 @@ static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
+static int cpu_management;
static DECLARE_WORK(topology_work, topology_work_fn);
@@ -144,6 +147,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
cpumask_set_cpu(cpu, &book->mask);
cpumask_set_cpu(cpu, &socket->mask);
smp_cpu_set_polarization(cpu, tl_core->pp);
+ smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
}
}
}
@@ -221,22 +225,22 @@ static void topology_update_polarization_simple(void)
static int ptf(unsigned long fc)
{
- int rc;
+ int cc;
asm volatile(
- " .insn rre,0xb9a20000,%1,%1\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (rc)
- : "d" (fc) : "cc");
- return rc;
+ " .insn rre,0xb9a20000,%[fc],%[fc]\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [fc] "d" (fc)
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
}
int topology_set_cpu_management(int fc)
{
int cpu, rc;
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return -EOPNOTSUPP;
if (fc)
rc = ptf(PTF_VERTICAL);
@@ -270,6 +274,7 @@ void update_cpu_masks(void)
topo->drawer_id = id;
}
}
+ hd_reset_state();
for_each_online_cpu(cpu) {
topo = &cpu_topology[cpu];
pkg_first = cpumask_first(&topo->core_mask);
@@ -278,8 +283,10 @@ void update_cpu_masks(void)
for_each_cpu(sibling, &topo->core_mask) {
topo_sibling = &cpu_topology[sibling];
smt_first = cpumask_first(&topo_sibling->thread_mask);
- if (sibling == smt_first)
+ if (sibling == smt_first) {
topo_package->booted_cores++;
+ hd_add_core(sibling);
+ }
}
} else {
topo->booted_cores = topo_package->booted_cores;
@@ -303,33 +310,33 @@ static void __arch_update_dedicated_flag(void *arg)
static int __arch_update_cpu_topology(void)
{
struct sysinfo_15_1_x *info = tl_info;
- int rc = 0;
+ int rc, hd_status;
+ hd_status = 0;
+ rc = 0;
mutex_lock(&smp_cpu_state_mutex);
- if (MACHINE_HAS_TOPOLOGY) {
+ if (cpu_has_topology()) {
rc = 1;
store_topology(info);
tl_to_masks(info);
}
update_cpu_masks();
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
topology_update_polarization_simple();
+ if (cpu_management == 1)
+ hd_status = hd_enable_hiperdispatch();
mutex_unlock(&smp_cpu_state_mutex);
+ if (hd_status == 0)
+ hd_disable_hiperdispatch();
return rc;
}
int arch_update_cpu_topology(void)
{
- struct device *dev;
- int cpu, rc;
+ int rc;
rc = __arch_update_cpu_topology();
on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
- for_each_online_cpu(cpu) {
- dev = get_cpu_device(cpu);
- if (dev)
- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
- }
return rc;
}
@@ -364,12 +371,12 @@ static void set_topology_timer(void)
if (atomic_add_unless(&topology_poll, -1, 0))
mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
else
- mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
+ mod_timer(&topology_timer, jiffies + secs_to_jiffies(60));
}
void topology_expect_change(void)
{
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return;
/* This is racy, but it doesn't matter since it is just a heuristic.
* Worst case is that we poll in a higher frequency for a bit longer.
@@ -380,7 +387,24 @@ void topology_expect_change(void)
set_topology_timer();
}
-static int cpu_management;
+static int set_polarization(int polarization)
+{
+ int rc = 0;
+
+ cpus_read_lock();
+ mutex_lock(&smp_cpu_state_mutex);
+ if (cpu_management == polarization)
+ goto out;
+ rc = topology_set_cpu_management(polarization);
+ if (rc)
+ goto out;
+ cpu_management = polarization;
+ topology_expect_change();
+out:
+ mutex_unlock(&smp_cpu_state_mutex);
+ cpus_read_unlock();
+ return rc;
+}
static ssize_t dispatching_show(struct device *dev,
struct device_attribute *attr,
@@ -389,7 +413,7 @@ static ssize_t dispatching_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", cpu_management);
+ count = sysfs_emit(buf, "%d\n", cpu_management);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -406,19 +430,7 @@ static ssize_t dispatching_store(struct device *dev,
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
- rc = 0;
- cpus_read_lock();
- mutex_lock(&smp_cpu_state_mutex);
- if (cpu_management == val)
- goto out;
- rc = topology_set_cpu_management(val);
- if (rc)
- goto out;
- cpu_management = val;
- topology_expect_change();
-out:
- mutex_unlock(&smp_cpu_state_mutex);
- cpus_read_unlock();
+ rc = set_polarization(val);
return rc ? rc : count;
}
static DEVICE_ATTR_RW(dispatching);
@@ -432,19 +444,19 @@ static ssize_t cpu_polarization_show(struct device *dev,
mutex_lock(&smp_cpu_state_mutex);
switch (smp_cpu_get_polarization(cpu)) {
case POLARIZATION_HRZ:
- count = sprintf(buf, "horizontal\n");
+ count = sysfs_emit(buf, "horizontal\n");
break;
case POLARIZATION_VL:
- count = sprintf(buf, "vertical:low\n");
+ count = sysfs_emit(buf, "vertical:low\n");
break;
case POLARIZATION_VM:
- count = sprintf(buf, "vertical:medium\n");
+ count = sysfs_emit(buf, "vertical:medium\n");
break;
case POLARIZATION_VH:
- count = sprintf(buf, "vertical:high\n");
+ count = sysfs_emit(buf, "vertical:high\n");
break;
default:
- count = sprintf(buf, "unknown\n");
+ count = sysfs_emit(buf, "unknown\n");
break;
}
mutex_unlock(&smp_cpu_state_mutex);
@@ -468,7 +480,7 @@ static ssize_t cpu_dedicated_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu));
+ count = sysfs_emit(buf, "%d\n", topology_cpu_dedicated(cpu));
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -488,7 +500,7 @@ int topology_cpu_init(struct cpu *cpu)
int rc;
rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
- if (rc || !MACHINE_HAS_TOPOLOGY)
+ if (rc || !cpu_has_topology())
return rc;
rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
if (rc)
@@ -496,33 +508,27 @@ int topology_cpu_init(struct cpu *cpu)
return rc;
}
-static const struct cpumask *cpu_thread_mask(int cpu)
-{
- return &cpu_topology[cpu].thread_mask;
-}
-
-
const struct cpumask *cpu_coregroup_mask(int cpu)
{
return &cpu_topology[cpu].core_mask;
}
-static const struct cpumask *cpu_book_mask(int cpu)
+static const struct cpumask *tl_book_mask(struct sched_domain_topology_level *tl, int cpu)
{
return &cpu_topology[cpu].book_mask;
}
-static const struct cpumask *cpu_drawer_mask(int cpu)
+static const struct cpumask *tl_drawer_mask(struct sched_domain_topology_level *tl, int cpu)
{
return &cpu_topology[cpu].drawer_mask;
}
static struct sched_domain_topology_level s390_topology[] = {
- { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
- { cpu_book_mask, SD_INIT_NAME(BOOK) },
- { cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
- { cpu_cpu_mask, SD_INIT_NAME(PKG) },
+ SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT),
+ SDTL_INIT(tl_mc_mask, cpu_core_flags, MC),
+ SDTL_INIT(tl_book_mask, NULL, BOOK),
+ SDTL_INIT(tl_drawer_mask, NULL, DRAWER),
+ SDTL_INIT(tl_pkg_mask, NULL, PKG),
{ NULL, },
};
@@ -536,33 +542,38 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info,
nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
nr_masks = max(nr_masks, 1);
for (i = 0; i < nr_masks; i++) {
- mask->next = memblock_alloc(sizeof(*mask->next), 8);
- if (!mask->next)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*mask->next), 8);
+ mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8);
mask = mask->next;
}
}
+static int __init detect_polarization(union topology_entry *tle)
+{
+ struct topology_core *tl_core;
+
+ while (tle->nl)
+ tle = next_tle(tle);
+ tl_core = (struct topology_core *)tle;
+ return tl_core->pp != POLARIZATION_HRZ;
+}
+
void __init topology_init_early(void)
{
struct sysinfo_15_1_x *info;
set_sched_topology(s390_topology);
if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
- if (MACHINE_HAS_TOPOLOGY)
+ if (cpu_has_topology())
topology_mode = TOPOLOGY_MODE_HW;
else
topology_mode = TOPOLOGY_MODE_SINGLE;
}
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
goto out;
- tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!tl_info)
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE);
+ tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
info = tl_info;
store_topology(info);
+ cpu_management = detect_polarization(info->tle);
pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
info->mag[0], info->mag[1], info->mag[2], info->mag[3],
info->mag[4], info->mag[5], info->mnest);
@@ -579,7 +590,7 @@ static inline int topology_get_mode(int enabled)
{
if (!enabled)
return TOPOLOGY_MODE_SINGLE;
- return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+ return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
}
static inline int topology_is_enabled(void)
@@ -600,7 +611,7 @@ static int __init topology_setup(char *str)
}
early_param("topology", topology_setup);
-static int topology_ctl_handler(struct ctl_table *ctl, int write,
+static int topology_ctl_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int enabled = topology_is_enabled();
@@ -630,12 +641,37 @@ static int topology_ctl_handler(struct ctl_table *ctl, int write,
return rc;
}
-static struct ctl_table topology_ctl_table[] = {
+static int polarization_ctl_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int polarization;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &polarization,
+ .maxlen = sizeof(int),
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ polarization = cpu_management;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+ return set_polarization(polarization);
+}
+
+static const struct ctl_table topology_ctl_table[] = {
{
.procname = "topology",
.mode = 0644,
.proc_handler = topology_ctl_handler,
},
+ {
+ .procname = "polarization",
+ .mode = 0644,
+ .proc_handler = polarization_ctl_handler,
+ },
};
static int __init topology_init(void)
@@ -644,10 +680,12 @@ static int __init topology_init(void)
int rc = 0;
timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
- if (MACHINE_HAS_TOPOLOGY)
+ if (cpu_has_topology())
set_topology_timer();
else
topology_update_polarization_simple();
+ if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY_VERTICAL))
+ set_polarization(1);
register_sysctl("s390", topology_ctl_table);
dev_root = bus_get_dev_root(&cpu_subsys);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 52578b5cecbd..19687dab32f7 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -3,18 +3,13 @@
* S390 version
* Copyright IBM Corp. 1999, 2000
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
*
* Derived from "arch/i386/kernel/traps.c"
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-/*
- * 'Traps.c' handles hardware traps and faults after we have saved some
- * state in 'asm.s'.
- */
-#include "asm/irqflags.h"
-#include "asm/ptrace.h"
+#include <linux/cpufeature.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/randomize_kstack.h>
@@ -27,9 +22,13 @@
#include <linux/uaccess.h>
#include <linux/cpu.h>
#include <linux/entry-common.h>
+#include <linux/kmsan.h>
#include <asm/asm-extable.h>
+#include <asm/irqflags.h>
+#include <asm/ptrace.h>
#include <asm/vtime.h>
#include <asm/fpu.h>
+#include <asm/fault.h>
#include "entry.h"
static inline void __user *get_trap_ip(struct pt_regs *regs)
@@ -40,7 +39,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
address = current->thread.trap_tdb.data[3];
else
address = regs->psw.addr;
- return (void __user *) (address - (regs->int_code >> 16));
+ return (void __user *)(address - (regs->int_code >> 16));
}
#ifdef CONFIG_GENERIC_BUG
@@ -55,16 +54,15 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
if (user_mode(regs)) {
force_sig_fault(si_signo, si_code, get_trap_ip(regs));
report_user_fault(regs, si_signo, 0);
- } else {
+ } else {
if (!fixup_exception(regs))
die(regs, str);
- }
+ }
}
static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
- if (notify_die(DIE_TRAP, str, regs, 0,
- regs->int_code, si_signo) == NOTIFY_STOP)
+ if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP)
return;
do_report_trap(regs, si_signo, si_code, str);
}
@@ -76,8 +74,7 @@ void do_per_trap(struct pt_regs *regs)
return;
if (!current->ptrace)
return;
- force_sig_fault(SIGTRAP, TRAP_HWBKPT,
- (void __force __user *) current->thread.per_event.address);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address);
}
NOKPROBE_SYMBOL(do_per_trap);
@@ -96,36 +93,25 @@ static void name(struct pt_regs *regs) \
do_trap(regs, signr, sicode, str); \
}
-DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR,
- "addressing exception")
-DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN,
- "execute exception")
-DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV,
- "fixpoint divide exception")
-DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF,
- "fixpoint overflow exception")
-DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF,
- "HFP overflow exception")
-DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND,
- "HFP underflow exception")
-DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES,
- "HFP significance exception")
-DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV,
- "HFP divide exception")
-DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV,
- "HFP square root exception")
-DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN,
- "operand exception")
-DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
- "privileged operation")
-DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
- "special operation exception")
-DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
- "transaction constraint exception")
+DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception")
+DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception")
+DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception")
+DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception")
+DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception")
+DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception")
+DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception")
+DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception")
+DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception")
+DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception")
+DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation")
+DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception")
+DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception");
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception")
static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
{
int si_code = 0;
+
/* FPC[2] is Data Exception Code */
if ((fpc & 0x00000300) == 0) {
/* bits 6 and 7 of DXC are 0 iff IEEE exception */
@@ -151,36 +137,35 @@ static void translation_specification_exception(struct pt_regs *regs)
static void illegal_op(struct pt_regs *regs)
{
- __u8 opcode[6];
- __u16 __user *location;
int is_uprobe_insn = 0;
+ u16 __user *location;
int signal = 0;
+ u16 opcode;
location = get_trap_ip(regs);
-
if (user_mode(regs)) {
- if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
+ if (get_user(opcode, location))
return;
- if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
+ if (opcode == S390_BREAKPOINT_U16) {
if (current->ptrace)
force_sig_fault(SIGTRAP, TRAP_BRKPT, location);
else
signal = SIGILL;
#ifdef CONFIG_UPROBES
- } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
+ } else if (opcode == UPROBE_SWBP_INSN) {
is_uprobe_insn = 1;
#endif
- } else
+ } else {
signal = SIGILL;
+ }
}
/*
- * We got either an illegal op in kernel mode, or user space trapped
+ * This is either an illegal op in kernel mode, or user space trapped
* on a uprobes illegal instruction. See if kprobes or uprobes picks
* it up. If not, SIGILL.
*/
if (is_uprobe_insn || !user_mode(regs)) {
- if (notify_die(DIE_BPT, "bpt", regs, 0,
- 3, SIGTRAP) != NOTIFY_STOP)
+ if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP)
signal = SIGILL;
}
if (signal)
@@ -188,18 +173,10 @@ static void illegal_op(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(illegal_op);
-DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
- "specification exception");
-
static void vector_exception(struct pt_regs *regs)
{
int si_code, vic;
- if (!cpu_has_vx()) {
- do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
- return;
- }
-
/* get vector interrupt code from fpc */
save_user_fpu_regs();
vic = (current->thread.ufpu.fpc & 0xf00) >> 8;
@@ -247,7 +224,6 @@ static void monitor_event_exception(struct pt_regs *regs)
{
if (user_mode(regs))
return;
-
switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
case BUG_TRAP_TYPE_NONE:
fixup_exception(regs);
@@ -260,15 +236,20 @@ static void monitor_event_exception(struct pt_regs *regs)
}
}
-void kernel_stack_overflow(struct pt_regs *regs)
+void kernel_stack_invalid(struct pt_regs *regs)
{
+ /*
+ * Normally regs are unpoisoned by the generic entry code, but
+ * kernel_stack_overflow() is a rare case that is called bypassing it.
+ */
+ kmsan_unpoison_entry_regs(regs);
bust_spinlocks(1);
- printk("Kernel stack overflow.\n");
+ pr_emerg("Kernel stack pointer invalid\n");
show_regs(regs);
bust_spinlocks(0);
- panic("Corrupt kernel stack, can't continue.");
+ panic("Invalid kernel stack pointer, cannot continue");
}
-NOKPROBE_SYMBOL(kernel_stack_overflow);
+NOKPROBE_SYMBOL(kernel_stack_invalid);
static void __init test_monitor_call(void)
{
@@ -276,27 +257,28 @@ static void __init test_monitor_call(void)
if (!IS_ENABLED(CONFIG_BUG))
return;
- asm volatile(
+ asm_inline volatile(
" mc 0,0\n"
- "0: xgr %0,%0\n"
+ "0: lhi %[val],0\n"
"1:\n"
- EX_TABLE(0b,1b)
- : "+d" (val));
+ EX_TABLE(0b, 1b)
+ : [val] "+d" (val));
if (!val)
panic("Monitor call doesn't work!\n");
}
void __init trap_init(void)
{
+ struct lowcore *lc = get_lowcore();
unsigned long flags;
struct ctlreg cr0;
local_irq_save(flags);
cr0 = local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
- psw_bits(S390_lowcore.external_new_psw).mcheck = 1;
- psw_bits(S390_lowcore.program_new_psw).mcheck = 1;
- psw_bits(S390_lowcore.svc_new_psw).mcheck = 1;
- psw_bits(S390_lowcore.io_new_psw).mcheck = 1;
+ psw_bits(lc->external_new_psw).mcheck = 1;
+ psw_bits(lc->program_new_psw).mcheck = 1;
+ psw_bits(lc->svc_new_psw).mcheck = 1;
+ psw_bits(lc->io_new_psw).mcheck = 1;
local_ctl_load(0, &cr0);
local_irq_restore(flags);
local_mcck_enable();
@@ -307,36 +289,47 @@ static void (*pgm_check_table[128])(struct pt_regs *regs);
void noinstr __do_pgm_check(struct pt_regs *regs)
{
- unsigned int trapnr;
+ struct lowcore *lc = get_lowcore();
irqentry_state_t state;
+ unsigned int trapnr;
+ union teid teid;
- regs->int_code = S390_lowcore.pgm_int_code;
- regs->int_parm_long = S390_lowcore.trans_exc_code;
-
+ teid.val = lc->trans_exc_code;
+ regs->int_code = lc->pgm_int_code;
+ regs->int_parm_long = teid.val;
+ /*
+ * In case of a guest fault, short-circuit the fault handler and return.
+ * This way the sie64a() function will return 0; fault address and
+ * other relevant bits are saved in current->thread.gmap_teid, and
+ * the fault number in current->thread.gmap_int_code. KVM will be
+ * able to use this information to handle the fault.
+ */
+ if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) {
+ current->thread.gmap_teid.val = regs->int_parm_long;
+ current->thread.gmap_int_code = regs->int_code & 0xffff;
+ return;
+ }
state = irqentry_enter(regs);
-
if (user_mode(regs)) {
update_timer_sys();
- if (!static_branch_likely(&cpu_has_bear)) {
+ if (!cpu_has_bear()) {
if (regs->last_break < 4096)
regs->last_break = 1;
}
current->thread.last_break = regs->last_break;
}
-
- if (S390_lowcore.pgm_code & 0x0200) {
+ if (lc->pgm_code & 0x0200) {
/* transaction abort */
- current->thread.trap_tdb = S390_lowcore.pgm_tdb;
+ current->thread.trap_tdb = lc->pgm_tdb;
}
-
- if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) {
+ if (lc->pgm_code & PGM_INT_CODE_PER) {
if (user_mode(regs)) {
struct per_event *ev = &current->thread.per_event;
set_thread_flag(TIF_PER_TRAP);
- ev->address = S390_lowcore.per_address;
- ev->cause = S390_lowcore.per_code_combined;
- ev->paid = S390_lowcore.per_access_id;
+ ev->address = lc->per_address;
+ ev->cause = lc->per_code_combined;
+ ev->paid = lc->per_access_id;
} else {
/* PER event in kernel is kprobes */
__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
@@ -344,11 +337,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
goto out;
}
}
-
if (!irqs_disabled_flags(regs->psw.mask))
trace_hardirqs_on();
__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
-
trapnr = regs->int_code & PGM_INT_CODE_MASK;
if (trapnr)
pgm_check_table[trapnr](regs);
@@ -400,8 +391,8 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
[0x3b] = do_dat_exception,
[0x3c] = default_trap_handler,
[0x3d] = do_secure_storage_access,
- [0x3e] = do_non_secure_storage_access,
- [0x3f] = do_secure_storage_violation,
+ [0x3e] = default_trap_handler,
+ [0x3f] = default_trap_handler,
[0x40] = monitor_event_exception,
[0x41 ... 0x7f] = default_trap_handler,
};
@@ -412,5 +403,3 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
__stringify(default_trap_handler))
COND_TRAP(do_secure_storage_access);
-COND_TRAP(do_non_secure_storage_access);
-COND_TRAP(do_secure_storage_violation);
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 0ece156fdd7c..0f88caca4eaf 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -1,4 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
@@ -49,6 +51,8 @@ static inline bool is_final_pt_regs(struct unwind_state *state,
READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
}
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
bool unwind_next_frame(struct unwind_state *state)
{
struct stack_info *info = &state->stack_info;
@@ -118,6 +122,8 @@ out_stop:
}
EXPORT_SYMBOL_GPL(unwind_next_frame);
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long first_frame)
{
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index 5b0633ea8d93..c624f3361e43 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -8,7 +8,6 @@
#include <linux/uaccess.h>
#include <linux/uprobes.h>
-#include <linux/compat.h>
#include <linux/kdebug.h>
#include <linux/sched/task_stack.h>
@@ -29,7 +28,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT)
return -EINVAL;
- if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)
+ if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)
return -EINVAL;
clear_thread_flag(TIF_PER_TRAP);
auprobe->saved_per = psw_bits(regs->psw).per;
@@ -161,11 +160,6 @@ bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
/* Instruction Emulation */
-static void adjust_psw_addr(psw_t *psw, unsigned long len)
-{
- psw->addr = __rewind_psw(*psw, -len);
-}
-
#define EMU_ILLEGAL_OP 1
#define EMU_SPECIFICATION 2
#define EMU_ADDRESSING 3
@@ -353,7 +347,7 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
}
break;
}
- adjust_psw_addr(&regs->psw, ilen);
+ regs->psw.addr = __forward_psw(regs->psw, ilen);
switch (rc) {
case EMU_ILLEGAL_OP:
regs->int_code = ilen << 16 | 0x0001;
@@ -373,8 +367,7 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) ||
- ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) &&
- !is_compat_task())) {
+ (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)) {
regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE);
do_report_trap(regs, SIGILL, ILL_ILLADR, NULL);
return true;
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 265fea37e030..ed46950be86f 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -2,11 +2,11 @@
/*
* Common Ultravisor functions and initialization
*
- * Copyright IBM Corp. 2019, 2020
+ * Copyright IBM Corp. 2019, 2024
*/
-#define KMSG_COMPONENT "prot_virt"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "prot_virt: " fmt
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/sizes.h>
@@ -14,15 +14,15 @@
#include <linux/memblock.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include <linux/pagewalk.h>
+#include <linux/backing-dev.h>
#include <asm/facility.h>
#include <asm/sections.h>
#include <asm/uv.h>
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
EXPORT_SYMBOL(prot_virt_guest);
-#endif
/*
* uv_info contains both host and guest information but it's currently only
@@ -35,7 +35,6 @@ EXPORT_SYMBOL(prot_virt_guest);
struct uv_info __bootdata_preserved(uv_info);
EXPORT_SYMBOL(uv_info);
-#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
EXPORT_SYMBOL(prot_virt_host);
@@ -110,7 +109,7 @@ EXPORT_SYMBOL_GPL(uv_pin_shared);
*
* @paddr: Absolute host address of page to be destroyed
*/
-static int uv_destroy_page(unsigned long paddr)
+static int uv_destroy(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_DESTR_SEC_STOR,
@@ -131,20 +130,33 @@ static int uv_destroy_page(unsigned long paddr)
}
/*
- * The caller must already hold a reference to the page
+ * The caller must already hold a reference to the folio
*/
-int uv_destroy_owned_page(unsigned long paddr)
+int uv_destroy_folio(struct folio *folio)
{
- struct page *page = phys_to_page(paddr);
int rc;
- get_page(page);
- rc = uv_destroy_page(paddr);
+ /* Large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
+ return 0;
+
+ folio_get(folio);
+ rc = uv_destroy(folio_to_phys(folio));
if (!rc)
- clear_bit(PG_arch_1, &page->flags);
- put_page(page);
+ clear_bit(PG_arch_1, &folio->flags.f);
+ folio_put(folio);
return rc;
}
+EXPORT_SYMBOL(uv_destroy_folio);
+
+/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_destroy_pte(pte_t pte)
+{
+ VM_WARN_ON(!pte_present(pte));
+ return uv_destroy_folio(pfn_folio(pte_pfn(pte)));
+}
/*
* Requests the Ultravisor to encrypt a guest page and make it
@@ -164,22 +176,69 @@ int uv_convert_from_secure(unsigned long paddr)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure);
/*
- * The caller must already hold a reference to the page
+ * The caller must already hold a reference to the folio.
*/
-int uv_convert_owned_from_secure(unsigned long paddr)
+int uv_convert_from_secure_folio(struct folio *folio)
{
- struct page *page = phys_to_page(paddr);
int rc;
- get_page(page);
- rc = uv_convert_from_secure(paddr);
+ /* Large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
+ return 0;
+
+ folio_get(folio);
+ rc = uv_convert_from_secure(folio_to_phys(folio));
if (!rc)
- clear_bit(PG_arch_1, &page->flags);
- put_page(page);
+ clear_bit(PG_arch_1, &folio->flags.f);
+ folio_put(folio);
return rc;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);
+
+/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_convert_from_secure_pte(pte_t pte)
+{
+ VM_WARN_ON(!pte_present(pte));
+ return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
+}
+
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
+ return false;
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
/*
* Calculate the expected ref_count for a folio that would otherwise have no
@@ -202,16 +261,34 @@ static int expected_folio_refs(struct folio *folio)
return res;
}
-static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
+/**
+ * __make_folio_secure() - make a folio secure
+ * @folio: the folio to make secure
+ * @uvcb: the uvcb that describes the UVC to be used
+ *
+ * The folio @folio will be made secure if possible, @uvcb will be passed
+ * as-is to the UVC.
+ *
+ * Return: 0 on success;
+ * -EBUSY if the folio is in writeback or has too many references;
+ * -EAGAIN if the UVC needs to be attempted again;
+ * -ENXIO if the address is not mapped;
+ * -EINVAL if the UVC failed for other reasons.
+ *
+ * Context: The caller must hold exactly one extra reference on the folio
+ * (it's the same logic as split_folio()), and the folio must be
+ * locked.
+ */
+static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
int expected, cc = 0;
if (folio_test_writeback(folio))
- return -EAGAIN;
- expected = expected_folio_refs(folio);
+ return -EBUSY;
+ expected = expected_folio_refs(folio) + 1;
if (!folio_ref_freeze(folio, expected))
return -EBUSY;
- set_bit(PG_arch_1, &folio->flags);
+ set_bit(PG_arch_1, &folio->flags.f);
/*
* If the UVC does not succeed or fail immediately, we don't want to
* loop for long, or we might get stall notifications.
@@ -233,248 +310,197 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
-/**
- * should_export_before_import - Determine whether an export is needed
- * before an import-like operation
- * @uvcb: the Ultravisor control block of the UVC to be performed
- * @mm: the mm of the process
- *
- * Returns whether an export is needed before every import-like operation.
- * This is needed for shared pages, which don't trigger a secure storage
- * exception when accessed from a different guest.
- *
- * Although considered as one, the Unpin Page UVC is not an actual import,
- * so it is not affected.
- *
- * No export is needed also when there is only one protected VM, because the
- * page cannot belong to the wrong VM in that case (there is no "other VM"
- * it can belong to).
- *
- * Return: true if an export is needed before every import, otherwise false.
- */
-static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb)
{
- /*
- * The misc feature indicates, among other things, that importing a
- * shared page from a different protected VM will automatically also
- * transfer its ownership.
- */
- if (uv_has_feature(BIT_UV_FEAT_MISC))
- return false;
- if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
- return false;
- return atomic_read(&mm->context.protected_count) > 1;
+ int rc;
+
+ if (!folio_trylock(folio))
+ return -EAGAIN;
+ if (should_export_before_import(uvcb, mm))
+ uv_convert_from_secure(folio_to_phys(folio));
+ rc = __make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
+
+ return rc;
}
-/*
- * Requests the Ultravisor to make a page accessible to a guest.
- * If it's brought in the first time, it will be cleared. If
- * it has been exported before, it will be decrypted and integrity
- * checked.
+/**
+ * s390_wiggle_split_folio() - try to drain extra references to a folio and
+ * split the folio if it is large.
+ * @mm: the mm containing the folio to work on
+ * @folio: the folio
+ *
+ * Context: Must be called while holding an extra reference to the folio;
+ * the mm lock should not be held.
+ * Return: 0 if the operation was successful;
+ * -EAGAIN if splitting the large folio was not successful,
+ * but another attempt can be made;
+ * -EINVAL in case of other folio splitting errors. See split_folio().
*/
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
{
- struct vm_area_struct *vma;
- bool local_drain = false;
- spinlock_t *ptelock;
- unsigned long uaddr;
- struct folio *folio;
- pte_t *ptep;
- int rc;
+ int rc, tried_splits;
-again:
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
+ lockdep_assert_not_held(&mm->mmap_lock);
+ folio_wait_writeback(folio);
+ lru_add_drain_all();
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Secure pages cannot be huge and userspace should not combine both.
- * In case userspace does it anyway this will result in an -EFAULT for
- * the unpack. The guest is thus never reaching secure mode. If
- * userspace is playing dirty tricky with mapping huge pages later
- * on this will result in a segmentation fault.
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = -ENXIO;
- ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
- if (!ptep)
- goto out;
- if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
- folio = page_folio(pte_page(*ptep));
- rc = -EINVAL;
- if (folio_test_large(folio))
- goto unlock;
- rc = -EAGAIN;
- if (folio_trylock(folio)) {
- if (should_export_before_import(uvcb, gmap->mm))
- uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
- rc = make_folio_secure(folio, uvcb);
+ if (!folio_test_large(folio))
+ return 0;
+
+ for (tried_splits = 0; tried_splits < 2; tried_splits++) {
+ struct address_space *mapping;
+ loff_t lstart, lend;
+ struct inode *inode;
+
+ folio_lock(folio);
+ rc = split_folio(folio);
+ if (rc != -EBUSY) {
folio_unlock(folio);
+ return rc;
}
- }
-unlock:
- pte_unmap_unlock(ptep, ptelock);
-out:
- mmap_read_unlock(gmap->mm);
- if (rc == -EAGAIN) {
/*
- * If we are here because the UVC returned busy or partial
- * completion, this is just a useless check, but it is safe.
+ * Splitting with -EBUSY can fail for various reasons, but we
+ * have to handle one case explicitly for now: some mappings
+ * don't allow for splitting dirty folios; writeback will
+ * mark them clean again, including marking all page table
+ * entries mapping the folio read-only, to catch future write
+ * attempts.
+ *
+ * While the system should be writing back dirty folios in the
+ * background, we obtained this folio by looking up a writable
+ * page table entry. On these problematic mappings, writable
+ * page table entries imply dirty folios, preventing the
+ * split in the first place.
+ *
+ * To prevent a livelock when trigger writeback manually and
+ * letting the caller look up the folio again in the page
+ * table (turning it dirty), immediately try to split again.
+ *
+ * This is only a problem for some mappings (e.g., XFS);
+ * mappings that do not support writeback (e.g., shmem) do not
+ * apply.
*/
- folio_wait_writeback(folio);
- } else if (rc == -EBUSY) {
- /*
- * If we have tried a local drain and the folio refcount
- * still does not match our expected safe value, try with a
- * system wide drain. This is needed if the pagevecs holding
- * the page are on a different CPU.
- */
- if (local_drain) {
- lru_add_drain_all();
- /* We give up here, and let the caller try again */
- return -EAGAIN;
+ if (!folio_test_dirty(folio) || folio_test_anon(folio) ||
+ !folio->mapping || !mapping_can_writeback(folio->mapping)) {
+ folio_unlock(folio);
+ break;
}
+
/*
- * We are here if the folio refcount does not match the
- * expected safe value. The main culprits are usually
- * pagevecs. With lru_add_drain() we drain the pagevecs
- * on the local CPU so that hopefully the refcount will
- * reach the expected safe value.
+ * Ideally, we'd only trigger writeback on this exact folio. But
+ * there is no easy way to do that, so we'll stabilize the
+ * mapping while we still hold the folio lock, so we can drop
+ * the folio lock to trigger writeback on the range currently
+ * covered by the folio instead.
*/
- lru_add_drain();
- local_drain = true;
- /* And now we try again immediately after draining */
- goto again;
- } else if (rc == -ENXIO) {
- if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
- return -EFAULT;
- return -EAGAIN;
- }
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_make_secure);
+ mapping = folio->mapping;
+ lstart = folio_pos(folio);
+ lend = lstart + folio_size(folio) - 1;
+ inode = igrab(mapping->host);
+ folio_unlock(folio);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
-{
- struct uv_cb_cts uvcb = {
- .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
- .header.len = sizeof(uvcb),
- .guest_handle = gmap->guest_handle,
- .gaddr = gaddr,
- };
+ if (unlikely(!inode))
+ break;
- return gmap_make_secure(gmap, gaddr, &uvcb);
+ filemap_write_and_wait_range(mapping, lstart, lend);
+ iput(mapping->host);
+ }
+ return -EAGAIN;
}
-EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
-/**
- * gmap_destroy_page - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @gaddr: the guest address to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- */
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
+int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb)
{
struct vm_area_struct *vma;
- unsigned long uaddr;
- struct page *page;
+ struct folio_walk fw;
+ struct folio *folio;
int rc;
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
+ mmap_read_lock(mm);
+ vma = vma_lookup(mm, hva);
+ if (!vma) {
+ mmap_read_unlock(mm);
+ return -EFAULT;
+ }
+ folio = folio_walk_start(&fw, vma, hva, 0);
+ if (!folio) {
+ mmap_read_unlock(mm);
+ return -ENXIO;
+ }
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Huge pages should not be able to become secure
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = 0;
- /* we take an extra reference here */
- page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
- if (IS_ERR_OR_NULL(page))
- goto out;
- rc = uv_destroy_owned_page(page_to_phys(page));
+ folio_get(folio);
/*
- * Fault handlers can race; it is possible that two CPUs will fault
- * on the same secure page. One CPU can destroy the page, reboot,
- * re-enter secure mode and import it, while the second CPU was
- * stuck at the beginning of the handler. At some point the second
- * CPU will be able to progress, and it will not be able to destroy
- * the page. In that case we do not want to terminate the process,
- * we instead try to export the page.
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode.
+ * If userspace plays dirty tricks and decides to map huge pages at a
+ * later point in time, it will receive a segmentation fault or
+ * KVM_RUN will return -EFAULT.
*/
- if (rc)
- rc = uv_convert_owned_from_secure(page_to_phys(page));
- put_page(page);
-out:
- mmap_read_unlock(gmap->mm);
+ if (folio_test_hugetlb(folio))
+ rc = -EFAULT;
+ else if (folio_test_large(folio))
+ rc = -E2BIG;
+ else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID))
+ rc = -ENXIO;
+ else
+ rc = make_folio_secure(mm, folio, uvcb);
+ folio_walk_end(&fw, vma);
+ mmap_read_unlock(mm);
+
+ if (rc == -E2BIG || rc == -EBUSY) {
+ rc = s390_wiggle_split_folio(mm, folio);
+ if (!rc)
+ rc = -EAGAIN;
+ }
+ folio_put(folio);
+
return rc;
}
-EXPORT_SYMBOL_GPL(gmap_destroy_page);
+EXPORT_SYMBOL_GPL(make_hva_secure);
/*
- * To be called with the page locked or with an extra reference! This will
- * prevent gmap_make_secure from touching the page concurrently. Having 2
- * parallel make_page_accessible is fine, as the UV calls will become a
- * no-op if the page is already exported.
+ * To be called with the folio locked or with an extra reference! This will
+ * prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
+ * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will
+ * become a no-op if the folio is already exported.
*/
-int arch_make_page_accessible(struct page *page)
+int arch_make_folio_accessible(struct folio *folio)
{
int rc = 0;
- /* Hugepage cannot be protected, so nothing to do */
- if (PageHuge(page))
+ /* Large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
return 0;
/*
- * PG_arch_1 is used in 3 places:
- * 1. for kernel page tables during early boot
- * 2. for storage keys of huge pages and KVM
- * 3. As an indication that this page might be secure. This can
+ * PG_arch_1 is used in 2 places:
+ * 1. for storage keys of hugetlb folios and KVM
+ * 2. As an indication that this small folio might be secure. This can
* overindicate, e.g. we set the bit before calling
* convert_to_secure.
- * As secure pages are never huge, all 3 variants can co-exists.
+ * As secure pages are never large folios, both variants can co-exists.
*/
- if (!test_bit(PG_arch_1, &page->flags))
+ if (!test_bit(PG_arch_1, &folio->flags.f))
return 0;
- rc = uv_pin_shared(page_to_phys(page));
+ rc = uv_pin_shared(folio_to_phys(folio));
if (!rc) {
- clear_bit(PG_arch_1, &page->flags);
+ clear_bit(PG_arch_1, &folio->flags.f);
return 0;
}
- rc = uv_convert_from_secure(page_to_phys(page));
+ rc = uv_convert_from_secure(folio_to_phys(folio));
if (!rc) {
- clear_bit(PG_arch_1, &page->flags);
+ clear_bit(PG_arch_1, &folio->flags.f);
return 0;
}
return rc;
}
-EXPORT_SYMBOL_GPL(arch_make_page_accessible);
-
-#endif
+EXPORT_SYMBOL_GPL(arch_make_folio_accessible);
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
static ssize_t uv_query_facilities(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -617,12 +643,32 @@ static struct kobj_attribute uv_query_supp_secret_types_attr =
static ssize_t uv_query_max_secrets(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return sysfs_emit(buf, "%d\n", uv_info.max_secrets);
+ return sysfs_emit(buf, "%d\n",
+ uv_info.max_assoc_secrets + uv_info.max_retr_secrets);
}
static struct kobj_attribute uv_query_max_secrets_attr =
__ATTR(max_secrets, 0444, uv_query_max_secrets, NULL);
+static ssize_t uv_query_max_retr_secrets(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", uv_info.max_retr_secrets);
+}
+
+static struct kobj_attribute uv_query_max_retr_secrets_attr =
+ __ATTR(max_retr_secrets, 0444, uv_query_max_retr_secrets, NULL);
+
+static ssize_t uv_query_max_assoc_secrets(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%d\n", uv_info.max_assoc_secrets);
+}
+
+static struct kobj_attribute uv_query_max_assoc_secrets_attr =
+ __ATTR(max_assoc_secrets, 0444, uv_query_max_assoc_secrets, NULL);
+
static struct attribute *uv_query_attrs[] = {
&uv_query_facilities_attr.attr,
&uv_query_feature_indications_attr.attr,
@@ -640,34 +686,91 @@ static struct attribute *uv_query_attrs[] = {
&uv_query_supp_add_secret_pcf_attr.attr,
&uv_query_supp_secret_types_attr.attr,
&uv_query_max_secrets_attr.attr,
+ &uv_query_max_assoc_secrets_attr.attr,
+ &uv_query_max_retr_secrets_attr.attr,
NULL,
};
+static inline struct uv_cb_query_keys uv_query_keys(void)
+{
+ struct uv_cb_query_keys uvcb = {
+ .header.cmd = UVC_CMD_QUERY_KEYS,
+ .header.len = sizeof(uvcb)
+ };
+
+ uv_call(0, (uint64_t)&uvcb);
+ return uvcb;
+}
+
+static inline ssize_t emit_hash(struct uv_key_hash *hash, char *buf, int at)
+{
+ return sysfs_emit_at(buf, at, "%016llx%016llx%016llx%016llx\n",
+ hash->dword[0], hash->dword[1], hash->dword[2], hash->dword[3]);
+}
+
+static ssize_t uv_keys_host_key(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+
+ return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_HK], buf, 0);
+}
+
+static struct kobj_attribute uv_keys_host_key_attr =
+ __ATTR(host_key, 0444, uv_keys_host_key, NULL);
+
+static ssize_t uv_keys_backup_host_key(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+
+ return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_BACK_HK], buf, 0);
+}
+
+static struct kobj_attribute uv_keys_backup_host_key_attr =
+ __ATTR(backup_host_key, 0444, uv_keys_backup_host_key, NULL);
+
+static ssize_t uv_keys_all(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+ ssize_t len = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(uvcb.key_hashes); i++)
+ len += emit_hash(uvcb.key_hashes + i, buf, len);
+
+ return len;
+}
+
+static struct kobj_attribute uv_keys_all_attr =
+ __ATTR(all, 0444, uv_keys_all, NULL);
+
static struct attribute_group uv_query_attr_group = {
.attrs = uv_query_attrs,
};
+static struct attribute *uv_keys_attrs[] = {
+ &uv_keys_host_key_attr.attr,
+ &uv_keys_backup_host_key_attr.attr,
+ &uv_keys_all_attr.attr,
+ NULL,
+};
+
+static struct attribute_group uv_keys_attr_group = {
+ .attrs = uv_keys_attrs,
+};
+
static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- int val = 0;
-
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
- val = prot_virt_guest;
-#endif
- return sysfs_emit(buf, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", prot_virt_guest);
}
static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- int val = 0;
-
-#if IS_ENABLED(CONFIG_KVM)
- val = prot_virt_host;
-#endif
-
- return sysfs_emit(buf, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", prot_virt_host);
}
static struct kobj_attribute uv_prot_virt_guest =
@@ -683,9 +786,27 @@ static const struct attribute *uv_prot_virt_attrs[] = {
};
static struct kset *uv_query_kset;
+static struct kset *uv_keys_kset;
static struct kobject *uv_kobj;
-static int __init uv_info_init(void)
+static int __init uv_sysfs_dir_init(const struct attribute_group *grp,
+ struct kset **uv_dir_kset, const char *name)
+{
+ struct kset *kset;
+ int rc;
+
+ kset = kset_create_and_add(name, NULL, uv_kobj);
+ if (!kset)
+ return -ENOMEM;
+ *uv_dir_kset = kset;
+
+ rc = sysfs_create_group(&kset->kobj, grp);
+ if (rc)
+ kset_unregister(kset);
+ return rc;
+}
+
+static int __init uv_sysfs_init(void)
{
int rc = -ENOMEM;
@@ -700,17 +821,16 @@ static int __init uv_info_init(void)
if (rc)
goto out_kobj;
- uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
- if (!uv_query_kset) {
- rc = -ENOMEM;
+ rc = uv_sysfs_dir_init(&uv_query_attr_group, &uv_query_kset, "query");
+ if (rc)
goto out_ind_files;
- }
- rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
- if (!rc)
- return 0;
+ /* Get installed key hashes if available, ignore any errors */
+ if (test_bit_inv(BIT_UVC_CMD_QUERY_KEYS, uv_info.inst_calls_list))
+ uv_sysfs_dir_init(&uv_keys_attr_group, &uv_keys_kset, "keys");
+
+ return 0;
- kset_unregister(uv_query_kset);
out_ind_files:
sysfs_remove_files(uv_kobj, uv_prot_virt_attrs);
out_kobj:
@@ -718,5 +838,110 @@ out_kobj:
kobject_put(uv_kobj);
return rc;
}
-device_initcall(uv_info_init);
-#endif
+device_initcall(uv_sysfs_init);
+
+/*
+ * Locate a secret in the list by its id.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Search for a secret with the given secret_id in the Ultravisor secret store.
+ *
+ * Context: might sleep.
+ */
+static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN],
+ const struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret)
+{
+ u16 i;
+
+ for (i = 0; i < list->total_num_secrets; i++) {
+ if (memcmp(secret_id, list->secrets[i].id, UV_SECRET_ID_LEN) == 0) {
+ *secret = list->secrets[i].hdr;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/**
+ * uv_find_secret() - search secret metadata for a given secret id.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Context: might sleep.
+ */
+int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret)
+{
+ u16 start_idx = 0;
+ u16 list_rc;
+ int ret;
+
+ do {
+ uv_list_secrets(list, start_idx, &list_rc, NULL);
+ if (list_rc != UVC_RC_EXECUTED && list_rc != UVC_RC_MORE_DATA) {
+ if (list_rc == UVC_RC_INV_CMD)
+ return -ENODEV;
+ else
+ return -EIO;
+ }
+ ret = find_secret_in_page(secret_id, list, secret);
+ if (ret == 0)
+ return ret;
+ start_idx = list->next_secret_idx;
+ } while (list_rc == UVC_RC_MORE_DATA && start_idx < list->next_secret_idx);
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(uv_find_secret);
+
+/**
+ * uv_retrieve_secret() - get the secret value for the secret index.
+ * @secret_idx: Secret index for which the secret should be retrieved.
+ * @buf: Buffer to store retrieved secret.
+ * @buf_size: Size of the buffer. The correct buffer size is reported as part of
+ * the result from `uv_get_secret_metadata`.
+ *
+ * Calls the Retrieve Secret UVC and translates the UV return code into an errno.
+ *
+ * Context: might sleep.
+ *
+ * Return:
+ * * %0 - Entry found; buffer contains a valid secret.
+ * * %ENOENT: - No entry found or secret at the index is non-retrievable.
+ * * %ENODEV: - Not supported: UV not available or command not available.
+ * * %EINVAL: - Buffer too small for content.
+ * * %EIO: - Other unexpected UV error.
+ */
+int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size)
+{
+ struct uv_cb_retr_secr uvcb = {
+ .header.len = sizeof(uvcb),
+ .header.cmd = UVC_CMD_RETR_SECRET,
+ .secret_idx = secret_idx,
+ .buf_addr = (u64)buf,
+ .buf_size = buf_size,
+ };
+
+ uv_call_sched(0, (u64)&uvcb);
+
+ switch (uvcb.header.rc) {
+ case UVC_RC_EXECUTED:
+ return 0;
+ case UVC_RC_INV_CMD:
+ return -ENODEV;
+ case UVC_RC_RETR_SECR_STORE_EMPTY:
+ case UVC_RC_RETR_SECR_INV_SECRET:
+ case UVC_RC_RETR_SECR_INV_IDX:
+ return -ENOENT;
+ case UVC_RC_RETR_SECR_BUF_SMALL:
+ return -EINVAL;
+ default:
+ return -EIO;
+ }
+}
+EXPORT_SYMBOL_GPL(uv_retrieve_secret);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 2f967ac2b8e3..a27a90a199be 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -7,106 +7,22 @@
*/
#include <linux/binfmts.h>
-#include <linux/compat.h>
#include <linux/elf.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/smp.h>
-#include <linux/time_namespace.h>
#include <linux/random.h>
+#include <linux/vdso_datastore.h>
#include <vdso/datapage.h>
+#include <asm/vdso/vsyscall.h>
+#include <asm/alternative.h>
#include <asm/vdso.h>
-extern char vdso64_start[], vdso64_end[];
-extern char vdso32_start[], vdso32_end[];
-
-static struct vm_special_mapping vvar_mapping;
-
-static union vdso_data_store vdso_data_store __page_aligned_data;
-
-struct vdso_data *vdso_data = vdso_data_store.data;
-
-enum vvar_pages {
- VVAR_DATA_PAGE_OFFSET,
- VVAR_TIMENS_PAGE_OFFSET,
- VVAR_NR_PAGES,
-};
-
-#ifdef CONFIG_TIME_NS
-struct vdso_data *arch_get_vdso_data(void *vvar_page)
-{
- return (struct vdso_data *)(vvar_page);
-}
-
-/*
- * The VVAR page layout depends on whether a task belongs to the root or
- * non-root time namespace. Whenever a task changes its namespace, the VVAR
- * page tables are cleared and then they will be re-faulted with a
- * corresponding layout.
- * See also the comment near timens_setup_vdso_data() for details.
- */
-int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
-{
- struct mm_struct *mm = task->mm;
- VMA_ITERATOR(vmi, mm, 0);
- struct vm_area_struct *vma;
-
- mmap_read_lock(mm);
- for_each_vma(vmi, vma) {
- if (!vma_is_special_mapping(vma, &vvar_mapping))
- continue;
- zap_vma_pages(vma);
- break;
- }
- mmap_read_unlock(mm);
- return 0;
-}
-#endif
-
-static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
- struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *timens_page = find_timens_vvar_page(vma);
- unsigned long addr, pfn;
- vm_fault_t err;
-
- switch (vmf->pgoff) {
- case VVAR_DATA_PAGE_OFFSET:
- pfn = virt_to_pfn(vdso_data);
- if (timens_page) {
- /*
- * Fault in VVAR page too, since it will be accessed
- * to get clock data anyway.
- */
- addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
- err = vmf_insert_pfn(vma, addr, pfn);
- if (unlikely(err & VM_FAULT_ERROR))
- return err;
- pfn = page_to_pfn(timens_page);
- }
- break;
-#ifdef CONFIG_TIME_NS
- case VVAR_TIMENS_PAGE_OFFSET:
- /*
- * If a task belongs to a time namespace then a namespace
- * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
- * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
- * offset.
- * See also the comment near timens_setup_vdso_data().
- */
- if (!timens_page)
- return VM_FAULT_SIGBUS;
- pfn = virt_to_pfn(vdso_data);
- break;
-#endif /* CONFIG_TIME_NS */
- default:
- return VM_FAULT_SIGBUS;
- }
- return vmf_insert_pfn(vma, vmf->address, pfn);
-}
+extern char vdso_start[], vdso_end[];
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *vma)
@@ -115,17 +31,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
return 0;
}
-static struct vm_special_mapping vvar_mapping = {
- .name = "[vvar]",
- .fault = vvar_fault,
-};
-
-static struct vm_special_mapping vdso64_mapping = {
- .name = "[vdso]",
- .mremap = vdso_mremap,
-};
-
-static struct vm_special_mapping vdso32_mapping = {
+static struct vm_special_mapping vdso_mapping = {
.name = "[vdso]",
.mremap = vdso_mremap,
};
@@ -140,39 +46,29 @@ early_initcall(vdso_getcpu_init); /* Must be called before SMP init */
static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
{
unsigned long vvar_start, vdso_text_start, vdso_text_len;
- struct vm_special_mapping *vdso_mapping;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int rc;
- BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+ BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES);
if (mmap_write_lock_killable(mm))
return -EINTR;
- if (is_compat_task()) {
- vdso_text_len = vdso32_end - vdso32_start;
- vdso_mapping = &vdso32_mapping;
- } else {
- vdso_text_len = vdso64_end - vdso64_start;
- vdso_mapping = &vdso64_mapping;
- }
+ vdso_text_len = vdso_end - vdso_start;
vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0);
rc = vvar_start;
if (IS_ERR_VALUE(vvar_start))
goto out;
- vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
- VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
- VM_PFNMAP,
- &vvar_mapping);
+ vma = vdso_install_vvar_mapping(mm, vvar_start);
rc = PTR_ERR(vma);
if (IS_ERR(vma))
goto out;
- vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
+ vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE;
/* VM_MAYWRITE for COW so gdb can set breakpoints */
vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
- VM_READ|VM_EXEC|
+ VM_READ|VM_EXEC|VM_SEALED_SYSMAP|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso_mapping);
+ &vdso_mapping);
if (IS_ERR(vma)) {
do_munmap(mm, vvar_start, PAGE_SIZE, NULL);
rc = PTR_ERR(vma);
@@ -212,18 +108,12 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len)
unsigned long vdso_text_size(void)
{
- unsigned long size;
-
- if (is_compat_task())
- size = vdso32_end - vdso32_start;
- else
- size = vdso64_end - vdso64_start;
- return PAGE_ALIGN(size);
+ return PAGE_ALIGN(vdso_end - vdso_start);
}
unsigned long vdso_size(void)
{
- return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE;
+ return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE;
}
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
@@ -250,11 +140,26 @@ static struct page ** __init vdso_setup_pages(void *start, void *end)
return pagelist;
}
+static void vdso_apply_alternatives(void)
+{
+ const struct elf64_shdr *alt, *shdr;
+ struct alt_instr *start, *end;
+ const struct elf64_hdr *hdr;
+
+ hdr = (struct elf64_hdr *)vdso_start;
+ shdr = (void *)hdr + hdr->e_shoff;
+ alt = find_section(hdr, shdr, ".altinstructions");
+ if (!alt)
+ return;
+ start = (void *)hdr + alt->sh_offset;
+ end = (void *)hdr + alt->sh_offset + alt->sh_size;
+ apply_alternatives(start, end);
+}
+
static int __init vdso_init(void)
{
- vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end);
- if (IS_ENABLED(CONFIG_COMPAT))
- vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end);
+ vdso_apply_alternatives();
+ vdso_mapping.pages = vdso_setup_pages(vdso_start, vdso_end);
return 0;
}
arch_initcall(vdso_init);
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso/.gitignore
index 5167384843b9..652e31d82582 100644
--- a/arch/s390/kernel/vdso32/.gitignore
+++ b/arch/s390/kernel/vdso/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-vdso32.lds
+vdso.lds
diff --git a/arch/s390/kernel/vdso/Makefile b/arch/s390/kernel/vdso/Makefile
new file mode 100644
index 000000000000..2fa12d4ac106
--- /dev/null
+++ b/arch/s390/kernel/vdso/Makefile
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-2.0
+# List of files in the vdso
+
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile.include
+obj-vdso = vdso_user_wrapper.o note.o vgetrandom-chacha.o
+obj-cvdso = vdso_generic.o getcpu.o vgetrandom.o
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
+CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vdso_generic.o = $(VDSO_CFLAGS_REMOVE)
+
+ifneq ($(c-getrandom-y),)
+ CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
+endif
+
+# Build rules
+
+targets := $(obj-vdso) $(obj-cvdso) vdso.so vdso.so.dbg
+obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+obj-cvdso := $(addprefix $(obj)/, $(obj-cvdso))
+
+KBUILD_AFLAGS_VDSO := $(KBUILD_AFLAGS) -DBUILD_VDSO
+
+KBUILD_CFLAGS_VDSO := $(KBUILD_CFLAGS) -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS_VDSO := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_VDSO))
+KBUILD_CFLAGS_VDSO := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_VDSO))
+KBUILD_CFLAGS_VDSO := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_VDSO))
+KBUILD_CFLAGS_VDSO := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_VDSO))
+KBUILD_CFLAGS_VDSO += -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables
+KBUILD_CFLAGS_VDSO += -fno-stack-protector
+ldflags-y := -shared -soname=linux-vdso.so.1 \
+ --hash-style=both --build-id=sha1 -T
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_VDSO)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_VDSO)
+
+obj-y += vdso_wrapper.o
+targets += vdso.lds
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# Force dependency (incbin is bad)
+$(obj)/vdso_wrapper.o : $(obj)/vdso.so
+
+quiet_cmd_vdso_and_check = VDSO $@
+ cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) $(obj-cvdso) FORCE
+ $(call if_changed,vdso_and_check)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso): %.o: %.S FORCE
+ $(call if_changed_dep,vdsoas)
+
+$(obj-cvdso): %.o: %.c FORCE
+ $(call if_changed_dep,vdsocc)
+
+# actual build commands
+quiet_cmd_vdsoas = VDSOA $@
+ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdsocc = VDSOC $@
+ cmd_vdsocc = $(CC) $(c_flags) -c -o $@ $<
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,vdsosym)
diff --git a/arch/s390/kernel/vdso64/gen_vdso_offsets.sh b/arch/s390/kernel/vdso/gen_vdso_offsets.sh
index 37f05cb38dad..359982fb002d 100755
--- a/arch/s390/kernel/vdso64/gen_vdso_offsets.sh
+++ b/arch/s390/kernel/vdso/gen_vdso_offsets.sh
@@ -12,4 +12,4 @@
#
LC_ALL=C
-sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p'
+sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso_offset_\2\t0x\1/p'
diff --git a/arch/s390/kernel/vdso64/getcpu.c b/arch/s390/kernel/vdso/getcpu.c
index 5c5d4a848b76..5c5d4a848b76 100644
--- a/arch/s390/kernel/vdso64/getcpu.c
+++ b/arch/s390/kernel/vdso/getcpu.c
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso/note.S
index db19d0680a0a..db19d0680a0a 100644
--- a/arch/s390/kernel/vdso32/note.S
+++ b/arch/s390/kernel/vdso/note.S
diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso/vdso.h
index 34c7a2312f9d..8cff033dd854 100644
--- a/arch/s390/kernel/vdso64/vdso.h
+++ b/arch/s390/kernel/vdso/vdso.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ARCH_S390_KERNEL_VDSO64_VDSO_H
-#define __ARCH_S390_KERNEL_VDSO64_VDSO_H
+#ifndef __ARCH_S390_KERNEL_VDSO_VDSO_H
+#define __ARCH_S390_KERNEL_VDSO_VDSO_H
#include <vdso/datapage.h>
@@ -10,5 +10,6 @@ int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unuse
int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts);
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
-#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */
+#endif /* __ARCH_S390_KERNEL_VDSO_VDSO_H */
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso/vdso.lds.S
index 37e2a505e81d..7bec4de0e8e0 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso/vdso.lds.S
@@ -4,19 +4,20 @@
* library
*/
+#include <asm/vdso/vsyscall.h>
#include <asm/page.h>
#include <asm/vdso.h>
+#include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
SECTIONS
{
- PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
- PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
+ VDSO_VVAR_SYMS
+
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -42,6 +43,10 @@ SECTIONS
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
+ . = ALIGN(8);
+ .altinstructions : { *(.altinstructions) }
+ .altinstr_replacement : { *(.altinstr_replacement) }
+
.dynamic : { *(.dynamic) } :text :dynamic
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
@@ -55,47 +60,9 @@ SECTIONS
_end = .;
PROVIDE(end = .);
- /*
- * Stabs debugging sections are here too.
- */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
+ STABS_DEBUG
+ DWARF_DEBUG
.comment 0 : { *(.comment) }
-
- /*
- * DWARF debug sections.
- * Symbols in the DWARF debugging sections are relative to the
- * beginning of the section so we begin them at 0.
- */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
- /* DWARF 3 */
- .debug_pubtypes 0 : { *(.debug_pubtypes) }
- .debug_ranges 0 : { *(.debug_ranges) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
/DISCARD/ : {
@@ -140,6 +107,7 @@ VERSION
__kernel_restart_syscall;
__kernel_rt_sigreturn;
__kernel_sigreturn;
+ __kernel_getrandom;
local: *;
};
}
diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso/vdso_generic.c
index a9aa75643c08..a9aa75643c08 100644
--- a/arch/s390/kernel/vdso64/vdso64_generic.c
+++ b/arch/s390/kernel/vdso/vdso_generic.c
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso/vdso_user_wrapper.S
index e26e68675c08..aa06c85bcbd3 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso/vdso_user_wrapper.S
@@ -13,10 +13,7 @@
* for details.
*/
.macro vdso_func func
- .globl __kernel_\func
- .type __kernel_\func,@function
- __ALIGN
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
CFI_STARTPROC
aghi %r15,-STACK_FRAME_VDSO_OVERHEAD
CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD)
@@ -32,7 +29,7 @@ __kernel_\func:
CFI_RESTORE 15
br %r14
CFI_ENDPROC
- .size __kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
.endm
vdso_func gettimeofday
@@ -41,16 +38,13 @@ vdso_func clock_gettime
vdso_func getcpu
.macro vdso_syscall func,syscall
- .globl __kernel_\func
- .type __kernel_\func,@function
- __ALIGN
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
CFI_STARTPROC
svc \syscall
/* Make sure we notice when a syscall returns, which shouldn't happen */
.word 0
CFI_ENDPROC
- .size __kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
.endm
vdso_syscall restart_syscall,__NR_restart_syscall
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso/vdso_wrapper.S
index de2fb930471a..f69e62a14978 100644
--- a/arch/s390/kernel/vdso32/vdso32_wrapper.S
+++ b/arch/s390/kernel/vdso/vdso_wrapper.S
@@ -5,11 +5,11 @@
__PAGE_ALIGNED_DATA
- .globl vdso32_start, vdso32_end
+ .globl vdso_start, vdso_end
.balign PAGE_SIZE
-vdso32_start:
- .incbin "arch/s390/kernel/vdso32/vdso32.so"
+vdso_start:
+ .incbin "arch/s390/kernel/vdso/vdso.so"
.balign PAGE_SIZE
-vdso32_end:
+vdso_end:
.previous
diff --git a/arch/s390/kernel/vdso/vgetrandom-chacha.S b/arch/s390/kernel/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..09c034c2f853
--- /dev/null
+++ b/arch/s390/kernel/vdso/vgetrandom-chacha.S
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/alternative.h>
+#include <asm/dwarf.h>
+#include <asm/fpu-insn.h>
+
+#define STATE0 %v0
+#define STATE1 %v1
+#define STATE2 %v2
+#define STATE3 %v3
+#define COPY0 %v4
+#define COPY1 %v5
+#define COPY2 %v6
+#define COPY3 %v7
+#define BEPERM %v19
+#define TMP0 %v20
+#define TMP1 %v21
+#define TMP2 %v22
+#define TMP3 %v23
+
+ .section .rodata
+
+ .balign 32
+SYM_DATA_START_LOCAL(chacha20_constants)
+ .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
+ .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
+SYM_DATA_END(chacha20_constants)
+
+ .text
+/*
+ * s390 ChaCha20 implementation meant for vDSO. Produces a given positive
+ * number of blocks of output with nonce 0, taking an input key and 8-bytes
+ * counter. Does not spill to the stack.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ * const uint8_t *key,
+ * uint32_t *counter,
+ * size_t nblocks)
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+ CFI_STARTPROC
+ larl %r1,chacha20_constants
+
+ /* COPY0 = "expand 32-byte k" */
+ VL COPY0,0,,%r1
+
+ /* BEPERM = byte selectors for VPERM */
+ ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
+
+ /* COPY1,COPY2 = key */
+ VLM COPY1,COPY2,0,%r3
+
+ /* COPY3 = counter || zero nonce */
+ lg %r3,0(%r4)
+ VZERO COPY3
+ VLVGG COPY3,%r3,0
+
+ lghi %r1,0
+.Lblock:
+ VLR STATE0,COPY0
+ VLR STATE1,COPY1
+ VLR STATE2,COPY2
+ VLR STATE3,COPY3
+
+ lghi %r0,10
+.Ldoubleround:
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,16
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,8
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,7
+
+ /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
+ VSLDB STATE1,STATE1,STATE1,4
+ /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+ VSLDB STATE2,STATE2,STATE2,8
+ /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
+ VSLDB STATE3,STATE3,STATE3,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,16
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,8
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,7
+
+ /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
+ VSLDB STATE1,STATE1,STATE1,12
+ /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+ VSLDB STATE2,STATE2,STATE2,8
+ /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
+ VSLDB STATE3,STATE3,STATE3,4
+ brctg %r0,.Ldoubleround
+
+ /* OUTPUT0 = STATE0 + COPY0 */
+ VAF STATE0,STATE0,COPY0
+ /* OUTPUT1 = STATE1 + COPY1 */
+ VAF STATE1,STATE1,COPY1
+ /* OUTPUT2 = STATE2 + COPY2 */
+ VAF STATE2,STATE2,COPY2
+ /* OUTPUT3 = STATE3 + COPY3 */
+ VAF STATE3,STATE3,COPY3
+
+ ALTERNATIVE \
+ __stringify( \
+ /* Convert STATE to little endian and store to OUTPUT */\
+ VPERM TMP0,STATE0,STATE0,BEPERM; \
+ VPERM TMP1,STATE1,STATE1,BEPERM; \
+ VPERM TMP2,STATE2,STATE2,BEPERM; \
+ VPERM TMP3,STATE3,STATE3,BEPERM; \
+ VSTM TMP0,TMP3,0,%r2), \
+ __stringify( \
+ /* 32 bit wise little endian store to OUTPUT */ \
+ VSTBRF STATE0,0,,%r2; \
+ VSTBRF STATE1,16,,%r2; \
+ VSTBRF STATE2,32,,%r2; \
+ VSTBRF STATE3,48,,%r2; \
+ brcl 0,0), \
+ ALT_FACILITY(148)
+
+ /* ++COPY3.COUNTER */
+ /* alsih %r3,1 */
+ .insn rilu,0xcc0a00000000,%r3,1
+ alcr %r3,%r1
+ VLVGG COPY3,%r3,0
+
+ /* OUTPUT += 64, --NBLOCKS */
+ aghi %r2,64
+ brctg %r5,.Lblock
+
+ /* COUNTER = COPY3.COUNTER */
+ stg %r3,0(%r4)
+
+ /* Zero out potentially sensitive regs */
+ VZERO STATE0
+ VZERO STATE1
+ VZERO STATE2
+ VZERO STATE3
+ VZERO COPY1
+ VZERO COPY2
+
+ /* Early exit if TMP0-TMP3 have not been used */
+ ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148)
+
+ VZERO TMP0
+ VZERO TMP1
+ VZERO TMP2
+ VZERO TMP3
+
+ br %r14
+ CFI_ENDPROC
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/s390/kernel/vdso/vgetrandom.c b/arch/s390/kernel/vdso/vgetrandom.c
new file mode 100644
index 000000000000..b5268b507fb5
--- /dev/null
+++ b/arch/s390/kernel/vdso/vgetrandom.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/facility.h>
+#include <uapi/asm-generic/errno.h>
+#include "vdso.h"
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ if (test_facility(129))
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+ if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
+ return -ENOSYS;
+ return getrandom_syscall(buffer, len, flags);
+}
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
deleted file mode 100644
index 2c5afb88d298..000000000000
--- a/arch/s390/kernel/vdso32/Makefile
+++ /dev/null
@@ -1,64 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# List of files in the vdso
-
-# Include the generic Makefile to check the built vdso.
-include $(srctree)/lib/vdso/Makefile
-obj-vdso32 = vdso_user_wrapper-32.o note-32.o
-
-# Build rules
-
-targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
-obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-
-KBUILD_AFLAGS += -DBUILD_VDSO
-KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
-
-KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
-KBUILD_AFLAGS_32 += -m31 -s
-
-KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_32 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32))
-KBUILD_CFLAGS_32 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_32))
-KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -fasynchronous-unwind-tables
-
-LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \
- --hash-style=both --build-id=sha1 -melf_s390 -T
-
-$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
-$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
-
-obj-y += vdso32_wrapper.o
-targets += vdso32.lds
-CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
-
-# Force dependency (incbin is bad)
-$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
-
-quiet_cmd_vdso_and_check = VDSO $@
- cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
-
-$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) FORCE
- $(call if_changed,vdso_and_check)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
-
-$(obj-vdso32): %-32.o: %.S FORCE
- $(call if_changed_dep,vdso32as)
-
-# actual build commands
-quiet_cmd_vdso32as = VDSO32A $@
- cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
-quiet_cmd_vdso32cc = VDSO32C $@
- cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $<
-
-# Generate VDSO offsets using helper script
-gen-vdsosym := $(src)/gen_vdso_offsets.sh
-quiet_cmd_vdsosym = VDSOSYM $@
- cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
-
-include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
- $(call if_changed,vdsosym)
diff --git a/arch/s390/kernel/vdso32/gen_vdso_offsets.sh b/arch/s390/kernel/vdso32/gen_vdso_offsets.sh
deleted file mode 100755
index 9c4f951e227d..000000000000
--- a/arch/s390/kernel/vdso32/gen_vdso_offsets.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-#
-# Match symbols in the DSO that look like VDSO_*; produce a header file
-# of constant offsets into the shared object.
-#
-# Doing this inside the Makefile will break the $(filter-out) function,
-# causing Kbuild to rebuild the vdso-offsets header file every time.
-#
-# Inspired by arm64 version.
-#
-
-LC_ALL=C
-sed -n 's/\([0-9a-f]*\) . __kernel_compat_\(.*\)/\#define vdso32_offset_\2\t0x\1/p'
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
deleted file mode 100644
index 65b9513a5a0e..000000000000
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ /dev/null
@@ -1,141 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the infamous ld script for the 64 bits vdso
- * library
- */
-
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390:31-bit)
-
-SECTIONS
-{
- PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
- PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :text
- .gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
-
- .note : { *(.note.*) } :text :note
-
- . = ALIGN(16);
- .text : {
- *(.text .stub .text.* .gnu.linkonce.t.*)
- } :text
- PROVIDE(__etext = .);
- PROVIDE(_etext = .);
- PROVIDE(etext = .);
-
- /*
- * Other stuff is appended to the text segment:
- */
- .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
- .rodata1 : { *(.rodata1) }
-
- .dynamic : { *(.dynamic) } :text :dynamic
-
- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
- .eh_frame : { KEEP (*(.eh_frame)) } :text
- .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
-
- .rela.dyn ALIGN(8) : { *(.rela.dyn) }
- .got ALIGN(8) : { *(.got .toc) }
- .got.plt ALIGN(8) : { *(.got.plt) }
-
- _end = .;
- PROVIDE(end = .);
-
- /*
- * Stabs debugging sections are here too.
- */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-
- /*
- * DWARF debug sections.
- * Symbols in the DWARF debugging sections are relative to the
- * beginning of the section so we begin them at 0.
- */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
- /* DWARF 3 */
- .debug_pubtypes 0 : { *(.debug_pubtypes) }
- .debug_ranges 0 : { *(.debug_ranges) }
- .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
-
- /DISCARD/ : {
- *(.note.GNU-stack)
- *(.branch_lt)
- *(.data .data.* .gnu.linkonce.d.* .sdata*)
- *(.bss .sbss .dynbss .dynsbss)
- }
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME 0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
- text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- note PT_NOTE FLAGS(4); /* PF_R */
- eh_frame_hdr PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
- VDSO_VERSION_STRING {
- global:
- /*
- * Has to be there for the kernel to find
- */
- __kernel_compat_restart_syscall;
- __kernel_compat_rt_sigreturn;
- __kernel_compat_sigreturn;
- local: *;
- };
-}
diff --git a/arch/s390/kernel/vdso32/vdso_user_wrapper.S b/arch/s390/kernel/vdso32/vdso_user_wrapper.S
deleted file mode 100644
index 2e645003fdaf..000000000000
--- a/arch/s390/kernel/vdso32/vdso_user_wrapper.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-
-.macro vdso_syscall func,syscall
- .globl __kernel_compat_\func
- .type __kernel_compat_\func,@function
- __ALIGN
-__kernel_compat_\func:
- CFI_STARTPROC
- svc \syscall
- /* Make sure we notice when a syscall returns, which shouldn't happen */
- .word 0
- CFI_ENDPROC
- .size __kernel_compat_\func,.-__kernel_compat_\func
-.endm
-
-vdso_syscall restart_syscall,__NR_restart_syscall
-vdso_syscall sigreturn,__NR_sigreturn
-vdso_syscall rt_sigreturn,__NR_rt_sigreturn
diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore
deleted file mode 100644
index 4ec80685fecc..000000000000
--- a/arch/s390/kernel/vdso64/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-vdso64.lds
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
deleted file mode 100644
index ba19c0ca7c87..000000000000
--- a/arch/s390/kernel/vdso64/Makefile
+++ /dev/null
@@ -1,74 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# List of files in the vdso
-
-# Include the generic Makefile to check the built vdso.
-include $(srctree)/lib/vdso/Makefile
-obj-vdso64 = vdso_user_wrapper.o note.o
-obj-cvdso64 = vdso64_generic.o getcpu.o
-VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
-CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
-CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
-
-# Build rules
-
-targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg
-obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
-obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64))
-
-KBUILD_AFLAGS += -DBUILD_VDSO
-KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
-
-KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
-KBUILD_AFLAGS_64 += -m64
-
-KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_64 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_64))
-KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64))
-KBUILD_CFLAGS_64 := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_64))
-KBUILD_CFLAGS_64 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_64))
-KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables
-ldflags-y := -shared -soname=linux-vdso64.so.1 \
- --hash-style=both --build-id=sha1 -T
-
-$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
-$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
-
-obj-y += vdso64_wrapper.o
-targets += vdso64.lds
-CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
-
-# Force dependency (incbin is bad)
-$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
-
-quiet_cmd_vdso_and_check = VDSO $@
- cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
- $(call if_changed,vdso_and_check)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
-
-# assembly rules for the .S files
-$(obj-vdso64): %.o: %.S FORCE
- $(call if_changed_dep,vdso64as)
-
-$(obj-cvdso64): %.o: %.c FORCE
- $(call if_changed_dep,vdso64cc)
-
-# actual build commands
-quiet_cmd_vdso64as = VDSO64A $@
- cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
-quiet_cmd_vdso64cc = VDSO64C $@
- cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
-
-# Generate VDSO offsets using helper script
-gen-vdsosym := $(src)/gen_vdso_offsets.sh
-quiet_cmd_vdsosym = VDSOSYM $@
- cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
-
-include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE
- $(call if_changed,vdsosym)
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
deleted file mode 100644
index db19d0680a0a..000000000000
--- a/arch/s390/kernel/vdso64/note.S
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
- .long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S
deleted file mode 100644
index 672184998623..000000000000
--- a/arch/s390/kernel/vdso64/vdso64_wrapper.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-
- __PAGE_ALIGNED_DATA
-
- .globl vdso64_start, vdso64_end
- .balign PAGE_SIZE
-vdso64_start:
- .incbin "arch/s390/kernel/vdso64/vdso64.so"
- .balign PAGE_SIZE
-vdso64_end:
-
- .previous
diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c
index 23f7d7619a99..cc8933e04ff7 100644
--- a/arch/s390/kernel/vmcore_info.c
+++ b/arch/s390/kernel/vmcore_info.c
@@ -1,8 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/vmcore_info.h>
-#include <asm/abs_lowcore.h>
#include <linux/mm.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
#include <asm/setup.h>
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index a1ce3925ec71..53bcbb91bb9b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -39,7 +39,7 @@ PHDRS {
SECTIONS
{
- . = __START_KERNEL;
+ . = TEXT_OFFSET;
.text : {
_stext = .; /* Start of text section */
_text = .; /* Text and read-only data */
@@ -51,22 +51,13 @@ SECTIONS
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
FTRACE_HOTPATCH_TRAMPOLINES_TEXT
- *(.text.*_indirect_*)
- *(.fixup)
+ *(.text..*_indirect_*)
*(.gnu.warning)
. = ALIGN(PAGE_SIZE);
_etext = .; /* End of text section */
} :text = 0x0700
RO_DATA(PAGE_SIZE)
- .data.rel.ro : {
- *(.data.rel.ro .data.rel.ro.*)
- }
- .got : {
- __got_start = .;
- *(.got)
- __got_end = .;
- }
. = ALIGN(PAGE_SIZE);
_sdata = .; /* Start of data section */
@@ -80,6 +71,22 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__end_ro_after_init = .;
+ . = ALIGN(8);
+ .skey_region_table : {
+ __skey_region_start = .;
+ KEEP(*(.skey_region))
+ __skey_region_end = .;
+ }
+
+ .data.rel.ro : {
+ *(.data.rel.ro .data.rel.ro.*)
+ }
+ .got : {
+ __got_start = .;
+ *(.got)
+ __got_end = .;
+ }
+
RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
.data.rel : {
*(.data.rel*)
@@ -143,6 +150,15 @@ SECTIONS
*(.altinstr_replacement)
}
+#ifdef CONFIG_STACKPROTECTOR
+ . = ALIGN(8);
+ .stack_prot_table : {
+ __stack_prot_start = .;
+ KEEP(*(__stack_protector_loc))
+ __stack_prot_end = .;
+ }
+#endif
+
/*
* Table with the patch locations to undo expolines
*/
@@ -190,6 +206,8 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
+ RUNTIME_CONST_VARIABLES
+
PERCPU_SECTION(0x100)
. = ALIGN(PAGE_SIZE);
@@ -200,6 +218,33 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
_end = . ;
+ /* Debugging sections. */
+ STABS_DEBUG
+ DWARF_DEBUG
+ ELF_DETAILS
+
+ /*
+ * Make sure that the .got.plt is either completely empty or it
+ * contains only the three reserved double words.
+ */
+ .got.plt : {
+ *(.got.plt)
+ }
+ ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!")
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .plt : {
+ *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+ .rela.dyn : {
+ *(.rela.*) *(.rela_*)
+ }
+ ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
+
/*
* uncompressed image info used by the decompressor
* it should match struct vmlinux_info
@@ -219,6 +264,12 @@ SECTIONS
QUAD(init_mm)
QUAD(swapper_pg_dir)
QUAD(invalid_pg_dir)
+ QUAD(__alt_instructions)
+ QUAD(__alt_instructions_end)
+#ifdef CONFIG_STACKPROTECTOR
+ QUAD(__stack_prot_start)
+ QUAD(__stack_prot_end)
+#endif
#ifdef CONFIG_KASAN
QUAD(kasan_early_shadow_page)
QUAD(kasan_early_shadow_pte)
@@ -228,33 +279,6 @@ SECTIONS
#endif
} :NONE
- /* Debugging sections. */
- STABS_DEBUG
- DWARF_DEBUG
- ELF_DETAILS
-
- /*
- * Make sure that the .got.plt is either completely empty or it
- * contains only the three reserved double words.
- */
- .got.plt : {
- *(.got.plt)
- }
- ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!")
-
- /*
- * Sections that should stay zero sized, which is safer to
- * explicitly check instead of blindly discarding.
- */
- .plt : {
- *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
- }
- ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
- .rela.dyn : {
- *(.rela.*) *(.rela_*)
- }
- ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
-
/* Sections to be discarded */
DISCARDS
/DISCARD/ : {
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index ffc1db0cbf9c..234a0ba30510 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -35,14 +35,15 @@ static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
static inline void set_vtimer(u64 expires)
{
+ struct lowcore *lc = get_lowcore();
u64 timer;
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" spt %1" /* Set new value imm. afterwards */
: "=Q" (timer) : "Q" (expires));
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
- S390_lowcore.last_update_timer = expires;
+ lc->system_timer += lc->last_update_timer - timer;
+ lc->last_update_timer = expires;
}
static inline int virt_timer_forward(u64 elapsed)
@@ -117,22 +118,23 @@ static void account_system_index_scaled(struct task_struct *p, u64 cputime,
static int do_account_vtime(struct task_struct *tsk)
{
u64 timer, clock, user, guest, system, hardirq, softirq;
+ struct lowcore *lc = get_lowcore();
- timer = S390_lowcore.last_update_timer;
- clock = S390_lowcore.last_update_clock;
+ timer = lc->last_update_timer;
+ clock = lc->last_update_clock;
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" stckf %1" /* Store current tod clock value */
- : "=Q" (S390_lowcore.last_update_timer),
- "=Q" (S390_lowcore.last_update_clock)
+ : "=Q" (lc->last_update_timer),
+ "=Q" (lc->last_update_clock)
: : "cc");
- clock = S390_lowcore.last_update_clock - clock;
- timer -= S390_lowcore.last_update_timer;
+ clock = lc->last_update_clock - clock;
+ timer -= lc->last_update_timer;
if (hardirq_count())
- S390_lowcore.hardirq_timer += timer;
+ lc->hardirq_timer += timer;
else
- S390_lowcore.system_timer += timer;
+ lc->system_timer += timer;
/* Update MT utilization calculation */
if (smp_cpu_mtid &&
@@ -141,16 +143,16 @@ static int do_account_vtime(struct task_struct *tsk)
/* Calculate cputime delta */
user = update_tsk_timer(&tsk->thread.user_timer,
- READ_ONCE(S390_lowcore.user_timer));
+ READ_ONCE(lc->user_timer));
guest = update_tsk_timer(&tsk->thread.guest_timer,
- READ_ONCE(S390_lowcore.guest_timer));
+ READ_ONCE(lc->guest_timer));
system = update_tsk_timer(&tsk->thread.system_timer,
- READ_ONCE(S390_lowcore.system_timer));
+ READ_ONCE(lc->system_timer));
hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
- READ_ONCE(S390_lowcore.hardirq_timer));
+ READ_ONCE(lc->hardirq_timer));
softirq = update_tsk_timer(&tsk->thread.softirq_timer,
- READ_ONCE(S390_lowcore.softirq_timer));
- S390_lowcore.steal_timer +=
+ READ_ONCE(lc->softirq_timer));
+ lc->steal_timer +=
clock - user - guest - system - hardirq - softirq;
/* Push account value */
@@ -176,17 +178,19 @@ static int do_account_vtime(struct task_struct *tsk)
void vtime_task_switch(struct task_struct *prev)
{
+ struct lowcore *lc = get_lowcore();
+
do_account_vtime(prev);
- prev->thread.user_timer = S390_lowcore.user_timer;
- prev->thread.guest_timer = S390_lowcore.guest_timer;
- prev->thread.system_timer = S390_lowcore.system_timer;
- prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
- prev->thread.softirq_timer = S390_lowcore.softirq_timer;
- S390_lowcore.user_timer = current->thread.user_timer;
- S390_lowcore.guest_timer = current->thread.guest_timer;
- S390_lowcore.system_timer = current->thread.system_timer;
- S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
- S390_lowcore.softirq_timer = current->thread.softirq_timer;
+ prev->thread.user_timer = lc->user_timer;
+ prev->thread.guest_timer = lc->guest_timer;
+ prev->thread.system_timer = lc->system_timer;
+ prev->thread.hardirq_timer = lc->hardirq_timer;
+ prev->thread.softirq_timer = lc->softirq_timer;
+ lc->user_timer = current->thread.user_timer;
+ lc->guest_timer = current->thread.guest_timer;
+ lc->system_timer = current->thread.system_timer;
+ lc->hardirq_timer = current->thread.hardirq_timer;
+ lc->softirq_timer = current->thread.softirq_timer;
}
/*
@@ -196,28 +200,29 @@ void vtime_task_switch(struct task_struct *prev)
*/
void vtime_flush(struct task_struct *tsk)
{
+ struct lowcore *lc = get_lowcore();
u64 steal, avg_steal;
if (do_account_vtime(tsk))
virt_timer_expire();
- steal = S390_lowcore.steal_timer;
- avg_steal = S390_lowcore.avg_steal_timer;
+ steal = lc->steal_timer;
+ avg_steal = lc->avg_steal_timer;
if ((s64) steal > 0) {
- S390_lowcore.steal_timer = 0;
+ lc->steal_timer = 0;
account_steal_time(cputime_to_nsecs(steal));
avg_steal += steal;
}
- S390_lowcore.avg_steal_timer = avg_steal / 2;
+ lc->avg_steal_timer = avg_steal / 2;
}
static u64 vtime_delta(void)
{
- u64 timer = S390_lowcore.last_update_timer;
-
- S390_lowcore.last_update_timer = get_cpu_timer();
+ struct lowcore *lc = get_lowcore();
+ u64 timer = lc->last_update_timer;
- return timer - S390_lowcore.last_update_timer;
+ lc->last_update_timer = get_cpu_timer();
+ return timer - lc->last_update_timer;
}
/*
@@ -226,12 +231,13 @@ static u64 vtime_delta(void)
*/
void vtime_account_kernel(struct task_struct *tsk)
{
+ struct lowcore *lc = get_lowcore();
u64 delta = vtime_delta();
if (tsk->flags & PF_VCPU)
- S390_lowcore.guest_timer += delta;
+ lc->guest_timer += delta;
else
- S390_lowcore.system_timer += delta;
+ lc->system_timer += delta;
virt_timer_forward(delta);
}
@@ -241,7 +247,7 @@ void vtime_account_softirq(struct task_struct *tsk)
{
u64 delta = vtime_delta();
- S390_lowcore.softirq_timer += delta;
+ get_lowcore()->softirq_timer += delta;
virt_timer_forward(delta);
}
@@ -250,7 +256,7 @@ void vtime_account_hardirq(struct task_struct *tsk)
{
u64 delta = vtime_delta();
- S390_lowcore.hardirq_timer += delta;
+ get_lowcore()->hardirq_timer += delta;
virt_timer_forward(delta);
}
diff --git a/arch/s390/kernel/wti.c b/arch/s390/kernel/wti.c
new file mode 100644
index 000000000000..949fdbf0e8b6
--- /dev/null
+++ b/arch/s390/kernel/wti.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for warning track interruption
+ *
+ * Copyright IBM Corp. 2023
+ */
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/smpboot.h>
+#include <linux/irq.h>
+#include <uapi/linux/sched/types.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+
+#define WTI_DBF_LEN 64
+
+struct wti_debug {
+ unsigned long missed;
+ unsigned long addr;
+ pid_t pid;
+};
+
+struct wti_state {
+ /* debug data for s390dbf */
+ struct wti_debug dbg;
+ /*
+ * Represents the real-time thread responsible to
+ * acknowledge the warning-track interrupt and trigger
+ * preliminary and postliminary precautions.
+ */
+ struct task_struct *thread;
+ /*
+ * If pending is true, the real-time thread must be scheduled.
+ * If not, a wake up of that thread will remain a noop.
+ */
+ bool pending;
+};
+
+static DEFINE_PER_CPU(struct wti_state, wti_state);
+
+static debug_info_t *wti_dbg;
+
+/*
+ * During a warning-track grace period, interrupts are disabled
+ * to prevent delays of the warning-track acknowledgment.
+ *
+ * Once the CPU is physically dispatched again, interrupts are
+ * re-enabled.
+ */
+
+static void wti_irq_disable(void)
+{
+ unsigned long flags;
+ struct ctlreg cr6;
+
+ local_irq_save(flags);
+ local_ctl_store(6, &cr6);
+ /* disable all I/O interrupts */
+ cr6.val &= ~0xff000000UL;
+ local_ctl_load(6, &cr6);
+ local_irq_restore(flags);
+}
+
+static void wti_irq_enable(void)
+{
+ unsigned long flags;
+ struct ctlreg cr6;
+
+ local_irq_save(flags);
+ local_ctl_store(6, &cr6);
+ /* enable all I/O interrupts */
+ cr6.val |= 0xff000000UL;
+ local_ctl_load(6, &cr6);
+ local_irq_restore(flags);
+}
+
+static void store_debug_data(struct wti_state *st)
+{
+ struct pt_regs *regs = get_irq_regs();
+
+ st->dbg.pid = current->pid;
+ st->dbg.addr = 0;
+ if (!user_mode(regs))
+ st->dbg.addr = regs->psw.addr;
+}
+
+static void wti_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct wti_state *st = this_cpu_ptr(&wti_state);
+
+ inc_irq_stat(IRQEXT_WTI);
+ wti_irq_disable();
+ store_debug_data(st);
+ st->pending = true;
+ wake_up_process(st->thread);
+}
+
+static int wti_pending(unsigned int cpu)
+{
+ struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+ return st->pending;
+}
+
+static void wti_dbf_grace_period(struct wti_state *st)
+{
+ struct wti_debug *wdi = &st->dbg;
+ char buf[WTI_DBF_LEN];
+
+ if (wdi->addr)
+ snprintf(buf, sizeof(buf), "%d %pS", wdi->pid, (void *)wdi->addr);
+ else
+ snprintf(buf, sizeof(buf), "%d <user>", wdi->pid);
+ debug_text_event(wti_dbg, 2, buf);
+ wdi->missed++;
+}
+
+static int wti_show(struct seq_file *seq, void *v)
+{
+ struct wti_state *st;
+ int cpu;
+
+ cpus_read_lock();
+ seq_puts(seq, " ");
+ for_each_online_cpu(cpu)
+ seq_printf(seq, "CPU%-8d", cpu);
+ seq_putc(seq, '\n');
+ for_each_online_cpu(cpu) {
+ st = per_cpu_ptr(&wti_state, cpu);
+ seq_printf(seq, " %10lu", st->dbg.missed);
+ }
+ seq_putc(seq, '\n');
+ cpus_read_unlock();
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wti);
+
+static void wti_thread_fn(unsigned int cpu)
+{
+ struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+ st->pending = false;
+ /*
+ * Yield CPU voluntarily to the hypervisor. Control
+ * resumes when hypervisor decides to dispatch CPU
+ * to this LPAR again.
+ */
+ if (diag49c(DIAG49C_SUBC_ACK))
+ wti_dbf_grace_period(st);
+ wti_irq_enable();
+}
+
+static struct smp_hotplug_thread wti_threads = {
+ .store = &wti_state.thread,
+ .thread_should_run = wti_pending,
+ .thread_fn = wti_thread_fn,
+ .thread_comm = "cpuwti/%u",
+ .selfparking = false,
+};
+
+static int __init wti_init(void)
+{
+ struct sched_param wti_sched_param = { .sched_priority = MAX_RT_PRIO - 1 };
+ struct dentry *wti_dir;
+ struct wti_state *st;
+ int cpu, rc;
+
+ rc = -EOPNOTSUPP;
+ if (!sclp.has_wti)
+ goto out;
+ rc = smpboot_register_percpu_thread(&wti_threads);
+ if (WARN_ON(rc))
+ goto out;
+ for_each_online_cpu(cpu) {
+ st = per_cpu_ptr(&wti_state, cpu);
+ sched_setscheduler(st->thread, SCHED_FIFO, &wti_sched_param);
+ }
+ rc = register_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+ if (rc) {
+ pr_warn("Couldn't request external interrupt 0x1007\n");
+ goto out_thread;
+ }
+ irq_subclass_register(IRQ_SUBCLASS_WARNING_TRACK);
+ rc = diag49c(DIAG49C_SUBC_REG);
+ if (rc) {
+ pr_warn("Failed to register warning track interrupt through DIAG 49C\n");
+ rc = -EOPNOTSUPP;
+ goto out_subclass;
+ }
+ wti_dir = debugfs_create_dir("wti", arch_debugfs_dir);
+ debugfs_create_file("stat", 0400, wti_dir, NULL, &wti_fops);
+ wti_dbg = debug_register("wti", 1, 1, WTI_DBF_LEN);
+ if (!wti_dbg) {
+ rc = -ENOMEM;
+ goto out_debug_register;
+ }
+ rc = debug_register_view(wti_dbg, &debug_hex_ascii_view);
+ if (rc)
+ goto out_debug_register;
+ goto out;
+out_debug_register:
+ debug_unregister(wti_dbg);
+out_subclass:
+ irq_subclass_unregister(IRQ_SUBCLASS_WARNING_TRACK);
+ unregister_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+out_thread:
+ smpboot_unregister_percpu_thread(&wti_threads);
+out:
+ return rc;
+}
+late_initcall(wti_init);
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index cae908d64550..f4ec8c1ce214 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -20,7 +20,6 @@ config KVM
def_tristate y
prompt "Kernel-based Virtual Machine (KVM) support"
select HAVE_KVM_CPU_RELAX_INTERCEPT
- select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
select KVM_COMMON
@@ -30,6 +29,7 @@ config KVM
select HAVE_KVM_NO_POLL
select KVM_VFIO
select MMU_NOTIFIER
+ select VIRT_XFER_TO_GUEST_WORK
help
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 02217fb4ae10..9a723c48b05a 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 2a32438e09ce..53233dec8cad 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -11,12 +11,30 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
#include "trace.h"
#include "trace-s390.h"
#include "gaccess.h"
+static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end)
+{
+ struct kvm_memslot_iter iter;
+ struct kvm_memory_slot *slot;
+ struct kvm_memslots *slots;
+ unsigned long start, end;
+
+ slots = kvm_vcpu_memslots(vcpu);
+
+ kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
+ slot = iter.slot;
+ start = __gfn_to_hva_memslot(slot, max(gfn_start, slot->base_gfn));
+ end = __gfn_to_hva_memslot(slot, min(gfn_end, slot->base_gfn + slot->npages));
+ gmap_helper_discard(vcpu->kvm->mm, start, end);
+ }
+}
+
static int diag_release_pages(struct kvm_vcpu *vcpu)
{
unsigned long start, end;
@@ -32,12 +50,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
+ mmap_read_lock(vcpu->kvm->mm);
/*
* We checked for start >= end above, so lets check for the
* fast path (no prefix swap page involved)
*/
if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
- gmap_discard(vcpu->arch.gmap, start, end);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(end));
} else {
/*
* This is slow path. gmap_discard will check for start
@@ -45,13 +64,14 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
* prefix and let gmap_discard make some of these calls
* NOPs.
*/
- gmap_discard(vcpu->arch.gmap, start, prefix);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(prefix));
if (start <= prefix)
- gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+ do_discard_gfn_range(vcpu, 0, 1);
if (end > prefix + PAGE_SIZE)
- gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
- gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
+ do_discard_gfn_range(vcpu, 1, 2);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(prefix) + 2, gpa_to_gfn(end));
}
+ mmap_read_unlock(vcpu->kvm->mm);
return 0;
}
@@ -77,7 +97,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_diagnose_258++;
if (vcpu->run->s.regs.gprs[rx] & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
+ rc = read_guest_real(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 5bf3d94e9dda..41ca6b0ee7a9 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -14,166 +14,11 @@
#include <asm/access-regs.h>
#include <asm/fault.h>
#include <asm/gmap.h>
+#include <asm/dat-bits.h>
#include "kvm-s390.h"
#include "gaccess.h"
-union asce {
- unsigned long val;
- struct {
- unsigned long origin : 52; /* Region- or Segment-Table Origin */
- unsigned long : 2;
- unsigned long g : 1; /* Subspace Group Control */
- unsigned long p : 1; /* Private Space Control */
- unsigned long s : 1; /* Storage-Alteration-Event Control */
- unsigned long x : 1; /* Space-Switch-Event Control */
- unsigned long r : 1; /* Real-Space Control */
- unsigned long : 1;
- unsigned long dt : 2; /* Designation-Type Control */
- unsigned long tl : 2; /* Region- or Segment-Table Length */
- };
-};
-
-enum {
- ASCE_TYPE_SEGMENT = 0,
- ASCE_TYPE_REGION3 = 1,
- ASCE_TYPE_REGION2 = 2,
- ASCE_TYPE_REGION1 = 3
-};
-
-union region1_table_entry {
- unsigned long val;
- struct {
- unsigned long rto: 52;/* Region-Table Origin */
- unsigned long : 2;
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Region-Second-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long : 1;
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Region-Second-Table Length */
- };
-};
-
-union region2_table_entry {
- unsigned long val;
- struct {
- unsigned long rto: 52;/* Region-Table Origin */
- unsigned long : 2;
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Region-Third-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long : 1;
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Region-Third-Table Length */
- };
-};
-
-struct region3_table_entry_fc0 {
- unsigned long sto: 52;/* Segment-Table Origin */
- unsigned long : 1;
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Segment-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Segment-Table Length */
-};
-
-struct region3_table_entry_fc1 {
- unsigned long rfaa : 33; /* Region-Frame Absolute Address */
- unsigned long : 14;
- unsigned long av : 1; /* ACCF-Validity Control */
- unsigned long acc: 4; /* Access-Control Bits */
- unsigned long f : 1; /* Fetch-Protection Bit */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 2;
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-union region3_table_entry {
- unsigned long val;
- struct region3_table_entry_fc0 fc0;
- struct region3_table_entry_fc1 fc1;
- struct {
- unsigned long : 53;
- unsigned long fc : 1; /* Format-Control */
- unsigned long : 4;
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
- };
-};
-
-struct segment_entry_fc0 {
- unsigned long pto: 53;/* Page-Table Origin */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 3;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-struct segment_entry_fc1 {
- unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
- unsigned long : 3;
- unsigned long av : 1; /* ACCF-Validity Control */
- unsigned long acc: 4; /* Access-Control Bits */
- unsigned long f : 1; /* Fetch-Protection Bit */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 2;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-union segment_table_entry {
- unsigned long val;
- struct segment_entry_fc0 fc0;
- struct segment_entry_fc1 fc1;
- struct {
- unsigned long : 53;
- unsigned long fc : 1; /* Format-Control */
- unsigned long : 4;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
- };
-};
-
-enum {
- TABLE_TYPE_SEGMENT = 0,
- TABLE_TYPE_REGION3 = 1,
- TABLE_TYPE_REGION2 = 2,
- TABLE_TYPE_REGION1 = 3
-};
-
-union page_table_entry {
- unsigned long val;
- struct {
- unsigned long pfra : 52; /* Page-Frame Real Address */
- unsigned long z : 1; /* Zero Bit */
- unsigned long i : 1; /* Page-Invalid Bit */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 8;
- };
-};
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
/*
* vaddress union in order to easily decode a virtual address into its
@@ -264,14 +109,9 @@ struct aste {
int ipte_lock_held(struct kvm *kvm)
{
- if (sclp.has_siif) {
- int rc;
+ if (sclp.has_siif)
+ return kvm->arch.sca->ipte_control.kh != 0;
- read_lock(&kvm->arch.sca_lock);
- rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
- read_unlock(&kvm->arch.sca_lock);
- return rc;
- }
return kvm->arch.ipte_lock_count != 0;
}
@@ -284,19 +124,16 @@ static void ipte_lock_simple(struct kvm *kvm)
if (kvm->arch.ipte_lock_count > 1)
goto out;
retry:
- read_lock(&kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(kvm);
+ ic = &kvm->arch.sca->ipte_control;
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
if (old.k) {
- read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
new = old;
new.k = 1;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&kvm->arch.sca_lock);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
out:
mutex_unlock(&kvm->arch.ipte_mutex);
}
@@ -309,14 +146,12 @@ static void ipte_unlock_simple(struct kvm *kvm)
kvm->arch.ipte_lock_count--;
if (kvm->arch.ipte_lock_count)
goto out;
- read_lock(&kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(kvm);
+ ic = &kvm->arch.sca->ipte_control;
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
new = old;
new.k = 0;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&kvm->arch.sca_lock);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
wake_up(&kvm->arch.ipte_wq);
out:
mutex_unlock(&kvm->arch.ipte_mutex);
@@ -327,36 +162,31 @@ static void ipte_lock_siif(struct kvm *kvm)
union ipte_control old, new, *ic;
retry:
- read_lock(&kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(kvm);
+ ic = &kvm->arch.sca->ipte_control;
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
if (old.kg) {
- read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
new = old;
new.k = 1;
new.kh++;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&kvm->arch.sca_lock);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
}
static void ipte_unlock_siif(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- read_lock(&kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(kvm);
+ ic = &kvm->arch.sca->ipte_control;
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
new = old;
new.kh--;
if (!new.kh)
new.k = 0;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&kvm->arch.sca_lock);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
if (!new.kh)
wake_up(&kvm->arch.ipte_wq);
}
@@ -474,7 +304,7 @@ enum prot_type {
PROT_TYPE_DAT = 3,
PROT_TYPE_IEP = 4,
/* Dummy value for passing an initialized value when code != PGM_PROTECTION */
- PROT_NONE,
+ PROT_TYPE_DUMMY,
};
static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
@@ -490,7 +320,7 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (code) {
case PGM_PROTECTION:
switch (prot) {
- case PROT_NONE:
+ case PROT_TYPE_DUMMY:
/* We should never get here, acts like termination */
WARN_ON_ONCE(1);
break;
@@ -632,7 +462,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
if (asce.r)
goto real_address;
- ptr = asce.origin * PAGE_SIZE;
+ ptr = asce.rsto * PAGE_SIZE;
switch (asce.dt) {
case ASCE_TYPE_REGION1:
if (vaddr.rfx01 > asce.tl)
@@ -960,7 +790,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
gpa = kvm_s390_real_to_abs(vcpu, ga);
if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
rc = PGM_ADDRESSING;
- prot = PROT_NONE;
+ prot = PROT_TYPE_DUMMY;
}
}
if (rc)
@@ -985,6 +815,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
const gfn_t gfn = gpa_to_gfn(gpa);
int rc;
+ if (!gfn_to_memslot(kvm, gfn))
+ return PGM_ADDRESSING;
if (mode == GACC_STORE)
rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
else
@@ -1116,7 +948,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
if (rc == PGM_PROTECTION)
prot = PROT_TYPE_KEYC;
else
- prot = PROT_NONE;
+ prot = PROT_TYPE_DUMMY;
rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
}
out_unlock:
@@ -1142,6 +974,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
gra += fragment_len;
data += fragment_len;
}
+ if (rc > 0)
+ vcpu->arch.pgm.code = rc;
return rc;
}
@@ -1379,7 +1213,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
parent = sg->parent;
vaddr.addr = saddr;
asce.val = sg->orig_asce;
- ptr = asce.origin * PAGE_SIZE;
+ ptr = asce.rsto * PAGE_SIZE;
if (asce.r) {
*fake = 1;
ptr = 0;
@@ -1546,6 +1380,44 @@ shadow_pgt:
}
/**
+ * shadow_pgt_lookup() - find a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: the address in the shadow aguest address space
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @dat_protection: if the pgtable is marked as protected by dat
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if the shadow page table was found and -EAGAIN if the page
+ * table was not found.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt,
+ int *dat_protection, int *fake)
+{
+ unsigned long pt_index;
+ unsigned long *table;
+ struct page *page;
+ int rc;
+
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+ if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
+ /* Shadow page tables are full pages (pte+pgste) */
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page));
+ *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE;
+ *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
+ *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE);
+ rc = 0;
+ } else {
+ rc = -EAGAIN;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+}
+
+/**
* kvm_s390_shadow_fault - handle fault on a shadow page table
* @vcpu: virtual cpu
* @sg: pointer to the shadow guest address space structure
@@ -1568,6 +1440,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
int dat_protection, fake;
int rc;
+ if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm))
+ return -EFAULT;
+
mmap_read_lock(sg->mm);
/*
* We don't want any guest-2 tables to change - so the parent
@@ -1576,7 +1451,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
*/
ipte_lock(vcpu->kvm);
- rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+ rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
if (rc)
rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
&fake);
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index b320d12aa049..3fde45a151f2 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -405,11 +405,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
* @len: number of bytes to copy
*
* Copy @len bytes from @data (kernel space) to @gra (guest real address).
- * It is up to the caller to ensure that the entire guest memory range is
- * valid memory before calling this function.
* Guest low address and key protection are not checked.
*
- * Returns zero on success or -EFAULT on error.
+ * Returns zero on success, -EFAULT when copying from @data failed, or
+ * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info
+ * is also stored to allow injecting into the guest (if applicable) using
+ * kvm_s390_inject_prog_cond().
*
* If an error occurs data may have been copied partially to guest memory.
*/
@@ -428,11 +429,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
* @len: number of bytes to copy
*
* Copy @len bytes from @gra (guest real address) to @data (kernel space).
- * It is up to the caller to ensure that the entire guest memory range is
- * valid memory before calling this function.
* Guest key protection is not checked.
*
- * Returns zero on success or -EFAULT on error.
+ * Returns zero on success, -EFAULT when copying to @data failed, or
+ * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info
+ * is also stored to allow injecting into the guest (if applicable) using
+ * kvm_s390_inject_prog_cond().
*
* If an error occurs data may have been copied partially to kernel space.
*/
diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c
new file mode 100644
index 000000000000..56ef153eb8fe
--- /dev/null
+++ b/arch/s390/kvm/gmap-vsie.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest memory management for KVM/s390 nested VMs.
+ *
+ * Copyright IBM Corp. 2008, 2020, 2024
+ *
+ * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+#include <linux/pagemap.h>
+#include <linux/mman.h>
+
+#include <asm/lowcore.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+
+#include "kvm-s390.h"
+
+/**
+ * gmap_find_shadow - find a specific asce in the list of shadow tables
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns the pointer to a gmap if a shadow table with the given asce is
+ * already available, ERR_PTR(-EAGAIN) if another one is just being created,
+ * otherwise NULL
+ *
+ * Context: Called with parent->shadow_lock held
+ */
+static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+ struct gmap *sg;
+
+ lockdep_assert_held(&parent->shadow_lock);
+ list_for_each_entry(sg, &parent->children, list) {
+ if (!gmap_shadow_valid(sg, asce, edat_level))
+ continue;
+ if (!sg->initialized)
+ return ERR_PTR(-EAGAIN);
+ refcount_inc(&sg->ref_count);
+ return sg;
+ }
+ return NULL;
+}
+
+/**
+ * gmap_shadow - create/find a shadow guest address space
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * The pages of the top level page table referred by the asce parameter
+ * will be set to read-only and marked in the PGSTEs of the kvm process.
+ * The shadow table will be removed automatically on any change to the
+ * PTE mapping for the source table.
+ *
+ * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
+ * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
+ * parent gmap table could not be protected.
+ */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+ struct gmap *sg, *new;
+ unsigned long limit;
+ int rc;
+
+ if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) ||
+ KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private))
+ return ERR_PTR(-EFAULT);
+ spin_lock(&parent->shadow_lock);
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ spin_unlock(&parent->shadow_lock);
+ if (sg)
+ return sg;
+ /* Create a new shadow gmap */
+ limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
+ if (asce & _ASCE_REAL_SPACE)
+ limit = -1UL;
+ new = gmap_alloc(limit);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+ new->mm = parent->mm;
+ new->parent = gmap_get(parent);
+ new->private = parent->private;
+ new->orig_asce = asce;
+ new->edat_level = edat_level;
+ new->initialized = false;
+ spin_lock(&parent->shadow_lock);
+ /* Recheck if another CPU created the same shadow */
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ if (sg) {
+ spin_unlock(&parent->shadow_lock);
+ gmap_free(new);
+ return sg;
+ }
+ if (asce & _ASCE_REAL_SPACE) {
+ /* only allow one real-space gmap shadow */
+ list_for_each_entry(sg, &parent->children, list) {
+ if (sg->orig_asce & _ASCE_REAL_SPACE) {
+ spin_lock(&sg->guest_table_lock);
+ gmap_unshadow(sg);
+ spin_unlock(&sg->guest_table_lock);
+ list_del(&sg->list);
+ gmap_put(sg);
+ break;
+ }
+ }
+ }
+ refcount_set(&new->ref_count, 2);
+ list_add(&new->list, &parent->children);
+ if (asce & _ASCE_REAL_SPACE) {
+ /* nothing to protect, return right away */
+ new->initialized = true;
+ spin_unlock(&parent->shadow_lock);
+ return new;
+ }
+ spin_unlock(&parent->shadow_lock);
+ /* protect after insertion, so it will get properly invalidated */
+ mmap_read_lock(parent->mm);
+ rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN,
+ ((asce & _ASCE_TABLE_LENGTH) + 1),
+ PROT_READ, GMAP_NOTIFY_SHADOW);
+ mmap_read_unlock(parent->mm);
+ spin_lock(&parent->shadow_lock);
+ new->initialized = true;
+ if (rc) {
+ list_del(&new->list);
+ gmap_free(new);
+ new = ERR_PTR(rc);
+ }
+ spin_unlock(&parent->shadow_lock);
+ return new;
+}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index b16352083ff9..420ae62977e2 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -94,7 +94,7 @@ static int handle_validity(struct kvm_vcpu *vcpu)
vcpu->stat.exit_validity++;
trace_kvm_s390_intercept_validity(vcpu, viwhy);
- KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy,
+ KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%p)", viwhy,
current->pid, vcpu->kvm);
/* do not warn on invalid runtime instrumentation mode */
@@ -367,7 +367,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg2, &srcaddr, GACC_FETCH, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
+ rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0);
if (rc != 0)
return rc;
@@ -376,7 +376,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg1, &dstaddr, GACC_STORE, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
+ rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE);
if (rc != 0)
return rc;
@@ -471,6 +471,9 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->ipa == 0xb256)
return handle_sthyi(vcpu);
+ if (vcpu->kvm->arch.user_operexec)
+ return -EOPNOTSUPP;
+
if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
return -EOPNOTSUPP;
rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &newpsw, sizeof(psw_t));
@@ -544,12 +547,12 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
guest_uvcb->header.cmd);
return 0;
}
- rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+ rc = kvm_s390_pv_make_secure(vcpu->kvm, uvcb.gaddr, &uvcb);
/*
* If the unpin did not succeed, the guest will exit again for the UVC
* and we will retry the unpin.
*/
- if (rc == -EINVAL)
+ if (rc == -EINVAL || rc == -ENXIO)
return 0;
/*
* If we got -EAGAIN here, we simply return it. It will eventually
@@ -652,10 +655,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
break;
case ICPT_PV_PREF:
rc = 0;
- gmap_convert_to_secure(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu));
- gmap_convert_to_secure(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+ kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu));
+ kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
break;
default:
return -EOPNOTSUPP;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4f0e7f61edf7..249cdc822ec5 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -7,12 +7,13 @@
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
-#define KMSG_COMPONENT "kvm-s390"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "kvm-s390: " fmt
+#include <linux/cpufeature.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
#include <linux/hrtimer.h>
+#include <linux/export.h>
#include <linux/mmu_context.h>
#include <linux/nospec.h>
#include <linux/signal.h>
@@ -43,70 +44,34 @@ static struct kvm_s390_gib *gib;
/* handle external calls via sigp interpretation facility */
static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
{
- int c, scn;
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ union esca_sigp_ctrl sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl;
if (!kvm_s390_test_cpuflags(vcpu, CPUSTAT_ECALL_PEND))
return 0;
BUG_ON(!kvm_s390_use_sca_entries());
- read_lock(&vcpu->kvm->arch.sca_lock);
- if (vcpu->kvm->arch.use_esca) {
- struct esca_block *sca = vcpu->kvm->arch.sca;
- union esca_sigp_ctrl sigp_ctrl =
- sca->cpu[vcpu->vcpu_id].sigp_ctrl;
-
- c = sigp_ctrl.c;
- scn = sigp_ctrl.scn;
- } else {
- struct bsca_block *sca = vcpu->kvm->arch.sca;
- union bsca_sigp_ctrl sigp_ctrl =
- sca->cpu[vcpu->vcpu_id].sigp_ctrl;
-
- c = sigp_ctrl.c;
- scn = sigp_ctrl.scn;
- }
- read_unlock(&vcpu->kvm->arch.sca_lock);
if (src_id)
- *src_id = scn;
+ *src_id = sigp_ctrl.scn;
- return c;
+ return sigp_ctrl.c;
}
static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
{
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ union esca_sigp_ctrl *sigp_ctrl = &sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+ union esca_sigp_ctrl old_val, new_val = {.scn = src_id, .c = 1};
int expect, rc;
BUG_ON(!kvm_s390_use_sca_entries());
- read_lock(&vcpu->kvm->arch.sca_lock);
- if (vcpu->kvm->arch.use_esca) {
- struct esca_block *sca = vcpu->kvm->arch.sca;
- union esca_sigp_ctrl *sigp_ctrl =
- &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union esca_sigp_ctrl new_val = {0}, old_val;
-
- old_val = READ_ONCE(*sigp_ctrl);
- new_val.scn = src_id;
- new_val.c = 1;
- old_val.c = 0;
-
- expect = old_val.value;
- rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
- } else {
- struct bsca_block *sca = vcpu->kvm->arch.sca;
- union bsca_sigp_ctrl *sigp_ctrl =
- &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union bsca_sigp_ctrl new_val = {0}, old_val;
- old_val = READ_ONCE(*sigp_ctrl);
- new_val.scn = src_id;
- new_val.c = 1;
- old_val.c = 0;
+ old_val = READ_ONCE(*sigp_ctrl);
+ old_val.c = 0;
- expect = old_val.value;
- rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
- }
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ expect = old_val.value;
+ rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
if (rc != expect) {
/* another external call is pending */
@@ -118,33 +83,14 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
{
- int rc, expect;
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ union esca_sigp_ctrl *sigp_ctrl = &sca->cpu[vcpu->vcpu_id].sigp_ctrl;
if (!kvm_s390_use_sca_entries())
return;
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
- read_lock(&vcpu->kvm->arch.sca_lock);
- if (vcpu->kvm->arch.use_esca) {
- struct esca_block *sca = vcpu->kvm->arch.sca;
- union esca_sigp_ctrl *sigp_ctrl =
- &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union esca_sigp_ctrl old;
-
- old = READ_ONCE(*sigp_ctrl);
- expect = old.value;
- rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
- } else {
- struct bsca_block *sca = vcpu->kvm->arch.sca;
- union bsca_sigp_ctrl *sigp_ctrl =
- &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union bsca_sigp_ctrl old;
- old = READ_ONCE(*sigp_ctrl);
- expect = old.value;
- rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
- }
- read_unlock(&vcpu->kvm->arch.sca_lock);
- WARN_ON(rc != expect); /* cannot clear? */
+ WRITE_ONCE(sigp_ctrl->value, 0);
}
int psw_extint_disabled(struct kvm_vcpu *vcpu)
@@ -247,12 +193,12 @@ static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
{
u64 word, _word;
+ word = READ_ONCE(gisa->u64.word[0]);
do {
- word = READ_ONCE(gisa->u64.word[0]);
if ((u64)gisa != word >> 32)
return -EBUSY;
_word = (word & ~0xffUL) | iam;
- } while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+ } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
return 0;
}
@@ -270,10 +216,10 @@ static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa)
{
u64 word, _word;
+ word = READ_ONCE(gisa->u64.word[0]);
do {
- word = READ_ONCE(gisa->u64.word[0]);
_word = word & ~(0xffUL << 24);
- } while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+ } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
}
/**
@@ -291,14 +237,14 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
u8 pending_mask, alert_mask;
u64 word, _word;
+ word = READ_ONCE(gi->origin->u64.word[0]);
do {
- word = READ_ONCE(gi->origin->u64.word[0]);
alert_mask = READ_ONCE(gi->alert.mask);
pending_mask = (u8)(word >> 24) & alert_mask;
if (pending_mask)
return pending_mask;
_word = (word & ~0xffUL) | alert_mask;
- } while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word);
+ } while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word));
return 0;
}
@@ -586,7 +532,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
/* take care of lazy register loading */
kvm_s390_fpu_store(vcpu->run);
save_access_regs(vcpu->run->s.regs.acrs);
- if (MACHINE_HAS_GS && vcpu->arch.gs_enabled)
+ if (cpu_has_gs() && vcpu->arch.gs_enabled)
save_gs_cb(current->thread.gs_cb);
/* Extended save area */
@@ -957,8 +903,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC);
rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
(u64 *) __LC_PGM_LAST_BREAK);
- rc |= put_guest_lc(vcpu, pgm_info.code,
- (u16 *)__LC_PGM_INT_CODE);
+ rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_CODE);
rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
@@ -1232,7 +1177,7 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- if (!sclp.has_sigpif)
+ if (!kvm_s390_use_sca_entries())
return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
return sca_ext_call_pending(vcpu, NULL);
@@ -1331,6 +1276,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
no_timer:
kvm_vcpu_srcu_read_unlock(vcpu);
+ vcpu->kvm->arch.float_int.last_sleep_cpu = vcpu->vcpu_idx;
kvm_vcpu_halt(vcpu);
vcpu->valid_wakeup = false;
__unset_cpu_idle(vcpu);
@@ -1556,7 +1502,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
return -EINVAL;
- if (sclp.has_sigpif && !kvm_s390_pv_cpu_get_handle(vcpu))
+ if (kvm_s390_use_sca_entries() && !kvm_s390_pv_cpu_get_handle(vcpu))
return sca_inject_ext_call(vcpu, src_id);
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
@@ -1957,18 +1903,15 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
if (!online_vcpus)
return;
- /* find idle VCPUs first, then round robin */
- sigcpu = find_first_bit(kvm->arch.idle_mask, online_vcpus);
- if (sigcpu == online_vcpus) {
- do {
- sigcpu = kvm->arch.float_int.next_rr_cpu++;
- kvm->arch.float_int.next_rr_cpu %= online_vcpus;
- /* avoid endless loops if all vcpus are stopped */
- if (nr_tries++ >= online_vcpus)
- return;
- } while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu)));
+ for (sigcpu = kvm->arch.float_int.last_sleep_cpu; ; sigcpu++) {
+ sigcpu %= online_vcpus;
+ dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+ if (!is_vcpu_stopped(dst_vcpu))
+ break;
+ /* avoid endless loops if all vcpus are stopped */
+ if (nr_tries++ >= online_vcpus)
+ return;
}
- dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
/* make the VCPU drop out of the SIE, or wake it up if sleeping */
switch (type) {
@@ -2687,9 +2630,13 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
kvm_s390_clear_float_irqs(dev->kvm);
break;
case KVM_DEV_FLIC_APF_ENABLE:
+ if (kvm_is_ucontrol(dev->kvm))
+ return -EINVAL;
dev->kvm->arch.gmap->pfault_enabled = 1;
break;
case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+ if (kvm_is_ucontrol(dev->kvm))
+ return -EINVAL;
dev->kvm->arch.gmap->pfault_enabled = 0;
/*
* Make sure no async faults are in transition when
@@ -2782,12 +2729,19 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
{
+ struct mm_struct *mm = kvm->mm;
struct page *page = NULL;
+ int locked = 1;
+
+ if (mmget_not_zero(mm)) {
+ mmap_read_lock(mm);
+ get_user_pages_remote(mm, uaddr, 1, FOLL_WRITE,
+ &page, &locked);
+ if (locked)
+ mmap_read_unlock(mm);
+ mmput(mm);
+ }
- mmap_read_lock(kvm->mm);
- get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
- &page, NULL);
- mmap_read_unlock(kvm->mm);
return page;
}
@@ -2898,20 +2852,25 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- u64 uaddr;
+ u64 uaddr_s, uaddr_i;
+ int idx;
switch (ue->type) {
/* we store the userspace addresses instead of the guest addresses */
case KVM_IRQ_ROUTING_S390_ADAPTER:
+ if (kvm_is_ucontrol(kvm))
+ return -EINVAL;
e->set = set_adapter_int;
- uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
- if (uaddr == -EFAULT)
- return -EFAULT;
- e->adapter.summary_addr = uaddr;
- uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
- if (uaddr == -EFAULT)
+
+ idx = srcu_read_lock(&kvm->srcu);
+ uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr);
+ uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr);
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i))
return -EFAULT;
- e->adapter.ind_addr = uaddr;
+ e->adapter.summary_addr = uaddr_s;
+ e->adapter.ind_addr = uaddr_i;
e->adapter.summary_offset = ue->u.adapter.summary_offset;
e->adapter.ind_offset = ue->u.adapter.ind_offset;
e->adapter.adapter_id = ue->u.adapter.adapter_id;
@@ -3161,7 +3120,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm)
if (!gi->origin)
return;
gisa_clear_ipm(gi->origin);
- VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin);
+ VM_EVENT(kvm, 3, "gisa 0x%p cleared", gi->origin);
}
void kvm_s390_gisa_init(struct kvm *kvm)
@@ -3174,11 +3133,10 @@ void kvm_s390_gisa_init(struct kvm *kvm)
gi->alert.mask = 0;
spin_lock_init(&gi->alert.ref_lock);
gi->expires = 50 * 1000; /* 50 usec */
- hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- gi->timer.function = gisa_vcpu_kicker;
+ hrtimer_setup(&gi->timer, gisa_vcpu_kicker, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
memset(gi->origin, 0, sizeof(struct kvm_s390_gisa));
gi->origin->next_alert = (u32)virt_to_phys(gi->origin);
- VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
+ VM_EVENT(kvm, 3, "gisa 0x%p initialized", gi->origin);
}
void kvm_s390_gisa_enable(struct kvm *kvm)
@@ -3219,7 +3177,7 @@ void kvm_s390_gisa_destroy(struct kvm *kvm)
process_gib_alert_list();
hrtimer_cancel(&gi->timer);
gi->origin = NULL;
- VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);
+ VM_EVENT(kvm, 3, "gisa 0x%p destroyed", gisa);
}
void kvm_s390_gisa_disable(struct kvm *kvm)
@@ -3468,7 +3426,7 @@ int __init kvm_s390_gib_init(u8 nisc)
}
}
- KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
+ KVM_EVENT(3, "gib 0x%p (nisc=%d) initialized", gib, gib->nisc);
goto out;
out_unreg_gal:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 54b5b2565df8..56a50524b3ee 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -10,10 +10,11 @@
* Jason J. Herne <jjherne@us.ibm.com>
*/
-#define KMSG_COMPONENT "kvm-s390"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "kvm-s390: " fmt
#include <linux/compiler.h>
+#include <linux/entry-virt.h>
+#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/hrtimer.h>
@@ -23,6 +24,7 @@
#include <linux/mman.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/cpufeature.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/timer.h>
@@ -36,13 +38,16 @@
#include <asm/access-regs.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
+#include <asm/machine.h>
#include <asm/stp.h>
#include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
#include <asm/nmi.h>
#include <asm/isc.h>
#include <asm/sclp.h>
#include <asm/cpacf.h>
#include <asm/timex.h>
+#include <asm/asm.h>
#include <asm/fpu.h>
#include <asm/ap.h>
#include <asm/uv.h>
@@ -180,7 +185,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
- STATS_DESC_COUNTER(VCPU, pfault_sync)
+ STATS_DESC_COUNTER(VCPU, pfault_sync),
+ STATS_DESC_COUNTER(VCPU, signal_exits)
};
const struct kvm_stats_header kvm_vcpu_stats_header = {
@@ -267,7 +273,6 @@ debug_info_t *kvm_s390_dbf_uv;
/* forward declarations */
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
-static int sca_switch_to_extended(struct kvm *kvm);
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
{
@@ -340,28 +345,46 @@ static inline int plo_test_bit(unsigned char nr)
" lgr 0,%[function]\n"
/* Parameter registers are ignored for "test bit" */
" plo 0,0,0,0(0)\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
: [function] "d" (function)
+ : CC_CLOBBER_LIST("0"));
+ return CC_TRANSFORM(cc) == 0;
+}
+
+static __always_inline void pfcr_query(u8 (*query)[16])
+{
+ asm volatile(
+ " lghi 0,0\n"
+ " .insn rsy,0xeb0000000016,0,0,%[query]"
+ : [query] "=QS" (*query)
+ :
: "cc", "0");
- return cc == 0;
}
-static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
+static __always_inline void __sortl_query(u8 (*query)[32])
{
asm volatile(
" lghi 0,0\n"
- " lgr 1,%[query]\n"
+ " la 1,%[query]\n"
/* Parameter registers are ignored */
- " .insn rrf,%[opc] << 16,2,4,6,0\n"
+ " .insn rre,0xb9380000,2,4"
+ : [query] "=R" (*query)
:
- : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
- : "cc", "memory", "0", "1");
+ : "cc", "0", "1");
}
-#define INSN_SORTL 0xb938
-#define INSN_DFLTCC 0xb939
+static __always_inline void __dfltcc_query(u8 (*query)[32])
+{
+ asm volatile(
+ " lghi 0,0\n"
+ " la 1,%[query]\n"
+ /* Parameter registers are ignored */
+ " .insn rrf,0xb9390000,2,4,6,0"
+ : [query] "=R" (*query)
+ :
+ : "cc", "0", "1");
+}
static void __init kvm_s390_cpu_feat_init(void)
{
@@ -415,18 +438,21 @@ static void __init kvm_s390_cpu_feat_init(void)
kvm_s390_available_subfunc.kdsa);
if (test_facility(150)) /* SORTL */
- __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
+ __sortl_query(&kvm_s390_available_subfunc.sortl);
if (test_facility(151)) /* DFLTCC */
- __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
+ __dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
- if (MACHINE_HAS_ESOP)
+ if (test_facility(201)) /* PFCR */
+ pfcr_query(&kvm_s390_available_subfunc.pfcr);
+
+ if (machine_has_esop())
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
/*
* We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
* 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
*/
- if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
+ if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao ||
!test_facility(3) || !nested)
return;
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
@@ -581,6 +607,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_S390_DIAG318:
case KVM_CAP_IRQFD_RESAMPLE:
+ case KVM_CAP_S390_USER_OPEREXEC:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -606,16 +633,18 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
- r = KVM_S390_BSCA_CPU_SLOTS;
+ /*
+ * Return the same value for KVM_CAP_MAX_VCPUS and
+ * KVM_CAP_MAX_VCPU_ID to conform with the KVM API.
+ */
+ r = KVM_S390_ESCA_CPU_SLOTS;
if (!kvm_s390_use_sca_entries())
r = KVM_MAX_VCPUS;
- else if (sclp.has_esca && sclp.has_64bscao)
- r = KVM_S390_ESCA_CPU_SLOTS;
if (ext == KVM_CAP_NR_VCPUS)
r = min_t(unsigned int, num_online_cpus(), r);
break;
case KVM_CAP_S390_COW:
- r = MACHINE_HAS_ESOP;
+ r = machine_has_esop();
break;
case KVM_CAP_S390_VECTOR_REGISTERS:
r = test_facility(129);
@@ -790,6 +819,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
set_kvm_facility(kvm->arch.model.fac_mask, 192);
set_kvm_facility(kvm->arch.model.fac_list, 192);
}
+ if (test_facility(198)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 198);
+ set_kvm_facility(kvm->arch.model.fac_list, 198);
+ }
+ if (test_facility(199)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 199);
+ set_kvm_facility(kvm->arch.model.fac_list, 199);
+ }
r = 0;
} else
r = -EINVAL;
@@ -886,6 +923,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
r ? "(not available)" : "(success)");
break;
+ case KVM_CAP_S390_USER_OPEREXEC:
+ VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_OPEREXEC");
+ kvm->arch.user_operexec = 1;
+ icpt_operexc_on_all_vcpus(kvm);
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
@@ -989,7 +1032,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
- VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
+ VM_EVENT(kvm, 3, "New guest asce: 0x%p",
(void *) kvm->arch.gmap->asce);
break;
}
@@ -1534,6 +1577,9 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
+ VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
return 0;
}
@@ -1748,6 +1794,9 @@ static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
+ VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
return 0;
}
@@ -1816,6 +1865,9 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
+ VM_EVENT(kvm, 3, "GET: host PFCR subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
return 0;
}
@@ -1888,22 +1940,18 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
* Updates the Multiprocessor Topology-Change-Report bit to signal
* the guest with a topology change.
* This is only relevant if the topology facility is present.
- *
- * The SCA version, bsca or esca, doesn't matter as offset is the same.
*/
static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
{
union sca_utility new, old;
- struct bsca_block *sca;
+ struct esca_block *sca;
- read_lock(&kvm->arch.sca_lock);
sca = kvm->arch.sca;
+ old = READ_ONCE(sca->utility);
do {
- old = READ_ONCE(sca->utility);
new = old;
new.mtcr = val;
- } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
- read_unlock(&kvm->arch.sca_lock);
+ } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
}
static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
@@ -1924,9 +1972,7 @@ static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
if (!test_kvm_facility(kvm, 11))
return -ENXIO;
- read_lock(&kvm->arch.sca_lock);
- topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
- read_unlock(&kvm->arch.sca_lock);
+ topo = kvm->arch.sca->utility.mtcr;
return put_user(topo, (u8 __user *)attr->addr);
}
@@ -2624,15 +2670,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
if (kvm_s390_pv_is_protected(kvm))
break;
- /*
- * FMT 4 SIE needs esca. As we never switch back to bsca from
- * esca, we need no cleanup in the error cases below
- */
- r = sca_switch_to_extended(kvm);
- if (r)
- break;
-
- r = s390_disable_cow_sharing();
+ mmap_write_lock(kvm->mm);
+ r = gmap_helper_disable_cow_sharing();
+ mmap_write_unlock(kvm->mm);
if (r)
break;
@@ -2997,14 +3037,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
break;
}
case KVM_CREATE_IRQCHIP: {
- struct kvm_irq_routing_entry routing;
-
r = -EINVAL;
- if (kvm->arch.use_irqchip) {
- /* Set up dummy routing. */
- memset(&routing, 0, sizeof(routing));
- r = kvm_set_irq_routing(kvm, &routing, 0, 0);
- }
+ if (kvm->arch.use_irqchip)
+ r = 0;
break;
}
case KVM_SET_DEVICE_ATTR: {
@@ -3277,10 +3312,7 @@ static void kvm_s390_crypto_init(struct kvm *kvm)
static void sca_dispose(struct kvm *kvm)
{
- if (kvm->arch.use_esca)
- free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
- else
- free_page((unsigned long)(kvm->arch.sca));
+ free_pages_exact(kvm->arch.sca, sizeof(*kvm->arch.sca));
kvm->arch.sca = NULL;
}
@@ -3294,10 +3326,9 @@ void kvm_arch_free_vm(struct kvm *kvm)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
- gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
- int i, rc;
+ gfp_t alloc_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
char debug_name[16];
- static unsigned long sca_offset;
+ int i, rc;
rc = -EINVAL;
#ifdef CONFIG_KVM_S390_UCONTROL
@@ -3318,20 +3349,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!sclp.has_64bscao)
alloc_flags |= GFP_DMA;
- rwlock_init(&kvm->arch.sca_lock);
- /* start with basic SCA */
- kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
- if (!kvm->arch.sca)
- goto out_err;
mutex_lock(&kvm_lock);
- sca_offset += 16;
- if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
- sca_offset = 0;
- kvm->arch.sca = (struct bsca_block *)
- ((char *) kvm->arch.sca + sca_offset);
+
+ kvm->arch.sca = alloc_pages_exact(sizeof(*kvm->arch.sca), alloc_flags);
mutex_unlock(&kvm_lock);
+ if (!kvm->arch.sca)
+ goto out_err;
- sprintf(debug_name, "kvm-%u", current->pid);
+ snprintf(debug_name, sizeof(debug_name), "kvm-%u", current->pid);
kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
if (!kvm->arch.dbf)
@@ -3361,7 +3386,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
/* we emulate STHYI in kvm */
set_kvm_facility(kvm->arch.model.fac_mask, 74);
set_kvm_facility(kvm->arch.model.fac_list, 74);
- if (MACHINE_HAS_TLB_GUEST) {
+ if (machine_has_tlb_guest()) {
set_kvm_facility(kvm->arch.model.fac_mask, 147);
set_kvm_facility(kvm->arch.model.fac_list, 147);
}
@@ -3394,8 +3419,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
VM_EVENT(kvm, 3, "vm created with type %lu", type);
if (type & KVM_VM_S390_UCONTROL) {
+ struct kvm_userspace_memory_region2 fake_memslot = {
+ .slot = KVM_S390_UCONTROL_MEMSLOT,
+ .guest_phys_addr = 0,
+ .userspace_addr = 0,
+ .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
+ .flags = 0,
+ };
+
kvm->arch.gmap = NULL;
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
+ /* one flat fake memslot covering the whole address-space */
+ mutex_lock(&kvm->slots_lock);
+ KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
+ mutex_unlock(&kvm->slots_lock);
} else {
if (sclp.hamax == U64_MAX)
kvm->arch.mem_limit = TASK_SIZE_MAX;
@@ -3417,7 +3454,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_gisa_init(kvm);
INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
kvm->arch.pv.set_aside = NULL;
- KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
+ KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
return 0;
out_err:
@@ -3480,7 +3517,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
- KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
+ KVM_EVENT(3, "vm 0x%p destroyed", kvm);
}
/* Section: vcpu related */
@@ -3496,133 +3533,38 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
static void sca_del_vcpu(struct kvm_vcpu *vcpu)
{
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+
if (!kvm_s390_use_sca_entries())
return;
- read_lock(&vcpu->kvm->arch.sca_lock);
- if (vcpu->kvm->arch.use_esca) {
- struct esca_block *sca = vcpu->kvm->arch.sca;
-
- clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
- sca->cpu[vcpu->vcpu_id].sda = 0;
- } else {
- struct bsca_block *sca = vcpu->kvm->arch.sca;
- clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
- sca->cpu[vcpu->vcpu_id].sda = 0;
- }
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ clear_bit_inv(vcpu->vcpu_id, (unsigned long *)sca->mcn);
+ sca->cpu[vcpu->vcpu_id].sda = 0;
}
static void sca_add_vcpu(struct kvm_vcpu *vcpu)
{
- if (!kvm_s390_use_sca_entries()) {
- phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
-
- /* we still need the basic sca for the ipte control */
- vcpu->arch.sie_block->scaoh = sca_phys >> 32;
- vcpu->arch.sie_block->scaol = sca_phys;
- return;
- }
- read_lock(&vcpu->kvm->arch.sca_lock);
- if (vcpu->kvm->arch.use_esca) {
- struct esca_block *sca = vcpu->kvm->arch.sca;
- phys_addr_t sca_phys = virt_to_phys(sca);
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ phys_addr_t sca_phys = virt_to_phys(sca);
- sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
- vcpu->arch.sie_block->scaoh = sca_phys >> 32;
- vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
- vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
- set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
- } else {
- struct bsca_block *sca = vcpu->kvm->arch.sca;
- phys_addr_t sca_phys = virt_to_phys(sca);
-
- sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
- vcpu->arch.sie_block->scaoh = sca_phys >> 32;
- vcpu->arch.sie_block->scaol = sca_phys;
- set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
- }
- read_unlock(&vcpu->kvm->arch.sca_lock);
-}
+ /* we still need the sca header for the ipte control */
+ vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+ vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
+ vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
-/* Basic SCA to Extended SCA data copy routines */
-static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
-{
- d->sda = s->sda;
- d->sigp_ctrl.c = s->sigp_ctrl.c;
- d->sigp_ctrl.scn = s->sigp_ctrl.scn;
-}
-
-static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
-{
- int i;
-
- d->ipte_control = s->ipte_control;
- d->mcn[0] = s->mcn;
- for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
- sca_copy_entry(&d->cpu[i], &s->cpu[i]);
-}
-
-static int sca_switch_to_extended(struct kvm *kvm)
-{
- struct bsca_block *old_sca = kvm->arch.sca;
- struct esca_block *new_sca;
- struct kvm_vcpu *vcpu;
- unsigned long vcpu_idx;
- u32 scaol, scaoh;
- phys_addr_t new_sca_phys;
-
- if (kvm->arch.use_esca)
- return 0;
-
- new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- if (!new_sca)
- return -ENOMEM;
-
- new_sca_phys = virt_to_phys(new_sca);
- scaoh = new_sca_phys >> 32;
- scaol = new_sca_phys & ESCA_SCAOL_MASK;
-
- kvm_s390_vcpu_block_all(kvm);
- write_lock(&kvm->arch.sca_lock);
-
- sca_copy_b_to_e(new_sca, old_sca);
-
- kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
- vcpu->arch.sie_block->scaoh = scaoh;
- vcpu->arch.sie_block->scaol = scaol;
- vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
- }
- kvm->arch.sca = new_sca;
- kvm->arch.use_esca = 1;
-
- write_unlock(&kvm->arch.sca_lock);
- kvm_s390_vcpu_unblock_all(kvm);
-
- free_page((unsigned long)old_sca);
+ if (!kvm_s390_use_sca_entries())
+ return;
- VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
- old_sca, kvm->arch.sca);
- return 0;
+ set_bit_inv(vcpu->vcpu_id, (unsigned long *)sca->mcn);
+ sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
}
static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
{
- int rc;
-
- if (!kvm_s390_use_sca_entries()) {
- if (id < KVM_MAX_VCPUS)
- return true;
- return false;
- }
- if (id < KVM_S390_BSCA_CPU_SLOTS)
- return true;
- if (!sclp.has_esca || !sclp.has_64bscao)
- return false;
-
- rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
+ if (!kvm_s390_use_sca_entries())
+ return id < KVM_MAX_VCPUS;
- return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
+ return id < KVM_S390_ESCA_CPU_SLOTS;
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
@@ -3715,7 +3657,6 @@ __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- gmap_enable(vcpu->arch.enabled_gmap);
kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__start_cpu_timer_accounting(vcpu);
@@ -3728,8 +3669,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__stop_cpu_timer_accounting(vcpu);
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
- vcpu->arch.enabled_gmap = gmap_get_enabled();
- gmap_disable(vcpu->arch.enabled_gmap);
}
@@ -3747,8 +3686,6 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
}
if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
- /* make vcpu_load load the right gmap on the first trigger */
- vcpu->arch.enabled_gmap = vcpu->arch.gmap;
}
static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
@@ -3770,6 +3707,13 @@ static bool kvm_has_pckmo_ecc(struct kvm *kvm)
}
+static bool kvm_has_pckmo_hmac(struct kvm *kvm)
+{
+ /* At least one HMAC subfunction must be present */
+ return kvm_has_pckmo_subfunc(kvm, 118) ||
+ kvm_has_pckmo_subfunc(kvm, 122);
+}
+
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
{
/*
@@ -3782,7 +3726,7 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
vcpu->arch.sie_block->eca &= ~ECA_APIE;
- vcpu->arch.sie_block->ecd &= ~ECD_ECC;
+ vcpu->arch.sie_block->ecd &= ~(ECD_ECC | ECD_HMAC);
if (vcpu->kvm->arch.crypto.apie)
vcpu->arch.sie_block->eca |= ECA_APIE;
@@ -3790,9 +3734,11 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
/* Set up protected key support */
if (vcpu->kvm->arch.crypto.aes_kw) {
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
- /* ecc is also wrapped with AES key */
+ /* ecc/hmac is also wrapped with AES key */
if (kvm_has_pckmo_ecc(vcpu->kvm))
vcpu->arch.sie_block->ecd |= ECD_ECC;
+ if (kvm_has_pckmo_hmac(vcpu->kvm))
+ vcpu->arch.sie_block->ecd |= ECD_HMAC;
}
if (vcpu->kvm->arch.crypto.dea_kw)
@@ -3841,8 +3787,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_setup_model(vcpu);
- /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
- if (MACHINE_HAS_ESOP)
+ /* pgste_set_pte has special handling for !machine_has_esop() */
+ if (machine_has_esop())
vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
if (test_kvm_facility(vcpu->kvm, 9))
vcpu->arch.sie_block->ecb |= ECB_SRSI;
@@ -3864,7 +3810,7 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->eca |= ECA_IB;
if (sclp.has_siif)
vcpu->arch.sie_block->eca |= ECA_SII;
- if (sclp.has_sigpif)
+ if (kvm_s390_use_sca_entries())
vcpu->arch.sie_block->eca |= ECA_SIGPI;
if (test_kvm_facility(vcpu->kvm, 129)) {
vcpu->arch.sie_block->eca |= ECA_VX;
@@ -3892,8 +3838,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
if (rc)
return rc;
}
- hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
+ hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
vcpu->arch.sie_block->hpid = HPID_KVM;
@@ -3974,7 +3920,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto out_free_sie_block;
}
- VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
+ VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p",
vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
@@ -4080,7 +4026,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
{
/* do not poll with more than halt_poll_max_steal percent of steal time */
- if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
+ if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >=
READ_ONCE(halt_poll_max_steal)) {
vcpu->stat.halt_no_poll_steal++;
return true;
@@ -4311,8 +4257,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- int ret = 0;
-
vcpu_load(vcpu);
vcpu->run->s.regs.fpc = fpu->fpc;
@@ -4323,7 +4267,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
vcpu_put(vcpu);
- return ret;
+ return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
@@ -4460,6 +4404,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
}
+static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm *kvm = gmap->private;
+ gfn_t gfn = gpa_to_gfn(gaddr);
+ bool unlocked;
+ hva_t vmaddr;
+ gpa_t tmp;
+ int rc;
+
+ if (kvm_is_ucontrol(kvm)) {
+ tmp = __gmap_translate(gmap, gaddr);
+ gfn = gpa_to_gfn(tmp);
+ }
+
+ vmaddr = gfn_to_hva(kvm, gfn);
+ rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!rc)
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+ return rc;
+}
+
+/**
+ * __kvm_s390_mprotect_many() - Apply specified protection to guest pages
+ * @gmap: the gmap of the guest
+ * @gpa: the starting guest address
+ * @npages: how many pages to protect
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: pgste notification bits to set
+ *
+ * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
+ *
+ * Context: kvm->srcu and gmap->mm need to be held in read mode
+ */
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits)
+{
+ unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
+ gpa_t end = gpa + npages * PAGE_SIZE;
+ int rc;
+
+ for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ if (rc == -EAGAIN) {
+ __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ }
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
+{
+ gpa_t gaddr = kvm_s390_get_prefix(vcpu);
+ int idx, rc;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ mmap_read_lock(vcpu->arch.gmap->mm);
+
+ rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
+
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return rc;
+}
+
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
@@ -4475,9 +4488,8 @@ retry:
*/
if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
int rc;
- rc = gmap_mprotect_notify(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu),
- PAGE_SIZE * 2, PROT_WRITE);
+
+ rc = kvm_s390_mprotect_notify_prefix(vcpu);
if (rc) {
kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
return rc;
@@ -4575,22 +4587,6 @@ int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clo
return 1;
}
-/**
- * kvm_arch_fault_in_page - fault-in guest page if necessary
- * @vcpu: The corresponding virtual cpu
- * @gpa: Guest physical address
- * @writable: Whether the page should be writable or not
- *
- * Make sure that a guest page has been faulted-in on the host.
- *
- * Return: Zero on success, negative error code otherwise.
- */
-long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
-{
- return gmap_fault(vcpu->arch.gmap, gpa,
- writable ? FAULT_FLAG_WRITE : 0);
-}
-
static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
unsigned long token)
{
@@ -4658,12 +4654,11 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
if (!vcpu->arch.gmap->pfault_enabled)
return false;
- hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
- hva += current->thread.gmap_addr & ~PAGE_MASK;
+ hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr);
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
return false;
- return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+ return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch);
}
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
@@ -4680,9 +4675,6 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
- if (need_resched())
- schedule();
-
if (!kvm_is_ucontrol(vcpu->kvm)) {
rc = kvm_s390_deliver_pending_interrupts(vcpu);
if (rc || guestdbg_exit_pending(vcpu))
@@ -4701,6 +4693,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
vcpu->arch.sie_block->icptcode = 0;
+ current->thread.gmap_int_code = 0;
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
trace_kvm_s390_sie_enter(vcpu, cpuflags);
@@ -4708,7 +4701,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
return 0;
}
-static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
{
struct kvm_s390_pgm_info pgm_info = {
.code = PGM_ADDRESSING,
@@ -4744,10 +4737,182 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
+static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
+{
+ KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
+ "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+}
+
+/*
+ * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
+ * @vcpu: the vCPU whose gmap is to be fixed up
+ * @gfn: the guest frame number used for memslots (including fake memslots)
+ * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
+ * @foll: FOLL_* flags
+ *
+ * Return: 0 on success, < 0 in case of error.
+ * Context: The mm lock must not be held before calling. May sleep.
+ */
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int foll)
+{
+ struct kvm_memory_slot *slot;
+ unsigned int fault_flags;
+ bool writable, unlocked;
+ unsigned long vmaddr;
+ struct page *page;
+ kvm_pfn_t pfn;
+ int rc;
+
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+ return vcpu_post_run_addressing_exception(vcpu);
+
+ fault_flags = foll & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
+ if (vcpu->arch.gmap->pfault_enabled)
+ foll |= FOLL_NOWAIT;
+ vmaddr = __gfn_to_hva_memslot(slot, gfn);
+
+try_again:
+ pfn = __kvm_faultin_pfn(slot, gfn, foll, &writable, &page);
+
+ /* Access outside memory, inject addressing exception */
+ if (is_noslot_pfn(pfn))
+ return vcpu_post_run_addressing_exception(vcpu);
+ /* Signal pending: try again */
+ if (pfn == KVM_PFN_ERR_SIGPENDING)
+ return -EAGAIN;
+
+ /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
+ if (pfn == KVM_PFN_ERR_NEEDS_IO) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ if (kvm_arch_setup_async_pf(vcpu))
+ return 0;
+ vcpu->stat.pfault_sync++;
+ /* Could not setup async pfault, try again synchronously */
+ foll &= ~FOLL_NOWAIT;
+ goto try_again;
+ }
+ /* Any other error */
+ if (is_error_pfn(pfn))
+ return -EFAULT;
+
+ /* Success */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
+ rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
+ if (!rc)
+ rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
+ scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
+ kvm_release_faultin_page(vcpu->kvm, page, false, writable);
+ }
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ return rc;
+}
+
+static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int foll)
+{
+ unsigned long gaddr_tmp;
+ gfn_t gfn;
+
+ gfn = gpa_to_gfn(gaddr);
+ if (kvm_is_ucontrol(vcpu->kvm)) {
+ /*
+ * This translates the per-vCPU guest address into a
+ * fake guest address, which can then be used with the
+ * fake memslots that are identity mapping userspace.
+ * This allows ucontrol VMs to use the normal fault
+ * resolution path, like normal VMs.
+ */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ if (gaddr_tmp == -EFAULT) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
+ vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
+ return -EREMOTE;
+ }
+ gfn = gpa_to_gfn(gaddr_tmp);
+ }
+ return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, foll);
+}
+
+static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
+{
+ unsigned int foll = 0;
+ unsigned long gaddr;
+ int rc;
+
+ gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
+ if (kvm_s390_cur_gmap_fault_is_write())
+ foll = FOLL_WRITE;
+
+ switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) {
+ case 0:
+ vcpu->stat.exit_null++;
+ break;
+ case PGM_SECURE_STORAGE_ACCESS:
+ case PGM_SECURE_STORAGE_VIOLATION:
+ kvm_s390_assert_primary_as(vcpu);
+ /*
+ * This can happen after a reboot with asynchronous teardown;
+ * the new guest (normal or protected) will run on top of the
+ * previous protected guest. The old pages need to be destroyed
+ * so the new guest can use them.
+ */
+ if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) {
+ /*
+ * Either KVM messed up the secure guest mapping or the
+ * same page is mapped into multiple secure guests.
+ *
+ * This exception is only triggered when a guest 2 is
+ * running and can therefore never occur in kernel
+ * context.
+ */
+ pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n",
+ current->thread.gmap_int_code, current->comm,
+ current->pid);
+ send_sig(SIGSEGV, current, 0);
+ }
+ break;
+ case PGM_NON_SECURE_STORAGE_ACCESS:
+ kvm_s390_assert_primary_as(vcpu);
+ /*
+ * This is normal operation; a page belonging to a protected
+ * guest has not been imported yet. Try to import the page into
+ * the protected guest.
+ */
+ rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr);
+ if (rc == -EINVAL)
+ send_sig(SIGSEGV, current, 0);
+ if (rc != -ENXIO)
+ break;
+ foll = FOLL_WRITE;
+ fallthrough;
+ case PGM_PROTECTION:
+ case PGM_SEGMENT_TRANSLATION:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_ASCE_TYPE:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ kvm_s390_assert_primary_as(vcpu);
+ return vcpu_dat_fault_handler(vcpu, gaddr, foll);
+ default:
+ KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ send_sig(SIGSEGV, current, 0);
+ break;
+ }
+ return 0;
+}
+
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
struct mcck_volatile_info *mcck_info;
struct sie_page *sie_page;
+ int rc;
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
@@ -4769,7 +4934,7 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
}
if (vcpu->arch.sie_block->icptcode > 0) {
- int rc = kvm_handle_sie_intercept(vcpu);
+ rc = kvm_handle_sie_intercept(vcpu);
if (rc != -EOPNOTSUPP)
return rc;
@@ -4778,24 +4943,28 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
return -EREMOTE;
- } else if (exit_reason != -EFAULT) {
- vcpu->stat.exit_null++;
- return 0;
- } else if (kvm_is_ucontrol(vcpu->kvm)) {
- vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
- vcpu->run->s390_ucontrol.trans_exc_code =
- current->thread.gmap_addr;
- vcpu->run->s390_ucontrol.pgm_code = 0x10;
- return -EREMOTE;
- } else if (current->thread.gmap_pfault) {
- trace_kvm_s390_major_guest_pfault(vcpu);
- current->thread.gmap_pfault = 0;
- if (kvm_arch_setup_async_pf(vcpu))
- return 0;
- vcpu->stat.pfault_sync++;
- return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
}
- return vcpu_post_run_fault_in_sie(vcpu);
+
+ return vcpu_post_run_handle_fault(vcpu);
+}
+
+int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
+ u64 *gprs, unsigned long gasce)
+{
+ int ret;
+
+ guest_state_enter_irqoff();
+
+ /*
+ * The guest_state_{enter,exit}_irqoff() functions inform lockdep and
+ * tracing that entry to the guest will enable host IRQs, and exit from
+ * the guest will disable host IRQs.
+ */
+ ret = sie64a(scb, gprs, gasce);
+
+ guest_state_exit_irqoff();
+
+ return ret;
}
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
@@ -4810,27 +4979,45 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
*/
kvm_vcpu_srcu_read_lock(vcpu);
- do {
+ while (true) {
rc = vcpu_pre_run(vcpu);
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (rc || guestdbg_exit_pending(vcpu))
break;
- kvm_vcpu_srcu_read_unlock(vcpu);
/*
* As PF_VCPU will be used in fault handler, between
- * guest_enter and guest_exit should be no uaccess.
+ * guest_timing_enter_irqoff and guest_timing_exit_irqoff
+ * should be no uaccess.
*/
- local_irq_disable();
- guest_enter_irqoff();
- __disable_cpu_timer_accounting(vcpu);
- local_irq_enable();
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(sie_page->pv_grregs,
vcpu->run->s.regs.gprs,
sizeof(sie_page->pv_grregs));
}
- exit_reason = sie64a(vcpu->arch.sie_block,
- vcpu->run->s.regs.gprs);
+
+xfer_to_guest_mode_check:
+ local_irq_disable();
+ xfer_to_guest_mode_prepare();
+ if (xfer_to_guest_mode_work_pending()) {
+ local_irq_enable();
+ rc = kvm_xfer_to_guest_mode_handle_work(vcpu);
+ if (rc)
+ break;
+ goto xfer_to_guest_mode_check;
+ }
+
+ guest_timing_enter_irqoff();
+ __disable_cpu_timer_accounting(vcpu);
+
+ exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
+ vcpu->run->s.regs.gprs,
+ vcpu->arch.gmap->asce);
+
+ __enable_cpu_timer_accounting(vcpu);
+ guest_timing_exit_irqoff();
+ local_irq_enable();
+
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(vcpu->run->s.regs.gprs,
sie_page->pv_grregs,
@@ -4846,16 +5033,15 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
}
}
- local_irq_disable();
- __enable_cpu_timer_accounting(vcpu);
- guest_exit_irqoff();
- local_irq_enable();
kvm_vcpu_srcu_read_lock(vcpu);
rc = vcpu_post_run(vcpu, exit_reason);
- } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
+ if (rc || guestdbg_exit_pending(vcpu)) {
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ break;
+ }
+ }
- kvm_vcpu_srcu_read_unlock(vcpu);
return rc;
}
@@ -4915,7 +5101,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
}
- if (MACHINE_HAS_GS) {
+ if (cpu_has_gs()) {
preempt_disable();
local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
if (current->thread.gs_cb) {
@@ -4981,7 +5167,7 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu)
kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
- if (MACHINE_HAS_GS) {
+ if (cpu_has_gs()) {
preempt_disable();
local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
if (vcpu->arch.gs_enabled)
@@ -5032,7 +5218,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.pv.dumping)
return -EINVAL;
- if (kvm_run->immediate_exit)
+ if (!vcpu->wants_to_run)
return -EINTR;
if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
@@ -5071,6 +5257,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (signal_pending(current) && !rc) {
kvm_run->exit_reason = KVM_EXIT_INTR;
+ vcpu->stat.signal_exits++;
rc = -EINTR;
}
@@ -5437,8 +5624,8 @@ static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
return r;
}
-long kvm_arch_vcpu_async_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
@@ -5633,7 +5820,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- r = gmap_fault(vcpu->arch.gmap, arg, 0);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = vcpu_dat_fault_handler(vcpu, arg, 0);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case KVM_ENABLE_CAP:
@@ -5749,6 +5938,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
gpa_t size;
+ if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
+ return -EINVAL;
+
/* When we are protected, we should not change the memory slots */
if (kvm_s390_pv_get_handle(kvm))
return -EINVAL;
@@ -5798,6 +5990,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int rc = 0;
+ if (kvm_is_ucontrol(kvm))
+ return;
+
switch (change) {
case KVM_MR_DELETE:
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index bf8534218af3..65c950760993 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -20,6 +20,8 @@
#include <asm/processor.h>
#include <asm/sclp.h>
+#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
+
static inline void kvm_s390_fpu_store(struct kvm_run *run)
{
fpu_stfpc(&run->s.regs.fpc);
@@ -267,13 +269,27 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots)
static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
{
- u32 gd = virt_to_phys(kvm->arch.gisa_int.origin);
+ u32 gd;
+
+ if (!kvm->arch.gisa_int.origin)
+ return 0;
+
+ gd = virt_to_phys(kvm->arch.gisa_int.origin);
if (gd && sclp.has_gisaf)
gd |= GISA_FORMAT1;
return gd;
}
+static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa)
+{
+ hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+ if (!kvm_is_error_hva(hva))
+ hva |= offset_in_page(gpa);
+ return hva;
+}
+
/* implemented in pv.c */
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
@@ -292,6 +308,9 @@ int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
u16 *rc, u16 *rrc);
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb);
static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
{
@@ -303,6 +322,41 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
return vcpu->arch.pv.handle;
}
+/**
+ * __kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @page: the page to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: must be called holding the mm lock for gmap->mm
+ */
+static inline int __kvm_s390_pv_destroy_page(struct page *page)
+{
+ struct folio *folio = page_folio(page);
+ int rc;
+
+ /* Large folios cannot be secure. Small folio implies FW_LEVEL_PTE. */
+ if (folio_test_large(folio))
+ return -EFAULT;
+
+ rc = uv_destroy_folio(folio);
+ /*
+ * Fault handlers can race; it is possible that two CPUs will fault
+ * on the same secure page. One CPU can destroy the page, reboot,
+ * re-enter secure mode and import it, while the second CPU was
+ * stuck at the beginning of the handler. At some point the second
+ * CPU will be able to progress, and it will not be able to destroy
+ * the page. In that case we do not want to terminate the process,
+ * we instead try to export the page.
+ */
+ if (rc)
+ rc = uv_convert_from_secure_folio(folio);
+
+ return rc;
+}
+
/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
@@ -382,6 +436,10 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
void kvm_s390_vsie_init(struct kvm *kvm);
void kvm_s390_vsie_destroy(struct kvm *kvm);
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+
+/* implemented in gmap-vsie.c */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
/* implemented in sigp.c */
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
@@ -389,7 +447,6 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
-long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
@@ -404,6 +461,14 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags);
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits);
+
+static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags)
+{
+ return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags);
+}
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
@@ -505,13 +570,6 @@ void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu);
int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
-/* support for Basic/Extended SCA handling */
-static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
-{
- struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */
-
- return &sca->ipte_control;
-}
static inline int kvm_s390_use_sca_entries(void)
{
/*
@@ -519,11 +577,18 @@ static inline int kvm_s390_use_sca_entries(void)
* might use the entries. By not setting the entries and keeping them
* invalid, hardware will not access them but intercept.
*/
- return sclp.has_sigpif;
+ return sclp.has_sigpif && sclp.has_esca;
}
void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
struct mcck_volatile_info *mcck_info);
+static inline bool kvm_s390_cur_gmap_fault_is_write(void)
+{
+ if (current->thread.gmap_int_code == PGM_PROTECTION)
+ return true;
+ return test_facility(75) && (current->thread.gmap_teid.fsi == TEID_FSI_STORE);
+}
+
/**
* kvm_s390_vcpu_crypto_reset_all
*
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index ffa7739c7a28..8c40154ff50f 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -103,7 +103,7 @@ static int zpci_reset_aipb(u8 nisc)
/*
* AEN registration can only happen once per system boot. If
* an aipb already exists then AEN was already registered and
- * we can re-use the aipb contents. This can only happen if
+ * we can reuse the aipb contents. This can only happen if
* the KVM module was removed and re-inserted. However, we must
* ensure that the same forwarding ISC is used as this is assigned
* during KVM module load.
@@ -208,13 +208,12 @@ static inline int account_mem(unsigned long nr_pages)
page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ cur_pages = atomic_long_read(&user->locked_vm);
do {
- cur_pages = atomic_long_read(&user->locked_vm);
new_pages = cur_pages + nr_pages;
if (new_pages > page_limit)
return -ENOMEM;
- } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
- new_pages) != cur_pages);
+ } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages));
atomic64_add(nr_pages, &current->mm->pinned_vm);
@@ -434,7 +433,6 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
{
struct zpci_dev *zdev = opaque;
- u8 status;
int rc;
if (!zdev)
@@ -481,13 +479,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
*/
zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
- rc = zpci_enable_device(zdev);
- if (rc)
- goto clear_gisa;
-
- /* Re-register the IOMMU that was already created */
- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
+ rc = zpci_reenable_device(zdev);
if (rc)
goto clear_gisa;
@@ -517,7 +509,6 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
{
struct zpci_dev *zdev = opaque;
struct kvm *kvm;
- u8 status;
if (!zdev)
return;
@@ -551,12 +542,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
goto out;
}
- if (zpci_enable_device(zdev))
- goto out;
-
- /* Re-register the IOMMU that was already created */
- zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
+ zpci_reenable_device(zdev);
out:
spin_lock(&kvm->arch.kzdev_list_lock);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 1a49b89706f8..0b14d894f38a 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -605,6 +605,14 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
}
}
+#if IS_ENABLED(CONFIG_VFIO_AP)
+bool kvm_s390_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa)
+{
+ return kvm_is_gpa_in_memslot(kvm, gpa);
+}
+EXPORT_SYMBOL_FOR_MODULES(kvm_s390_is_gpa_in_memslot, "vfio_ap");
+#endif
+
/*
* handle_pqap: Handling pqap interception
* @vcpu: the vcpu having issue the pqap instruction
@@ -746,7 +754,7 @@ int is_valid_psw(psw_t *psw)
int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
{
psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
- psw_compat_t new_psw;
+ psw32_t new_psw;
u64 addr;
int rc;
u8 ar;
@@ -1248,6 +1256,8 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
static int handle_essa(struct kvm_vcpu *vcpu)
{
+ lockdep_assert_held(&vcpu->kvm->srcu);
+
/* entries expected to be 1FF */
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
unsigned long *cbrlo;
@@ -1297,12 +1307,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
/* Retry the ESSA instruction */
kvm_s390_retry_instr(vcpu);
} else {
- int srcu_idx;
-
mmap_read_lock(vcpu->kvm->mm);
- srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
i = __do_essa(vcpu, orc);
- srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
mmap_read_unlock(vcpu->kvm->mm);
if (i < 0)
return i;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 75e81ba26d04..6ba5a0305e25 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -5,6 +5,8 @@
* Copyright IBM Corp. 2019, 2020
* Author(s): Janosch Frank <frankja@linux.ibm.com>
*/
+
+#include <linux/export.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/minmax.h>
@@ -33,6 +35,64 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
/**
+ * kvm_s390_pv_make_secure() - make one guest page secure
+ * @kvm: the guest
+ * @gaddr: the guest address that needs to be made secure
+ * @uvcb: the UVCB specifying which operation needs to be performed
+ *
+ * Context: needs to be called with kvm->srcu held.
+ * Return: 0 on success, < 0 in case of error.
+ */
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
+{
+ unsigned long vmaddr;
+
+ lockdep_assert_held(&kvm->srcu);
+
+ vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
+ if (kvm_is_error_hva(vmaddr))
+ return -EFAULT;
+ return make_hva_secure(kvm->mm, vmaddr, uvcb);
+}
+
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
+{
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_get_handle(kvm),
+ .gaddr = gaddr,
+ };
+
+ return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
+}
+
+/**
+ * kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @kvm: the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: may sleep.
+ */
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
+{
+ struct page *page;
+ int rc = 0;
+
+ mmap_read_lock(kvm->mm);
+ page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
+ if (page)
+ rc = __kvm_s390_pv_destroy_page(page);
+ kvm_release_page_clean(page);
+ mmap_read_unlock(kvm->mm);
+ return rc;
+}
+
+/**
* struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
* be destroyed
*
@@ -564,6 +624,17 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
int cc, ret;
u16 dummy;
+ /* Add the notifier only once. No races because we hold kvm->lock */
+ if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
+ /* The notifier will be unregistered when the VM is destroyed */
+ kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
+ ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
+ if (ret) {
+ kvm->arch.pv.mmu_notifier.ops = NULL;
+ return ret;
+ }
+ }
+
ret = kvm_s390_pv_alloc_vm(kvm);
if (ret)
return ret;
@@ -599,11 +670,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
return -EIO;
}
kvm->arch.gmap->guest_handle = uvcb.guest_handle;
- /* Add the notifier only once. No races because we hold kvm->lock */
- if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
- kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
- mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
- }
return 0;
}
@@ -637,11 +703,29 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
.tweak[0] = tweak,
.tweak[1] = offset,
};
- int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+ int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
+ unsigned long vmaddr;
+ bool unlocked;
*rc = uvcb.header.rc;
*rrc = uvcb.header.rrc;
+ if (ret == -ENXIO) {
+ mmap_read_lock(kvm->mm);
+ vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
+ if (kvm_is_error_hva(vmaddr)) {
+ ret = -EFAULT;
+ } else {
+ ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!ret)
+ ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
+ }
+ mmap_read_unlock(kvm->mm);
+ if (!ret)
+ return -EAGAIN;
+ return ret;
+ }
+
if (ret && ret != -EAGAIN)
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
uvcb.gaddr, *rc, *rrc);
@@ -660,6 +744,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
addr, size);
+ guard(srcu)(&kvm->srcu);
+
while (offset < size) {
ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
if (ret == -EAGAIN) {
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 9ac92dbf680d..9e28f165c114 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu,
__entry->sie_block = sie_block;
),
- TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
+ TP_printk("create cpu %d at 0x%p, sie block at 0x%p",
__entry->id, __entry->vcpu, __entry->sie_block)
);
@@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css,
__entry->kvm = kvm;
),
- TP_printk("enabling channel I/O support (kvm @ %pK)\n",
+ TP_printk("enabling channel I/O support (kvm @ %p)\n",
__entry->kvm)
);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index c9ecae830634..b526621d2a1b 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -13,6 +13,7 @@
#include <linux/bitmap.h>
#include <linux/sched/signal.h>
#include <linux/io.h>
+#include <linux/mman.h>
#include <asm/gmap.h>
#include <asm/mmu_context.h>
@@ -23,6 +24,10 @@
#include "kvm-s390.h"
#include "gaccess.h"
+enum vsie_page_flags {
+ VSIE_PAGE_IN_USE = 0,
+};
+
struct vsie_page {
struct kvm_s390_sie_block scb_s; /* 0x0000 */
/*
@@ -46,11 +51,40 @@ struct vsie_page {
gpa_t gvrd_gpa; /* 0x0240 */
gpa_t riccbd_gpa; /* 0x0248 */
gpa_t sdnx_gpa; /* 0x0250 */
- __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
+ /*
+ * guest address of the original SCB. Remains set for free vsie
+ * pages, so we can properly look them up in our addr_to_page
+ * radix tree.
+ */
+ gpa_t scb_gpa; /* 0x0258 */
+ /*
+ * Flags: must be set/cleared atomically after the vsie page can be
+ * looked up by other CPUs.
+ */
+ unsigned long flags; /* 0x0260 */
+ __u8 reserved[0x0700 - 0x0268]; /* 0x0268 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
};
+/**
+ * gmap_shadow_valid() - check if a shadow guest address space matches the
+ * given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise; the
+ * caller has to request a new shadow gmap in this case.
+ */
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+ if (sg->removed)
+ return 0;
+ return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+
/* trigger a validity icpt for the given scb */
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
__u16 reason_code)
@@ -335,7 +369,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
/* we may only allow it if enabled for guest 2 */
ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
(ECB3_AES | ECB3_DEA);
- ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd & ECD_ECC;
+ ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd &
+ (ECD_ECC | ECD_HMAC);
if (!ecb3_flags && !ecd_flags)
goto end;
@@ -583,7 +618,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
struct kvm *kvm = gmap->private;
struct vsie_page *cur;
unsigned long prefix;
- struct page *page;
int i;
if (!gmap_is_shadow(gmap))
@@ -593,10 +627,9 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
* therefore we can safely reference them all the time.
*/
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
- page = READ_ONCE(kvm->arch.vsie.pages[i]);
- if (!page)
+ cur = READ_ONCE(kvm->arch.vsie.pages[i]);
+ if (!cur)
continue;
- cur = page_to_virt(page);
if (READ_ONCE(cur->gmap) != gmap)
continue;
prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
@@ -661,7 +694,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
struct page *page;
page = gfn_to_page(kvm, gpa_to_gfn(gpa));
- if (is_error_page(page))
+ if (!page)
return -EINVAL;
*hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK);
return 0;
@@ -670,7 +703,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
/* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
{
- kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
+ kvm_release_page_dirty(pfn_to_page(hpa >> PAGE_SHIFT));
/* mark the page always as dirty for migration */
mark_page_dirty(kvm, gpa_to_gfn(gpa));
}
@@ -749,7 +782,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
rc = set_validity_icpt(scb_s, 0x0011U);
else if ((gpa & PAGE_MASK) !=
- ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
+ ((gpa + offsetof(struct bsca_block, cpu[0]) - 1) & PAGE_MASK))
rc = set_validity_icpt(scb_s, 0x003bU);
if (!rc) {
rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
@@ -853,7 +886,7 @@ unpin:
static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
gpa_t gpa)
{
- hpa_t hpa = (hpa_t) vsie_page->scb_o;
+ hpa_t hpa = virt_to_phys(vsie_page->scb_o);
if (hpa)
unpin_guest_page(vcpu->kvm, gpa, hpa);
@@ -922,19 +955,19 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
int rc;
- if (current->thread.gmap_int_code == PGM_PROTECTION)
+ if ((current->thread.gmap_int_code & PGM_INT_CODE_MASK) == PGM_PROTECTION)
/* we can directly forward all protection exceptions */
return inject_fault(vcpu, PGM_PROTECTION,
- current->thread.gmap_addr, 1);
+ current->thread.gmap_teid.addr * PAGE_SIZE, 1);
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
- current->thread.gmap_addr, NULL);
+ current->thread.gmap_teid.addr * PAGE_SIZE, NULL);
if (rc > 0) {
rc = inject_fault(vcpu, rc,
- current->thread.gmap_addr,
- current->thread.gmap_write_flag);
+ current->thread.gmap_teid.addr * PAGE_SIZE,
+ kvm_s390_cur_gmap_fault_is_write());
if (rc >= 0)
- vsie_page->fault_addr = current->thread.gmap_addr;
+ vsie_page->fault_addr = current->thread.gmap_teid.addr * PAGE_SIZE;
}
return rc;
}
@@ -1137,10 +1170,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vcpu->arch.sie_block->fpf & FPF_BPBC)
set_thread_flag(TIF_ISOLATE_BP_GUEST);
- local_irq_disable();
- guest_enter_irqoff();
- local_irq_enable();
-
/*
* Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
* and VCPU requests also hinder the vSIE from running and lead
@@ -1148,16 +1177,29 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
* also kick the vSIE.
*/
vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
+ current->thread.gmap_int_code = 0;
barrier();
- if (!kvm_s390_vcpu_sie_inhibited(vcpu))
- rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+ if (!kvm_s390_vcpu_sie_inhibited(vcpu)) {
+xfer_to_guest_mode_check:
+ local_irq_disable();
+ xfer_to_guest_mode_prepare();
+ if (xfer_to_guest_mode_work_pending()) {
+ local_irq_enable();
+ rc = kvm_xfer_to_guest_mode_handle_work(vcpu);
+ if (rc)
+ goto skip_sie;
+ goto xfer_to_guest_mode_check;
+ }
+ guest_timing_enter_irqoff();
+ rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
+ guest_timing_exit_irqoff();
+ local_irq_enable();
+ }
+
+skip_sie:
barrier();
vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
- local_irq_disable();
- guest_exit_irqoff();
- local_irq_enable();
-
/* restore guest state for bp isolation override */
if (!guest_bp_isolation)
clear_thread_flag(TIF_ISOLATE_BP_GUEST);
@@ -1172,7 +1214,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (rc > 0)
rc = 0; /* we could still have an icpt */
- else if (rc == -EFAULT)
+ else if (current->thread.gmap_int_code)
return handle_fault(vcpu, vsie_page);
switch (scb_s->icptcode) {
@@ -1295,20 +1337,30 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (!rc)
rc = map_prefix(vcpu, vsie_page);
if (!rc) {
- gmap_enable(vsie_page->gmap);
update_intervention_requests(vsie_page);
rc = do_vsie_run(vcpu, vsie_page);
- gmap_enable(vcpu->arch.gmap);
}
atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
if (rc == -EAGAIN)
rc = 0;
- if (rc || scb_s->icptcode || signal_pending(current) ||
- kvm_s390_vcpu_has_irq(vcpu, 0) ||
- kvm_s390_vcpu_sie_inhibited(vcpu))
+
+ /*
+ * Exit the loop if the guest needs to process the intercept
+ */
+ if (rc || scb_s->icptcode)
+ break;
+
+ /*
+ * Exit the loop if the host needs to process an intercept,
+ * but rewind the PSW to re-enter SIE once that's completed
+ * instead of passing a "no action" intercept to the guest.
+ */
+ if (kvm_s390_vcpu_has_irq(vcpu, 0) ||
+ kvm_s390_vcpu_sie_inhibited(vcpu)) {
+ kvm_s390_rewind_psw(vcpu, 4);
break;
- cond_resched();
+ }
}
if (rc == -EFAULT) {
@@ -1331,6 +1383,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return rc;
}
+/* Try getting a given vsie page, returning "true" on success. */
+static inline bool try_get_vsie_page(struct vsie_page *vsie_page)
+{
+ if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags))
+ return false;
+ return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
+/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */
+static void put_vsie_page(struct vsie_page *vsie_page)
+{
+ clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
/*
* Get or create a vsie page for a scb address.
*
@@ -1341,16 +1407,21 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
{
struct vsie_page *vsie_page;
- struct page *page;
int nr_vcpus;
rcu_read_lock();
- page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
+ vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
rcu_read_unlock();
- if (page) {
- if (page_ref_inc_return(page) == 2)
- return page_to_virt(page);
- page_ref_dec(page);
+ if (vsie_page) {
+ if (try_get_vsie_page(vsie_page)) {
+ if (vsie_page->scb_gpa == addr)
+ return vsie_page;
+ /*
+ * We raced with someone reusing + putting this vsie
+ * page before we grabbed it.
+ */
+ put_vsie_page(vsie_page);
+ }
}
/*
@@ -1361,36 +1432,40 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
mutex_lock(&kvm->arch.vsie.mutex);
if (kvm->arch.vsie.page_count < nr_vcpus) {
- page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
- if (!page) {
+ vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
+ if (!vsie_page) {
mutex_unlock(&kvm->arch.vsie.mutex);
return ERR_PTR(-ENOMEM);
}
- page_ref_inc(page);
- kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
+ __set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+ kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page;
kvm->arch.vsie.page_count++;
} else {
/* reuse an existing entry that belongs to nobody */
while (true) {
- page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
- if (page_ref_inc_return(page) == 2)
+ vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
+ if (try_get_vsie_page(vsie_page))
break;
- page_ref_dec(page);
kvm->arch.vsie.next++;
kvm->arch.vsie.next %= nr_vcpus;
}
- radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+ if (vsie_page->scb_gpa != ULONG_MAX)
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+ vsie_page->scb_gpa >> 9);
}
- page->index = addr;
- /* double use of the same address */
- if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
- page_ref_dec(page);
+ /* Mark it as invalid until it resides in the tree. */
+ vsie_page->scb_gpa = ULONG_MAX;
+
+ /* Double use of the same address or allocation failure. */
+ if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9,
+ vsie_page)) {
+ put_vsie_page(vsie_page);
mutex_unlock(&kvm->arch.vsie.mutex);
return NULL;
}
+ vsie_page->scb_gpa = addr;
mutex_unlock(&kvm->arch.vsie.mutex);
- vsie_page = page_to_virt(page);
memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
release_gmap_shadow(vsie_page);
vsie_page->fault_addr = 0;
@@ -1398,14 +1473,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
return vsie_page;
}
-/* put a vsie page acquired via get_vsie_page */
-static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
-{
- struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
-
- page_ref_dec(page);
-}
-
int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
{
struct vsie_page *vsie_page;
@@ -1425,9 +1492,10 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
if (unlikely(scb_addr & 0x1ffUL))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
- kvm_s390_vcpu_sie_inhibited(vcpu))
+ if (kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) {
+ kvm_s390_rewind_psw(vcpu, 4);
return 0;
+ }
vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
if (IS_ERR(vsie_page))
@@ -1454,7 +1522,7 @@ out_unshadow:
out_unpin_scb:
unpin_scb(vcpu, vsie_page, scb_addr);
out_put:
- put_vsie_page(vcpu->kvm, vsie_page);
+ put_vsie_page(vsie_page);
return rc < 0 ? rc : 0;
}
@@ -1470,18 +1538,18 @@ void kvm_s390_vsie_init(struct kvm *kvm)
void kvm_s390_vsie_destroy(struct kvm *kvm)
{
struct vsie_page *vsie_page;
- struct page *page;
int i;
mutex_lock(&kvm->arch.vsie.mutex);
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
- page = kvm->arch.vsie.pages[i];
+ vsie_page = kvm->arch.vsie.pages[i];
kvm->arch.vsie.pages[i] = NULL;
- vsie_page = page_to_virt(page);
release_gmap_shadow(vsie_page);
/* free the radix tree entry */
- radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
- __free_page(page);
+ if (vsie_page->scb_gpa != ULONG_MAX)
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+ vsie_page->scb_gpa >> 9);
+ free_page((unsigned long)vsie_page);
}
kvm->arch.vsie.page_count = 0;
mutex_unlock(&kvm->arch.vsie.mutex);
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index be14c58cb989..c1ea14e3c927 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -7,6 +7,7 @@
*/
#include <linux/processor.h>
+#include <linux/export.h>
#include <linux/delay.h>
#include <asm/div64.h>
#include <asm/timex.h>
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 08f60a42b9a6..d026debf250c 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -34,8 +34,7 @@ SYM_FUNC_START(__memmove)
la %r3,256(%r3)
brctg %r0,.Lmemmove_forward_loop
.Lmemmove_forward_remainder:
- larl %r5,.Lmemmove_mvc
- ex %r4,0(%r5)
+ exrl %r4,.Lmemmove_mvc
.Lmemmove_exit:
BR_EX %r14
.Lmemmove_reverse:
@@ -83,8 +82,7 @@ SYM_FUNC_START(__memset)
la %r1,256(%r1)
brctg %r3,.Lmemset_clear_loop
.Lmemset_clear_remainder:
- larl %r3,.Lmemset_xc
- ex %r4,0(%r3)
+ exrl %r4,.Lmemset_xc
.Lmemset_exit:
BR_EX %r14
.Lmemset_fill:
@@ -102,8 +100,7 @@ SYM_FUNC_START(__memset)
brctg %r5,.Lmemset_fill_loop
.Lmemset_fill_remainder:
stc %r3,0(%r1)
- larl %r5,.Lmemset_mvc
- ex %r4,0(%r5)
+ exrl %r4,.Lmemset_mvc
BR_EX %r14
.Lmemset_fill_exit:
stc %r3,0(%r1)
@@ -132,8 +129,7 @@ SYM_FUNC_START(__memcpy)
lgr %r1,%r2
jnz .Lmemcpy_loop
.Lmemcpy_remainder:
- larl %r5,.Lmemcpy_mvc
- ex %r4,0(%r5)
+ exrl %r4,.Lmemcpy_mvc
.Lmemcpy_exit:
BR_EX %r14
.Lmemcpy_loop:
@@ -175,8 +171,7 @@ SYM_FUNC_START(__memset\bits)
brctg %r5,.L__memset_loop\bits
.L__memset_remainder\bits:
\insn %r3,0(%r1)
- larl %r5,.L__memset_mvc\bits
- ex %r4,0(%r5)
+ exrl %r4,.L__memset_mvc\bits
BR_EX %r14
.L__memset_store\bits:
\insn %r3,0(%r2)
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 81c53440b3e6..10db1e56a811 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -10,11 +10,14 @@
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/jiffies.h>
+#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/percpu.h>
#include <linux/io.h>
#include <asm/alternative.h>
+#include <asm/machine.h>
+#include <asm/asm.h>
int spin_retry = -1;
@@ -36,6 +39,23 @@ static int __init spin_retry_setup(char *str)
}
__setup("spin_retry=", spin_retry_setup);
+static const struct ctl_table s390_spin_sysctl_table[] = {
+ {
+ .procname = "spin_retry",
+ .data = &spin_retry,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+};
+
+static int __init init_s390_spin_sysctls(void)
+{
+ register_sysctl_init("kernel", s390_spin_sysctl_table);
+ return 0;
+}
+arch_initcall(init_s390_spin_sysctls);
+
struct spin_wait {
struct spin_wait *next, *prev;
int node_id;
@@ -75,25 +95,44 @@ static inline int arch_load_niai4(int *lock)
int owner;
asm_inline volatile(
- ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
- " l %0,%1\n"
- : "=d" (owner) : "Q" (*lock) : "memory");
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */
+ " l %[owner],%[lock]"
+ : [owner] "=d" (owner) : [lock] "R" (*lock) : "memory");
return owner;
}
-static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
+#ifdef __HAVE_ASM_FLAG_OUTPUTS__
+
+static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new)
+{
+ int cc;
+
+ asm_inline volatile(
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */
+ " cs %[old],%[new],%[lock]"
+ : [old] "+d" (old), [lock] "+Q" (*lock), "=@cc" (cc)
+ : [new] "d" (new)
+ : "memory");
+ return cc == 0;
+}
+
+#else /* __HAVE_ASM_FLAG_OUTPUTS__ */
+
+static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new)
{
int expected = old;
asm_inline volatile(
- ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
- " cs %0,%3,%1\n"
- : "=d" (old), "=Q" (*lock)
- : "0" (old), "d" (new), "Q" (*lock)
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */
+ " cs %[old],%[new],%[lock]"
+ : [old] "+d" (old), [lock] "+Q" (*lock)
+ : [new] "d" (new)
: "cc", "memory");
return expected == old;
}
+#endif /* __HAVE_ASM_FLAG_OUTPUTS__ */
+
static inline struct spin_wait *arch_spin_decode_tail(int lock)
{
int ix, cpu;
@@ -119,16 +158,16 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
struct spin_wait *node, *next;
int lockval, ix, node_id, tail_id, old, new, owner, count;
- ix = S390_lowcore.spinlock_index++;
+ ix = get_lowcore()->spinlock_index++;
barrier();
- lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
+ lockval = spinlock_lockval(); /* cpu + 1 */
node = this_cpu_ptr(&spin_wait[ix]);
node->prev = node->next = NULL;
node_id = node->node_id;
/* Enqueue the node for this CPU in the spinlock wait queue */
+ old = READ_ONCE(lp->lock);
while (1) {
- old = READ_ONCE(lp->lock);
if ((old & _Q_LOCK_CPU_MASK) == 0 &&
(old & _Q_LOCK_STEAL_MASK) != _Q_LOCK_STEAL_MASK) {
/*
@@ -139,7 +178,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
* waiter will get the lock.
*/
new = (old ? (old + _Q_LOCK_STEAL_ADD) : 0) | lockval;
- if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ if (arch_try_cmpxchg(&lp->lock, &old, new))
/* Got the lock */
goto out;
/* lock passing in progress */
@@ -147,7 +186,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
}
/* Make the node of this CPU the new tail. */
new = node_id | (old & _Q_LOCK_MASK);
- if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ if (arch_try_cmpxchg(&lp->lock, &old, new))
break;
}
/* Set the 'next' pointer of the tail node in the queue */
@@ -184,7 +223,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
if (!owner) {
tail_id = old & _Q_TAIL_MASK;
new = ((tail_id != node_id) ? tail_id : 0) | lockval;
- if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ if (arch_try_cmpxchg(&lp->lock, &old, new))
/* Got the lock */
break;
continue;
@@ -192,7 +231,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
if (count-- >= 0)
continue;
count = spin_retry;
- if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+ if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
}
@@ -205,14 +244,14 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
}
out:
- S390_lowcore.spinlock_index--;
+ get_lowcore()->spinlock_index--;
}
static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
{
int lockval, old, new, owner, count;
- lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
+ lockval = spinlock_lockval(); /* cpu + 1 */
/* Pass the virtual CPU to the lock holder if it is not running */
owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL);
@@ -226,7 +265,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
/* Try to get the lock if it is free. */
if (!owner) {
new = (old & _Q_TAIL_MASK) | lockval;
- if (arch_cmpxchg_niai8(&lp->lock, old, new)) {
+ if (arch_try_cmpxchg_niai8(&lp->lock, old, new)) {
/* Got the lock */
return;
}
@@ -235,7 +274,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
if (count-- >= 0)
continue;
count = spin_retry;
- if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+ if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
}
}
@@ -251,14 +290,14 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
int arch_spin_trylock_retry(arch_spinlock_t *lp)
{
- int cpu = SPINLOCK_LOCKVAL;
+ int cpu = spinlock_lockval();
int owner, count;
for (count = spin_retry; count > 0; count--) {
owner = READ_ONCE(lp->lock);
/* Try to get the lock if it is free. */
if (!owner) {
- if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
+ if (arch_try_cmpxchg(&lp->lock, &owner, cpu))
return 1;
}
}
@@ -300,7 +339,7 @@ void arch_write_lock_wait(arch_rwlock_t *rw)
while (1) {
old = READ_ONCE(rw->cnts);
if ((old & 0x1ffff) == 0 &&
- __atomic_cmpxchg_bool(&rw->cnts, old, old | 0x10000))
+ arch_try_cmpxchg(&rw->cnts, &old, old | 0x10000))
/* Got the lock */
break;
barrier();
@@ -317,7 +356,7 @@ void arch_spin_relax(arch_spinlock_t *lp)
cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK;
if (!cpu)
return;
- if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
+ if (machine_is_lpar() && !arch_vcpu_is_preempted(cpu - 1))
return;
smp_yield_cpu(cpu - 1);
}
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index 7d8741818239..757f58960198 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -15,6 +15,7 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/export.h>
+#include <asm/asm.h>
/*
* Helper functions to find the end of a string
@@ -26,7 +27,7 @@ static inline char *__strend(const char *s)
asm volatile(
" lghi 0,0\n"
"0: srst %[e],%[s]\n"
- " jo 0b\n"
+ " jo 0b"
: [e] "+&a" (e), [s] "+&a" (s)
:
: "cc", "memory", "0");
@@ -40,7 +41,7 @@ static inline char *__strnend(const char *s, size_t n)
asm volatile(
" lghi 0,0\n"
"0: srst %[p],%[s]\n"
- " jo 0b\n"
+ " jo 0b"
: [p] "+&d" (p), [s] "+&a" (s)
:
: "cc", "memory", "0");
@@ -77,50 +78,6 @@ EXPORT_SYMBOL(strnlen);
#endif
/**
- * strcpy - Copy a %NUL terminated string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- *
- * returns a pointer to @dest
- */
-#ifdef __HAVE_ARCH_STRCPY
-char *strcpy(char *dest, const char *src)
-{
- char *ret = dest;
-
- asm volatile(
- " lghi 0,0\n"
- "0: mvst %[dest],%[src]\n"
- " jo 0b\n"
- : [dest] "+&a" (dest), [src] "+&a" (src)
- :
- : "cc", "memory", "0");
- return ret;
-}
-EXPORT_SYMBOL(strcpy);
-#endif
-
-/**
- * strncpy - Copy a length-limited, %NUL-terminated string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @n: The maximum number of bytes to copy
- *
- * The result is not %NUL-terminated if the source exceeds
- * @n bytes.
- */
-#ifdef __HAVE_ARCH_STRNCPY
-char *strncpy(char *dest, const char *src, size_t n)
-{
- size_t len = __strnend(src, n) - src;
- memset(dest + len, 0, n - len);
- memcpy(dest, src, len);
- return dest;
-}
-EXPORT_SYMBOL(strncpy);
-#endif
-
-/**
* strcat - Append one %NUL-terminated string to another
* @dest: The string to be appended to
* @src: The string to append to it
@@ -138,7 +95,7 @@ char *strcat(char *dest, const char *src)
"0: srst %[dummy],%[dest]\n"
" jo 0b\n"
"1: mvst %[dummy],%[src]\n"
- " jo 1b\n"
+ " jo 1b"
: [dummy] "+&a" (dummy), [dest] "+&a" (dest), [src] "+&a" (src)
:
: "cc", "memory", "0");
@@ -180,9 +137,6 @@ EXPORT_SYMBOL(strlcat);
* @n: The maximum numbers of bytes to copy
*
* returns a pointer to @dest
- *
- * Note that in contrast to strncpy, strncat ensures the result is
- * terminated.
*/
#ifdef __HAVE_ARCH_STRNCAT
char *strncat(char *dest, const char *src, size_t n)
@@ -238,12 +192,11 @@ static inline int clcle(const char *s1, unsigned long l1,
asm volatile(
"0: clcle %[r1],%[r3],0\n"
" jo 0b\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=&d" (cc), [r1] "+&d" (r1.pair), [r3] "+&d" (r3.pair)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [r1] "+d" (r1.pair), [r3] "+d" (r3.pair)
:
- : "cc", "memory");
- return cc;
+ : CC_CLOBBER_LIST("memory"));
+ return CC_TRANSFORM(cc);
}
/**
@@ -338,7 +291,7 @@ void *memscan(void *s, int c, size_t n)
asm volatile(
" lgr 0,%[c]\n"
"0: srst %[ret],%[s]\n"
- " jo 0b\n"
+ " jo 0b"
: [ret] "+&a" (ret), [s] "+&a" (s)
: [c] "d" (c)
: "cc", "memory", "0");
diff --git a/arch/s390/lib/test_kprobes.c b/arch/s390/lib/test_kprobes.c
index 9e62d62812e5..9021298c3e8a 100644
--- a/arch/s390/lib/test_kprobes.c
+++ b/arch/s390/lib/test_kprobes.c
@@ -72,4 +72,5 @@ static struct kunit_suite kprobes_test_suite = {
kunit_test_suites(&kprobes_test_suite);
+MODULE_DESCRIPTION("KUnit tests for kprobes");
MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c
index 9894009fc1f2..f96b6a3737e7 100644
--- a/arch/s390/lib/test_modules.c
+++ b/arch/s390/lib/test_modules.c
@@ -29,4 +29,5 @@ static struct kunit_suite modules_test_suite = {
kunit_test_suites(&modules_test_suite);
+MODULE_DESCRIPTION("KUnit test that modules with many relocations are loaded properly");
MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 2848e3fb2ff5..6bb3fa5bf925 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -150,7 +150,7 @@ static __always_inline struct pt_regs fake_pt_regs(void)
regs.gprs[15] = current_stack_pointer;
asm volatile(
- "basr %[psw_addr],0\n"
+ "basr %[psw_addr],0"
: [psw_addr] "=d" (regs.psw.addr));
return regs;
}
@@ -232,7 +232,7 @@ static noinline void test_unwind_kprobed_func(void)
asm volatile(
" nopr %%r7\n"
"test_unwind_kprobed_insn:\n"
- " nopr %%r7\n"
+ " nopr %%r7"
:);
}
@@ -270,9 +270,9 @@ static void notrace __used test_unwind_ftrace_handler(unsigned long ip,
struct ftrace_ops *fops,
struct ftrace_regs *fregs)
{
- struct unwindme *u = (struct unwindme *)fregs->regs.gprs[2];
+ struct unwindme *u = (struct unwindme *)arch_ftrace_regs(fregs)->regs.gprs[2];
- u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &fregs->regs : NULL,
+ u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &arch_ftrace_regs(fregs)->regs : NULL,
(u->flags & UWM_SP) ? u->sp : 0);
}
@@ -356,7 +356,7 @@ static noinline int unwindme_func2(struct unwindme *u)
if (u->flags & UWM_SWITCH_STACK) {
local_irq_save(flags);
local_mcck_save(mflags);
- rc = call_on_stack(1, S390_lowcore.nodat_stack,
+ rc = call_on_stack(1, get_lowcore()->nodat_stack,
int, unwindme_func3, struct unwindme *, u);
local_mcck_restore(mflags);
local_irq_restore(flags);
@@ -519,4 +519,5 @@ static struct kunit_suite test_unwind_suite = {
kunit_test_suites(&test_unwind_suite);
+MODULE_DESCRIPTION("KUnit test for unwind_for_each_frame");
MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 61d8dcd95bbc..1a6ba105e071 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -8,73 +8,85 @@
* Gerald Schaefer (gerald.schaefer@de.ibm.com)
*/
+#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <asm/asm-extable.h>
#include <asm/ctlreg.h>
+#include <asm/skey.h>
#ifdef CONFIG_DEBUG_ENTRY
void debug_user_asce(int exit)
{
+ struct lowcore *lc = get_lowcore();
struct ctlreg cr1, cr7;
local_ctl_store(1, &cr1);
local_ctl_store(7, &cr7);
- if (cr1.val == S390_lowcore.kernel_asce.val && cr7.val == S390_lowcore.user_asce.val)
+ if (cr1.val == lc->user_asce.val && cr7.val == lc->user_asce.val)
return;
panic("incorrect ASCE on kernel %s\n"
"cr1: %016lx cr7: %016lx\n"
"kernel: %016lx user: %016lx\n",
exit ? "exit" : "entry", cr1.val, cr7.val,
- S390_lowcore.kernel_asce.val, S390_lowcore.user_asce.val);
+ lc->kernel_asce.val, lc->user_asce.val);
}
#endif /*CONFIG_DEBUG_ENTRY */
-static unsigned long raw_copy_from_user_key(void *to, const void __user *from,
- unsigned long size, unsigned long key)
+union oac {
+ unsigned int val;
+ struct {
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac1;
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac2;
+ };
+};
+
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key)
{
- unsigned long rem;
+ unsigned long osize;
union oac spec = {
.oac2.key = key,
.oac2.as = PSW_BITS_AS_SECONDARY,
.oac2.k = 1,
.oac2.a = 1,
};
+ int cc;
- asm volatile(
- " lr 0,%[spec]\n"
- "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
- "1: jz 5f\n"
- " algr %[size],%[val]\n"
- " slgr %[from],%[val]\n"
- " slgr %[to],%[val]\n"
- " j 0b\n"
- "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */
- " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */
- " slgr %[rem],%[from]\n"
- " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
- "4: slgr %[size],%[rem]\n"
- " j 6f\n"
- "5: slgr %[size],%[size]\n"
- "6:\n"
- EX_TABLE(0b, 2b)
- EX_TABLE(1b, 2b)
- EX_TABLE(3b, 6b)
- EX_TABLE(4b, 6b)
- : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem)
- : [val] "a" (-4096UL), [spec] "d" (spec.val)
- : "cc", "memory", "0");
- return size;
-}
-
-unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- return raw_copy_from_user_key(to, from, n, 0);
+ while (1) {
+ osize = size;
+ asm_inline volatile(
+ " lr %%r0,%[spec]\n"
+ "0: mvcos %[to],%[from],%[size]\n"
+ "1: nopr %%r7\n"
+ CC_IPM(cc)
+ EX_TABLE_UA_MVCOS_FROM(0b, 0b)
+ EX_TABLE_UA_MVCOS_FROM(1b, 0b)
+ : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to)
+ : [spec] "d" (spec.val), [from] "Q" (*(const char __user *)from)
+ : CC_CLOBBER_LIST("memory", "0"));
+ if (CC_TRANSFORM(cc) == 0)
+ return osize - size;
+ size -= 4096;
+ to += 4096;
+ from += 4096;
+ }
}
-EXPORT_SYMBOL(raw_copy_from_user);
unsigned long _copy_from_user_key(void *to, const void __user *from,
unsigned long n, unsigned long key)
@@ -93,50 +105,37 @@ unsigned long _copy_from_user_key(void *to, const void __user *from,
}
EXPORT_SYMBOL(_copy_from_user_key);
-static unsigned long raw_copy_to_user_key(void __user *to, const void *from,
- unsigned long size, unsigned long key)
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key)
{
- unsigned long rem;
+ unsigned long osize;
union oac spec = {
.oac1.key = key,
.oac1.as = PSW_BITS_AS_SECONDARY,
.oac1.k = 1,
.oac1.a = 1,
};
+ int cc;
- asm volatile(
- " lr 0,%[spec]\n"
- "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
- "1: jz 5f\n"
- " algr %[size],%[val]\n"
- " slgr %[to],%[val]\n"
- " slgr %[from],%[val]\n"
- " j 0b\n"
- "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */
- " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */
- " slgr %[rem],%[to]\n"
- " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
- "4: slgr %[size],%[rem]\n"
- " j 6f\n"
- "5: slgr %[size],%[size]\n"
- "6:\n"
- EX_TABLE(0b, 2b)
- EX_TABLE(1b, 2b)
- EX_TABLE(3b, 6b)
- EX_TABLE(4b, 6b)
- : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem)
- : [val] "a" (-4096UL), [spec] "d" (spec.val)
- : "cc", "memory", "0");
- return size;
-}
-
-unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- return raw_copy_to_user_key(to, from, n, 0);
+ while (1) {
+ osize = size;
+ asm_inline volatile(
+ " lr %%r0,%[spec]\n"
+ "0: mvcos %[to],%[from],%[size]\n"
+ "1: nopr %%r7\n"
+ CC_IPM(cc)
+ EX_TABLE_UA_MVCOS_TO(0b, 0b)
+ EX_TABLE_UA_MVCOS_TO(1b, 0b)
+ : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
+ : [spec] "d" (spec.val), [from] "Q" (*(const char *)from)
+ : CC_CLOBBER_LIST("memory", "0"));
+ if (CC_TRANSFORM(cc) == 0)
+ return osize - size;
+ size -= 4096;
+ to += 4096;
+ from += 4096;
+ }
}
-EXPORT_SYMBOL(raw_copy_to_user);
unsigned long _copy_to_user_key(void __user *to, const void *from,
unsigned long n, unsigned long key)
@@ -149,38 +148,188 @@ unsigned long _copy_to_user_key(void __user *to, const void *from,
}
EXPORT_SYMBOL(_copy_to_user_key);
-unsigned long __clear_user(void __user *to, unsigned long size)
+#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+
+static nokprobe_inline int __cmpxchg_user_key_small(unsigned long address, unsigned int *uval,
+ unsigned int old, unsigned int new,
+ unsigned int mask, unsigned long key)
{
- unsigned long rem;
- union oac spec = {
- .oac1.as = PSW_BITS_AS_SECONDARY,
- .oac1.a = 1,
- };
+ unsigned long count;
+ unsigned int prev;
+ bool sacf_flag;
+ int rc = 0;
+
+ skey_regions_initialize();
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile(
+ "20: spka 0(%[key])\n"
+ " sacf 256\n"
+ " llill %[count],%[max_loops]\n"
+ "0: l %[prev],%[address]\n"
+ "1: nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "2: lr %[tmp],%[prev]\n"
+ "3: cs %[prev],%[new],%[address]\n"
+ "4: jnl 5f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jnz 5f\n"
+ " brct %[count],2b\n"
+ "5: sacf 768\n"
+ " spka %[default_key]\n"
+ "21:\n"
+ EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+ SKEY_REGION(20b, 21b)
+ : [rc] "+&d" (rc),
+ [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (old),
+ [new] "+&d" (new),
+ [mask] "+&d" (mask),
+ [count] "=a" (count)
+ : [key] "%[count]" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY),
+ [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+ : "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
+ *uval = prev;
+ if (!count)
+ rc = -EAGAIN;
+ return rc;
+}
+
+int __kprobes __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
+ unsigned char old, unsigned char new, unsigned long key)
+{
+ unsigned int prev, shift, mask, _old, _new;
+ int rc;
+
+ shift = (3 ^ (address & 3)) << 3;
+ address ^= address & 3;
+ _old = (unsigned int)old << shift;
+ _new = (unsigned int)new << shift;
+ mask = ~(0xff << shift);
+ rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
+ *uval = prev >> shift;
+ return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key1);
+
+int __kprobes __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
+ unsigned short old, unsigned short new, unsigned long key)
+{
+ unsigned int prev, shift, mask, _old, _new;
+ int rc;
+
+ shift = (2 ^ (address & 2)) << 3;
+ address ^= address & 2;
+ _old = (unsigned int)old << shift;
+ _new = (unsigned int)new << shift;
+ mask = ~(0xffff << shift);
+ rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
+ *uval = prev >> shift;
+ return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key2);
+
+int __kprobes __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
+ unsigned int old, unsigned int new, unsigned long key)
+{
+ unsigned int prev = old;
+ bool sacf_flag;
+ int rc = 0;
+
+ skey_regions_initialize();
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile(
+ "20: spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cs %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ "21:\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ SKEY_REGION(20b, 21b)
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+Q" (*(int *)address)
+ : [new] "d" (new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
+ *uval = prev;
+ return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key4);
+
+int __kprobes __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
+ unsigned long old, unsigned long new, unsigned long key)
+{
+ unsigned long prev = old;
+ bool sacf_flag;
+ int rc = 0;
+
+ skey_regions_initialize();
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile(
+ "20: spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: csg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ "21:\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ SKEY_REGION(20b, 21b)
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(long *)address)
+ : [new] "d" (new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
+ *uval = prev;
+ return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key8);
+
+int __kprobes __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
+ __uint128_t old, __uint128_t new, unsigned long key)
+{
+ __uint128_t prev = old;
+ bool sacf_flag;
+ int rc = 0;
- asm volatile(
- " lr 0,%[spec]\n"
- "0: mvcos 0(%[to]),0(%[zeropg]),%[size]\n"
- "1: jz 5f\n"
- " algr %[size],%[val]\n"
- " slgr %[to],%[val]\n"
- " j 0b\n"
- "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */
- " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */
- " slgr %[rem],%[to]\n"
- " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "3: mvcos 0(%[to]),0(%[zeropg]),%[rem]\n"
- "4: slgr %[size],%[rem]\n"
- " j 6f\n"
- "5: slgr %[size],%[size]\n"
- "6:\n"
- EX_TABLE(0b, 2b)
- EX_TABLE(1b, 2b)
- EX_TABLE(3b, 6b)
- EX_TABLE(4b, 6b)
- : [size] "+&a" (size), [to] "+&a" (to), [rem] "=&a" (rem)
- : [val] "a" (-4096UL), [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val)
- : "cc", "memory", "0");
- return size;
+ skey_regions_initialize();
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile(
+ "20: spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cdsg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ "21:\n"
+ EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
+ SKEY_REGION(20b, 21b)
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(__int128_t *)address)
+ : [new] "d" (new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
+ *uval = prev;
+ return rc;
}
-EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__cmpxchg_user_key16);
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
index fb924a8041dc..1721b73b7803 100644
--- a/arch/s390/lib/xor.c
+++ b/arch/s390/lib/xor.c
@@ -15,7 +15,6 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2)
{
asm volatile(
- " larl 1,2f\n"
" aghi %0,-1\n"
" jm 3f\n"
" srlg 0,%0,8\n"
@@ -25,12 +24,12 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
" la %1,256(%1)\n"
" la %2,256(%2)\n"
" brctg 0,0b\n"
- "1: ex %0,0(1)\n"
+ "1: exrl %0,2f\n"
" j 3f\n"
"2: xc 0(1,%1),0(%2)\n"
- "3:\n"
+ "3:"
: : "d" (bytes), "a" (p1), "a" (p2)
- : "0", "1", "cc", "memory");
+ : "0", "cc", "memory");
}
static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
@@ -38,9 +37,8 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p3)
{
asm volatile(
- " larl 1,2f\n"
" aghi %0,-1\n"
- " jm 3f\n"
+ " jm 4f\n"
" srlg 0,%0,8\n"
" ltgr 0,0\n"
" jz 1f\n"
@@ -50,14 +48,14 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
" la %2,256(%2)\n"
" la %3,256(%3)\n"
" brctg 0,0b\n"
- "1: ex %0,0(1)\n"
- " ex %0,6(1)\n"
- " j 3f\n"
+ "1: exrl %0,2f\n"
+ " exrl %0,3f\n"
+ " j 4f\n"
"2: xc 0(1,%1),0(%2)\n"
- " xc 0(1,%1),0(%3)\n"
- "3:\n"
+ "3: xc 0(1,%1),0(%3)\n"
+ "4:"
: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
- : : "0", "1", "cc", "memory");
+ : : "0", "cc", "memory");
}
static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
@@ -66,9 +64,8 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p4)
{
asm volatile(
- " larl 1,2f\n"
" aghi %0,-1\n"
- " jm 3f\n"
+ " jm 5f\n"
" srlg 0,%0,8\n"
" ltgr 0,0\n"
" jz 1f\n"
@@ -80,16 +77,16 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
" la %3,256(%3)\n"
" la %4,256(%4)\n"
" brctg 0,0b\n"
- "1: ex %0,0(1)\n"
- " ex %0,6(1)\n"
- " ex %0,12(1)\n"
- " j 3f\n"
+ "1: exrl %0,2f\n"
+ " exrl %0,3f\n"
+ " exrl %0,4f\n"
+ " j 5f\n"
"2: xc 0(1,%1),0(%2)\n"
- " xc 0(1,%1),0(%3)\n"
- " xc 0(1,%1),0(%4)\n"
- "3:\n"
+ "3: xc 0(1,%1),0(%3)\n"
+ "4: xc 0(1,%1),0(%4)\n"
+ "5:"
: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
- : : "0", "1", "cc", "memory");
+ : : "0", "cc", "memory");
}
static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
@@ -101,7 +98,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
asm volatile(
" larl 1,2f\n"
" aghi %0,-1\n"
- " jm 3f\n"
+ " jm 6f\n"
" srlg 0,%0,8\n"
" ltgr 0,0\n"
" jz 1f\n"
@@ -115,19 +112,19 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
" la %4,256(%4)\n"
" la %5,256(%5)\n"
" brctg 0,0b\n"
- "1: ex %0,0(1)\n"
- " ex %0,6(1)\n"
- " ex %0,12(1)\n"
- " ex %0,18(1)\n"
- " j 3f\n"
+ "1: exrl %0,2f\n"
+ " exrl %0,3f\n"
+ " exrl %0,4f\n"
+ " exrl %0,5f\n"
+ " j 6f\n"
"2: xc 0(1,%1),0(%2)\n"
- " xc 0(1,%1),0(%3)\n"
- " xc 0(1,%1),0(%4)\n"
- " xc 0(1,%1),0(%5)\n"
- "3:\n"
+ "3: xc 0(1,%1),0(%3)\n"
+ "4: xc 0(1,%1),0(%4)\n"
+ "5: xc 0(1,%1),0(%5)\n"
+ "6:"
: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
"+a" (p5)
- : : "0", "1", "cc", "memory");
+ : : "0", "cc", "memory");
}
struct xor_block_template xor_block_xc = {
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index f6c2db7a8669..bd0401cc7ca5 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -9,6 +9,8 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o
+obj-$(CONFIG_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_PGSTE) += gmap.o
obj-$(CONFIG_PFAULT) += pfault.o
+
+obj-$(subst m,y,$(CONFIG_KVM)) += gmap_helpers.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index f8b13f247646..eb7ef63fab1e 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -95,11 +95,12 @@ static long cmm_alloc_pages(long nr, long *counter,
(*counter)++;
spin_unlock(&cmm_lock);
nr--;
+ cond_resched();
}
return nr;
}
-static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
+static long __cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
{
struct cmm_page_array *pa;
unsigned long addr;
@@ -123,6 +124,21 @@ static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
return nr;
}
+static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
+{
+ long inc = 0;
+
+ while (nr) {
+ inc = min(256L, nr);
+ nr -= inc;
+ inc = __cmm_free_pages(inc, counter, list);
+ if (inc)
+ break;
+ cond_resched();
+ }
+ return nr + inc;
+}
+
static int cmm_oom_notify(struct notifier_block *self,
unsigned long dummy, void *parm)
{
@@ -185,10 +201,10 @@ static void cmm_set_timer(void)
{
if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) {
if (timer_pending(&cmm_timer))
- del_timer(&cmm_timer);
+ timer_delete(&cmm_timer);
return;
}
- mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC));
+ mod_timer(&cmm_timer, jiffies + secs_to_jiffies(cmm_timeout_seconds));
}
static void cmm_timer_fn(struct timer_list *unused)
@@ -243,7 +259,7 @@ static int cmm_skip_blanks(char *cp, char **endp)
return str != cp;
}
-static int cmm_pages_handler(struct ctl_table *ctl, int write,
+static int cmm_pages_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
long nr = cmm_get_pages();
@@ -262,7 +278,7 @@ static int cmm_pages_handler(struct ctl_table *ctl, int write,
return 0;
}
-static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
+static int cmm_timed_pages_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -282,7 +298,7 @@ static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
return 0;
}
-static int cmm_timeout_handler(struct ctl_table *ctl, int write,
+static int cmm_timeout_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
char buf[64], *p;
@@ -305,8 +321,8 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
cmm_set_timeout(nr, seconds);
*ppos += *lenp;
} else {
- len = sprintf(buf, "%ld %ld\n",
- cmm_timeout_pages, cmm_timeout_seconds);
+ len = scnprintf(buf, sizeof(buf), "%ld %ld\n",
+ cmm_timeout_pages, cmm_timeout_seconds);
if (len > *lenp)
len = *lenp;
memcpy(buffer, buf, len);
@@ -316,7 +332,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
return 0;
}
-static struct ctl_table cmm_table[] = {
+static const struct ctl_table cmm_table[] = {
{
.procname = "cmm_pages",
.mode = 0644,
@@ -408,7 +424,7 @@ out_smsg:
#endif
unregister_sysctl_table(cmm_sysctl_header);
out_sysctl:
- del_timer_sync(&cmm_timer);
+ timer_delete_sync(&cmm_timer);
return rc;
}
module_init(cmm_init);
@@ -421,10 +437,11 @@ static void __exit cmm_exit(void)
#endif
unregister_oom_notifier(&cmm_oom_nb);
kthread_stop(cmm_thread_ptr);
- del_timer_sync(&cmm_timer);
+ timer_delete_sync(&cmm_timer);
cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
}
module_exit(cmm_exit);
+MODULE_DESCRIPTION("Cooperative memory management interface");
MODULE_LICENSE("GPL");
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index ffd07ed7b4af..89badbe72ae7 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -1,8 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/cpufeature.h>
#include <linux/set_memory.h>
#include <linux/ptdump.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
+#include <linux/sort.h>
#include <linux/mm.h>
#include <linux/kfence.h>
#include <linux/kasan.h>
@@ -15,68 +18,14 @@
static unsigned long max_addr;
struct addr_marker {
+ int is_start;
unsigned long start_address;
+ unsigned long size;
const char *name;
};
-enum address_markers_idx {
- IDENTITY_BEFORE_NR = 0,
- IDENTITY_BEFORE_END_NR,
- AMODE31_START_NR,
- AMODE31_END_NR,
- KERNEL_START_NR,
- KERNEL_END_NR,
-#ifdef CONFIG_KFENCE
- KFENCE_START_NR,
- KFENCE_END_NR,
-#endif
- IDENTITY_AFTER_NR,
- IDENTITY_AFTER_END_NR,
- VMEMMAP_NR,
- VMEMMAP_END_NR,
- VMALLOC_NR,
- VMALLOC_END_NR,
- MODULES_NR,
- MODULES_END_NR,
- ABS_LOWCORE_NR,
- ABS_LOWCORE_END_NR,
- MEMCPY_REAL_NR,
- MEMCPY_REAL_END_NR,
-#ifdef CONFIG_KASAN
- KASAN_SHADOW_START_NR,
- KASAN_SHADOW_END_NR,
-#endif
-};
-
-static struct addr_marker address_markers[] = {
- [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"},
- [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
- [AMODE31_START_NR] = {0, "Amode31 Area Start"},
- [AMODE31_END_NR] = {0, "Amode31 Area End"},
- [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
- [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
-#ifdef CONFIG_KFENCE
- [KFENCE_START_NR] = {0, "KFence Pool Start"},
- [KFENCE_END_NR] = {0, "KFence Pool End"},
-#endif
- [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"},
- [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"},
- [VMEMMAP_NR] = {0, "vmemmap Area Start"},
- [VMEMMAP_END_NR] = {0, "vmemmap Area End"},
- [VMALLOC_NR] = {0, "vmalloc Area Start"},
- [VMALLOC_END_NR] = {0, "vmalloc Area End"},
- [MODULES_NR] = {0, "Modules Area Start"},
- [MODULES_END_NR] = {0, "Modules Area End"},
- [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"},
- [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"},
- [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"},
- [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"},
-#ifdef CONFIG_KASAN
- [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
- [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
-#endif
- { -1, NULL }
-};
+static struct addr_marker *markers;
+static unsigned int markers_cnt;
struct pg_state {
struct ptdump_state ptdump;
@@ -102,7 +51,7 @@ struct pg_state {
struct seq_file *__m = (m); \
\
if (__m) \
- seq_printf(__m, fmt); \
+ seq_puts(__m, fmt); \
})
static void print_prot(struct seq_file *m, unsigned int pr, int level)
@@ -135,7 +84,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
* in which case we have two lpswe instructions in lowcore that need
* to be executable.
*/
- if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)))
+ if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !cpu_has_bear()))
return;
WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX),
"s390/mm: Found insecure W+X mapping at address %pS\n",
@@ -143,6 +92,20 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
+static void note_page_update_state(struct pg_state *st, unsigned long addr, unsigned int prot, int level)
+{
+ struct seq_file *m = st->seq;
+
+ while (addr >= st->marker[1].start_address) {
+ st->marker++;
+ pt_dump_seq_printf(m, "---[ %s %s ]---\n", st->marker->name,
+ st->marker->is_start ? "Start" : "End");
+ }
+ st->start_address = addr;
+ st->current_prot = prot;
+ st->level = level;
+}
+
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
{
int width = sizeof(unsigned long) * 2;
@@ -165,10 +128,8 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
if (level == -1)
addr = max_addr;
if (st->level == -1) {
- pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
- st->start_address = addr;
- st->current_prot = prot;
- st->level = level;
+ pt_dump_seq_puts(m, "---[ Kernel Virtual Address Space ]---\n");
+ note_page_update_state(st, addr, prot, level);
} else if (prot != st->current_prot || level != st->level ||
addr >= st->marker[1].start_address) {
note_prot_wx(st, addr);
@@ -182,21 +143,52 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
}
pt_dump_seq_printf(m, "%9lu%c ", delta, *unit);
print_prot(m, st->current_prot, st->level);
- while (addr >= st->marker[1].start_address) {
- st->marker++;
- pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
- }
- st->start_address = addr;
- st->current_prot = prot;
- st->level = level;
+ note_page_update_state(st, addr, prot, level);
}
}
+static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
+{
+ note_page(pt_st, addr, 4, pte_val(pte));
+}
+
+static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
+{
+ note_page(pt_st, addr, 3, pmd_val(pmd));
+}
+
+static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
+{
+ note_page(pt_st, addr, 2, pud_val(pud));
+}
+
+static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
+{
+ note_page(pt_st, addr, 1, p4d_val(p4d));
+}
+
+static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
+{
+ note_page(pt_st, addr, 0, pgd_val(pgd));
+}
+
+static void note_page_flush(struct ptdump_state *pt_st)
+{
+ pte_t pte_zero = {0};
+
+ note_page(pt_st, 0, -1, pte_val(pte_zero));
+}
+
bool ptdump_check_wx(void)
{
struct pg_state st = {
.ptdump = {
- .note_page = note_page,
+ .note_page_pte = note_page_pte,
+ .note_page_pmd = note_page_pmd,
+ .note_page_pud = note_page_pud,
+ .note_page_p4d = note_page_p4d,
+ .note_page_pgd = note_page_pgd,
+ .note_page_flush = note_page_flush,
.range = (struct ptdump_range[]) {
{.start = 0, .end = max_addr},
{.start = 0, .end = 0},
@@ -214,7 +206,7 @@ bool ptdump_check_wx(void)
},
};
- if (!MACHINE_HAS_NX)
+ if (!cpu_has_nx())
return true;
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
if (st.wx_pages) {
@@ -223,7 +215,7 @@ bool ptdump_check_wx(void)
return false;
} else {
pr_info("Checked W+X mappings: passed, no %sW+X pages found\n",
- (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ?
+ (nospec_uses_trampoline() || !cpu_has_bear()) ?
"unexpected " : "");
return true;
@@ -235,7 +227,12 @@ static int ptdump_show(struct seq_file *m, void *v)
{
struct pg_state st = {
.ptdump = {
- .note_page = note_page,
+ .note_page_pte = note_page_pte,
+ .note_page_pmd = note_page_pmd,
+ .note_page_pud = note_page_pud,
+ .note_page_p4d = note_page_p4d,
+ .note_page_pgd = note_page_pgd,
+ .note_page_flush = note_page_flush,
.range = (struct ptdump_range[]) {
{.start = 0, .end = max_addr},
{.start = 0, .end = 0},
@@ -247,35 +244,72 @@ static int ptdump_show(struct seq_file *m, void *v)
.check_wx = false,
.wx_pages = 0,
.start_address = 0,
- .marker = address_markers,
+ .marker = markers,
};
- get_online_mems();
mutex_lock(&cpa_mutex);
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
mutex_unlock(&cpa_mutex);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
#endif /* CONFIG_PTDUMP_DEBUGFS */
-/*
- * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple
- * insertion sort to preserve the original order of markers with the same
- * start address.
- */
-static void sort_address_markers(void)
+static int ptdump_cmp(const void *a, const void *b)
{
- struct addr_marker tmp;
- int i, j;
-
- for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) {
- tmp = address_markers[i];
- for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--)
- address_markers[j + 1] = address_markers[j];
- address_markers[j + 1] = tmp;
+ const struct addr_marker *ama = a;
+ const struct addr_marker *amb = b;
+
+ if (ama->start_address > amb->start_address)
+ return 1;
+ if (ama->start_address < amb->start_address)
+ return -1;
+ /*
+ * If the start addresses of two markers are identical sort markers in an
+ * order that considers areas contained within other areas correctly.
+ */
+ if (ama->is_start && amb->is_start) {
+ if (ama->size > amb->size)
+ return -1;
+ if (ama->size < amb->size)
+ return 1;
+ return 0;
+ }
+ if (!ama->is_start && !amb->is_start) {
+ if (ama->size > amb->size)
+ return 1;
+ if (ama->size < amb->size)
+ return -1;
+ return 0;
}
+ if (ama->is_start)
+ return 1;
+ if (amb->is_start)
+ return -1;
+ return 0;
+}
+
+static int add_marker(unsigned long start, unsigned long end, const char *name)
+{
+ struct addr_marker *new;
+ size_t newsize;
+
+ newsize = (markers_cnt + 2) * sizeof(*markers);
+ new = kvrealloc(markers, newsize, GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+ markers = new;
+ markers[markers_cnt].is_start = 1;
+ markers[markers_cnt].start_address = start;
+ markers[markers_cnt].size = end - start;
+ markers[markers_cnt].name = name;
+ markers_cnt++;
+ markers[markers_cnt].is_start = 0;
+ markers[markers_cnt].start_address = end;
+ markers[markers_cnt].size = end - start;
+ markers[markers_cnt].name = name;
+ markers_cnt++;
+ return 0;
}
static int pt_dump_init(void)
@@ -283,34 +317,48 @@ static int pt_dump_init(void)
#ifdef CONFIG_KFENCE
unsigned long kfence_start = (unsigned long)__kfence_pool;
#endif
+ unsigned long lowcore = (unsigned long)get_lowcore();
+ int rc;
+
/*
* Figure out the maximum virtual address being accessible with the
* kernel ASCE. We need this to keep the page table walker functions
* from accessing non-existent entries.
*/
- max_addr = (S390_lowcore.kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2;
+ max_addr = (get_lowcore()->kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31);
- address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
- address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31;
- address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31;
- address_markers[MODULES_NR].start_address = MODULES_VADDR;
- address_markers[MODULES_END_NR].start_address = MODULES_END;
- address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
- address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE;
- address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area;
- address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + MEMCPY_REAL_SIZE;
- address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
- address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
- address_markers[VMALLOC_NR].start_address = VMALLOC_START;
- address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
+ /* start + end markers - must be added first */
+ rc = add_marker(0, -1UL, NULL);
+ rc |= add_marker((unsigned long)_stext, (unsigned long)_end, "Kernel Image");
+ rc |= add_marker(lowcore, lowcore + sizeof(struct lowcore), "Lowcore");
+ rc |= add_marker(__identity_base, __identity_base + ident_map_size, "Identity Mapping");
+ rc |= add_marker((unsigned long)__samode31, (unsigned long)__eamode31, "Amode31 Area");
+ rc |= add_marker(MODULES_VADDR, MODULES_END, "Modules Area");
+ rc |= add_marker(__abs_lowcore, __abs_lowcore + ABS_LOWCORE_MAP_SIZE, "Lowcore Area");
+ rc |= add_marker(__memcpy_real_area, __memcpy_real_area + MEMCPY_REAL_SIZE, "Real Memory Copy Area");
+ rc |= add_marker((unsigned long)vmemmap, (unsigned long)vmemmap + vmemmap_size, "vmemmap Area");
+ rc |= add_marker(VMALLOC_START, VMALLOC_END, "vmalloc Area");
#ifdef CONFIG_KFENCE
- address_markers[KFENCE_START_NR].start_address = kfence_start;
- address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE;
+ rc |= add_marker(kfence_start, kfence_start + KFENCE_POOL_SIZE, "KFence Pool");
+#endif
+#ifdef CONFIG_KMSAN
+ rc |= add_marker(KMSAN_VMALLOC_SHADOW_START, KMSAN_VMALLOC_SHADOW_END, "Kmsan vmalloc Shadow");
+ rc |= add_marker(KMSAN_VMALLOC_ORIGIN_START, KMSAN_VMALLOC_ORIGIN_END, "Kmsan vmalloc Origins");
+ rc |= add_marker(KMSAN_MODULES_SHADOW_START, KMSAN_MODULES_SHADOW_END, "Kmsan Modules Shadow");
+ rc |= add_marker(KMSAN_MODULES_ORIGIN_START, KMSAN_MODULES_ORIGIN_END, "Kmsan Modules Origins");
+#endif
+#ifdef CONFIG_KASAN
+ rc |= add_marker(KASAN_SHADOW_START, KASAN_SHADOW_END, "Kasan Shadow");
#endif
- sort_address_markers();
+ if (rc)
+ goto error;
+ sort(&markers[1], markers_cnt - 1, sizeof(*markers), ptdump_cmp, NULL);
#ifdef CONFIG_PTDUMP_DEBUGFS
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
#endif /* CONFIG_PTDUMP_DEBUGFS */
return 0;
+error:
+ kvfree(markers);
+ return -ENOMEM;
}
device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
index 0a0738a473af..7498e858c401 100644
--- a/arch/s390/mm/extable.c
+++ b/arch/s390/mm/extable.c
@@ -7,6 +7,7 @@
#include <linux/panic.h>
#include <asm/asm-extable.h>
#include <asm/extable.h>
+#include <asm/fpu.h>
const struct exception_table_entry *s390_search_extables(unsigned long addr)
{
@@ -26,7 +27,7 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r
return true;
}
-static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs)
+static bool ex_handler_ua_fault(const struct exception_table_entry *ex, struct pt_regs *regs)
{
unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
@@ -35,18 +36,6 @@ static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct p
return true;
}
-static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs)
-{
- unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
- unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
- size_t len = FIELD_GET(EX_DATA_LEN, ex->data);
-
- regs->gprs[reg_err] = -EFAULT;
- memset((void *)regs->gprs[reg_addr], 0, len);
- regs->psw.addr = extable_fixup(ex);
- return true;
-}
-
static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex,
bool pair, struct pt_regs *regs)
{
@@ -77,6 +66,56 @@ static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt
return true;
}
+static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+ fpu_sfpc(0);
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+}
+
+struct insn_ssf {
+ u64 opc1 : 8;
+ u64 r3 : 4;
+ u64 opc2 : 4;
+ u64 b1 : 4;
+ u64 d1 : 12;
+ u64 b2 : 4;
+ u64 d2 : 12;
+} __packed;
+
+static bool ex_handler_ua_mvcos(const struct exception_table_entry *ex,
+ bool from, struct pt_regs *regs)
+{
+ unsigned long uaddr, remainder;
+ struct insn_ssf *insn;
+
+ /*
+ * If the faulting user space access crossed a page boundary retry by
+ * limiting the access to the first page (adjust length accordingly).
+ * Then the mvcos instruction will either complete with condition code
+ * zero, or generate another fault where the user space access did not
+ * cross a page boundary.
+ * If the faulting user space access did not cross a page boundary set
+ * length to zero and retry. In this case no user space access will
+ * happen, and the mvcos instruction will complete with condition code
+ * zero.
+ * In both cases the instruction will complete with condition code
+ * zero (copying finished), and the register which contains the
+ * length, indicates the number of bytes copied.
+ */
+ regs->psw.addr = extable_fixup(ex);
+ insn = (struct insn_ssf *)regs->psw.addr;
+ if (from)
+ uaddr = regs->gprs[insn->b2] + insn->d2;
+ else
+ uaddr = regs->gprs[insn->b1] + insn->d1;
+ remainder = PAGE_SIZE - (uaddr & (PAGE_SIZE - 1));
+ if (regs->gprs[insn->r3] <= remainder)
+ remainder = 0;
+ regs->gprs[insn->r3] = remainder;
+ return true;
+}
+
bool fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *ex;
@@ -89,16 +128,20 @@ bool fixup_exception(struct pt_regs *regs)
return ex_handler_fixup(ex, regs);
case EX_TYPE_BPF:
return ex_handler_bpf(ex, regs);
- case EX_TYPE_UA_STORE:
- return ex_handler_ua_store(ex, regs);
- case EX_TYPE_UA_LOAD_MEM:
- return ex_handler_ua_load_mem(ex, regs);
+ case EX_TYPE_UA_FAULT:
+ return ex_handler_ua_fault(ex, regs);
case EX_TYPE_UA_LOAD_REG:
return ex_handler_ua_load_reg(ex, false, regs);
case EX_TYPE_UA_LOAD_REGPAIR:
return ex_handler_ua_load_reg(ex, true, regs);
case EX_TYPE_ZEROPAD:
return ex_handler_zeropad(ex, regs);
+ case EX_TYPE_FPC:
+ return ex_handler_fpc(ex, regs);
+ case EX_TYPE_UA_MVCOS_TO:
+ return ex_handler_ua_mvcos(ex, false, regs);
+ case EX_TYPE_UA_MVCOS_FROM:
+ return ex_handler_ua_mvcos(ex, true, regs);
}
panic("invalid exception table entry");
}
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 282fefe107a2..6cc33c705de2 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -7,8 +7,7 @@
* Copyright IBM Corp. 2002, 2004
*/
-#define KMSG_COMPONENT "extmem"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "extmem: " fmt
#include <linux/kernel.h>
#include <linux/string.h>
@@ -21,6 +20,7 @@
#include <linux/ioport.h>
#include <linux/refcount.h>
#include <linux/pgtable.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/page.h>
#include <asm/ebcdic.h>
@@ -28,6 +28,7 @@
#include <asm/extmem.h>
#include <asm/cpcmd.h>
#include <asm/setup.h>
+#include <asm/asm.h>
#define DCSS_PURGESEG 0x08
#define DCSS_LOADSHRX 0x20
@@ -134,20 +135,21 @@ dcss_diag(int *func, void *parameter,
unsigned long *ret1, unsigned long *ret2)
{
unsigned long rx, ry;
- int rc;
+ int cc;
rx = virt_to_phys(parameter);
ry = (unsigned long) *func;
diag_stat_inc(DIAG_STAT_X064);
asm volatile(
- " diag %0,%1,0x64\n"
- " ipm %2\n"
- " srl %2,28\n"
- : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+ " diag %[rx],%[ry],0x64\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [rx] "+d" (rx), [ry] "+d" (ry)
+ :
+ : CC_CLOBBER);
*ret1 = rx;
*ret2 = ry;
- return rc;
+ return CC_TRANSFORM(cc);
}
static inline int
@@ -253,7 +255,7 @@ segment_type (char* name)
int rc;
struct dcss_segment seg;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return -ENOSYS;
dcss_mkname(name, seg.dcss_name);
@@ -416,7 +418,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr,
struct dcss_segment *seg;
int rc;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return -ENOSYS;
mutex_lock(&dcss_lock);
@@ -527,6 +529,14 @@ segment_modify_shared (char *name, int do_nonshared)
return rc;
}
+static void __dcss_diag_purge_on_cpu_0(void *data)
+{
+ struct dcss_segment *seg = (struct dcss_segment *)data;
+ unsigned long dummy;
+
+ dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+}
+
/*
* Decrease the use count of a DCSS segment and remove
* it from the address space if nobody is using it
@@ -535,10 +545,9 @@ segment_modify_shared (char *name, int do_nonshared)
void
segment_unload(char *name)
{
- unsigned long dummy;
struct dcss_segment *seg;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return;
mutex_lock(&dcss_lock);
@@ -553,7 +562,14 @@ segment_unload(char *name)
kfree(seg->res);
vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
list_del(&seg->list);
- dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+ /*
+ * Workaround for z/VM issue, where calling the DCSS unload diag on
+ * a non-IPL CPU would cause bogus sclp maximum memory detection on
+ * next IPL.
+ * IPL CPU 0 cannot be set offline, so the dcss_diag() call can
+ * directly be scheduled to that CPU.
+ */
+ smp_call_function_single(0, __dcss_diag_purge_on_cpu_0, seg, 1);
kfree(seg);
out_unlock:
mutex_unlock(&dcss_lock);
@@ -570,7 +586,7 @@ segment_save(char *name)
char cmd2[80];
int i, response;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return;
mutex_lock(&dcss_lock);
@@ -581,14 +597,16 @@ segment_save(char *name)
goto out;
}
- sprintf(cmd1, "DEFSEG %s", name);
+ snprintf(cmd1, sizeof(cmd1), "DEFSEG %s", name);
for (i=0; i<seg->segcnt; i++) {
- sprintf(cmd1+strlen(cmd1), " %lX-%lX %s",
- seg->range[i].start >> PAGE_SHIFT,
- seg->range[i].end >> PAGE_SHIFT,
- segtype_string[seg->range[i].start & 0xff]);
+ size_t len = strlen(cmd1);
+
+ snprintf(cmd1 + len, sizeof(cmd1) - len, " %lX-%lX %s",
+ seg->range[i].start >> PAGE_SHIFT,
+ seg->range[i].end >> PAGE_SHIFT,
+ segtype_string[seg->range[i].start & 0xff]);
}
- sprintf(cmd2, "SAVESEG %s", name);
+ snprintf(cmd2, sizeof(cmd2), "SAVESEG %s", name);
response = 0;
cpcmd(cmd1, NULL, 0, &response);
if (response) {
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 65747f15dbec..e2e13778c36a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -11,11 +11,11 @@
#include <linux/kernel_stat.h>
#include <linux/mmu_context.h>
+#include <linux/cpufeature.h>
#include <linux/perf_event.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
-#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
@@ -23,7 +23,6 @@
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
-#include <linux/compat.h>
#include <linux/smp.h>
#include <linux/kdebug.h>
#include <linux/init.h>
@@ -34,58 +33,29 @@
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
#include <linux/kfence.h>
+#include <linux/pagewalk.h>
#include <asm/asm-extable.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/fault.h>
#include <asm/diag.h>
-#include <asm/gmap.h>
#include <asm/irq.h>
#include <asm/facility.h>
#include <asm/uv.h>
#include "../kernel/entry.h"
-enum fault_type {
- KERNEL_FAULT,
- USER_FAULT,
- GMAP_FAULT,
-};
-
-static DEFINE_STATIC_KEY_FALSE(have_store_indication);
-
-static int __init fault_init(void)
-{
- if (test_facility(75))
- static_branch_enable(&have_store_indication);
- return 0;
-}
-early_initcall(fault_init);
-
/*
* Find out which address space caused the exception.
*/
-static enum fault_type get_fault_type(struct pt_regs *regs)
+static bool is_kernel_fault(struct pt_regs *regs)
{
union teid teid = { .val = regs->int_parm_long };
- struct gmap *gmap;
- if (likely(teid.as == PSW_BITS_AS_PRIMARY)) {
- if (user_mode(regs))
- return USER_FAULT;
- if (!IS_ENABLED(CONFIG_PGSTE))
- return KERNEL_FAULT;
- gmap = (struct gmap *)S390_lowcore.gmap;
- if (gmap && gmap->asce == regs->cr1)
- return GMAP_FAULT;
- return KERNEL_FAULT;
- }
+ if (user_mode(regs))
+ return false;
if (teid.as == PSW_BITS_AS_SECONDARY)
- return USER_FAULT;
- /* Access register mode, not used in the kernel */
- if (teid.as == PSW_BITS_AS_ACCREG)
- return USER_FAULT;
- /* Home space -> access via kernel ASCE */
- return KERNEL_FAULT;
+ return false;
+ return true;
}
static unsigned long get_fault_address(struct pt_regs *regs)
@@ -99,7 +69,7 @@ static __always_inline bool fault_is_write(struct pt_regs *regs)
{
union teid teid = { .val = regs->int_parm_long };
- if (static_branch_likely(&have_store_indication))
+ if (test_facility(75))
return teid.fsi == TEID_FSI_STORE;
return false;
}
@@ -146,7 +116,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
goto out;
table = __va(entry & _SEGMENT_ENTRY_ORIGIN);
}
- table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
+ table += (address & _PAGE_INDEX) >> PAGE_SHIFT;
if (get_kernel_nofault(entry, table))
goto bad;
pr_cont("P:%016lx ", entry);
@@ -162,8 +132,17 @@ static void dump_fault_info(struct pt_regs *regs)
union teid teid = { .val = regs->int_parm_long };
unsigned long asce;
- pr_alert("Failing address: %016lx TEID: %016lx\n",
+ pr_alert("Failing address: %016lx TEID: %016lx",
get_fault_address(regs), teid.val);
+ if (test_facility(131))
+ pr_cont(" ESOP-2");
+ else if (machine_has_esop())
+ pr_cont(" ESOP-1");
+ else
+ pr_cont(" SOP");
+ if (test_facility(75))
+ pr_cont(" FSI");
+ pr_cont("\n");
pr_alert("Fault in ");
switch (teid.as) {
case PSW_BITS_AS_HOME:
@@ -180,21 +159,12 @@ static void dump_fault_info(struct pt_regs *regs)
break;
}
pr_cont("mode while using ");
- switch (get_fault_type(regs)) {
- case USER_FAULT:
- asce = S390_lowcore.user_asce.val;
- pr_cont("user ");
- break;
- case GMAP_FAULT:
- asce = ((struct gmap *)S390_lowcore.gmap)->asce;
- pr_cont("gmap ");
- break;
- case KERNEL_FAULT:
- asce = S390_lowcore.kernel_asce.val;
+ if (is_kernel_fault(regs)) {
+ asce = get_lowcore()->kernel_asce.val;
pr_cont("kernel ");
- break;
- default:
- unreachable();
+ } else {
+ asce = get_lowcore()->user_asce.val;
+ pr_cont("user ");
}
pr_cont("ASCE.\n");
dump_pagetable(asce, get_fault_address(regs));
@@ -202,6 +172,23 @@ static void dump_fault_info(struct pt_regs *regs)
int show_unhandled_signals = 1;
+static const struct ctl_table s390_fault_sysctl_table[] = {
+ {
+ .procname = "userprocess_debug",
+ .data = &show_unhandled_signals,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+};
+
+static int __init init_s390_fault_sysctls(void)
+{
+ register_sysctl_init("kernel", s390_fault_sysctl_table);
+ return 0;
+}
+arch_initcall(init_s390_fault_sysctls);
+
void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
{
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
@@ -229,7 +216,6 @@ static void do_sigsegv(struct pt_regs *regs, int si_code)
static void handle_fault_error_nolock(struct pt_regs *regs, int si_code)
{
- enum fault_type fault_type;
unsigned long address;
bool is_write;
@@ -240,17 +226,15 @@ static void handle_fault_error_nolock(struct pt_regs *regs, int si_code)
}
if (fixup_exception(regs))
return;
- fault_type = get_fault_type(regs);
- if (fault_type == KERNEL_FAULT) {
+ if (is_kernel_fault(regs)) {
address = get_fault_address(regs);
is_write = fault_is_write(regs);
if (kfence_handle_page_fault(address, is_write, regs))
return;
- }
- if (fault_type == KERNEL_FAULT)
pr_alert("Unable to handle kernel pointer dereference in virtual kernel address space\n");
- else
+ } else {
pr_alert("Unable to handle kernel paging request in virtual user address space\n");
+ }
dump_fault_info(regs);
die(regs, "Oops");
}
@@ -284,9 +268,7 @@ static void do_exception(struct pt_regs *regs, int access)
struct vm_area_struct *vma;
unsigned long address;
struct mm_struct *mm;
- enum fault_type type;
unsigned int flags;
- struct gmap *gmap;
vm_fault_t fault;
bool is_write;
@@ -300,16 +282,8 @@ static void do_exception(struct pt_regs *regs, int access)
mm = current->mm;
address = get_fault_address(regs);
is_write = fault_is_write(regs);
- type = get_fault_type(regs);
- switch (type) {
- case KERNEL_FAULT:
+ if (is_kernel_fault(regs) || faulthandler_disabled() || !mm)
return handle_fault_error_nolock(regs, 0);
- case USER_FAULT:
- case GMAP_FAULT:
- if (faulthandler_disabled() || !mm)
- return handle_fault_error_nolock(regs, 0);
- break;
- }
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
flags = FAULT_FLAG_DEFAULT;
if (user_mode(regs))
@@ -333,14 +307,11 @@ static void do_exception(struct pt_regs *regs, int access)
vma_end_read(vma);
if (!(fault & VM_FAULT_RETRY)) {
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
- if (unlikely(fault & VM_FAULT_ERROR))
- goto error;
- return;
+ goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
if (fault & VM_FAULT_MAJOR)
flags |= FAULT_FLAG_TRIED;
-
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
@@ -348,81 +319,29 @@ static void do_exception(struct pt_regs *regs, int access)
return;
}
lock_mmap:
- mmap_read_lock(mm);
- gmap = NULL;
- if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
- gmap = (struct gmap *)S390_lowcore.gmap;
- current->thread.gmap_addr = address;
- current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
- current->thread.gmap_int_code = regs->int_code & 0xffff;
- address = __gmap_translate(gmap, address);
- if (address == -EFAULT)
- return handle_fault_error(regs, SEGV_MAPERR);
- if (gmap->pfault_enabled)
- flags |= FAULT_FLAG_RETRY_NOWAIT;
- }
retry:
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (!vma)
- return handle_fault_error(regs, SEGV_MAPERR);
- if (unlikely(vma->vm_start > address)) {
- if (!(vma->vm_flags & VM_GROWSDOWN))
- return handle_fault_error(regs, SEGV_MAPERR);
- vma = expand_stack(mm, address);
- if (!vma)
- return handle_fault_error_nolock(regs, SEGV_MAPERR);
- }
+ return handle_fault_error_nolock(regs, SEGV_MAPERR);
if (unlikely(!(vma->vm_flags & access)))
return handle_fault_error(regs, SEGV_ACCERR);
fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs)) {
- if (flags & FAULT_FLAG_RETRY_NOWAIT)
- mmap_read_unlock(mm);
if (!user_mode(regs))
handle_fault_error_nolock(regs, 0);
return;
}
/* The fault is fully completed (including releasing mmap lock) */
- if (fault & VM_FAULT_COMPLETED) {
- if (gmap) {
- mmap_read_lock(mm);
- goto gmap;
- }
+ if (fault & VM_FAULT_COMPLETED)
return;
- }
- if (unlikely(fault & VM_FAULT_ERROR)) {
- mmap_read_unlock(mm);
- goto error;
- }
if (fault & VM_FAULT_RETRY) {
- if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
- /*
- * FAULT_FLAG_RETRY_NOWAIT has been set,
- * mmap_lock has not been released
- */
- current->thread.gmap_pfault = 1;
- return handle_fault_error(regs, 0);
- }
- flags &= ~FAULT_FLAG_RETRY_NOWAIT;
flags |= FAULT_FLAG_TRIED;
- mmap_read_lock(mm);
goto retry;
}
-gmap:
- if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
- address = __gmap_link(gmap, current->thread.gmap_addr,
- address);
- if (address == -EFAULT)
- return handle_fault_error(regs, SEGV_MAPERR);
- if (address == -ENOMEM) {
- fault = VM_FAULT_OOM;
- mmap_read_unlock(mm);
- goto error;
- }
- }
mmap_read_unlock(mm);
- return;
-error:
+done:
+ if (!(fault & VM_FAULT_ERROR))
+ return;
if (fault & VM_FAULT_OOM) {
if (!user_mode(regs))
handle_fault_error_nolock(regs, 0);
@@ -433,12 +352,14 @@ error:
handle_fault_error_nolock(regs, 0);
else
do_sigsegv(regs, SEGV_MAPERR);
- } else if (fault & VM_FAULT_SIGBUS) {
+ } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON |
+ VM_FAULT_HWPOISON_LARGE)) {
if (!user_mode(regs))
handle_fault_error_nolock(regs, 0);
else
do_sigbus(regs);
} else {
+ pr_emerg("Unexpected fault flags: %08x\n", fault);
BUG();
}
}
@@ -452,25 +373,23 @@ void do_protection_exception(struct pt_regs *regs)
* The exception to this rule are aborted transactions, for these
* the PSW already points to the correct location.
*/
- if (!(regs->int_code & 0x200))
+ if (!(regs->int_code & 0x200)) {
regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+ set_pt_regs_flag(regs, PIF_PSW_ADDR_ADJUSTED);
+ }
/*
- * Check for low-address protection. This needs to be treated
- * as a special case because the translation exception code
- * field is not guaranteed to contain valid data in this case.
+ * If bit 61 if the TEID is not set, the remainder of the
+ * TEID is unpredictable. Special handling is required.
*/
if (unlikely(!teid.b61)) {
if (user_mode(regs)) {
- /* Low-address protection in user mode: cannot happen */
- die(regs, "Low-address protection");
+ dump_fault_info(regs);
+ die(regs, "Unexpected TEID");
}
- /*
- * Low-address protection in kernel mode means
- * NULL pointer write access in kernel mode.
- */
+ /* Assume low-address protection in kernel mode. */
return handle_fault_error_nolock(regs, 0);
}
- if (unlikely(MACHINE_HAS_NX && teid.b56)) {
+ if (unlikely(cpu_has_nx() && teid.b56)) {
regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK);
return handle_fault_error_nolock(regs, SEGV_ACCERR);
}
@@ -491,9 +410,9 @@ void do_secure_storage_access(struct pt_regs *regs)
union teid teid = { .val = regs->int_parm_long };
unsigned long addr = get_fault_address(regs);
struct vm_area_struct *vma;
+ struct folio_walk fw;
struct mm_struct *mm;
- struct page *page;
- struct gmap *gmap;
+ struct folio *folio;
int rc;
/*
@@ -518,81 +437,37 @@ void do_secure_storage_access(struct pt_regs *regs)
*/
panic("Unexpected PGM 0x3d with TEID bit 61=0");
}
- switch (get_fault_type(regs)) {
- case GMAP_FAULT:
- mm = current->mm;
- gmap = (struct gmap *)S390_lowcore.gmap;
- mmap_read_lock(mm);
- addr = __gmap_translate(gmap, addr);
- mmap_read_unlock(mm);
- if (IS_ERR_VALUE(addr))
- return handle_fault_error_nolock(regs, SEGV_MAPERR);
- fallthrough;
- case USER_FAULT:
+ if (is_kernel_fault(regs)) {
+ folio = phys_to_folio(addr);
+ if (unlikely(!folio_try_get(folio)))
+ return;
+ rc = arch_make_folio_accessible(folio);
+ folio_put(folio);
+ if (rc)
+ BUG();
+ } else {
+ if (faulthandler_disabled())
+ return handle_fault_error_nolock(regs, 0);
mm = current->mm;
mmap_read_lock(mm);
vma = find_vma(mm, addr);
if (!vma)
return handle_fault_error(regs, SEGV_MAPERR);
- page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
- if (IS_ERR_OR_NULL(page)) {
+ folio = folio_walk_start(&fw, vma, addr, 0);
+ if (!folio) {
mmap_read_unlock(mm);
- break;
+ return;
}
- if (arch_make_page_accessible(page))
+ /* arch_make_folio_accessible() needs a raised refcount. */
+ folio_get(folio);
+ rc = arch_make_folio_accessible(folio);
+ folio_put(folio);
+ folio_walk_end(&fw, vma);
+ if (rc)
send_sig(SIGSEGV, current, 0);
- put_page(page);
mmap_read_unlock(mm);
- break;
- case KERNEL_FAULT:
- page = phys_to_page(addr);
- if (unlikely(!try_get_page(page)))
- break;
- rc = arch_make_page_accessible(page);
- put_page(page);
- if (rc)
- BUG();
- break;
- default:
- unreachable();
}
}
NOKPROBE_SYMBOL(do_secure_storage_access);
-void do_non_secure_storage_access(struct pt_regs *regs)
-{
- struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
- unsigned long gaddr = get_fault_address(regs);
-
- if (WARN_ON_ONCE(get_fault_type(regs) != GMAP_FAULT))
- return handle_fault_error_nolock(regs, SEGV_MAPERR);
- if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
- send_sig(SIGSEGV, current, 0);
-}
-NOKPROBE_SYMBOL(do_non_secure_storage_access);
-
-void do_secure_storage_violation(struct pt_regs *regs)
-{
- struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
- unsigned long gaddr = get_fault_address(regs);
-
- /*
- * If the VM has been rebooted, its address space might still contain
- * secure pages from the previous boot.
- * Clear the page so it can be reused.
- */
- if (!gmap_destroy_page(gmap, gaddr))
- return;
- /*
- * Either KVM messed up the secure guest mapping or the same
- * page is mapped into multiple secure guests.
- *
- * This exception is only triggered when a guest 2 is running
- * and can therefore never occur in kernel context.
- */
- pr_warn_ratelimited("Secure storage violation in task: %s, pid %d\n",
- current->comm, current->pid);
- send_sig(SIGSEGV, current, 0);
-}
-
#endif /* CONFIG_PGSTE */
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 474a25ca5c48..dd85bcca817d 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -8,6 +8,8 @@
* Janosch Frank <frankja@linux.vnet.ibm.com>
*/
+#include <linux/cpufeature.h>
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/pagewalk.h>
#include <linux/swap.h>
@@ -20,9 +22,20 @@
#include <linux/pgtable.h>
#include <asm/page-states.h>
#include <asm/pgalloc.h>
+#include <asm/machine.h>
+#include <asm/gmap_helpers.h>
#include <asm/gmap.h>
#include <asm/page.h>
-#include <asm/tlb.h>
+
+/*
+ * The address is saved in a radix tree directly; NULL would be ambiguous,
+ * since 0 is a valid address, and NULL is returned when nothing was found.
+ * The lower bits are ignored by all users of the macro, so it can be used
+ * to distinguish a valid address 0 from a NULL.
+ */
+#define VALID_GADDR_FLAG 1
+#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG)
+#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG)
#define GMAP_SHADOW_FAKE_TABLE 1ULL
@@ -43,7 +56,7 @@ static struct page *gmap_alloc_crst(void)
*
* Returns a guest address space structure.
*/
-static struct gmap *gmap_alloc(unsigned long limit)
+struct gmap *gmap_alloc(unsigned long limit)
{
struct gmap *gmap;
struct page *page;
@@ -70,9 +83,7 @@ static struct gmap *gmap_alloc(unsigned long limit)
gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
if (!gmap)
goto out;
- INIT_LIST_HEAD(&gmap->crst_list);
INIT_LIST_HEAD(&gmap->children);
- INIT_LIST_HEAD(&gmap->pt_list);
INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
@@ -82,8 +93,6 @@ static struct gmap *gmap_alloc(unsigned long limit)
page = gmap_alloc_crst();
if (!page)
goto out_free;
- page->index = 0;
- list_add(&page->lru, &gmap->crst_list);
table = page_to_virt(page);
crst_table_init(table, etype);
gmap->table = table;
@@ -97,6 +106,7 @@ out_free:
out:
return NULL;
}
+EXPORT_SYMBOL_GPL(gmap_alloc);
/**
* gmap_create - create a guest address space
@@ -128,10 +138,7 @@ EXPORT_SYMBOL_GPL(gmap_create);
static void gmap_flush_tlb(struct gmap *gmap)
{
- if (MACHINE_HAS_IDTE)
- __tlb_flush_idte(gmap->asce);
- else
- __tlb_flush_global();
+ __tlb_flush_idte(gmap->asce);
}
static void gmap_radix_tree_free(struct radix_tree_root *root)
@@ -185,32 +192,46 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
} while (nr > 0);
}
+static void gmap_free_crst(unsigned long *table, bool free_ptes)
+{
+ bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0;
+ int i;
+
+ if (is_segment) {
+ if (!free_ptes)
+ goto out;
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _SEGMENT_ENTRY_INVALID))
+ page_table_free_pgste(page_ptdesc(phys_to_page(table[i])));
+ } else {
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _REGION_ENTRY_INVALID))
+ gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes);
+ }
+
+out:
+ free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+}
+
/**
* gmap_free - free a guest address space
* @gmap: pointer to the guest address space structure
*
* No locks required. There are no references to this gmap anymore.
*/
-static void gmap_free(struct gmap *gmap)
+void gmap_free(struct gmap *gmap)
{
- struct page *page, *next;
-
/* Flush tlb of all gmaps (if not already done for shadows) */
if (!(gmap_is_shadow(gmap) && gmap->removed))
gmap_flush_tlb(gmap);
/* Free all segment & region tables. */
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, CRST_ALLOC_ORDER);
+ gmap_free_crst(gmap->table, gmap_is_shadow(gmap));
+
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
/* Free additional data for a shadow gmap */
if (gmap_is_shadow(gmap)) {
- struct ptdesc *ptdesc, *n;
-
- /* Free all page tables. */
- list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list)
- page_table_free_pgste(ptdesc);
gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
/* Release reference to the parent */
gmap_put(gmap->parent);
@@ -218,6 +239,7 @@ static void gmap_free(struct gmap *gmap)
kfree(gmap);
}
+EXPORT_SYMBOL_GPL(gmap_free);
/**
* gmap_get - increase reference counter for guest address space
@@ -281,37 +303,6 @@ void gmap_remove(struct gmap *gmap)
}
EXPORT_SYMBOL_GPL(gmap_remove);
-/**
- * gmap_enable - switch primary space to the guest address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_enable(struct gmap *gmap)
-{
- S390_lowcore.gmap = (unsigned long) gmap;
-}
-EXPORT_SYMBOL_GPL(gmap_enable);
-
-/**
- * gmap_disable - switch back to the standard primary address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_disable(struct gmap *gmap)
-{
- S390_lowcore.gmap = 0UL;
-}
-EXPORT_SYMBOL_GPL(gmap_disable);
-
-/**
- * gmap_get_enabled - get a pointer to the currently enabled gmap
- *
- * Returns a pointer to the currently enabled gmap. 0 if none is enabled.
- */
-struct gmap *gmap_get_enabled(void)
-{
- return (struct gmap *) S390_lowcore.gmap;
-}
-EXPORT_SYMBOL_GPL(gmap_get_enabled);
-
/*
* gmap_alloc_table is assumed to be called with mmap_lock held
*/
@@ -329,10 +320,8 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
crst_table_init(new, init);
spin_lock(&gmap->guest_table_lock);
if (*table & _REGION_ENTRY_INVALID) {
- list_add(&page->lru, &gmap->crst_list);
*table = __pa(new) | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
- page->index = gaddr;
page = NULL;
}
spin_unlock(&gmap->guest_table_lock);
@@ -341,21 +330,23 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
return 0;
}
-/**
- * __gmap_segment_gaddr - find virtual address from segment pointer
- * @entry: pointer to a segment table entry in the guest address space
- *
- * Returns the virtual address in the guest address space for the segment
- */
-static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr)
{
- struct page *page;
- unsigned long offset;
+ return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+}
+
+static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr)
+{
+ return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+}
- offset = (unsigned long) entry / sizeof(unsigned long);
- offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
- page = pmd_pgtable_page((pmd_t *) entry);
- return page->index + offset;
+static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr,
+ unsigned long *gaddr)
+{
+ *gaddr = host_to_guest_delete(gmap, vmaddr);
+ if (IS_GADDR_VALID(*gaddr))
+ return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1);
+ return NULL;
}
/**
@@ -367,16 +358,19 @@ static unsigned long __gmap_segment_gaddr(unsigned long *entry)
*/
static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
{
- unsigned long *entry;
+ unsigned long gaddr;
int flush = 0;
+ pmd_t *pmdp;
BUG_ON(gmap_is_shadow(gmap));
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
- if (entry) {
- flush = (*entry != _SEGMENT_ENTRY_EMPTY);
- *entry = _SEGMENT_ENTRY_EMPTY;
+
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
+ flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY);
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
+
spin_unlock(&gmap->guest_table_lock);
return flush;
}
@@ -495,26 +489,6 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
EXPORT_SYMBOL_GPL(__gmap_translate);
/**
- * gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- */
-unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
-{
- unsigned long rc;
-
- mmap_read_lock(gmap->mm);
- rc = __gmap_translate(gmap, gaddr);
- mmap_read_unlock(gmap->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_translate);
-
-/**
* gmap_unlink - disconnect a page table from the gmap shadow tables
* @mm: pointer to the parent mm_struct
* @table: pointer to the host page table
@@ -613,15 +587,18 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
spin_lock(&gmap->guest_table_lock);
if (*table == _SEGMENT_ENTRY_EMPTY) {
rc = radix_tree_insert(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT, table);
+ vmaddr >> PMD_SHIFT,
+ (void *)MAKE_VALID_GADDR(gaddr));
if (!rc) {
if (pmd_leaf(*pmd)) {
*table = (pmd_val(*pmd) &
_SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
- | _SEGMENT_ENTRY_GMAP_UC;
+ | _SEGMENT_ENTRY_GMAP_UC
+ | _SEGMENT_ENTRY;
} else
- *table = pmd_val(*pmd) &
- _SEGMENT_ENTRY_HARDWARE_BITS;
+ *table = (pmd_val(*pmd) &
+ _SEGMENT_ENTRY_HARDWARE_BITS)
+ | _SEGMENT_ENTRY;
}
} else if (*table & _SEGMENT_ENTRY_PROTECT &&
!(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
@@ -635,113 +612,27 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
radix_tree_preload_end();
return rc;
}
-
-/**
- * gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
- * if the vm address is already mapped to a different guest segment.
- */
-int gmap_fault(struct gmap *gmap, unsigned long gaddr,
- unsigned int fault_flags)
-{
- unsigned long vmaddr;
- int rc;
- bool unlocked;
-
- mmap_read_lock(gmap->mm);
-
-retry:
- unlocked = false;
- vmaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(vmaddr)) {
- rc = vmaddr;
- goto out_up;
- }
- if (fixup_user_fault(gmap->mm, vmaddr, fault_flags,
- &unlocked)) {
- rc = -EFAULT;
- goto out_up;
- }
- /*
- * In the case that fixup_user_fault unlocked the mmap_lock during
- * faultin redo __gmap_translate to not race with a map/unmap_segment.
- */
- if (unlocked)
- goto retry;
-
- rc = __gmap_link(gmap, gaddr, vmaddr);
-out_up:
- mmap_read_unlock(gmap->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_fault);
+EXPORT_SYMBOL(__gmap_link);
/*
* this function is assumed to be called with mmap_lock held
*/
void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
{
- struct vm_area_struct *vma;
unsigned long vmaddr;
- spinlock_t *ptl;
- pte_t *ptep;
+
+ mmap_assert_locked(gmap->mm);
/* Find the vm address for the guest address */
vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
gaddr >> PMD_SHIFT);
if (vmaddr) {
vmaddr |= gaddr & ~PMD_MASK;
-
- vma = vma_lookup(gmap->mm, vmaddr);
- if (!vma || is_vm_hugetlb_page(vma))
- return;
-
- /* Get pointer to the page table entry */
- ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
- if (likely(ptep)) {
- ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
- pte_unmap_unlock(ptep, ptl);
- }
+ gmap_helper_zap_one_page(gmap->mm, vmaddr);
}
}
EXPORT_SYMBOL_GPL(__gmap_zap);
-void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
-{
- unsigned long gaddr, vmaddr, size;
- struct vm_area_struct *vma;
-
- mmap_read_lock(gmap->mm);
- for (gaddr = from; gaddr < to;
- gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
- /* Find the vm address for the guest address */
- vmaddr = (unsigned long)
- radix_tree_lookup(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- if (!vmaddr)
- continue;
- vmaddr |= gaddr & ~PMD_MASK;
- /* Find vma in the parent mm */
- vma = find_vma(gmap->mm, vmaddr);
- if (!vma)
- continue;
- /*
- * We do not discard pages that are backed by
- * hugetlbfs, so we don't have to refault them.
- */
- if (is_vm_hugetlb_page(vma))
- continue;
- size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
- zap_page_range_single(vma, vmaddr, size, NULL);
- }
- mmap_read_unlock(gmap->mm);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
-
static LIST_HEAD(gmap_notifier_list);
static DEFINE_SPINLOCK(gmap_notifier_lock);
@@ -803,8 +694,7 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
*
* Note: Can also be called for shadow gmaps.
*/
-static inline unsigned long *gmap_table_walk(struct gmap *gmap,
- unsigned long gaddr, int level)
+unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level)
{
const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
unsigned long *table = gmap->table;
@@ -851,10 +741,11 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
- table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
+ table += (gaddr & _PAGE_INDEX) >> PAGE_SHIFT;
}
return table;
}
+EXPORT_SYMBOL(gmap_table_walk);
/**
* gmap_pte_op_walk - walk the gmap page table, get the page table lock
@@ -1051,86 +942,40 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
* @bits: pgste notification bits to set
*
- * Returns 0 if successfully protected, -ENOMEM if out of memory and
- * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
+ * Returns:
+ * PAGE_SIZE if a small page was successfully protected;
+ * HPAGE_SIZE if a large page was successfully protected;
+ * -ENOMEM if out of memory;
+ * -EFAULT if gaddr is invalid (or mapping for shadows is missing);
+ * -EAGAIN if the guest mapping is missing and should be fixed by the caller.
*
- * Called with sg->mm->mmap_lock in read.
+ * Context: Called with sg->mm->mmap_lock in read.
*/
-static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
- unsigned long len, int prot, unsigned long bits)
+int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits)
{
- unsigned long vmaddr, dist;
pmd_t *pmdp;
- int rc;
+ int rc = 0;
BUG_ON(gmap_is_shadow(gmap));
- while (len) {
- rc = -EAGAIN;
- pmdp = gmap_pmd_op_walk(gmap, gaddr);
- if (pmdp) {
- if (!pmd_leaf(*pmdp)) {
- rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
- bits);
- if (!rc) {
- len -= PAGE_SIZE;
- gaddr += PAGE_SIZE;
- }
- } else {
- rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
- bits);
- if (!rc) {
- dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
- len = len < dist ? 0 : len - dist;
- gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
- }
- }
- gmap_pmd_op_end(gmap, pmdp);
- }
- if (rc) {
- if (rc == -EINVAL)
- return rc;
- /* -EAGAIN, fixup of userspace mm and gmap */
- vmaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(vmaddr))
- return vmaddr;
- rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
- if (rc)
- return rc;
- }
- }
- return 0;
-}
+ pmdp = gmap_pmd_op_walk(gmap, gaddr);
+ if (!pmdp)
+ return -EAGAIN;
-/**
- * gmap_mprotect_notify - change access rights for a range of ptes and
- * call the notifier if any pte changes again
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: virtual address in the guest address space
- * @len: size of area
- * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
- *
- * Returns 0 if for each page in the given range a gmap mapping exists,
- * the new access rights could be set and the notifier could be armed.
- * If the gmap mapping is missing for one or more pages -EFAULT is
- * returned. If no memory could be allocated -ENOMEM is returned.
- * This function establishes missing page table entries.
- */
-int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
- unsigned long len, int prot)
-{
- int rc;
+ if (!pmd_leaf(*pmdp)) {
+ rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits);
+ if (!rc)
+ rc = PAGE_SIZE;
+ } else {
+ rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits);
+ if (!rc)
+ rc = HPAGE_SIZE;
+ }
+ gmap_pmd_op_end(gmap, pmdp);
- if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap))
- return -EINVAL;
- if (!MACHINE_HAS_ESOP && prot == PROT_READ)
- return -EINVAL;
- mmap_read_lock(gmap->mm);
- rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
- mmap_read_unlock(gmap->mm);
return rc;
}
-EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
+EXPORT_SYMBOL_GPL(gmap_protect_one);
/**
* gmap_read_table - get an unsigned long value from a guest page table using
@@ -1317,7 +1162,7 @@ static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
if (!table || *table & _PAGE_INVALID)
return;
- gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1);
+ gmap_call_notifier(sg, raddr, raddr + PAGE_SIZE - 1);
ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
}
@@ -1335,7 +1180,7 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
int i;
BUG_ON(!gmap_is_shadow(sg));
- for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE)
+ for (i = 0; i < _PAGE_ENTRIES; i++, raddr += PAGE_SIZE)
pgt[i] = _PAGE_INVALID;
}
@@ -1364,7 +1209,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
@@ -1392,7 +1236,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
}
@@ -1422,7 +1265,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1450,7 +1292,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1480,7 +1321,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1508,7 +1348,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1538,7 +1377,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_r2t(sg, raddr, __va(r2t));
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1570,7 +1408,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
r1t[i] = _REGION1_ENTRY_EMPTY;
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1581,7 +1418,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
*
* Called with sg->guest_table_lock
*/
-static void gmap_unshadow(struct gmap *sg)
+void gmap_unshadow(struct gmap *sg)
{
unsigned long *table;
@@ -1607,143 +1444,7 @@ static void gmap_unshadow(struct gmap *sg)
break;
}
}
-
-/**
- * gmap_find_shadow - find a specific asce in the list of shadow tables
- * @parent: pointer to the parent gmap
- * @asce: ASCE for which the shadow table is created
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns the pointer to a gmap if a shadow table with the given asce is
- * already available, ERR_PTR(-EAGAIN) if another one is just being created,
- * otherwise NULL
- */
-static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce,
- int edat_level)
-{
- struct gmap *sg;
-
- list_for_each_entry(sg, &parent->children, list) {
- if (sg->orig_asce != asce || sg->edat_level != edat_level ||
- sg->removed)
- continue;
- if (!sg->initialized)
- return ERR_PTR(-EAGAIN);
- refcount_inc(&sg->ref_count);
- return sg;
- }
- return NULL;
-}
-
-/**
- * gmap_shadow_valid - check if a shadow guest address space matches the
- * given properties and is still valid
- * @sg: pointer to the shadow guest address space structure
- * @asce: ASCE for which the shadow table is requested
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns 1 if the gmap shadow is still valid and matches the given
- * properties, the caller can continue using it. Returns 0 otherwise, the
- * caller has to request a new shadow gmap in this case.
- *
- */
-int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
-{
- if (sg->removed)
- return 0;
- return sg->orig_asce == asce && sg->edat_level == edat_level;
-}
-EXPORT_SYMBOL_GPL(gmap_shadow_valid);
-
-/**
- * gmap_shadow - create/find a shadow guest address space
- * @parent: pointer to the parent gmap
- * @asce: ASCE for which the shadow table is created
- * @edat_level: edat level to be used for the shadow translation
- *
- * The pages of the top level page table referred by the asce parameter
- * will be set to read-only and marked in the PGSTEs of the kvm process.
- * The shadow table will be removed automatically on any change to the
- * PTE mapping for the source table.
- *
- * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
- * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
- * parent gmap table could not be protected.
- */
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
- int edat_level)
-{
- struct gmap *sg, *new;
- unsigned long limit;
- int rc;
-
- BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
- BUG_ON(gmap_is_shadow(parent));
- spin_lock(&parent->shadow_lock);
- sg = gmap_find_shadow(parent, asce, edat_level);
- spin_unlock(&parent->shadow_lock);
- if (sg)
- return sg;
- /* Create a new shadow gmap */
- limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
- if (asce & _ASCE_REAL_SPACE)
- limit = -1UL;
- new = gmap_alloc(limit);
- if (!new)
- return ERR_PTR(-ENOMEM);
- new->mm = parent->mm;
- new->parent = gmap_get(parent);
- new->private = parent->private;
- new->orig_asce = asce;
- new->edat_level = edat_level;
- new->initialized = false;
- spin_lock(&parent->shadow_lock);
- /* Recheck if another CPU created the same shadow */
- sg = gmap_find_shadow(parent, asce, edat_level);
- if (sg) {
- spin_unlock(&parent->shadow_lock);
- gmap_free(new);
- return sg;
- }
- if (asce & _ASCE_REAL_SPACE) {
- /* only allow one real-space gmap shadow */
- list_for_each_entry(sg, &parent->children, list) {
- if (sg->orig_asce & _ASCE_REAL_SPACE) {
- spin_lock(&sg->guest_table_lock);
- gmap_unshadow(sg);
- spin_unlock(&sg->guest_table_lock);
- list_del(&sg->list);
- gmap_put(sg);
- break;
- }
- }
- }
- refcount_set(&new->ref_count, 2);
- list_add(&new->list, &parent->children);
- if (asce & _ASCE_REAL_SPACE) {
- /* nothing to protect, return right away */
- new->initialized = true;
- spin_unlock(&parent->shadow_lock);
- return new;
- }
- spin_unlock(&parent->shadow_lock);
- /* protect after insertion, so it will get properly invalidated */
- mmap_read_lock(parent->mm);
- rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
- ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
- PROT_READ, GMAP_NOTIFY_SHADOW);
- mmap_read_unlock(parent->mm);
- spin_lock(&parent->shadow_lock);
- new->initialized = true;
- if (rc) {
- list_del(&new->list);
- gmap_free(new);
- new = ERR_PTR(rc);
- }
- spin_unlock(&parent->shadow_lock);
- return new;
-}
-EXPORT_SYMBOL_GPL(gmap_shadow);
+EXPORT_SYMBOL(gmap_unshadow);
/**
* gmap_shadow_r2t - create an empty shadow region 2 table
@@ -1777,9 +1478,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = r2t & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_r2t = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -1801,7 +1499,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r2t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -1861,9 +1558,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = r3t & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_r3t = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -1885,7 +1579,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r3t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -1945,9 +1638,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = sgt & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_sgt = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -1969,7 +1659,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= sgt & _REGION_ENTRY_PROTECT;
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -2002,45 +1691,22 @@ out_free:
}
EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
-/**
- * gmap_shadow_pgt_lookup - find a shadow page table
- * @sg: pointer to the shadow guest address space structure
- * @saddr: the address in the shadow aguest address space
- * @pgt: parent gmap address of the page table to get shadowed
- * @dat_protection: if the pgtable is marked as protected by dat
- * @fake: pgt references contiguous guest memory block, not a pgtable
- *
- * Returns 0 if the shadow page table was found and -EAGAIN if the page
- * table was not found.
- *
- * Called with sg->mm->mmap_lock in read.
- */
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
- unsigned long *pgt, int *dat_protection,
- int *fake)
+static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr)
{
- unsigned long *table;
- struct page *page;
- int rc;
+ unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc));
- BUG_ON(!gmap_is_shadow(sg));
- spin_lock(&sg->guest_table_lock);
- table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
- if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
- /* Shadow page tables are full pages (pte+pgste) */
- page = pfn_to_page(*table >> PAGE_SHIFT);
- *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
- *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
- *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
- rc = 0;
- } else {
- rc = -EAGAIN;
- }
- spin_unlock(&sg->guest_table_lock);
- return rc;
+ pgstes += _PAGE_ENTRIES;
+
+ pgstes[0] &= ~PGSTE_ST2_MASK;
+ pgstes[1] &= ~PGSTE_ST2_MASK;
+ pgstes[2] &= ~PGSTE_ST2_MASK;
+ pgstes[3] &= ~PGSTE_ST2_MASK;
+ pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK;
+ pgstes[1] |= pgt_addr & PGSTE_ST2_MASK;
+ pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK;
+ pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK;
}
-EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
/**
* gmap_shadow_pgt - instantiate a shadow page table
@@ -2069,9 +1735,10 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
ptdesc = page_table_alloc_pgste(sg->mm);
if (!ptdesc)
return -ENOMEM;
- ptdesc->pt_index = pgt & _SEGMENT_ENTRY_ORIGIN;
+ origin = pgt & _SEGMENT_ENTRY_ORIGIN;
if (fake)
- ptdesc->pt_index |= GMAP_SHADOW_FAKE_TABLE;
+ origin |= GMAP_SHADOW_FAKE_TABLE;
+ gmap_pgste_set_pgt_addr(ptdesc, origin);
s_pgt = page_to_phys(ptdesc_page(ptdesc));
/* Install shadow page table */
spin_lock(&sg->guest_table_lock);
@@ -2090,7 +1757,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
/* mark as invalid as long as the parent table is not protected */
*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
(pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
- list_add(&ptdesc->pt_list, &sg->pt_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_SEGMENT_ENTRY_INVALID;
@@ -2268,7 +1934,6 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
pte_t *pte, unsigned long bits)
{
unsigned long offset, gaddr = 0;
- unsigned long *table;
struct gmap *gmap, *sg, *next;
offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
@@ -2276,12 +1941,9 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- table = radix_tree_lookup(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (table)
- gaddr = __gmap_segment_gaddr(table) + offset;
+ gaddr = host_to_guest_lookup(gmap, vmaddr) + offset;
spin_unlock(&gmap->guest_table_lock);
- if (!table)
+ if (!IS_GADDR_VALID(gaddr))
continue;
if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
@@ -2321,13 +1983,11 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
gaddr &= HPAGE_MASK;
pmdp_notify_gmap(gmap, pmdp, gaddr);
new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN));
- if (MACHINE_HAS_TLB_GUEST)
+ if (machine_has_tlb_guest())
__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
IDTE_GLOBAL);
- else if (MACHINE_HAS_IDTE)
- __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
else
- __pmdp_csp(pmdp);
+ __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
set_pmd(pmdp, new);
}
@@ -2341,15 +2001,14 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
if (pmdp) {
- gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
pmdp_notify_gmap(gmap, pmdp, gaddr);
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
- _SEGMENT_ENTRY_GMAP_UC));
+ _SEGMENT_ENTRY_GMAP_UC |
+ _SEGMENT_ENTRY));
if (purge)
- __pmdp_csp(pmdp);
+ __pmdp_cspg(pmdp);
set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
}
spin_unlock(&gmap->guest_table_lock);
@@ -2370,44 +2029,31 @@ void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)
EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);
/**
- * gmap_pmdp_csp - csp all affected guest pmd entries
- * @mm: pointer to the process mm_struct
- * @vmaddr: virtual address in the process address space
- */
-void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr)
-{
- gmap_pmdp_clear(mm, vmaddr, 1);
-}
-EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
-
-/**
* gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
* @mm: pointer to the process mm_struct
* @vmaddr: virtual address in the process address space
*/
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
{
- unsigned long *entry, gaddr;
+ unsigned long gaddr;
struct gmap *gmap;
pmd_t *pmdp;
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (entry) {
- pmdp = (pmd_t *)entry;
- gaddr = __gmap_segment_gaddr(entry);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
pmdp_notify_gmap(gmap, pmdp, gaddr);
- WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
- _SEGMENT_ENTRY_GMAP_UC));
- if (MACHINE_HAS_TLB_GUEST)
+ WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC |
+ _SEGMENT_ENTRY));
+ if (machine_has_tlb_guest())
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
gmap->asce, IDTE_LOCAL);
- else if (MACHINE_HAS_IDTE)
+ else
__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
- *entry = _SEGMENT_ENTRY_EMPTY;
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
spin_unlock(&gmap->guest_table_lock);
}
@@ -2422,29 +2068,25 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
*/
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
{
- unsigned long *entry, gaddr;
+ unsigned long gaddr;
struct gmap *gmap;
pmd_t *pmdp;
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (entry) {
- pmdp = (pmd_t *)entry;
- gaddr = __gmap_segment_gaddr(entry);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
pmdp_notify_gmap(gmap, pmdp, gaddr);
- WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
- _SEGMENT_ENTRY_GMAP_UC));
- if (MACHINE_HAS_TLB_GUEST)
+ WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC |
+ _SEGMENT_ENTRY));
+ if (machine_has_tlb_guest())
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
gmap->asce, IDTE_GLOBAL);
- else if (MACHINE_HAS_IDTE)
- __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
else
- __pmdp_csp(pmdp);
- *entry = _SEGMENT_ENTRY_EMPTY;
+ __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
spin_unlock(&gmap->guest_table_lock);
}
@@ -2559,9 +2201,6 @@ int s390_enable_sie(void)
/* Do we have pgstes? if yes, we are done */
if (mm_has_pgste(mm))
return 0;
- /* Fail if the page tables are 2K */
- if (!mm_alloc_pgste(mm))
- return -EINVAL;
mmap_write_lock(mm);
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
@@ -2571,138 +2210,6 @@ int s390_enable_sie(void)
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
-static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
- unsigned long end, struct mm_walk *walk)
-{
- unsigned long *found_addr = walk->private;
-
- /* Return 1 of the page is a zeropage. */
- if (is_zero_pfn(pte_pfn(*pte))) {
- /*
- * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
- * right thing and likely don't care: FAULT_FLAG_UNSHARE
- * currently only works in COW mappings, which is also where
- * mm_forbids_zeropage() is checked.
- */
- if (!is_cow_mapping(walk->vma->vm_flags))
- return -EFAULT;
-
- *found_addr = addr;
- return 1;
- }
- return 0;
-}
-
-static const struct mm_walk_ops find_zeropage_ops = {
- .pte_entry = find_zeropage_pte_entry,
- .walk_lock = PGWALK_WRLOCK,
-};
-
-/*
- * Unshare all shared zeropages, replacing them by anonymous pages. Note that
- * we cannot simply zap all shared zeropages, because this could later
- * trigger unexpected userfaultfd missing events.
- *
- * This must be called after mm->context.allow_cow_sharing was
- * set to 0, to avoid future mappings of shared zeropages.
- *
- * mm contracts with s390, that even if mm were to remove a page table,
- * and racing with walk_page_range_vma() calling pte_offset_map_lock()
- * would fail, it will never insert a page table containing empty zero
- * pages once mm_forbids_zeropage(mm) i.e.
- * mm->context.allow_cow_sharing is set to 0.
- */
-static int __s390_unshare_zeropages(struct mm_struct *mm)
-{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
- unsigned long addr;
- vm_fault_t fault;
- int rc;
-
- for_each_vma(vmi, vma) {
- /*
- * We could only look at COW mappings, but it's more future
- * proof to catch unexpected zeropages in other mappings and
- * fail.
- */
- if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
- continue;
- addr = vma->vm_start;
-
-retry:
- rc = walk_page_range_vma(vma, addr, vma->vm_end,
- &find_zeropage_ops, &addr);
- if (rc < 0)
- return rc;
- else if (!rc)
- continue;
-
- /* addr was updated by find_zeropage_pte_entry() */
- fault = handle_mm_fault(vma, addr,
- FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
- NULL);
- if (fault & VM_FAULT_OOM)
- return -ENOMEM;
- /*
- * See break_ksm(): even after handle_mm_fault() returned 0, we
- * must start the lookup from the current address, because
- * handle_mm_fault() may back out if there's any difficulty.
- *
- * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
- * maybe they could trigger in the future on concurrent
- * truncation. In that case, the shared zeropage would be gone
- * and we can simply retry and make progress.
- */
- cond_resched();
- goto retry;
- }
-
- return 0;
-}
-
-static int __s390_disable_cow_sharing(struct mm_struct *mm)
-{
- int rc;
-
- if (!mm->context.allow_cow_sharing)
- return 0;
-
- mm->context.allow_cow_sharing = 0;
-
- /* Replace all shared zeropages by anonymous pages. */
- rc = __s390_unshare_zeropages(mm);
- /*
- * Make sure to disable KSM (if enabled for the whole process or
- * individual VMAs). Note that nothing currently hinders user space
- * from re-enabling it.
- */
- if (!rc)
- rc = ksm_disable(mm);
- if (rc)
- mm->context.allow_cow_sharing = 1;
- return rc;
-}
-
-/*
- * Disable most COW-sharing of memory pages for the whole process:
- * (1) Disable KSM and unmerge/unshare any KSM pages.
- * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
- *
- * Not that we currently don't bother with COW-shared pages that are shared
- * with parent/child processes due to fork().
- */
-int s390_disable_cow_sharing(void)
-{
- int rc;
-
- mmap_write_lock(current->mm);
- rc = __s390_disable_cow_sharing(current->mm);
- mmap_write_unlock(current->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
-
/*
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
@@ -2733,7 +2240,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
{
pmd_t *pmd = (pmd_t *)pte;
unsigned long start, end;
- struct page *page = pmd_page(*pmd);
+ struct folio *folio = page_folio(pmd_page(*pmd));
/*
* The write check makes sure we do not set a key on shared
@@ -2748,7 +2255,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
start = pmd_val(*pmd) & HPAGE_MASK;
end = start + HPAGE_SIZE;
__storage_key_init_range(start, end);
- set_bit(PG_arch_1, &page->flags);
+ set_bit(PG_arch_1, &folio->flags.f);
cond_resched();
return 0;
}
@@ -2770,7 +2277,7 @@ int s390_enable_skey(void)
goto out_up;
mm->context.uses_skeys = 1;
- rc = __s390_disable_cow_sharing(mm);
+ rc = gmap_helper_disable_cow_sharing();
if (rc) {
mm->context.uses_skeys = 0;
goto out_up;
@@ -2841,13 +2348,15 @@ static const struct mm_walk_ops gather_pages_ops = {
*/
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
{
+ struct folio *folio;
unsigned long i;
for (i = 0; i < count; i++) {
+ folio = pfn_folio(pfns[i]);
/* we always have an extra reference */
- uv_destroy_owned_page(pfn_to_phys(pfns[i]));
+ uv_destroy_folio(folio);
/* get rid of the extra reference */
- put_page(pfn_to_page(pfns[i]));
+ folio_put(folio);
cond_resched();
}
}
@@ -2888,49 +2397,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
/**
- * s390_unlist_old_asce - Remove the topmost level of page tables from the
- * list of page tables of the gmap.
- * @gmap: the gmap whose table is to be removed
- *
- * On s390x, KVM keeps a list of all pages containing the page tables of the
- * gmap (the CRST list). This list is used at tear down time to free all
- * pages that are now not needed anymore.
- *
- * This function removes the topmost page of the tree (the one pointed to by
- * the ASCE) from the CRST list.
- *
- * This means that it will not be freed when the VM is torn down, and needs
- * to be handled separately by the caller, unless a leak is actually
- * intended. Notice that this function will only remove the page from the
- * list, the page will still be used as a top level page table (and ASCE).
- */
-void s390_unlist_old_asce(struct gmap *gmap)
-{
- struct page *old;
-
- old = virt_to_page(gmap->table);
- spin_lock(&gmap->guest_table_lock);
- list_del(&old->lru);
- /*
- * Sometimes the topmost page might need to be "removed" multiple
- * times, for example if the VM is rebooted into secure mode several
- * times concurrently, or if s390_replace_asce fails after calling
- * s390_remove_old_asce and is attempted again later. In that case
- * the old asce has been removed from the list, and therefore it
- * will not be freed when the VM terminates, but the ASCE is still
- * in use and still pointed to.
- * A subsequent call to replace_asce will follow the pointer and try
- * to remove the same page from the list again.
- * Therefore it's necessary that the page of the ASCE has valid
- * pointers, so list_del can work (and do nothing) without
- * dereferencing stale or invalid pointers.
- */
- INIT_LIST_HEAD(&old->lru);
- spin_unlock(&gmap->guest_table_lock);
-}
-EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
-
-/**
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
* @gmap: the gmap whose ASCE needs to be replaced
*
@@ -2949,8 +2415,6 @@ int s390_replace_asce(struct gmap *gmap)
struct page *page;
void *table;
- s390_unlist_old_asce(gmap);
-
/* Replacing segment type ASCEs would cause serious issues */
if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
return -EINVAL;
@@ -2958,19 +2422,9 @@ int s390_replace_asce(struct gmap *gmap)
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = 0;
table = page_to_virt(page);
memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
- /*
- * The caller has to deal with the old ASCE, but here we make sure
- * the new one is properly added to the CRST list, so that
- * it will be freed when the VM is torn down.
- */
- spin_lock(&gmap->guest_table_lock);
- list_add(&page->lru, &gmap->crst_list);
- spin_unlock(&gmap->guest_table_lock);
-
/* Set new table origin while preserving existing ASCE control bits */
asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
WRITE_ONCE(gmap->asce, asce);
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
new file mode 100644
index 000000000000..549f14ad08af
--- /dev/null
+++ b/arch/s390/mm/gmap_helpers.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helper functions for KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2025
+ */
+
+#include <linux/export.h>
+#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/leafops.h>
+#include <linux/pagewalk.h>
+#include <linux/ksm.h>
+#include <asm/gmap_helpers.h>
+#include <asm/pgtable.h>
+
+/**
+ * ptep_zap_softleaf_entry() - discard a software leaf entry.
+ * @mm: the mm
+ * @entry: the software leaf entry that needs to be zapped
+ *
+ * Discards the given software leaf entry. If the leaf entry was an actual
+ * swap entry (and not a migration entry, for example), the actual swapped
+ * page is also discarded from swap.
+ */
+static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
+{
+ if (softleaf_is_swap(entry))
+ dec_mm_counter(mm, MM_SWAPENTS);
+ else if (softleaf_is_migration(entry))
+ dec_mm_counter(mm, mm_counter(softleaf_to_folio(entry)));
+ free_swap_and_cache(entry);
+}
+
+/**
+ * gmap_helper_zap_one_page() - discard a page if it was swapped.
+ * @mm: the mm
+ * @vmaddr: the userspace virtual address that needs to be discarded
+ *
+ * If the given address maps to a swap entry, discard it.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
+{
+ struct vm_area_struct *vma;
+ spinlock_t *ptl;
+ pgste_t pgste;
+ pte_t *ptep;
+
+ mmap_assert_locked(mm);
+
+ /* Find the vm address for the guest address */
+ vma = vma_lookup(mm, vmaddr);
+ if (!vma || is_vm_hugetlb_page(vma))
+ return;
+
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(mm, vmaddr, &ptl);
+ if (unlikely(!ptep))
+ return;
+ if (pte_swap(*ptep)) {
+ preempt_disable();
+ pgste = pgste_get_lock(ptep);
+
+ ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
+ pte_clear(mm, vmaddr, ptep);
+
+ pgste_set_unlock(ptep, pgste);
+ preempt_enable();
+ }
+ pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
+
+/**
+ * gmap_helper_discard() - discard user pages in the given range
+ * @mm: the mm
+ * @vmaddr: starting userspace address
+ * @end: end address (first address outside the range)
+ *
+ * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
+{
+ struct vm_area_struct *vma;
+
+ mmap_assert_locked(mm);
+
+ while (vmaddr < end) {
+ vma = find_vma_intersection(mm, vmaddr, end);
+ if (!vma)
+ return;
+ if (!is_vm_hugetlb_page(vma))
+ zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
+ vmaddr = vma->vm_end;
+ }
+}
+EXPORT_SYMBOL_GPL(gmap_helper_discard);
+
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long *found_addr = walk->private;
+
+ /* Return 1 of the page is a zeropage. */
+ if (is_zero_pfn(pte_pfn(*pte))) {
+ /*
+ * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+ * right thing and likely don't care: FAULT_FLAG_UNSHARE
+ * currently only works in COW mappings, which is also where
+ * mm_forbids_zeropage() is checked.
+ */
+ if (!is_cow_mapping(walk->vma->vm_flags))
+ return -EFAULT;
+
+ *found_addr = addr;
+ return 1;
+ }
+ return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+ .pte_entry = find_zeropage_pte_entry,
+ .walk_lock = PGWALK_WRLOCK,
+};
+
+/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
+ * @mm: the mm whose zero pages are to be unshared
+ *
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
+ unsigned long addr;
+ vm_fault_t fault;
+ int rc;
+
+ for_each_vma(vmi, vma) {
+ /*
+ * We could only look at COW mappings, but it's more future
+ * proof to catch unexpected zeropages in other mappings and
+ * fail.
+ */
+ if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+ continue;
+ addr = vma->vm_start;
+
+retry:
+ rc = walk_page_range_vma(vma, addr, vma->vm_end,
+ &find_zeropage_ops, &addr);
+ if (rc < 0)
+ return rc;
+ else if (!rc)
+ continue;
+
+ /* addr was updated by find_zeropage_pte_entry() */
+ fault = handle_mm_fault(vma, addr,
+ FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+ NULL);
+ if (fault & VM_FAULT_OOM)
+ return -ENOMEM;
+ /*
+ * See break_ksm(): even after handle_mm_fault() returned 0, we
+ * must start the lookup from the current address, because
+ * handle_mm_fault() may back out if there's any difficulty.
+ *
+ * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+ * maybe they could trigger in the future on concurrent
+ * truncation. In that case, the shared zeropage would be gone
+ * and we can simply retry and make progress.
+ */
+ cond_resched();
+ goto retry;
+ }
+
+ return 0;
+}
+
+/**
+ * gmap_helper_disable_cow_sharing() - disable all COW sharing
+ *
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int gmap_helper_disable_cow_sharing(void)
+{
+ struct mm_struct *mm = current->mm;
+ int rc;
+
+ mmap_assert_write_locked(mm);
+
+ if (!mm->context.allow_cow_sharing)
+ return 0;
+
+ mm->context.allow_cow_sharing = 0;
+
+ /* Replace all shared zeropages by anonymous pages. */
+ rc = __gmap_helper_unshare_zeropages(mm);
+ /*
+ * Make sure to disable KSM (if enabled for the whole process or
+ * individual VMAs). Note that nothing currently hinders user space
+ * from re-enabling it.
+ */
+ if (!rc)
+ rc = ksm_disable(mm);
+ if (rc)
+ mm->context.allow_cow_sharing = 1;
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 2675aab4acc7..d42e61c7594e 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -6,15 +6,15 @@
* Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
-#define KMSG_COMPONENT "hugetlb"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "hugetlb: " fmt
-#include <asm/pgalloc.h>
+#include <linux/cpufeature.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/mman.h>
#include <linux/sched/mm.h>
#include <linux/security.h>
+#include <asm/pgalloc.h>
/*
* If the bit selected by single-bit bitmask "a" is set within "x", move
@@ -24,6 +24,7 @@
static inline unsigned long __pte_to_rste(pte_t pte)
{
+ swp_entry_t arch_entry;
unsigned long rste;
/*
@@ -48,6 +49,7 @@ static inline unsigned long __pte_to_rste(pte_t pte)
*/
if (pte_present(pte)) {
rste = pte_val(pte) & PAGE_MASK;
+ rste |= _SEGMENT_ENTRY_PRESENT;
rste |= move_set_bit(pte_val(pte), _PAGE_READ,
_SEGMENT_ENTRY_READ);
rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
@@ -66,6 +68,10 @@ static inline unsigned long __pte_to_rste(pte_t pte)
#endif
rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
_SEGMENT_ENTRY_NOEXEC);
+ } else if (!pte_none(pte)) {
+ /* swap pte */
+ arch_entry = __pte_to_swp_entry(pte);
+ rste = mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry));
} else
rste = _SEGMENT_ENTRY_EMPTY;
return rste;
@@ -73,13 +79,18 @@ static inline unsigned long __pte_to_rste(pte_t pte)
static inline pte_t __rste_to_pte(unsigned long rste)
{
+ swp_entry_t arch_entry;
unsigned long pteval;
- int present;
+ int present, none;
+ pte_t pte;
- if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+ if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
present = pud_present(__pud(rste));
- else
+ none = pud_none(__pud(rste));
+ } else {
present = pmd_present(__pmd(rste));
+ none = pmd_none(__pmd(rste));
+ }
/*
* Convert encoding pmd / pud bits pte bits
@@ -114,6 +125,11 @@ static inline pte_t __rste_to_pte(unsigned long rste)
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY);
#endif
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC);
+ } else if (!none) {
+ /* swap rste */
+ arch_entry = __rste_to_swp_entry(rste);
+ pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry));
+ pteval = pte_val(pte);
} else
pteval = _PAGE_INVALID;
return __pte(pteval);
@@ -121,7 +137,7 @@ static inline pte_t __rste_to_pte(unsigned long rste)
static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
{
- struct page *page;
+ struct folio *folio;
unsigned long size, paddr;
if (!mm_uses_skeys(mm) ||
@@ -129,16 +145,16 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
return;
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
- page = pud_page(__pud(rste));
+ folio = page_folio(pud_page(__pud(rste)));
size = PUD_SIZE;
paddr = rste & PUD_MASK;
} else {
- page = pmd_page(__pmd(rste));
+ folio = page_folio(pmd_page(__pmd(rste)));
size = PMD_SIZE;
paddr = rste & PMD_MASK;
}
- if (!test_and_set_bit(PG_arch_1, &page->flags))
+ if (!test_and_set_bit(PG_arch_1, &folio->flags.f))
__storage_key_init_range(paddr, paddr + size);
}
@@ -148,8 +164,6 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
unsigned long rste;
rste = __pte_to_rste(pte);
- if (!MACHINE_HAS_NX)
- rste &= ~_SEGMENT_ENTRY_NOEXEC;
/* Set correct table type for 2G hugepages */
if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
@@ -169,15 +183,15 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
__set_huge_pte_at(mm, addr, ptep, pte);
}
-pte_t huge_ptep_get(pte_t *ptep)
+pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
return __rste_to_pte(pte_val(*ptep));
}
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+pte_t __huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- pte_t pte = huge_ptep_get(ptep);
+ pte_t pte = huge_ptep_get(mm, addr, ptep);
pmd_t *pmdp = (pmd_t *) ptep;
pud_t *pudp = (pud_t *) ptep;
@@ -223,11 +237,10 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
p4dp = p4d_offset(pgdp, addr);
if (p4d_present(*p4dp)) {
pudp = pud_offset(p4dp, addr);
- if (pud_present(*pudp)) {
- if (pud_leaf(*pudp))
- return (pte_t *) pudp;
+ if (sz == PUD_SIZE)
+ return (pte_t *)pudp;
+ if (pud_present(*pudp))
pmdp = pmd_offset(pudp, addr);
- }
}
}
return (pte_t *) pmdp;
@@ -235,95 +248,10 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
bool __init arch_hugetlb_valid_size(unsigned long size)
{
- if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
+ if (cpu_has_edat1() && size == PMD_SIZE)
return true;
- else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE)
+ else if (cpu_has_edat2() && size == PUD_SIZE)
return true;
else
return false;
}
-
-static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
- unsigned long addr, unsigned long len,
- unsigned long pgoff, unsigned long flags)
-{
- struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info = {};
-
- info.length = len;
- info.low_limit = current->mm->mmap_base;
- info.high_limit = TASK_SIZE;
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- return vm_unmapped_area(&info);
-}
-
-static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
- unsigned long addr0, unsigned long len,
- unsigned long pgoff, unsigned long flags)
-{
- struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info = {};
- unsigned long addr;
-
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.low_limit = PAGE_SIZE;
- info.high_limit = current->mm->mmap_base;
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- addr = vm_unmapped_area(&info);
-
- /*
- * A failed mmap() very likely causes application failure,
- * so fall back to the bottom-up function here. This scenario
- * can happen with large stack limits and large mmap()
- * allocations.
- */
- if (addr & ~PAGE_MASK) {
- VM_BUG_ON(addr != -ENOMEM);
- info.flags = 0;
- info.low_limit = TASK_UNMAPPED_BASE;
- info.high_limit = TASK_SIZE;
- addr = vm_unmapped_area(&info);
- }
-
- return addr;
-}
-
-unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- struct hstate *h = hstate_file(file);
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
-
- if (len & ~huge_page_mask(h))
- return -EINVAL;
- if (len > TASK_SIZE - mmap_min_addr)
- return -ENOMEM;
-
- if (flags & MAP_FIXED) {
- if (prepare_hugepage_range(file, addr, len))
- return -EINVAL;
- goto check_asce_limit;
- }
-
- if (addr) {
- addr = ALIGN(addr, huge_page_size(h));
- vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
- goto check_asce_limit;
- }
-
- if (!test_bit(MMF_TOPDOWN, &mm->flags))
- addr = hugetlb_get_unmapped_area_bottomup(file, addr, len,
- pgoff, flags);
- else
- addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
- pgoff, flags);
- if (offset_in_page(addr))
- return addr;
-
-check_asce_limit:
- return check_asce_limit(mm, addr, len);
-}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index e769d2726f4e..e4953453d254 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -8,6 +8,7 @@
* Copyright (C) 1995 Linus Torvalds
*/
+#include <linux/cpufeature.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -39,7 +40,6 @@
#include <asm/kfence.h>
#include <asm/dma.h>
#include <asm/abs_lowcore.h>
-#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/sclp.h>
@@ -56,33 +56,32 @@ pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
struct ctlreg __bootdata_preserved(s390_invalid_asce);
+unsigned long __bootdata_preserved(page_noexec_mask);
+EXPORT_SYMBOL(page_noexec_mask);
+
+unsigned long __bootdata_preserved(segment_noexec_mask);
+EXPORT_SYMBOL(segment_noexec_mask);
+
+unsigned long __bootdata_preserved(region_noexec_mask);
+EXPORT_SYMBOL(region_noexec_mask);
+
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(zero_page_mask);
static void __init setup_zero_pages(void)
{
+ unsigned long total_pages = memblock_estimated_nr_free_pages();
unsigned int order;
- struct page *page;
- int i;
/* Latest machines require a mapping granularity of 512KB */
order = 7;
/* Limit number of empty zero pages for small memory sizes */
- while (order > 2 && (totalram_pages() >> 10) < (1UL << order))
+ while (order > 2 && (total_pages >> 10) < (1UL << order))
order--;
- empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
- if (!empty_zero_page)
- panic("Out of memory in setup_zero_pages");
-
- page = virt_to_page((void *) empty_zero_page);
- split_page(page, order);
- for (i = 1 << order; i > 0; i--) {
- mark_page_reserved(page);
- page++;
- }
+ empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE);
zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
}
@@ -96,7 +95,7 @@ void __init paging_init(void)
vmem_map_init();
sparse_init();
- zone_dma_bits = 31;
+ zone_dma_limit = DMA_BIT_MASK(31);
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
@@ -107,6 +106,8 @@ void mark_rodata_ro(void)
{
unsigned long size = __end_ro_after_init - __start_ro_after_init;
+ if (cpu_has_nx())
+ system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT);
__set_memory_ro(__start_ro_after_init, __end_ro_after_init);
pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
}
@@ -141,7 +142,7 @@ bool force_dma_unencrypted(struct device *dev)
}
/* protected virtualization */
-static void pv_init(void)
+static void __init pv_init(void)
{
if (!is_prot_virt_guest())
return;
@@ -153,29 +154,16 @@ static void pv_init(void)
swiotlb_update_mem_attributes();
}
-void __init mem_init(void)
+void __init arch_mm_preinit(void)
{
cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(0, mm_cpumask(&init_mm));
- set_max_mapnr(max_low_pfn);
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-
pv_init();
- kfence_split_mapping();
- /* this will put all low memory onto the freelists */
- memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages. */
}
-void free_initmem(void)
-{
- set_memory_rwnx((unsigned long)_sinittext,
- (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT);
- free_initmem_default(POISON_FREE_INITMEM);
-}
-
unsigned long memory_block_size_bytes(void)
{
/*
@@ -234,16 +222,13 @@ struct s390_cma_mem_data {
static int s390_cma_check_range(struct cma *cma, void *data)
{
struct s390_cma_mem_data *mem_data;
- unsigned long start, end;
mem_data = data;
- start = cma_get_base(cma);
- end = start + cma_get_size(cma);
- if (end < mem_data->start)
- return 0;
- if (start >= mem_data->end)
- return 0;
- return -EBUSY;
+
+ if (cma_intersects(cma, mem_data->start, mem_data->end))
+ return -EBUSY;
+
+ return 0;
}
static int s390_cma_mem_notifier(struct notifier_block *nb,
@@ -280,7 +265,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
unsigned long size_pages = PFN_DOWN(size);
int rc;
- if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
+ if (WARN_ON_ONCE(pgprot_val(params->pgprot) != pgprot_val(PAGE_KERNEL)))
return -EINVAL;
VM_BUG_ON(!mhp_range_allowed(start, size, true));
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 632c3a55feed..cfd219fe495c 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -17,6 +17,7 @@
#include <asm/asm-extable.h>
#include <asm/abs_lowcore.h>
#include <asm/stacktrace.h>
+#include <asm/sections.h>
#include <asm/maccess.h>
#include <asm/ctlreg.h>
@@ -40,7 +41,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
" ex %1,0(1)\n"
" lg %1,0(%3)\n"
" lra %0,0(%0)\n"
- " sturg %1,%0\n"
+ " sturg %1,%0"
: "+&a" (aligned), "+&a" (count), "=m" (tmp)
: "a" (&tmp), "a" (&tmp[offset]), "a" (src)
: "cc", "memory", "1");
@@ -48,7 +49,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
}
/*
- * s390_kernel_write - write to kernel memory bypassing DAT
+ * __s390_kernel_write - write to kernel memory bypassing DAT
* @dst: destination address
* @src: source address
* @size: number of bytes to copy
@@ -61,7 +62,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
*/
static DEFINE_SPINLOCK(s390_kernel_write_lock);
-notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
+notrace void *__s390_kernel_write(void *dst, const void *src, size_t size)
{
void *tmp = dst;
unsigned long flags;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 206756946589..2a222a7e14f4 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -15,8 +15,8 @@
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/random.h>
-#include <linux/compat.h>
#include <linux/security.h>
+#include <linux/hugetlb.h>
#include <asm/elf.h>
static unsigned long stack_maxrandom_size(void)
@@ -26,7 +26,7 @@ static unsigned long stack_maxrandom_size(void)
return STACK_RND_MASK << PAGE_SHIFT;
}
-static inline int mmap_is_legacy(struct rlimit *rlim_stack)
+static inline int mmap_is_legacy(const struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
@@ -46,11 +46,10 @@ static unsigned long mmap_base_legacy(unsigned long rnd)
}
static inline unsigned long mmap_base(unsigned long rnd,
- struct rlimit *rlim_stack)
+ const struct rlimit *rlim_stack)
{
unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
- unsigned long gap_min, gap_max;
/* Values close to RLIM_INFINITY can overflow. */
if (gap + pad > gap)
@@ -60,19 +59,15 @@ static inline unsigned long mmap_base(unsigned long rnd,
* Top of mmap area (just below the process stack).
* Leave at least a ~128 MB hole.
*/
- gap_min = SZ_128M;
- gap_max = (STACK_TOP / 6) * 5;
-
- if (gap < gap_min)
- gap = gap_min;
- else if (gap > gap_max)
- gap = gap_max;
+ gap = clamp(gap, SZ_128M, (STACK_TOP / 6) * 5);
return PAGE_ALIGN(STACK_TOP - gap - rnd);
}
static int get_align_mask(struct file *filp, unsigned long flags)
{
+ if (filp && is_file_hugepages(filp))
+ return huge_page_mask_align(filp);
if (!(current->flags & PF_RANDOMIZE))
return 0;
if (filp || (flags & MAP_SHARED))
@@ -82,7 +77,7 @@ static int get_align_mask(struct file *filp, unsigned long flags)
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
- unsigned long flags)
+ unsigned long flags, vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -106,7 +101,8 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
info.align_mask = get_align_mask(filp, flags);
- info.align_offset = pgoff << PAGE_SHIFT;
+ if (!(filp && is_file_hugepages(filp)))
+ info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
if (offset_in_page(addr))
return addr;
@@ -117,7 +113,7 @@ check_asce_limit:
unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
- unsigned long flags)
+ unsigned long flags, vm_flags_t vm_flags)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
@@ -144,7 +140,8 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long ad
info.low_limit = PAGE_SIZE;
info.high_limit = mm->mmap_base;
info.align_mask = get_align_mask(filp, flags);
- info.align_offset = pgoff << PAGE_SHIFT;
+ if (!(filp && is_file_hugepages(filp)))
+ info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
/*
@@ -171,7 +168,7 @@ check_asce_limit:
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
-void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
+void arch_pick_mmap_layout(struct mm_struct *mm, const struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
@@ -184,29 +181,35 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
*/
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = mmap_base_legacy(random_factor);
- clear_bit(MMF_TOPDOWN, &mm->flags);
+ mm_flags_clear(MMF_TOPDOWN, mm);
} else {
mm->mmap_base = mmap_base(random_factor, rlim_stack);
- set_bit(MMF_TOPDOWN, &mm->flags);
+ mm_flags_set(MMF_TOPDOWN, mm);
}
}
-static const pgprot_t protection_map[16] = {
- [VM_NONE] = PAGE_NONE,
- [VM_READ] = PAGE_RO,
- [VM_WRITE] = PAGE_RO,
- [VM_WRITE | VM_READ] = PAGE_RO,
- [VM_EXEC] = PAGE_RX,
- [VM_EXEC | VM_READ] = PAGE_RX,
- [VM_EXEC | VM_WRITE] = PAGE_RX,
- [VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX,
- [VM_SHARED] = PAGE_NONE,
- [VM_SHARED | VM_READ] = PAGE_RO,
- [VM_SHARED | VM_WRITE] = PAGE_RW,
- [VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW,
- [VM_SHARED | VM_EXEC] = PAGE_RX,
- [VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX,
- [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX,
- [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX
-};
+static pgprot_t protection_map[16] __ro_after_init;
+
+void __init setup_protection_map(void)
+{
+ pgprot_t *pm = protection_map;
+
+ pm[VM_NONE] = PAGE_NONE;
+ pm[VM_READ] = PAGE_RO;
+ pm[VM_WRITE] = PAGE_RO;
+ pm[VM_WRITE | VM_READ] = PAGE_RO;
+ pm[VM_EXEC] = PAGE_RX;
+ pm[VM_EXEC | VM_READ] = PAGE_RX;
+ pm[VM_EXEC | VM_WRITE] = PAGE_RX;
+ pm[VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX;
+ pm[VM_SHARED] = PAGE_NONE;
+ pm[VM_SHARED | VM_READ] = PAGE_RO;
+ pm[VM_SHARED | VM_WRITE] = PAGE_RW;
+ pm[VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW;
+ pm[VM_SHARED | VM_EXEC] = PAGE_RX;
+ pm[VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX;
+ pm[VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX;
+ pm[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX;
+}
+
DECLARE_VM_GET_PAGE_PROT
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 01bc8fad64d6..3042647c9dbf 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -3,6 +3,7 @@
* Copyright IBM Corp. 2011
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
*/
+#include <linux/cpufeature.h>
#include <linux/hugetlb.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
@@ -12,6 +13,7 @@
#include <asm/pgalloc.h>
#include <asm/kfence.h>
#include <asm/page.h>
+#include <asm/asm.h>
#include <asm/set_memory.h>
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
@@ -26,7 +28,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
unsigned long boundary, size;
while (start < end) {
- if (MACHINE_HAS_EDAT1) {
+ if (cpu_has_edat1()) {
/* set storage keys for a 1MB frame */
size = 1UL << 20;
boundary = (start + size) & ~(size - 1);
@@ -62,7 +64,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
unsigned long *table, mask;
mask = 0;
- if (MACHINE_HAS_EDAT2) {
+ if (cpu_has_edat2()) {
switch (dtt) {
case CRDTE_DTT_REGION3:
mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
@@ -75,11 +77,9 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
break;
}
table = (unsigned long *)((unsigned long)old & mask);
- crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce.val);
- } else if (MACHINE_HAS_IDTE) {
- cspg(old, *old, new);
+ crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val);
} else {
- csp((unsigned int *)old + 1, *old, new);
+ cspg(old, *old, new);
}
}
@@ -108,8 +108,6 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
} else if (flags & SET_MEMORY_DEF) {
new = __pte(pte_val(new) & PAGE_MASK);
new = set_pte_bit(new, PAGE_KERNEL);
- if (!MACHINE_HAS_NX)
- new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
}
pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
ptep++;
@@ -166,8 +164,6 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
} else if (flags & SET_MEMORY_DEF) {
new = __pmd(pmd_val(new) & PMD_MASK);
new = set_pmd_bit(new, SEGMENT_KERNEL);
- if (!MACHINE_HAS_NX)
- new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
}
pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
}
@@ -255,8 +251,6 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
} else if (flags & SET_MEMORY_DEF) {
new = __pud(pud_val(new) & PUD_MASK);
new = set_pud_bit(new, REGION3_KERNEL);
- if (!MACHINE_HAS_NX)
- new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
}
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
}
@@ -378,7 +372,7 @@ int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags
unsigned long end;
int rc;
- if (!MACHINE_HAS_NX)
+ if (!cpu_has_nx())
flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
if (!flags)
return 0;
@@ -406,6 +400,33 @@ int set_direct_map_default_noflush(struct page *page)
return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
}
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
+{
+ unsigned long flags;
+
+ if (valid)
+ flags = SET_MEMORY_DEF;
+ else
+ flags = SET_MEMORY_INV;
+
+ return __set_memory((unsigned long)page_to_virt(page), nr, flags);
+}
+
+bool kernel_page_present(struct page *page)
+{
+ unsigned long addr;
+ unsigned int cc;
+
+ addr = (unsigned long)page_address(page);
+ asm volatile(
+ " lra %[addr],0(%[addr])\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [addr] "+a" (addr)
+ :
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc) == 0;
+}
+
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
static void ipte_range(pte_t *pte, unsigned long address, int nr)
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 1aac13bb8f53..2f829448c719 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/irq.h>
#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
#include <asm/pfault.h>
#include <asm/diag.h>
@@ -56,7 +57,7 @@ int __pfault_init(void)
if (pfault_disable)
return rc;
diag_stat_inc(DIAG_STAT_X258);
- asm volatile(
+ asm_inline volatile(
" diag %[refbk],%[rc],0x258\n"
"0: nopr %%r7\n"
EX_TABLE(0b, 0b)
@@ -78,7 +79,7 @@ void __pfault_fini(void)
if (pfault_disable)
return;
diag_stat_inc(DIAG_STAT_X258);
- asm volatile(
+ asm_inline volatile(
" diag %[refbk],0,0x258\n"
"0: nopr %%r7\n"
EX_TABLE(0b, 0b)
@@ -198,8 +199,7 @@ block:
* return to userspace schedule() to block.
*/
__set_current_state(TASK_UNINTERRUPTIBLE);
- set_tsk_need_resched(tsk);
- set_preempt_need_resched();
+ set_need_resched_current();
}
}
out:
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 7e3e767ab87d..7df23528c01b 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -12,43 +12,20 @@
#include <asm/mmu_context.h>
#include <asm/page-states.h>
#include <asm/pgalloc.h>
-#include <asm/gmap.h>
-#include <asm/tlb.h>
#include <asm/tlbflush.h>
-#ifdef CONFIG_PGSTE
-
-int page_table_allocate_pgste = 0;
-EXPORT_SYMBOL(page_table_allocate_pgste);
-
-static struct ctl_table page_table_sysctl[] = {
- {
- .procname = "allocate_pgste",
- .data = &page_table_allocate_pgste,
- .maxlen = sizeof(int),
- .mode = S_IRUGO | S_IWUSR,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-};
-
-static int __init page_table_register_sysctl(void)
-{
- return register_sysctl("vm", page_table_sysctl) ? 0 : -ENOMEM;
-}
-__initcall(page_table_register_sysctl);
-
-#endif /* CONFIG_PGSTE */
-
-unsigned long *crst_table_alloc(struct mm_struct *mm)
+unsigned long *crst_table_alloc_noprof(struct mm_struct *mm)
{
- struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
+ gfp_t gfp = GFP_KERNEL_ACCOUNT;
+ struct ptdesc *ptdesc;
unsigned long *table;
+ if (mm == &init_mm)
+ gfp &= ~__GFP_ACCOUNT;
+ ptdesc = pagetable_alloc_noprof(gfp, CRST_ALLOC_ORDER);
if (!ptdesc)
return NULL;
- table = ptdesc_to_virt(ptdesc);
+ table = ptdesc_address(ptdesc);
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
}
@@ -63,11 +40,15 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
static void __crst_table_upgrade(void *arg)
{
struct mm_struct *mm = arg;
+ struct ctlreg asce;
/* change all active ASCEs to avoid the creation of new TLBs */
if (current->active_mm == mm) {
- S390_lowcore.user_asce.val = mm->context.asce;
- local_ctl_load(7, &S390_lowcore.user_asce);
+ asce.val = mm->context.asce;
+ get_lowcore()->user_asce = asce;
+ local_ctl_load(7, &asce);
+ if (!test_thread_flag(TIF_ASCE_PRIMARY))
+ local_ctl_load(1, &asce);
}
__tlb_flush_local();
}
@@ -77,6 +58,8 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
unsigned long asce_limit = mm->context.asce_limit;
+ mmap_assert_write_locked(mm);
+
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
VM_BUG_ON(asce_limit < _REGION2_SIZE);
@@ -88,23 +71,18 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
if (unlikely(!p4d))
goto err_p4d;
crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
+ pagetable_p4d_ctor(virt_to_ptdesc(p4d));
}
if (end > _REGION1_SIZE) {
pgd = crst_table_alloc(mm);
if (unlikely(!pgd))
goto err_pgd;
crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
+ pagetable_pgd_ctor(virt_to_ptdesc(pgd));
}
spin_lock_bh(&mm->page_table_lock);
- /*
- * This routine gets called with mmap_lock lock held and there is
- * no reason to optimize for the case of otherwise. However, if
- * that would ever change, the below check will let us know.
- */
- VM_BUG_ON(asce_limit != mm->context.asce_limit);
-
if (p4d) {
__pgd = (unsigned long *) mm->pgd;
p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
@@ -130,6 +108,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
return 0;
err_pgd:
+ pagetable_dtor(virt_to_ptdesc(p4d));
crst_table_free(mm, p4d);
err_p4d:
return -ENOMEM;
@@ -137,14 +116,14 @@ err_p4d:
#ifdef CONFIG_PGSTE
-struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm)
+struct ptdesc *page_table_alloc_pgste_noprof(struct mm_struct *mm)
{
struct ptdesc *ptdesc;
u64 *table;
- ptdesc = pagetable_alloc(GFP_KERNEL, 0);
+ ptdesc = pagetable_alloc_noprof(GFP_KERNEL_ACCOUNT, 0);
if (ptdesc) {
- table = (u64 *)ptdesc_to_virt(ptdesc);
+ table = (u64 *)ptdesc_address(ptdesc);
__arch_set_page_dat(table, 1);
memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
@@ -159,51 +138,35 @@ void page_table_free_pgste(struct ptdesc *ptdesc)
#endif /* CONFIG_PGSTE */
-unsigned long *page_table_alloc(struct mm_struct *mm)
+unsigned long *page_table_alloc_noprof(struct mm_struct *mm)
{
+ gfp_t gfp = GFP_KERNEL_ACCOUNT;
struct ptdesc *ptdesc;
unsigned long *table;
- ptdesc = pagetable_alloc(GFP_KERNEL, 0);
+ if (mm == &init_mm)
+ gfp &= ~__GFP_ACCOUNT;
+ ptdesc = pagetable_alloc_noprof(gfp, 0);
if (!ptdesc)
return NULL;
- if (!pagetable_pte_ctor(ptdesc)) {
+ if (!pagetable_pte_ctor(mm, ptdesc)) {
pagetable_free(ptdesc);
return NULL;
}
- table = ptdesc_to_virt(ptdesc);
+ table = ptdesc_address(ptdesc);
__arch_set_page_dat(table, 1);
- /* pt_list is used by gmap only */
- INIT_LIST_HEAD(&ptdesc->pt_list);
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
return table;
}
-static void pagetable_pte_dtor_free(struct ptdesc *ptdesc)
-{
- pagetable_pte_dtor(ptdesc);
- pagetable_free(ptdesc);
-}
-
void page_table_free(struct mm_struct *mm, unsigned long *table)
{
struct ptdesc *ptdesc = virt_to_ptdesc(table);
- pagetable_pte_dtor_free(ptdesc);
-}
-
-void __tlb_remove_table(void *table)
-{
- struct ptdesc *ptdesc = virt_to_ptdesc(table);
- struct page *page = ptdesc_page(ptdesc);
-
- if (compound_order(page) == CRST_ALLOC_ORDER) {
- /* pmd, pud, or p4d */
- pagetable_free(ptdesc);
- return;
- }
- pagetable_pte_dtor_free(ptdesc);
+ if (pagetable_is_reserved(ptdesc))
+ return free_reserved_ptdesc(ptdesc);
+ pagetable_dtor_free(ptdesc);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -211,7 +174,7 @@ static void pte_free_now(struct rcu_head *head)
{
struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
- pagetable_pte_dtor_free(ptdesc);
+ pagetable_dtor_free(ptdesc);
}
void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
@@ -219,11 +182,6 @@ void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
- /*
- * THPs are not allowed for KVM guests. Warn if pgste ever reaches here.
- * Turn to the generic pte_free_defer() version once gmap is removed.
- */
- WARN_ON_ONCE(mm_has_pgste(mm));
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -278,7 +236,7 @@ static inline unsigned long base_##NAME##_addr_end(unsigned long addr, \
return (next - 1) < (end - 1) ? next : end; \
}
-BASE_ADDR_END_FUNC(page, _PAGE_SIZE)
+BASE_ADDR_END_FUNC(page, PAGE_SIZE)
BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
@@ -289,7 +247,7 @@ static inline unsigned long base_lra(unsigned long address)
unsigned long real;
asm volatile(
- " lra %0,0(%1)\n"
+ " lra %0,0(%1)"
: "=d" (real) : "a" (address) : "cc");
return real;
}
@@ -302,7 +260,7 @@ static int base_page_walk(unsigned long *origin, unsigned long addr,
if (!alloc)
return 0;
pte = origin;
- pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
+ pte += (addr & _PAGE_INDEX) >> PAGE_SHIFT;
do {
next = base_page_addr_end(addr, end);
*pte = base_lra(addr);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 2c944bafb030..666adcd681ab 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -4,6 +4,8 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
+#include <linux/cpufeature.h>
+#include <linux/export.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -14,15 +16,16 @@
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
-#include <linux/swapops.h>
+#include <linux/leafops.h>
#include <linux/sysctl.h>
#include <linux/ksm.h>
#include <linux/mman.h>
-#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/page-states.h>
+#include <asm/pgtable.h>
+#include <asm/machine.h>
pgprot_t pgprot_writecombine(pgprot_t prot)
{
@@ -34,22 +37,12 @@ pgprot_t pgprot_writecombine(pgprot_t prot)
}
EXPORT_SYMBOL_GPL(pgprot_writecombine);
-pgprot_t pgprot_writethrough(pgprot_t prot)
-{
- /*
- * mio_wb_bit_mask may be set on a different CPU, but it is only set
- * once at init and only read afterwards.
- */
- return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask);
-}
-EXPORT_SYMBOL_GPL(pgprot_writethrough);
-
static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, int nodat)
{
unsigned long opt, asce;
- if (MACHINE_HAS_TLB_GUEST) {
+ if (machine_has_tlb_guest()) {
opt = 0;
asce = READ_ONCE(mm->context.gmap_asce);
if (asce == 0UL || nodat)
@@ -69,7 +62,7 @@ static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
{
unsigned long opt, asce;
- if (MACHINE_HAS_TLB_GUEST) {
+ if (machine_has_tlb_guest()) {
opt = 0;
asce = READ_ONCE(mm->context.gmap_asce);
if (asce == 0UL || nodat)
@@ -94,7 +87,7 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
if (unlikely(pte_val(old) & _PAGE_INVALID))
return old;
atomic_inc(&mm->context.flush_count);
- if (MACHINE_HAS_TLB_LC &&
+ if (cpu_has_tlb_lc() &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
ptep_ipte_local(mm, addr, ptep, nodat);
else
@@ -123,28 +116,6 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
return old;
}
-static inline pgste_t pgste_get_lock(pte_t *ptep)
-{
- unsigned long value = 0;
-#ifdef CONFIG_PGSTE
- unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
-
- do {
- value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
- } while (value & PGSTE_PCL_BIT);
- value |= PGSTE_PCL_BIT;
-#endif
- return __pgste(value);
-}
-
-static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
- barrier();
- WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
-#endif
-}
-
static inline pgste_t pgste_get(pte_t *ptep)
{
unsigned long pgste = 0;
@@ -173,10 +144,10 @@ static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
skey = (unsigned long) page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
/* Transfer page changed & referenced bit to guest bits in pgste */
- pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
+ pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */
/* Copy page access key and fetch protection bit to pgste */
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
- pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+ pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
#endif
return pgste;
@@ -210,7 +181,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
if ((pte_val(entry) & _PAGE_PRESENT) &&
(pte_val(entry) & _PAGE_WRITE) &&
!(pte_val(entry) & _PAGE_INVALID)) {
- if (!MACHINE_HAS_ESOP) {
+ if (!machine_has_esop()) {
/*
* Without enhanced suppression-on-protection force
* the dirty bit on for all writable ptes.
@@ -220,7 +191,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
}
if (!(pte_val(entry) & _PAGE_PROTECT))
/* This pte allows write access, set user-dirty */
- pgste_val(pgste) |= PGSTE_UC_BIT;
+ pgste = set_pgste_bit(pgste, PGSTE_UC_BIT);
}
#endif
set_pte(ptep, entry);
@@ -236,7 +207,7 @@ static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
if (bits) {
- pgste_val(pgste) ^= bits;
+ pgste = __pgste(pgste_val(pgste) ^ bits);
ptep_notify(mm, addr, ptep, bits);
}
#endif
@@ -303,9 +274,9 @@ void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
preempt_disable();
atomic_inc(&mm->context.flush_count);
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __ptep_rdp(addr, ptep, 0, 0, 1);
+ __ptep_rdp(addr, ptep, 1);
else
- __ptep_rdp(addr, ptep, 0, 0, 0);
+ __ptep_rdp(addr, ptep, 0);
/*
* PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
* means it is still valid and active, and must not be changed according
@@ -343,7 +314,6 @@ pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
int nodat;
struct mm_struct *mm = vma->vm_mm;
- preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
old = ptep_flush_lazy(mm, addr, ptep, nodat);
@@ -360,8 +330,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
pgste_t pgste;
struct mm_struct *mm = vma->vm_mm;
- if (!MACHINE_HAS_NX)
- pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC));
if (mm_has_pgste(mm)) {
pgste = pgste_get(ptep);
pgste_set_key(ptep, pgste, pte, mm);
@@ -370,13 +338,12 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
} else {
set_pte(ptep, pte);
}
- preempt_enable();
}
static inline void pmdp_idte_local(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
- if (MACHINE_HAS_TLB_GUEST)
+ if (machine_has_tlb_guest())
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
mm->context.asce, IDTE_LOCAL);
else
@@ -388,19 +355,15 @@ static inline void pmdp_idte_local(struct mm_struct *mm,
static inline void pmdp_idte_global(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
- if (MACHINE_HAS_TLB_GUEST) {
+ if (machine_has_tlb_guest()) {
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
mm->context.asce, IDTE_GLOBAL);
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
gmap_pmdp_idte_global(mm, addr);
- } else if (MACHINE_HAS_IDTE) {
+ } else {
__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
gmap_pmdp_idte_global(mm, addr);
- } else {
- __pmdp_csp(pmdp);
- if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
- gmap_pmdp_csp(mm, addr);
}
}
@@ -413,7 +376,7 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
return old;
atomic_inc(&mm->context.flush_count);
- if (MACHINE_HAS_TLB_LC &&
+ if (cpu_has_tlb_lc() &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
pmdp_idte_local(mm, addr, pmdp);
else
@@ -507,7 +470,7 @@ EXPORT_SYMBOL(pmdp_xchg_lazy);
static inline void pudp_idte_local(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
- if (MACHINE_HAS_TLB_GUEST)
+ if (machine_has_tlb_guest())
__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
mm->context.asce, IDTE_LOCAL);
else
@@ -517,17 +480,11 @@ static inline void pudp_idte_local(struct mm_struct *mm,
static inline void pudp_idte_global(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
- if (MACHINE_HAS_TLB_GUEST)
+ if (machine_has_tlb_guest())
__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
mm->context.asce, IDTE_GLOBAL);
- else if (MACHINE_HAS_IDTE)
- __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
else
- /*
- * Invalid bit position is the same for pmd and pud, so we can
- * re-use _pmd_csp() here
- */
- __pmdp_csp((pmd_t *) pudp);
+ __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
}
static inline pud_t pudp_flush_direct(struct mm_struct *mm,
@@ -539,7 +496,7 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm,
if (pud_val(old) & _REGION_ENTRY_INVALID)
return old;
atomic_inc(&mm->context.flush_count);
- if (MACHINE_HAS_TLB_LC &&
+ if (cpu_has_tlb_lc() &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
pudp_idte_local(mm, addr, pudp);
else
@@ -611,7 +568,7 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
/* the mm_has_pgste() check is done in set_pte_at() */
preempt_disable();
pgste = pgste_get_lock(ptep);
- pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+ pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO);
pgste_set_key(ptep, pgste, entry, mm);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
@@ -624,7 +581,7 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
preempt_disable();
pgste = pgste_get_lock(ptep);
- pgste_val(pgste) |= PGSTE_IN_BIT;
+ pgste = set_pgste_bit(pgste, PGSTE_IN_BIT);
pgste_set_unlock(ptep, pgste);
preempt_enable();
}
@@ -669,7 +626,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
}
- pgste_val(pgste) |= bit;
+ pgste = set_pgste_bit(pgste, bit);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
return 0;
@@ -689,7 +646,7 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
if (!(pte_val(spte) & _PAGE_INVALID) &&
!((pte_val(spte) & _PAGE_PROTECT) &&
!(pte_val(pte) & _PAGE_PROTECT))) {
- pgste_val(spgste) |= PGSTE_VSIE_BIT;
+ spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT);
tpgste = pgste_get_lock(tptep);
tpte = __pte((pte_val(spte) & PAGE_MASK) |
(pte_val(pte) & _PAGE_PROTECT));
@@ -716,12 +673,12 @@ void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
pgste_set_unlock(ptep, pgste);
}
-static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
+static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
{
- if (!non_swap_entry(entry))
+ if (softleaf_is_swap(entry))
dec_mm_counter(mm, MM_SWAPENTS);
- else if (is_migration_entry(entry)) {
- struct folio *folio = pfn_swap_entry_folio(entry);
+ else if (softleaf_is_migration(entry)) {
+ struct folio *folio = softleaf_to_folio(entry);
dec_mm_counter(mm, mm_counter(folio));
}
@@ -743,11 +700,11 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
if (!reset && pte_swap(pte) &&
((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
(pgstev & _PGSTE_GPS_ZERO))) {
- ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
+ ptep_zap_softleaf_entry(mm, softleaf_from_pte(pte));
pte_clear(mm, addr, ptep);
}
if (reset)
- pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
+ pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
pgste_set_unlock(ptep, pgste);
preempt_enable();
}
@@ -760,8 +717,8 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
/* Clear storage key ACC and F, but set R/C */
preempt_disable();
pgste = pgste_get_lock(ptep);
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
- pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
+ pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT);
ptev = pte_val(*ptep);
if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
@@ -782,13 +739,13 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
pgste = pgste_get_lock(ptep);
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
- pgste_val(pgste) &= ~PGSTE_UC_BIT;
+ pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT);
pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
ptep_ipte_global(mm, addr, ptep, nodat);
- if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+ if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE))
pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
else
pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));
@@ -844,11 +801,11 @@ again:
if (!ptep)
goto again;
new = old = pgste_get_lock(ptep);
- pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
- PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT |
+ PGSTE_ACC_BITS | PGSTE_FP_BIT);
keyul = (unsigned long) key;
- pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
- pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+ new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48);
+ new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
unsigned long bits, skey;
@@ -859,12 +816,12 @@ again:
/* Set storage key ACC and FP */
page_set_storage_key(paddr, skey, !nq);
/* Merge host changed & referenced into pgste */
- pgste_val(new) |= bits << 52;
+ new = set_pgste_bit(new, bits << 52);
}
/* changing the guest storage key is considered a change of the page */
if ((pgste_val(new) ^ pgste_val(old)) &
(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
- pgste_val(new) |= PGSTE_UC_BIT;
+ new = set_pgste_bit(new, PGSTE_UC_BIT);
pgste_set_unlock(ptep, new);
pte_unmap_unlock(ptep, ptl);
@@ -952,19 +909,19 @@ again:
goto again;
new = old = pgste_get_lock(ptep);
/* Reset guest reference bit only */
- pgste_val(new) &= ~PGSTE_GR_BIT;
+ new = clear_pgste_bit(new, PGSTE_GR_BIT);
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
paddr = pte_val(*ptep) & PAGE_MASK;
cc = page_reset_referenced(paddr);
/* Merge real referenced bit into host-set */
- pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
+ new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT);
}
/* Reflect guest's logical view, not physical */
cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
/* Changing the guest storage key is considered a change of the page */
if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
- pgste_val(new) |= PGSTE_UC_BIT;
+ new = set_pgste_bit(new, PGSTE_UC_BIT);
pgste_set_unlock(ptep, new);
pte_unmap_unlock(ptep, ptl);
@@ -1128,7 +1085,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
if (res)
pgstev |= _PGSTE_GPS_ZERO;
- pgste_val(pgste) = pgstev;
+ pgste = __pgste(pgstev);
pgste_set_unlock(ptep, pgste);
pte_unmap_unlock(ptep, ptl);
return res;
@@ -1161,8 +1118,8 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
return -EFAULT;
new = pgste_get_lock(ptep);
- pgste_val(new) &= ~bits;
- pgste_val(new) |= value & bits;
+ new = clear_pgste_bit(new, bits);
+ new = set_pgste_bit(new, value & bits);
pgste_set_unlock(ptep, new);
pte_unmap_unlock(ptep, ptl);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 41c714e21292..d96587b84e81 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -4,6 +4,8 @@
*/
#include <linux/memory_hotplug.h>
+#include <linux/bootmem_info.h>
+#include <linux/cpufeature.h>
#include <linux/memblock.h>
#include <linux/pfn.h>
#include <linux/mm.h>
@@ -38,15 +40,21 @@ static void __ref *vmem_alloc_pages(unsigned int order)
static void vmem_free_pages(unsigned long addr, int order, struct vmem_altmap *altmap)
{
+ unsigned int nr_pages = 1 << order;
+ struct page *page;
+
if (altmap) {
vmem_altmap_free(altmap, 1 << order);
return;
}
- /* We don't expect boot memory to be removed ever. */
- if (!slab_is_available() ||
- WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr))))
- return;
- free_pages(addr, order);
+ page = virt_to_page((void *)addr);
+ if (PageReserved(page)) {
+ /* allocated from memblock */
+ while (nr_pages--)
+ free_bootmem_page(page++);
+ } else {
+ free_pages(addr, order);
+ }
}
void *vmem_crst_alloc(unsigned long val)
@@ -63,13 +71,12 @@ void *vmem_crst_alloc(unsigned long val)
pte_t __ref *vmem_pte_alloc(void)
{
- unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
pte_t *pte;
if (slab_is_available())
- pte = (pte_t *) page_table_alloc(&init_mm);
+ pte = (pte_t *)page_table_alloc(&init_mm);
else
- pte = (pte_t *) memblock_alloc(size, size);
+ pte = (pte_t *)memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!pte)
return NULL;
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
@@ -79,10 +86,6 @@ pte_t __ref *vmem_pte_alloc(void)
static void vmem_pte_free(unsigned long *table)
{
- /* We don't expect boot memory to be removed ever. */
- if (!slab_is_available() ||
- WARN_ON_ONCE(PageReserved(virt_to_page(table))))
- return;
page_table_free(&init_mm, table);
}
@@ -171,9 +174,6 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
pte_t *pte;
prot = pgprot_val(PAGE_KERNEL);
- if (!MACHINE_HAS_NX)
- prot &= ~_PAGE_NOEXEC;
-
pte = pte_offset_kernel(pmd, addr);
for (; addr < end; addr += PAGE_SIZE, pte++) {
if (!add) {
@@ -230,9 +230,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
pte_t *pte;
prot = pgprot_val(SEGMENT_KERNEL);
- if (!MACHINE_HAS_NX)
- prot &= ~_SEGMENT_ENTRY_NOEXEC;
-
pmd = pmd_offset(pud, addr);
for (; addr < end; addr = next, pmd++) {
next = pmd_addr_end(addr, end);
@@ -255,12 +252,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
} else if (pmd_none(*pmd)) {
if (IS_ALIGNED(addr, PMD_SIZE) &&
IS_ALIGNED(next, PMD_SIZE) &&
- MACHINE_HAS_EDAT1 && direct &&
+ cpu_has_edat1() && direct &&
!debug_pagealloc_enabled()) {
set_pmd(pmd, __pmd(__pa(addr) | prot));
pages++;
continue;
- } else if (!direct && MACHINE_HAS_EDAT1) {
+ } else if (!direct && cpu_has_edat1()) {
void *new_page;
/*
@@ -324,8 +321,6 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
pmd_t *pmd;
prot = pgprot_val(REGION3_KERNEL);
- if (!MACHINE_HAS_NX)
- prot &= ~_REGION_ENTRY_NOEXEC;
pud = pud_offset(p4d, addr);
for (; addr < end; addr = next, pud++) {
next = pud_addr_end(addr, end);
@@ -343,7 +338,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
} else if (pud_none(*pud)) {
if (IS_ALIGNED(addr, PUD_SIZE) &&
IS_ALIGNED(next, PUD_SIZE) &&
- MACHINE_HAS_EDAT2 && direct &&
+ cpu_has_edat2() && direct &&
!debug_pagealloc_enabled()) {
set_pud(pud, __pud(__pa(addr) | prot));
pages++;
@@ -661,25 +656,16 @@ void __init vmem_map_init(void)
{
__set_memory_rox(_stext, _etext);
__set_memory_ro(_etext, __end_rodata);
- __set_memory_rox(_sinittext, _einittext);
__set_memory_rox(__stext_amode31, __etext_amode31);
/*
* If the BEAR-enhancement facility is not installed the first
* prefix page is used to return to the previous context with
* an LPSWE instruction and therefore must be executable.
*/
- if (!static_key_enabled(&cpu_has_bear))
+ if (!cpu_has_bear())
set_memory_x(0, 1);
- if (debug_pagealloc_enabled()) {
- /*
- * Use RELOC_HIDE() as long as __va(0) translates to NULL,
- * since performing pointer arithmetic on a NULL pointer
- * has undefined behavior and generates compiler warnings.
- */
- __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size));
- }
- if (MACHINE_HAS_NX)
- system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT);
+ if (debug_pagealloc_enabled())
+ __set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
index 8cab6deb0403..9275cf63192a 100644
--- a/arch/s390/net/Makefile
+++ b/arch/s390/net/Makefile
@@ -2,5 +2,5 @@
#
# Arch-specific network modules
#
-obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_timed_may_goto.o
obj-$(CONFIG_HAVE_PNETID) += pnet.o
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
deleted file mode 100644
index 7822ea92e54a..000000000000
--- a/arch/s390/net/bpf_jit.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * BPF Jit compiler defines
- *
- * Copyright IBM Corp. 2012,2015
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- * Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#ifndef __ARCH_S390_NET_BPF_JIT_H
-#define __ARCH_S390_NET_BPF_JIT_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/filter.h>
-#include <linux/types.h>
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * Stackframe layout (packed stack):
- *
- * ^ high
- * +---------------+ |
- * | old backchain | |
- * +---------------+ |
- * | r15 - r6 | |
- * +---------------+ |
- * | 4 byte align | |
- * | tail_call_cnt | |
- * BFP -> +===============+ |
- * | | |
- * | BPF stack | |
- * | | |
- * R15+160 -> +---------------+ |
- * | new backchain | |
- * R15+152 -> +---------------+ |
- * | + 152 byte SA | |
- * R15 -> +---------------+ + low
- *
- * We get 160 bytes stack space from calling function, but only use
- * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt.
- *
- * The stack size used by the BPF program ("BPF stack" above) is passed
- * via "aux->stack_depth".
- */
-#define STK_SPACE_ADD (160)
-#define STK_160_UNUSED (160 - 12 * 8)
-#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED)
-
-#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */
-#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */
-
-#endif /* __ARCH_S390_NET_BPF_JIT_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 9d440a0b729e..579461d471bb 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -15,8 +15,7 @@
* Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "bpf_jit"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "bpf_jit: " fmt
#include <linux/netdevice.h>
#include <linux/filter.h>
@@ -32,7 +31,6 @@
#include <asm/set_memory.h>
#include <asm/text-patching.h>
#include <asm/unwind.h>
-#include "bpf_jit.h"
struct bpf_jit {
u32 seen; /* Flags to remember seen eBPF instructions */
@@ -48,14 +46,13 @@ struct bpf_jit {
int lit64; /* Current position in 64-bit literal pool */
int base_ip; /* Base address for literal pool */
int exit_ip; /* Address of exit */
- int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
- int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int excnt; /* Number of exception table entries */
int prologue_plt_ret; /* Return address for prologue hotpatch PLT */
int prologue_plt; /* Start of prologue hotpatch PLT */
int kern_arena; /* Pool offset of kernel arena address */
u64 user_arena; /* User arena address */
+ u32 frame_off; /* Offset of struct bpf_prog from %r15 */
};
#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
@@ -127,6 +124,18 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
jit->seen_regs |= (1 << r1);
}
+static s32 off_to_pcrel(struct bpf_jit *jit, u32 off)
+{
+ return off - jit->prg;
+}
+
+static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr)
+{
+ if (jit->prg_buf)
+ return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg);
+ return 0;
+}
+
#define REG_SET_SEEN(b1) \
({ \
reg_set_seen(jit, b1); \
@@ -201,7 +210,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT4_PCREL_RIC(op, mask, target) \
({ \
- int __rel = ((target) - jit->prg) / 2; \
+ int __rel = off_to_pcrel(jit, target) / 2; \
_EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \
})
@@ -239,7 +248,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target) \
({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
+ unsigned int rel = off_to_pcrel(jit, target) / 2; \
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \
(op2) | (mask) << 12); \
REG_SET_SEEN(b1); \
@@ -248,7 +257,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target) \
({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
+ unsigned int rel = off_to_pcrel(jit, target) / 2; \
_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \
(rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \
REG_SET_SEEN(b1); \
@@ -257,29 +266,41 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
- int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \
+ int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
+static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel)
+{
+ u32 pc32dbl = (s32)(pcrel / 2);
+
+ _EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff);
+}
+
+static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel)
+{
+ emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel);
+ REG_SET_SEEN(b);
+}
+
#define EMIT6_PCREL_RILB(op, b, target) \
-({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
- _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
- REG_SET_SEEN(b); \
-})
+ emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target))
-#define EMIT6_PCREL_RIL(op, target) \
-({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
- _EMIT6((op) | rel >> 16, rel & 0xffff); \
-})
+#define EMIT6_PCREL_RILB_PTR(op, b, target_ptr) \
+ emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr))
+
+static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel)
+{
+ emit6_pcrel_ril(jit, op | mask << 20, pcrel);
+}
#define EMIT6_PCREL_RILC(op, mask, target) \
-({ \
- EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \
-})
+ emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target))
+
+#define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr) \
+ emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr))
#define _EMIT6_IMM(op, imm) \
({ \
@@ -404,11 +425,25 @@ static void jit_fill_hole(void *area, unsigned int size)
}
/*
+ * Caller-allocated part of the frame.
+ * Thanks to packed stack, its otherwise unused initial part can be used for
+ * the BPF stack and for the next frame.
+ */
+struct prog_frame {
+ u64 unused[8];
+ /* BPF stack starts here and grows towards 0 */
+ u32 tail_call_cnt;
+ u32 pad;
+ u64 r6[10]; /* r6 - r15 */
+ u64 backchain;
+} __packed;
+
+/*
* Save registers from "rs" (register start) to "re" (register end) on stack
*/
static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
{
- u32 off = STK_OFF_R6 + (rs - 6) * 8;
+ u32 off = offsetof(struct prog_frame, r6) + (rs - 6) * 8;
if (rs == re)
/* stg %rs,off(%r15) */
@@ -421,12 +456,9 @@ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
/*
* Restore registers from "rs" (register start) to "re" (register end) on stack
*/
-static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
+static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
{
- u32 off = STK_OFF_R6 + (rs - 6) * 8;
-
- if (jit->seen & SEEN_STACK)
- off += STK_OFF + stack_depth;
+ u32 off = jit->frame_off + offsetof(struct prog_frame, r6) + (rs - 6) * 8;
if (rs == re)
/* lg %rs,off(%r15) */
@@ -470,8 +502,7 @@ static int get_end(u16 seen_regs, int start)
* Save and restore clobbered registers (6-15) on stack.
* We save/restore registers in chunks with gap >= 2 registers.
*/
-static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth,
- u16 extra_regs)
+static void save_restore_regs(struct bpf_jit *jit, int op, u16 extra_regs)
{
u16 seen_regs = jit->seen_regs | extra_regs;
const int last = 15, save_restore_size = 6;
@@ -494,7 +525,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth,
if (op == REGS_SAVE)
save_regs(jit, rs, re);
else
- restore_regs(jit, rs, re, stack_depth);
+ restore_regs(jit, rs, re);
re++;
} while (re <= last);
}
@@ -503,7 +534,7 @@ static void bpf_skip(struct bpf_jit *jit, int size)
{
if (size >= 6 && !is_valid_rel(size)) {
/* brcl 0xf,size */
- EMIT6_PCREL_RIL(0xc0f4000000, size);
+ EMIT6_PCREL_RILC(0xc0040000, 0xf, size);
size -= 6;
} else if (size >= 4 && is_valid_rel(size)) {
/* brc 0xf,size */
@@ -544,18 +575,27 @@ static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
{
memcpy(plt, &bpf_plt, sizeof(*plt));
plt->ret = ret;
- plt->target = target;
+ /*
+ * (target == NULL) implies that the branch to this PLT entry was
+ * patched and became a no-op. However, some CPU could have jumped
+ * to this PLT entry before patching and may be still executing it.
+ *
+ * Since the intention in this case is to make the PLT entry a no-op,
+ * make the target point to the return label instead of NULL.
+ */
+ plt->target = target ?: ret;
}
/*
* Emit function prologue
*
* Save registers and create stack frame if necessary.
- * See stack frame layout description in "bpf_jit.h"!
+ * Stack frame layout is described by struct prog_frame.
*/
-static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
- u32 stack_depth)
+static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
{
+ BUILD_BUG_ON(sizeof(struct prog_frame) != STACK_FRAME_OVERHEAD);
+
/* No-op for hotpatching */
/* brcl 0,prologue_plt */
EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
@@ -563,8 +603,9 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
if (!bpf_is_subprog(fp)) {
/* Initialize the tail call counter in the main program. */
- /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
- _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
+ /* xc tail_call_cnt(4,%r15),tail_call_cnt(%r15) */
+ _EMIT6(0xd703f000 | offsetof(struct prog_frame, tail_call_cnt),
+ 0xf000 | offsetof(struct prog_frame, tail_call_cnt));
} else {
/*
* Skip the tail call counter initialization in subprograms.
@@ -587,7 +628,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
jit->seen_regs |= NVREGS;
} else {
/* Save registers */
- save_restore_regs(jit, REGS_SAVE, stack_depth,
+ save_restore_regs(jit, REGS_SAVE,
fp->aux->exception_boundary ? NVREGS : 0);
}
/* Setup literal pool */
@@ -605,77 +646,44 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
}
/* Setup stack and backchain */
if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* lgr %w1,%r15 (backchain) */
- EMIT4(0xb9040000, REG_W1, REG_15);
- /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
- EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
- /* aghi %r15,-STK_OFF */
- EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* stg %w1,152(%r15) (backchain) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
- REG_15, 152);
- }
-}
-
-/*
- * Emit an expoline for a jump that follows
- */
-static void emit_expoline(struct bpf_jit *jit)
-{
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
-}
-
-/*
- * Emit __s390_indirect_jump_r1 thunk if necessary
- */
-static void emit_r1_thunk(struct bpf_jit *jit)
-{
- if (nospec_uses_trampoline()) {
- jit->r1_thunk_ip = jit->prg;
- emit_expoline(jit);
- /* br %r1 */
- _EMIT2(0x07f1);
+ /* lgr %w1,%r15 (backchain) */
+ EMIT4(0xb9040000, REG_W1, REG_15);
+ /* la %bfp,unused_end(%r15) (BPF frame pointer) */
+ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15,
+ offsetofend(struct prog_frame, unused));
+ /* aghi %r15,-frame_off */
+ EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off);
+ /* stg %w1,backchain(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+ REG_15,
+ offsetof(struct prog_frame, backchain));
}
}
/*
- * Call r1 either directly or via __s390_indirect_jump_r1 thunk
+ * Jump using a register either directly or via an expoline thunk
*/
-static void call_r1(struct bpf_jit *jit)
-{
- if (nospec_uses_trampoline())
- /* brasl %r14,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
- else
- /* basr %r14,%r1 */
- EMIT2(0x0d00, REG_14, REG_1);
-}
+#define EMIT_JUMP_REG(reg) do { \
+ if (nospec_uses_trampoline()) \
+ /* brcl 0xf,__s390_indirect_jump_rN */ \
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f, \
+ __s390_indirect_jump_r ## reg); \
+ else \
+ /* br %rN */ \
+ _EMIT2(0x07f0 | reg); \
+} while (0)
/*
* Function epilogue
*/
-static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
+static void bpf_jit_epilogue(struct bpf_jit *jit)
{
jit->exit_ip = jit->prg;
/* Load exit code: lgr %r2,%b0 */
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
- save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
- if (nospec_uses_trampoline()) {
- jit->r14_thunk_ip = jit->prg;
- /* Generate __s390_indirect_jump_r14 thunk */
- emit_expoline(jit);
- }
- /* br %r14 */
- _EMIT2(0x07fe);
-
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- emit_r1_thunk(jit);
+ save_restore_regs(jit, REGS_RESTORE, 0);
+ EMIT_JUMP_REG(14);
jit->prg = ALIGN(jit->prg, 8);
jit->prologue_plt = jit->prg;
@@ -856,7 +864,7 @@ static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
* stack space for the large switch statement.
*/
static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
- int i, bool extra_pass, u32 stack_depth)
+ int i, bool extra_pass)
{
struct bpf_insn *insn = &fp->insnsi[i];
s32 branch_oc_off = insn->off;
@@ -1767,19 +1775,21 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
+
/*
* Copy the tail call counter to where the callee expects it.
- *
- * Note 1: The callee can increment the tail call counter, but
- * we do not load it back, since the x86 JIT does not do this
- * either.
- *
- * Note 2: We assume that the verifier does not let us call the
- * main program, which clears the tail call counter on entry.
*/
- /* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
- _EMIT6(0xd203f000 | STK_OFF_TCCNT,
- 0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
+
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ /*
+ * mvc tail_call_cnt(4,%r15),
+ * frame_off+tail_call_cnt(%r15)
+ */
+ _EMIT6(0xd203f000 | offsetof(struct prog_frame,
+ tail_call_cnt),
+ 0xf000 | (jit->frame_off +
+ offsetof(struct prog_frame,
+ tail_call_cnt)));
/* Sign-extend the kfunc arguments. */
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
@@ -1795,12 +1805,38 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
}
}
- /* lgrl %w1,func */
- EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
- /* %r1() */
- call_r1(jit);
- /* lgr %b0,%r2: load return value into %b0 */
- EMIT4(0xb9040000, BPF_REG_0, REG_2);
+ if ((void *)func == arch_bpf_timed_may_goto) {
+ /*
+ * arch_bpf_timed_may_goto() has a special ABI: the
+ * parameters are in BPF_REG_AX and BPF_REG_10; the
+ * return value is in BPF_REG_AX; and all GPRs except
+ * REG_W0, REG_W1, and BPF_REG_AX are callee-saved.
+ */
+
+ /* brasl %r0,func */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_0, (void *)func);
+ } else {
+ /* brasl %r14,func */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, (void *)func);
+ /* lgr %b0,%r2: load return value into %b0 */
+ EMIT4(0xb9040000, BPF_REG_0, REG_2);
+ }
+
+ /*
+ * Copy the potentially updated tail call counter back.
+ */
+
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ /*
+ * mvc frame_off+tail_call_cnt(%r15),
+ * tail_call_cnt(4,%r15)
+ */
+ _EMIT6(0xd203f000 | (jit->frame_off +
+ offsetof(struct prog_frame,
+ tail_call_cnt)),
+ 0xf000 | offsetof(struct prog_frame,
+ tail_call_cnt));
+
break;
}
case BPF_JMP | BPF_TAIL_CALL: {
@@ -1830,10 +1866,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
* goto out;
*/
- if (jit->seen & SEEN_STACK)
- off = STK_OFF_TCCNT + STK_OFF + stack_depth;
- else
- off = STK_OFF_TCCNT;
+ off = jit->frame_off +
+ offsetof(struct prog_frame, tail_call_cnt);
/* lhi %w0,1 */
EMIT4_IMM(0xa7080000, REG_W0, 1);
/* laal %w1,%w0,off(%r15) */
@@ -1863,7 +1897,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/*
* Restore registers before calling function
*/
- save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
+ save_restore_regs(jit, REGS_RESTORE, 0);
/*
* goto *(prog->bpf_func + tail_call_start);
@@ -1877,7 +1911,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* aghi %r1,tail_call_start */
EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
/* brcl 0xf,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
+ __s390_indirect_jump_r1);
} else {
/* bc 0xf,tail_call_start(%r1) */
_EMIT4(0x47f01000 + jit->tail_call_start);
@@ -2155,7 +2190,7 @@ static int bpf_set_addr(struct bpf_jit *jit, int i)
* Compile eBPF program into s390x code
*/
static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
- bool extra_pass, u32 stack_depth)
+ bool extra_pass)
{
int i, insn_count, lit32_size, lit64_size;
u64 kern_arena;
@@ -2164,24 +2199,30 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
jit->lit64 = jit->lit64_start;
jit->prg = 0;
jit->excnt = 0;
+ if (is_first_pass(jit) || (jit->seen & SEEN_STACK))
+ jit->frame_off = sizeof(struct prog_frame) -
+ offsetofend(struct prog_frame, unused) +
+ round_up(fp->aux->stack_depth, 8);
+ else
+ jit->frame_off = 0;
kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena);
if (kern_arena)
jit->kern_arena = _EMIT_CONST_U64(kern_arena);
jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena);
- bpf_jit_prologue(jit, fp, stack_depth);
+ bpf_jit_prologue(jit, fp);
if (bpf_set_addr(jit, 0) < 0)
return -1;
for (i = 0; i < fp->len; i += insn_count) {
- insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
+ insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
if (insn_count < 0)
return -1;
/* Next instruction address */
if (bpf_set_addr(jit, i + insn_count) < 0)
return -1;
}
- bpf_jit_epilogue(jit, stack_depth);
+ bpf_jit_epilogue(jit);
lit32_size = jit->lit32 - jit->lit32_start;
lit64_size = jit->lit64 - jit->lit64_start;
@@ -2257,7 +2298,6 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
*/
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
- u32 stack_depth = round_up(fp->aux->stack_depth, 8);
struct bpf_prog *tmp, *orig_fp = fp;
struct bpf_binary_header *header;
struct s390_jit_data *jit_data;
@@ -2310,7 +2350,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
* - 3: Calculate program size and addrs array
*/
for (pass = 1; pass <= 3; pass++) {
- if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
+ if (bpf_jit_prog(&jit, fp, extra_pass)) {
fp = orig_fp;
goto free_addrs;
}
@@ -2324,7 +2364,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
goto free_addrs;
}
skip_init_ctx:
- if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
+ if (bpf_jit_prog(&jit, fp, extra_pass)) {
bpf_jit_binary_free(header);
fp = orig_fp;
goto free_addrs;
@@ -2372,8 +2412,9 @@ bool bpf_jit_supports_far_kfunc_call(void)
return true;
}
-int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
- void *old_addr, void *new_addr)
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
+ enum bpf_text_poke_type new_t, void *old_addr,
+ void *new_addr)
{
struct bpf_plt expected_plt, current_plt, new_plt, *plt;
struct {
@@ -2390,7 +2431,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
return -EINVAL;
- if (t == BPF_MOD_JUMP &&
+ if ((new_t == BPF_MOD_JUMP || old_t == BPF_MOD_JUMP) &&
insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
/*
* The branch already points to the destination,
@@ -2489,14 +2530,12 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
* goto skip;
*/
- /* %r1 = __bpf_prog_enter */
- load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
/* %r2 = p */
load_imm64(jit, REG_2, (u64)p);
/* la %r3,run_ctx_off(%r15) */
EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
- /* %r1() */
- call_r1(jit);
+ /* brasl %r14,__bpf_prog_enter */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_enter(p));
/* ltgr %r7,%r2 */
EMIT4(0xb9020000, REG_7, REG_2);
/* brcl 8,skip */
@@ -2507,15 +2546,13 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
* retval = bpf_func(args, p->insnsi);
*/
- /* %r1 = p->bpf_func */
- load_imm64(jit, REG_1, (u64)p->bpf_func);
/* la %r2,bpf_args_off(%r15) */
EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
/* %r3 = p->insnsi */
if (!p->jited)
load_imm64(jit, REG_3, (u64)p->insnsi);
- /* %r1() */
- call_r1(jit);
+ /* brasl %r14,p->bpf_func */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, p->bpf_func);
/* stg %r2,retval_off(%r15) */
if (save_ret) {
if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
@@ -2532,16 +2569,14 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
* __bpf_prog_exit(p, start, &run_ctx);
*/
- /* %r1 = __bpf_prog_exit */
- load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
/* %r2 = p */
load_imm64(jit, REG_2, (u64)p);
/* lgr %r3,%r7 */
EMIT4(0xb9040000, REG_3, REG_7);
/* la %r4,run_ctx_off(%r15) */
EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
- /* %r1() */
- call_r1(jit);
+ /* brasl %r14,__bpf_prog_exit */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_exit(p));
return 0;
}
@@ -2585,9 +2620,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
if (nr_stack_args > MAX_NR_STACK_ARGS)
return -ENOTSUPP;
- /* Return to %r14, since func_addr and %r0 are not available. */
- if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) ||
- (flags & BPF_TRAMP_F_INDIRECT))
+ /* Return to %r14 in the struct_ops case. */
+ if (flags & BPF_TRAMP_F_INDIRECT)
flags |= BPF_TRAMP_F_SKIP_FRAME;
/*
@@ -2645,9 +2679,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
/* stg %r1,backchain_off(%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
tjit->backchain_off);
- /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
+ /* mvc tccnt_off(4,%r15),stack_size+tail_call_cnt(%r15) */
_EMIT6(0xd203f000 | tjit->tccnt_off,
- 0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
+ 0xf000 | (tjit->stack_size +
+ offsetof(struct prog_frame, tail_call_cnt)));
/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
if (nr_reg_args)
EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
@@ -2701,9 +2736,6 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
/* lgr %r8,%r0 */
EMIT4(0xb9040000, REG_8, REG_0);
- } else {
- /* %r8 = func_addr + S390X_PATCH_SIZE */
- load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
}
/*
@@ -2729,12 +2761,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
* __bpf_tramp_enter(im);
*/
- /* %r1 = __bpf_tramp_enter */
- load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
/* %r2 = im */
load_imm64(jit, REG_2, (u64)im);
- /* %r1() */
- call_r1(jit);
+ /* brasl %r14,__bpf_tramp_enter */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_enter);
}
for (i = 0; i < fentry->nr_links; i++)
@@ -2784,15 +2814,28 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
(nr_stack_args * sizeof(u64) - 1) << 16 |
tjit->stack_args_off,
0xf000 | tjit->orig_stack_args_off);
- /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
- _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
- /* lgr %r1,%r8 */
- EMIT4(0xb9040000, REG_1, REG_8);
- /* %r1() */
- call_r1(jit);
+ /* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */
+ _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
+ 0xf000 | tjit->tccnt_off);
+ if (flags & BPF_TRAMP_F_ORIG_STACK) {
+ if (nospec_uses_trampoline())
+ /* brasl %r14,__s390_indirect_jump_r8 */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+ __s390_indirect_jump_r8);
+ else
+ /* basr %r14,%r8 */
+ EMIT2(0x0d00, REG_14, REG_8);
+ } else {
+ /* brasl %r14,func_addr+S390X_PATCH_SIZE */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+ func_addr + S390X_PATCH_SIZE);
+ }
/* stg %r2,retval_off(%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
tjit->retval_off);
+ /* mvc tccnt_off(%r15),tail_call_cnt(4,%r15) */
+ _EMIT6(0xd203f000 | tjit->tccnt_off,
+ 0xf000 | offsetof(struct prog_frame, tail_call_cnt));
im->ip_after_call = jit->prg_buf + jit->prg;
@@ -2817,12 +2860,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
* __bpf_tramp_exit(im);
*/
- /* %r1 = __bpf_tramp_exit */
- load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
/* %r2 = im */
load_imm64(jit, REG_2, (u64)im);
- /* %r1() */
- call_r1(jit);
+ /* brasl %r14,__bpf_tramp_exit */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_exit);
}
/* lmg %r2,%rN,reg_args_off(%r15) */
@@ -2831,7 +2872,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
REG_2 + (nr_reg_args - 1), REG_15,
tjit->reg_args_off);
/* lgr %r1,%r8 */
- if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
+ if (!(flags & BPF_TRAMP_F_SKIP_FRAME) &&
+ (flags & BPF_TRAMP_F_ORIG_STACK))
EMIT4(0xb9040000, REG_1, REG_8);
/* lmg %r7,%r8,r7_r8_off(%r15) */
EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
@@ -2842,23 +2884,20 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
tjit->retval_off);
- /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
- _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
+ /* mvc stack_size+tail_call_cnt(4,%r15),tccnt_off(%r15) */
+ _EMIT6(0xd203f000 | (tjit->stack_size +
+ offsetof(struct prog_frame, tail_call_cnt)),
0xf000 | tjit->tccnt_off);
/* aghi %r15,stack_size */
EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
- /* Emit an expoline for the following indirect jump. */
- if (nospec_uses_trampoline())
- emit_expoline(jit);
if (flags & BPF_TRAMP_F_SKIP_FRAME)
- /* br %r14 */
- _EMIT2(0x07fe);
+ EMIT_JUMP_REG(14);
+ else if (flags & BPF_TRAMP_F_ORIG_STACK)
+ EMIT_JUMP_REG(1);
else
- /* br %r1 */
- _EMIT2(0x07f1);
-
- emit_r1_thunk(jit);
-
+ /* brcl 0xf,func_addr+S390X_PATCH_SIZE */
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
+ func_addr + S390X_PATCH_SIZE);
return 0;
}
@@ -2919,10 +2958,21 @@ bool bpf_jit_supports_arena(void)
bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
{
- /*
- * Currently the verifier uses this function only to check which
- * atomic stores to arena are supported, and they all are.
- */
+ if (!in_arena)
+ return true;
+ switch (insn->code) {
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_ATOMIC | BPF_DW:
+ if (bpf_atomic_is_load_store(insn))
+ return false;
+ break;
+ case BPF_LDX | BPF_MEMSX | BPF_B:
+ case BPF_LDX | BPF_MEMSX | BPF_H:
+ case BPF_LDX | BPF_MEMSX | BPF_W:
+ return false;
+ }
return true;
}
@@ -2960,3 +3010,8 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *, u64, u64, u64),
prev_addr = addr;
}
}
+
+bool bpf_jit_supports_timed_may_goto(void)
+{
+ return true;
+}
diff --git a/arch/s390/net/bpf_timed_may_goto.S b/arch/s390/net/bpf_timed_may_goto.S
new file mode 100644
index 000000000000..06f567a460d7
--- /dev/null
+++ b/arch/s390/net/bpf_timed_may_goto.S
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
+
+#define R2_OFF 0
+#define R5_OFF (R2_OFF + (5 - 2 + 1) * 8)
+#define R14_OFF (R5_OFF + 8)
+#define RETADDR_OFF (R14_OFF + 8)
+#define R15_OFF (RETADDR_OFF + 8)
+#define BACKCHAIN_OFF (R15_OFF + 8)
+#define FRAME_SIZE (BACKCHAIN_OFF + 8)
+#define FRAME_OFF (STACK_FRAME_OVERHEAD - FRAME_SIZE)
+#if (FRAME_OFF + BACKCHAIN_OFF) != __SF_BACKCHAIN
+#error Stack frame layout calculation is broken
+#endif
+
+ GEN_BR_THUNK %r1
+
+SYM_FUNC_START(arch_bpf_timed_may_goto)
+ /*
+ * This function has a special ABI: the parameters are in %r12 and
+ * %r13; the return value is in %r12; all GPRs except %r0, %r1, and
+ * %r12 are callee-saved; and the return address is in %r0.
+ */
+ stmg %r2,%r5,FRAME_OFF+R2_OFF(%r15)
+ stg %r14,FRAME_OFF+R14_OFF(%r15)
+ stg %r0,FRAME_OFF+RETADDR_OFF(%r15)
+ stg %r15,FRAME_OFF+R15_OFF(%r15)
+ lgr %r1,%r15
+ lay %r15,-FRAME_SIZE(%r15)
+ stg %r1,__SF_BACKCHAIN(%r15)
+
+ lay %r2,0(%r12,%r13)
+ brasl %r14,bpf_check_timed_may_goto
+ lgr %r12,%r2
+
+ lg %r15,FRAME_SIZE+FRAME_OFF+R15_OFF(%r15)
+ lmg %r2,%r5,FRAME_OFF+R2_OFF(%r15)
+ lg %r14,FRAME_OFF+R14_OFF(%r15)
+ lg %r1,FRAME_OFF+RETADDR_OFF(%r15)
+ BR_EX %r1
+SYM_FUNC_END(arch_bpf_timed_may_goto)
diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c
index 79211bec0fc8..03089ef479b2 100644
--- a/arch/s390/net/pnet.c
+++ b/arch/s390/net/pnet.c
@@ -6,6 +6,7 @@
*/
#include <linux/device.h>
+#include <linux/export.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/types.h>
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 0547a10406e7..1810e0944a4e 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,7 +3,8 @@
# Makefile for the s390 PCI subsystem.
#
-obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o pci_sysfs.o \
+obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
- pci_bus.o pci_kvm_hook.o
+ pci_bus.o pci_kvm_hook.o pci_report.o pci_fixup.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
+obj-$(CONFIG_SYSFS) += pci_sysfs.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 0de0f6e405b5..93d2c9c780fc 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -16,8 +16,7 @@
* Thomas Klein
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -29,7 +28,9 @@
#include <linux/pci.h>
#include <linux/printk.h>
#include <linux/lockdep.h>
+#include <linux/list_sort.h>
+#include <asm/machine.h>
#include <asm/isc.h>
#include <asm/airq.h>
#include <asm/facility.h>
@@ -43,6 +44,7 @@
/* list of all detected zpci devices */
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
+static DEFINE_MUTEX(zpci_add_remove_lock);
static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
static DEFINE_SPINLOCK(zpci_domain_lock);
@@ -68,6 +70,15 @@ EXPORT_SYMBOL_GPL(zpci_aipb);
struct airq_iv *zpci_aif_sbv;
EXPORT_SYMBOL_GPL(zpci_aif_sbv);
+void zpci_zdev_put(struct zpci_dev *zdev)
+{
+ if (!zdev)
+ return;
+ mutex_lock(&zpci_add_remove_lock);
+ kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock);
+ mutex_unlock(&zpci_add_remove_lock);
+}
+
struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
@@ -123,14 +134,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
struct zpci_fib fib = {0};
u8 cc;
- WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
/* Work around off by one in ISM virt device */
if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
fib.pal = limit + (1 << 12);
else
fib.pal = limit;
- fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+ fib.iota = iota;
fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, status);
if (cc)
@@ -160,6 +170,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
struct zpci_iommu_ctrs *ctrs;
struct zpci_fib fib = {0};
+ unsigned long flags;
u8 cc, status;
if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
@@ -171,6 +182,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
WARN_ON((u64) zdev->fmb & 0xf);
/* reset software counters */
+ spin_lock_irqsave(&zdev->dom_lock, flags);
ctrs = zpci_get_iommu_ctrs(zdev);
if (ctrs) {
atomic64_set(&ctrs->mapped_pages, 0);
@@ -179,6 +191,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
atomic64_set(&ctrs->sync_map_rpcits, 0);
atomic64_set(&ctrs->sync_rpcits, 0);
}
+ spin_unlock_irqrestore(&zdev->dom_lock, flags);
fib.fmb_addr = virt_to_phys(zdev->fmb);
@@ -251,7 +264,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
}
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
- unsigned long prot)
+ pgprot_t prot)
{
/*
* When PCI MIO instructions are unavailable the "physical" address
@@ -261,7 +274,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
if (!static_branch_unlikely(&have_mio))
return (void __iomem *)phys_addr;
- return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
+ return generic_ioremap_prot(phys_addr, size, prot);
}
EXPORT_SYMBOL(ioremap_prot);
@@ -587,7 +600,6 @@ int pcibios_device_add(struct pci_dev *pdev)
if (pdev->is_physfn)
pdev->no_vf_scan = 1;
- pdev->dev.groups = zpci_attr_groups;
zpci_map_resources(pdev);
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
@@ -687,6 +699,23 @@ int zpci_enable_device(struct zpci_dev *zdev)
}
EXPORT_SYMBOL_GPL(zpci_enable_device);
+int zpci_reenable_device(struct zpci_dev *zdev)
+{
+ u8 status;
+ int rc;
+
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ return rc;
+
+ rc = zpci_iommu_register_ioat(zdev, &status);
+ if (rc)
+ zpci_disable_device(zdev);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_reenable_device);
+
int zpci_disable_device(struct zpci_dev *zdev)
{
u32 fh = zdev->fh;
@@ -736,7 +765,6 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
*/
int zpci_hot_reset_device(struct zpci_dev *zdev)
{
- u8 status;
int rc;
lockdep_assert_held(&zdev->state_lock);
@@ -755,19 +783,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
return rc;
}
- rc = zpci_enable_device(zdev);
- if (rc)
- return rc;
-
- if (zdev->dma_table)
- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
- if (rc) {
- zpci_disable_device(zdev);
- return rc;
- }
+ rc = zpci_reenable_device(zdev);
- return 0;
+ return rc;
}
/**
@@ -776,8 +794,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
* @fh: Current Function Handle of the device to be created
* @state: Initial state after creation either Standby or Configured
*
- * Creates a new zpci device and adds it to its, possibly newly created, zbus
- * as well as zpci_list.
+ * Allocates a new struct zpci_dev and queries the platform for its details.
+ * If successful the device can subsequently be added to the zPCI subsystem
+ * using zpci_add_device().
*
* Returns: the zdev on success or an error pointer otherwise
*/
@@ -786,7 +805,6 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
struct zpci_dev *zdev;
int rc;
- zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
if (!zdev)
return ERR_PTR(-ENOMEM);
@@ -801,11 +819,35 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
goto error;
zdev->state = state;
- kref_init(&zdev->kref);
mutex_init(&zdev->state_lock);
mutex_init(&zdev->fmb_lock);
mutex_init(&zdev->kzdev_lock);
+ return zdev;
+
+error:
+ zpci_dbg(0, "crt fid:%x, rc:%d\n", fid, rc);
+ kfree(zdev);
+ return ERR_PTR(rc);
+}
+
+/**
+ * zpci_add_device() - Add a previously created zPCI device to the zPCI subsystem
+ * @zdev: The zPCI device to be added
+ *
+ * A struct zpci_dev is added to the zPCI subsystem and to a virtual PCI bus creating
+ * a new one as necessary. A hotplug slot is created and events start to be handled.
+ * If successful from this point on zpci_zdev_get() and zpci_zdev_put() must be used.
+ * If adding the struct zpci_dev fails the device was not added and should be freed.
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int zpci_add_device(struct zpci_dev *zdev)
+{
+ int rc;
+
+ mutex_lock(&zpci_add_remove_lock);
+ zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state);
rc = zpci_init_iommu(zdev);
if (rc)
goto error;
@@ -814,18 +856,19 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
if (rc)
goto error_destroy_iommu;
+ kref_init(&zdev->kref);
spin_lock(&zpci_list_lock);
list_add_tail(&zdev->entry, &zpci_list);
spin_unlock(&zpci_list_lock);
-
- return zdev;
+ mutex_unlock(&zpci_add_remove_lock);
+ return 0;
error_destroy_iommu:
zpci_destroy_iommu(zdev);
error:
- zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
- kfree(zdev);
- return ERR_PTR(rc);
+ zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc);
+ mutex_unlock(&zpci_add_remove_lock);
+ return rc;
}
bool zpci_is_device_configured(struct zpci_dev *zdev)
@@ -894,60 +937,44 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
* @zdev: the zpci_dev that was reserved
*
* Handle the case that a given zPCI function was reserved by another system.
- * After a call to this function the zpci_dev can not be found via
- * get_zdev_by_fid() anymore but may still be accessible via existing
- * references though it will not be functional anymore.
*/
void zpci_device_reserved(struct zpci_dev *zdev)
{
- /*
- * Remove device from zpci_list as it is going away. This also
- * makes sure we ignore subsequent zPCI events for this device.
- */
- spin_lock(&zpci_list_lock);
- list_del(&zdev->entry);
- spin_unlock(&zpci_list_lock);
+ lockdep_assert_held(&zdev->state_lock);
+ /* We may declare the device reserved multiple times */
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ return;
zdev->state = ZPCI_FN_STATE_RESERVED;
zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+ /*
+ * The underlying device is gone. Allow the zdev to be freed
+ * as soon as all other references are gone by accounting for
+ * the removal as a dropped reference.
+ */
zpci_zdev_put(zdev);
}
void zpci_release_device(struct kref *kref)
{
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
- int ret;
+
+ lockdep_assert_held(&zpci_add_remove_lock);
+ WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED);
+ /*
+ * We already hold zpci_list_lock thanks to kref_put_lock().
+ * This makes sure no new reference can be taken from the list.
+ */
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
if (zdev->has_hp_slot)
zpci_exit_slot(zdev);
- if (zdev->zbus->bus)
- zpci_bus_remove_device(zdev, false);
-
- if (zdev_enabled(zdev))
- zpci_disable_device(zdev);
+ if (zdev->has_resources)
+ zpci_cleanup_bus_resources(zdev);
- switch (zdev->state) {
- case ZPCI_FN_STATE_CONFIGURED:
- ret = sclp_pci_deconfigure(zdev->fid);
- zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
- fallthrough;
- case ZPCI_FN_STATE_STANDBY:
- if (zdev->has_hp_slot)
- zpci_exit_slot(zdev);
- spin_lock(&zpci_list_lock);
- list_del(&zdev->entry);
- spin_unlock(&zpci_list_lock);
- zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
- fallthrough;
- case ZPCI_FN_STATE_RESERVED:
- if (zdev->has_resources)
- zpci_cleanup_bus_resources(zdev);
- zpci_bus_device_unregister(zdev);
- zpci_destroy_iommu(zdev);
- fallthrough;
- default:
- break;
- }
+ zpci_bus_device_unregister(zdev);
+ zpci_destroy_iommu(zdev);
zpci_dbg(3, "rem fid:%x\n", zdev->fid);
kfree_rcu(zdev, rcu);
}
@@ -1064,7 +1091,7 @@ char * __init pcibios_setup(char *str)
return NULL;
}
if (!strcmp(str, "nomio")) {
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO;
+ clear_machine_feature(MFEATURE_PCI_MIO);
return NULL;
}
if (!strcmp(str, "force_floating")) {
@@ -1083,6 +1110,50 @@ bool zpci_is_enabled(void)
return s390_pci_initialized;
}
+static int zpci_cmp_rid(void *priv, const struct list_head *a,
+ const struct list_head *b)
+{
+ struct zpci_dev *za = container_of(a, struct zpci_dev, entry);
+ struct zpci_dev *zb = container_of(b, struct zpci_dev, entry);
+
+ /*
+ * PCI functions without RID available maintain original order
+ * between themselves but sort before those with RID.
+ */
+ if (za->rid == zb->rid)
+ return za->rid_available > zb->rid_available;
+ /*
+ * PCI functions with RID sort by RID ascending.
+ */
+ return za->rid > zb->rid;
+}
+
+static void zpci_add_devices(struct list_head *scan_list)
+{
+ struct zpci_dev *zdev, *tmp;
+
+ list_sort(NULL, scan_list, &zpci_cmp_rid);
+ list_for_each_entry_safe(zdev, tmp, scan_list, entry) {
+ list_del_init(&zdev->entry);
+ if (zpci_add_device(zdev))
+ kfree(zdev);
+ }
+}
+
+int zpci_scan_devices(void)
+{
+ LIST_HEAD(scan_list);
+ int rc;
+
+ rc = clp_scan_pci_devices(&scan_list);
+ if (rc)
+ return rc;
+
+ zpci_add_devices(&scan_list);
+ zpci_bus_scan_busses();
+ return 0;
+}
+
static int __init pci_base_init(void)
{
int rc;
@@ -1095,7 +1166,7 @@ static int __init pci_base_init(void)
return 0;
}
- if (MACHINE_HAS_PCI_MIO) {
+ if (test_machine_feature(MFEATURE_PCI_MIO)) {
static_branch_enable(&have_mio);
system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT);
}
@@ -1112,10 +1183,13 @@ static int __init pci_base_init(void)
if (rc)
goto out_irq;
- rc = clp_scan_pci_devices();
+ rc = zpci_scan_devices();
+ if (rc)
+ goto out_find;
+
+ rc = zpci_fw_sysfs_init();
if (rc)
goto out_find;
- zpci_bus_scan_busses();
s390_pci_initialized = 1;
return 0;
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index daa5d7450c7d..72adc8f6e94f 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -7,18 +7,17 @@
*
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/err.h>
-#include <linux/export.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/jump_label.h>
#include <linux/pci.h>
#include <linux/printk.h>
+#include <linux/dma-direct.h>
#include <asm/pci_clp.h>
#include <asm/pci_dma.h>
@@ -45,15 +44,17 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
if (!zdev_enabled(zdev)) {
rc = zpci_enable_device(zdev);
- if (rc)
+ if (rc) {
+ pr_err("Enabling PCI function %08x failed\n", zdev->fid);
return rc;
+ }
}
if (!zdev->has_resources) {
zpci_setup_bus_resources(zdev);
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
if (zdev->bars[i].res)
- pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0);
+ pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res);
}
}
@@ -168,9 +169,15 @@ void zpci_bus_scan_busses(void)
mutex_unlock(&zbus_list_lock);
}
+static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
+{
+ return !s390_pci_no_rid && zdev->rid_available &&
+ !zdev->vfn;
+}
+
/* zpci_bus_create_pci_bus - Create the PCI bus associated with this zbus
* @zbus: the zbus holding the zdevices
- * @fr: PCI root function that will determine the bus's domain, and bus speeed
+ * @fr: PCI root function that will determine the bus's domain, and bus speed
* @ops: the pci operations
*
* The PCI function @fr determines the domain (its UID), multifunction property
@@ -188,7 +195,7 @@ static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, s
return domain;
zbus->domain_nr = domain;
- zbus->multifunction = fr->rid_available;
+ zbus->multifunction = zpci_bus_is_multifunction_root(fr);
zbus->max_bus_speed = fr->max_bus_speed;
/*
@@ -232,13 +239,15 @@ static void zpci_bus_put(struct zpci_bus *zbus)
kref_put(&zbus->kref, zpci_bus_release);
}
-static struct zpci_bus *zpci_bus_get(int pchid)
+static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
{
struct zpci_bus *zbus;
mutex_lock(&zbus_list_lock);
list_for_each_entry(zbus, &zbus_list, bus_next) {
- if (pchid == zbus->pchid) {
+ if (!zbus->multifunction)
+ continue;
+ if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) {
kref_get(&zbus->kref);
goto out_unlock;
}
@@ -249,7 +258,7 @@ out_unlock:
return zbus;
}
-static struct zpci_bus *zpci_bus_alloc(int pchid)
+static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
{
struct zpci_bus *zbus;
@@ -257,7 +266,8 @@ static struct zpci_bus *zpci_bus_alloc(int pchid)
if (!zbus)
return NULL;
- zbus->pchid = pchid;
+ zbus->topo = topo;
+ zbus->topo_is_tid = topo_is_tid;
INIT_LIST_HEAD(&zbus->bus_next);
mutex_lock(&zbus_list_lock);
list_add_tail(&zbus->bus_next, &zbus_list);
@@ -274,10 +284,32 @@ static struct zpci_bus *zpci_bus_alloc(int pchid)
return zbus;
}
+static void pci_dma_range_setup(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ u64 aligned_end, size;
+ dma_addr_t dma_start;
+ int ret;
+
+ dma_start = PAGE_ALIGN(zdev->start_dma);
+ aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1);
+ if (aligned_end >= dma_start)
+ size = aligned_end - dma_start;
+ else
+ size = 0;
+ WARN_ON_ONCE(size == 0);
+
+ ret = dma_direct_set_offset(&pdev->dev, 0, dma_start, size);
+ if (ret)
+ pr_err("Failed to allocate DMA range map for %s\n", pci_name(pdev));
+}
+
void pcibios_bus_add_device(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
+ pci_dma_range_setup(pdev);
+
/*
* With pdev->no_vf_scan the common PCI probing code does not
* perform PF/VF linking.
@@ -292,19 +324,22 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
{
int rc = -EINVAL;
+ if (zbus->multifunction) {
+ if (!zdev->rid_available) {
+ WARN_ONCE(1, "rid_available not set for multifunction\n");
+ return rc;
+ }
+ zdev->devfn = zdev->rid & ZPCI_RID_MASK_DEVFN;
+ }
+
if (zbus->function[zdev->devfn]) {
pr_err("devfn %04x is already assigned\n", zdev->devfn);
return rc;
}
-
zdev->zbus = zbus;
zbus->function[zdev->devfn] = zdev;
zpci_nb_devices++;
- if (zbus->multifunction && !zdev->rid_available) {
- WARN_ONCE(1, "rid_available not set for multifunction\n");
- goto error;
- }
rc = zpci_init_slot(zdev);
if (rc)
goto error;
@@ -319,10 +354,25 @@ error:
return rc;
}
+static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ struct pci_dev *pdev;
+
+ if (!zdev->vfn)
+ return false;
+
+ pdev = zpci_iov_find_parent_pf(zbus, zdev);
+ if (!pdev)
+ return true;
+ pci_dev_put(pdev);
+ return false;
+}
+
int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
{
+ bool topo_is_tid = zdev->tid_avail;
struct zpci_bus *zbus = NULL;
- int rc = -EBADF;
+ int topo, rc = -EBADF;
if (zpci_nb_devices == ZPCI_NR_DEVICES) {
pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n",
@@ -330,14 +380,19 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
return -ENOSPC;
}
- if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS)
- return -EINVAL;
-
- if (!s390_pci_no_rid && zdev->rid_available)
- zbus = zpci_bus_get(zdev->pchid);
+ topo = topo_is_tid ? zdev->tid : zdev->pchid;
+ zbus = zpci_bus_get(topo, topo_is_tid);
+ /*
+ * An isolated VF gets its own domain/bus even if there exists
+ * a matching domain/bus already
+ */
+ if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) {
+ zpci_bus_put(zbus);
+ zbus = NULL;
+ }
if (!zbus) {
- zbus = zpci_bus_alloc(zdev->pchid);
+ zbus = zpci_bus_alloc(topo, topo_is_tid);
if (!zbus)
return -ENOMEM;
}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index af9f0ac79a1b..ae3d7a9159bd 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -6,6 +6,10 @@
* Pierre Morel <pmorel@linux.ibm.com>
*
*/
+#ifndef __S390_PCI_BUS_H
+#define __S390_PCI_BUS_H
+
+#include <linux/pci.h>
int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
void zpci_bus_device_unregister(struct zpci_dev *zdev);
@@ -17,11 +21,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev);
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
void zpci_release_device(struct kref *kref);
-static inline void zpci_zdev_put(struct zpci_dev *zdev)
-{
- if (zdev)
- kref_put(&zdev->kref, zpci_release_device);
-}
+
+void zpci_zdev_put(struct zpci_dev *zdev);
static inline void zpci_zdev_get(struct zpci_dev *zdev)
{
@@ -40,3 +41,4 @@ static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn];
}
+#endif /* __S390_PCI_BUS_H */
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index ee90a91ed888..177aa0214547 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -6,10 +6,8 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
-#include <linux/compat.h>
#include <linux/kernel.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
@@ -20,6 +18,7 @@
#include <asm/asm-extable.h>
#include <asm/pci_debug.h>
#include <asm/pci_clp.h>
+#include <asm/asm.h>
#include <asm/clp.h>
#include <uapi/asm/clp.h>
@@ -52,18 +51,20 @@ static inline void zpci_err_clp(unsigned int rsp, int rc)
static inline int clp_get_ilp(unsigned long *ilp)
{
unsigned long mask;
- int cc = 3;
+ int cc, exception;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1)
- : "cc");
+ : CC_OUT(cc, cc), [mask] "=d" (mask), [exc] "+d" (exception)
+ : [cmd] "a" (1)
+ : CC_CLOBBER);
*ilp = mask;
- return cc;
+ return exception ? 3 : CC_TRANSFORM(cc);
}
/*
@@ -72,19 +73,20 @@ static inline int clp_get_ilp(unsigned long *ilp)
static __always_inline int clp_req(void *data, unsigned int lps)
{
struct { u8 _[CLP_BLK_SIZE]; } *req = data;
+ int cc, exception;
u64 ignored;
- int cc = 3;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req)
+ : CC_OUT(cc, cc), [ign] "=d" (ignored), "+m" (*req), [exc] "+d" (exception)
: [req] "a" (req), [lps] "i" (lps)
- : "cc");
- return cc;
+ : CC_CLOBBER);
+ return exception ? 3 : CC_TRANSFORM(cc);
}
static void *clp_alloc_block(gfp_t gfp_mask)
@@ -108,6 +110,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->version = response->version;
zdev->maxstbl = response->maxstbl;
zdev->dtsm = response->dtsm;
+ zdev->rtr_avail = response->rtr;
switch (response->version) {
case 1:
@@ -162,12 +165,16 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
zdev->pft = response->pft;
zdev->vfn = response->vfn;
zdev->port = response->port;
+ zdev->fidparm = response->fidparm;
zdev->uid = response->uid;
zdev->fmb_length = sizeof(u32) * response->fmb_len;
- zdev->rid_available = response->rid_avail;
zdev->is_physfn = response->is_physfn;
- if (!s390_pci_no_rid && zdev->rid_available)
- zdev->devfn = response->rid & ZPCI_RID_MASK_DEVFN;
+ zdev->rid_available = response->rid_avail;
+ if (zdev->rid_available)
+ zdev->rid = response->rid;
+ zdev->tid_avail = response->tid_avail;
+ if (zdev->tid_avail)
+ zdev->tid = response->tid;
memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
if (response->util_str_avail) {
@@ -407,6 +414,7 @@ static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid,
static void __clp_add(struct clp_fh_list_entry *entry, void *data)
{
+ struct list_head *scan_list = data;
struct zpci_dev *zdev;
if (!entry->vendor_id)
@@ -417,10 +425,13 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data)
zpci_zdev_put(zdev);
return;
}
- zpci_create_device(entry->fid, entry->fh, entry->config_state);
+ zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state);
+ if (IS_ERR(zdev))
+ return;
+ list_add_tail(&zdev->entry, scan_list);
}
-int clp_scan_pci_devices(void)
+int clp_scan_pci_devices(struct list_head *scan_list)
{
struct clp_req_rsp_list_pci *rrb;
int rc;
@@ -429,7 +440,7 @@ int clp_scan_pci_devices(void)
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, NULL, __clp_add);
+ rc = clp_list_pci(rrb, scan_list, __clp_add);
clp_free_block(rrb);
return rc;
@@ -638,7 +649,7 @@ static long clp_misc_ioctl(struct file *filp, unsigned int cmd,
if (cmd != CLP_SYNC)
return -EINVAL;
- argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg;
+ argp = (void __user *)arg;
if (copy_from_user(&req, argp, sizeof(req)))
return -EFAULT;
if (req.r != 0)
@@ -656,8 +667,6 @@ static const struct file_operations clp_misc_fops = {
.open = nonseekable_open,
.release = clp_misc_release,
.unlocked_ioctl = clp_misc_ioctl,
- .compat_ioctl = clp_misc_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice clp_misc_device = {
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 2cb5043a997d..c7ed7bf254b5 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -6,8 +6,7 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/seq_file.h>
@@ -71,17 +70,23 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,
static void pci_sw_counter_show(struct seq_file *m)
{
- struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private);
+ struct zpci_dev *zdev = m->private;
+ struct zpci_iommu_ctrs *ctrs;
atomic64_t *counter;
+ unsigned long flags;
int i;
+ spin_lock_irqsave(&zdev->dom_lock, flags);
+ ctrs = zpci_get_iommu_ctrs(m->private);
if (!ctrs)
- return;
+ goto unlock;
counter = &ctrs->mapped_pages;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
atomic64_read(counter));
+unlock:
+ spin_unlock_irqrestore(&zdev->dom_lock, flags);
}
static int pci_perf_show(struct seq_file *m, void *v)
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index dbe95ec5917e..839bd91c056e 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -6,8 +6,7 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/pci.h>
@@ -16,6 +15,7 @@
#include <asm/sclp.h>
#include "pci_bus.h"
+#include "pci_report.h"
/* Content Code Description for PCI Function Error */
struct zpci_ccdf_err {
@@ -53,6 +53,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
case PCI_ERS_RESULT_CAN_RECOVER:
case PCI_ERS_RESULT_RECOVERED:
case PCI_ERS_RESULT_NEED_RESET:
+ case PCI_ERS_RESULT_NONE:
return false;
default:
return true;
@@ -77,10 +78,6 @@ static bool is_driver_supported(struct pci_driver *driver)
return false;
if (!driver->err_handler->error_detected)
return false;
- if (!driver->err_handler->slot_reset)
- return false;
- if (!driver->err_handler->resume)
- return false;
return true;
}
@@ -90,6 +87,7 @@ static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
+ pci_uevent_ers(pdev, ers_res);
if (ers_result_indicates_abort(ers_res))
pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
@@ -105,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
struct zpci_dev *zdev = to_zpci(pdev);
int rc;
+ /* The underlying device may have been disabled by the event */
+ if (!zdev_enabled(zdev))
+ return PCI_ERS_RESULT_NEED_RESET;
+
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
rc = zpci_reset_load_store_blocked(zdev);
if (rc) {
@@ -113,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
return PCI_ERS_RESULT_NEED_RESET;
}
- if (driver->err_handler->mmio_enabled) {
+ if (driver->err_handler->mmio_enabled)
ers_res = driver->err_handler->mmio_enabled(pdev);
- if (ers_result_indicates_abort(ers_res)) {
- pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
- pci_name(pdev));
- return ers_res;
- } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
- pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
- return ers_res;
- }
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+ pci_name(pdev));
+ return ers_res;
+ } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+ return ers_res;
}
pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
@@ -149,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
return ers_res;
}
pdev->error_state = pci_channel_io_normal;
- ers_res = driver->err_handler->slot_reset(pdev);
+
+ if (driver->err_handler->slot_reset)
+ ers_res = driver->err_handler->slot_reset(pdev);
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
if (ers_result_indicates_abort(ers_res)) {
pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
return ers_res;
@@ -169,6 +178,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
{
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ char *status_str = "success";
struct pci_driver *driver;
/*
@@ -176,7 +187,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
* is unbound or probed and that userspace can't access its
* configuration space while we perform recovery.
*/
- pci_dev_lock(pdev);
+ device_lock(&pdev->dev);
if (pdev->error_state == pci_channel_io_perm_failure) {
ers_res = PCI_ERS_RESULT_DISCONNECT;
goto out_unlock;
@@ -186,45 +197,67 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
if (is_passed_through(pdev)) {
pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
pci_name(pdev));
+ status_str = "failed (pass-through)";
goto out_unlock;
}
driver = to_pci_driver(pdev->dev.driver);
if (!is_driver_supported(driver)) {
- if (!driver)
+ if (!driver) {
pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
pci_name(pdev));
- else
+ status_str = "failed (no driver)";
+ } else {
pr_info("%s: The %s driver bound to the device does not support error recovery\n",
pci_name(pdev),
driver->name);
+ status_str = "failed (no driver support)";
+ }
goto out_unlock;
}
ers_res = zpci_event_notify_error_detected(pdev, driver);
- if (ers_result_indicates_abort(ers_res))
+ if (ers_result_indicates_abort(ers_res)) {
+ status_str = "failed (abort on detection)";
goto out_unlock;
+ }
- if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+ if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
ers_res = zpci_event_do_error_state_clear(pdev, driver);
- if (ers_result_indicates_abort(ers_res))
+ if (ers_result_indicates_abort(ers_res)) {
+ status_str = "failed (abort on MMIO enable)";
goto out_unlock;
+ }
}
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
ers_res = zpci_event_do_reset(pdev, driver);
+ /*
+ * ers_res can be PCI_ERS_RESULT_NONE either because the driver
+ * decided to return it, indicating that it abstains from voting
+ * on how to recover, or because it didn't implement the callback.
+ * Both cases assume, that if there is nothing else causing a
+ * disconnect, we recovered successfully.
+ */
+ if (ers_res == PCI_ERS_RESULT_NONE)
+ ers_res = PCI_ERS_RESULT_RECOVERED;
+
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
pci_name(pdev));
+ status_str = "failed (driver can't recover)";
goto out_unlock;
}
pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
if (driver->err_handler->resume)
driver->err_handler->resume(pdev);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED);
out_unlock:
- pci_dev_unlock(pdev);
+ device_unlock(&pdev->dev);
+ zpci_report_status(zdev, "recovery", status_str);
return ers_res;
}
@@ -260,6 +293,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
pci_ers_result_t ers_res;
+ u32 fh = 0;
+ int rc;
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
@@ -268,6 +303,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
if (zdev) {
mutex_lock(&zdev->state_lock);
+ rc = clp_refresh_fh(zdev->fid, &fh);
+ if (rc)
+ goto no_pdev;
+ if (!fh || ccdf->fh != fh) {
+ /* Ignore events with stale handles */
+ zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
+ ccdf->fid, fh, ccdf->fh);
+ goto no_pdev;
+ }
zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
@@ -280,18 +324,19 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
goto no_pdev;
switch (ccdf->pec) {
- case 0x003a: /* Service Action or Error Recovery Successful */
+ case 0x002a: /* Error event concerns FMB */
+ case 0x002b:
+ case 0x002c:
+ break;
+ case 0x0040: /* Service Action or Error Recovery Failed */
+ case 0x003b:
+ zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
+ break;
+ default: /* PCI function left in the error state attempt to recover */
ers_res = zpci_event_attempt_error_recovery(pdev);
if (ers_res != PCI_ERS_RESULT_RECOVERED)
zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
break;
- default:
- /*
- * Mark as frozen not permanently failed because the device
- * could be subsequently recovered by the platform.
- */
- zpci_event_io_failure(pdev, pci_channel_io_frozen);
- break;
}
pci_dev_put(pdev);
no_pdev:
@@ -321,6 +366,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
zdev->state = ZPCI_FN_STATE_STANDBY;
}
+static void zpci_event_reappear(struct zpci_dev *zdev)
+{
+ lockdep_assert_held(&zdev->state_lock);
+ /*
+ * The zdev is in the reserved state. This means that it was presumed to
+ * go away but there are still undropped references. Now, the platform
+ * announced its availability again. Bring back the lingering zdev
+ * to standby. This is safe because we hold a temporary reference
+ * now so that it won't go away. Account for the re-appearance of the
+ * underlying device by incrementing the reference count.
+ */
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+ zpci_zdev_get(zdev);
+ zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh);
+}
+
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
@@ -339,19 +400,34 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
if (IS_ERR(zdev))
break;
+ if (zpci_add_device(zdev)) {
+ kfree(zdev);
+ break;
+ }
} else {
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ zpci_event_reappear(zdev);
/* the configuration request may be stale */
- if (zdev->state != ZPCI_FN_STATE_STANDBY)
+ else if (zdev->state != ZPCI_FN_STATE_STANDBY)
break;
zdev->state = ZPCI_FN_STATE_CONFIGURED;
}
zpci_scan_configured_device(zdev, ccdf->fh);
break;
case 0x0302: /* Reserved -> Standby */
- if (!zdev)
- zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
- else
+ if (!zdev) {
+ zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
+ if (IS_ERR(zdev))
+ break;
+ if (zpci_add_device(zdev)) {
+ kfree(zdev);
+ break;
+ }
+ } else {
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ zpci_event_reappear(zdev);
zpci_update_fh(zdev, ccdf->fh);
+ }
break;
case 0x0303: /* Deconfiguration requested */
if (zdev) {
@@ -380,7 +456,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
case 0x0306: /* 0x308 or 0x302 for multiple devices */
zpci_remove_reserved_devices();
- clp_scan_pci_devices();
+ zpci_scan_devices();
break;
case 0x0308: /* Standby -> Reserved */
if (!zdev)
diff --git a/arch/s390/pci/pci_fixup.c b/arch/s390/pci/pci_fixup.c
new file mode 100644
index 000000000000..35688b645098
--- /dev/null
+++ b/arch/s390/pci/pci_fixup.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Exceptions for specific devices,
+ *
+ * Copyright IBM Corp. 2025
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ */
+#include <linux/pci.h>
+
+static void zpci_ism_bar_no_mmap(struct pci_dev *pdev)
+{
+ /*
+ * ISM's BAR is special. Drivers written for ISM know
+ * how to handle this but others need to be aware of their
+ * special nature e.g. to prevent attempts to mmap() it.
+ */
+ pdev->non_mappable_bars = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM,
+ PCI_DEVICE_ID_IBM_ISM,
+ zpci_ism_bar_no_mmap);
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 56480be48244..35ceb1bea1c6 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -15,6 +15,7 @@
#include <asm/pci_debug.h>
#include <asm/pci_io.h>
#include <asm/processor.h>
+#include <asm/asm.h>
#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
@@ -57,16 +58,16 @@ static inline void zpci_err_insn_addr(int lvl, u8 insn, u8 cc, u8 status,
/* Modify PCI Function Controls */
static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
{
- u8 cc;
+ int cc;
asm volatile (
" .insn rxy,0xe300000000d0,%[req],%[fib]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib)
- : : "cc");
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [req] "+d" (req), [fib] "+Q" (*fib)
+ :
+ : CC_CLOBBER);
*status = req >> 24 & 0xff;
- return cc;
+ return CC_TRANSFORM(cc);
}
u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
@@ -98,17 +99,16 @@ EXPORT_SYMBOL_GPL(zpci_mod_fc);
static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
{
union register_pair addr_range = {.even = addr, .odd = range};
- u8 cc;
+ int cc;
asm volatile (
" .insn rre,0xb9d30000,%[fn],%[addr_range]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc), [fn] "+d" (fn)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [fn] "+d" (fn)
: [addr_range] "d" (addr_range.pair)
- : "cc");
+ : CC_CLOBBER);
*status = fn >> 24 & 0xff;
- return cc;
+ return CC_TRANSFORM(cc);
}
int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
@@ -145,7 +145,7 @@ int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
return -EIO;
asm volatile(
- ".insn rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]\n"
+ ".insn rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]"
: : [ctl] "d" (ctl), [isc] "d" (isc << 27), [iib] "Q" (*iib));
return 0;
@@ -156,20 +156,23 @@ EXPORT_SYMBOL_GPL(zpci_set_irq_ctrl);
static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
{
union register_pair req_off = {.even = req, .odd = offset};
- int cc = -ENXIO;
+ int cc, exception;
u64 __data;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rre,0xb9d20000,%[data],%[req_off]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [data] "=d" (__data),
- [req_off] "+&d" (req_off.pair) :: "cc");
+ : CC_OUT(cc, cc), [data] "=d" (__data),
+ [req_off] "+d" (req_off.pair), [exc] "+d" (exception)
+ :
+ : CC_CLOBBER);
*status = req_off.even >> 24 & 0xff;
*data = __data;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
@@ -222,20 +225,23 @@ static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status)
{
union register_pair ioaddr_len = {.even = ioaddr, .odd = len};
- int cc = -ENXIO;
+ int cc, exception;
u64 __data;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rre,0xb9d60000,%[data],%[ioaddr_len]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [data] "=d" (__data),
- [ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc");
+ : CC_OUT(cc, cc), [data] "=d" (__data),
+ [ioaddr_len] "+d" (ioaddr_len.pair), [exc] "+d" (exception)
+ :
+ : CC_CLOBBER);
*status = ioaddr_len.odd >> 24 & 0xff;
*data = __data;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
@@ -258,19 +264,20 @@ EXPORT_SYMBOL_GPL(zpci_load);
static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
{
union register_pair req_off = {.even = req, .odd = offset};
- int cc = -ENXIO;
+ int cc, exception;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rre,0xb9d00000,%[data],%[req_off]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [req_off] "+&d" (req_off.pair)
+ : CC_OUT(cc, cc), [req_off] "+d" (req_off.pair), [exc] "+d" (exception)
: [data] "d" (data)
- : "cc");
+ : CC_CLOBBER);
*status = req_off.even >> 24 & 0xff;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
int __zpci_store(u64 data, u64 req, u64 offset)
@@ -311,19 +318,20 @@ static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status)
{
union register_pair ioaddr_len = {.even = ioaddr, .odd = len};
- int cc = -ENXIO;
+ int cc, exception;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rre,0xb9d40000,%[data],%[ioaddr_len]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
+ : CC_OUT(cc, cc), [ioaddr_len] "+d" (ioaddr_len.pair), [exc] "+d" (exception)
: [data] "d" (data)
- : "cc", "memory");
+ : CC_CLOBBER_LIST("memory"));
*status = ioaddr_len.odd >> 24 & 0xff;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
@@ -345,19 +353,20 @@ EXPORT_SYMBOL_GPL(zpci_store);
/* PCI Store Block */
static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
{
- int cc = -ENXIO;
+ int cc, exception;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [req] "+d" (req)
+ : CC_OUT(cc, cc), [req] "+d" (req), [exc] "+d" (exception)
: [offset] "d" (offset), [data] "Q" (*data)
- : "cc");
+ : CC_CLOBBER);
*status = req >> 24 & 0xff;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
int __zpci_store_block(const u64 *data, u64 req, u64 offset)
@@ -398,19 +407,20 @@ static inline int zpci_write_block_fh(volatile void __iomem *dst,
static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status)
{
- int cc = -ENXIO;
+ int cc, exception;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [len] "+d" (len)
+ : CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception)
: [ioaddr] "d" (ioaddr), [data] "Q" (*data)
- : "cc");
+ : CC_CLOBBER);
*status = len >> 24 & 0xff;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
int zpci_write_block(volatile void __iomem *dst,
@@ -432,7 +442,7 @@ EXPORT_SYMBOL_GPL(zpci_write_block);
static inline void __pciwb_mio(void)
{
- asm volatile (".insn rre,0xb9d50000,0,0\n");
+ asm volatile (".insn rre,0xb9d50000,0,0");
}
void zpci_barrier(void)
diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c
index ead062bf2b41..13050ce5c3e9 100644
--- a/arch/s390/pci/pci_iov.c
+++ b/arch/s390/pci/pci_iov.c
@@ -7,8 +7,7 @@
*
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/pci.h>
@@ -60,18 +59,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in
return 0;
}
-int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+/**
+ * zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function
+ * @zbus: The bus that the PCI function is on, or would be added on
+ * @zdev: The PCI function
+ *
+ * Finds the parent PF, if it exists and is configured, of the given PCI function
+ * and increments its refcount. Th PF is searched for on the provided bus so the
+ * caller has to ensure that this is the correct bus to search. This function may
+ * be used before adding the PCI function to a zbus.
+ *
+ * Return: Pointer to the struct pci_dev of the parent PF or NULL if it not
+ * found. If the function is not a VF or has no RequesterID information,
+ * NULL is returned as well.
+ */
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
{
- int i, cand_devfn;
- struct zpci_dev *zdev;
+ int i, vfid, devfn, cand_devfn;
struct pci_dev *pdev;
- int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
- int rc = 0;
if (!zbus->multifunction)
- return 0;
-
- /* If the parent PF for the given VF is also configured in the
+ return NULL;
+ /* Non-VFs and VFs without RID available don't have a parent */
+ if (!zdev->vfn || !zdev->rid_available)
+ return NULL;
+ /* Linux vfid starts at 0 vfn at 1 */
+ vfid = zdev->vfn - 1;
+ devfn = zdev->rid & ZPCI_RID_MASK_DEVFN;
+ /*
+ * If the parent PF for the given VF is also configured in the
* instance, it must be on the same zbus.
* We can then identify the parent PF by checking what
* devfn the VF would have if it belonged to that PF using the PF's
@@ -85,15 +101,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn
if (!pdev)
continue;
cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
- if (cand_devfn == virtfn->devfn) {
- rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
- /* balance pci_get_slot() */
- pci_dev_put(pdev);
- break;
- }
+ if (cand_devfn == devfn)
+ return pdev;
/* balance pci_get_slot() */
pci_dev_put(pdev);
}
}
+ return NULL;
+}
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+ struct zpci_dev *zdev = to_zpci(virtfn);
+ struct pci_dev *pdev_pf;
+ int rc = 0;
+
+ pdev_pf = zpci_iov_find_parent_pf(zbus, zdev);
+ if (pdev_pf) {
+ /* Linux' vfids start at 0 while zdev->vfn starts at 1 */
+ rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1);
+ pci_dev_put(pdev_pf);
+ }
return rc;
}
diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h
index b2c828003bad..d2c2793eb0f3 100644
--- a/arch/s390/pci/pci_iov.h
+++ b/arch/s390/pci/pci_iov.h
@@ -10,6 +10,8 @@
#ifndef __S390_PCI_IOV_H
#define __S390_PCI_IOV_H
+#include <linux/pci.h>
+
#ifdef CONFIG_PCI_IOV
void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn);
@@ -17,6 +19,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev);
int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev);
+
#else /* CONFIG_PCI_IOV */
static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
@@ -26,5 +30,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v
{
return 0;
}
+
+static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ return NULL;
+}
#endif /* CONFIG_PCI_IOV */
#endif /* __S390_PCI_IOV_h */
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 0ef83b6ac0db..2a06df8c2498 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/irq.h>
@@ -107,9 +106,6 @@ static int zpci_set_irq(struct zpci_dev *zdev)
else
rc = zpci_set_airq(zdev);
- if (!rc)
- zdev->irqs_registered = 1;
-
return rc;
}
@@ -123,9 +119,6 @@ static int zpci_clear_irq(struct zpci_dev *zdev)
else
rc = zpci_clear_airq(zdev);
- if (!rc)
- zdev->irqs_registered = 0;
-
return rc;
}
@@ -268,33 +261,20 @@ static void zpci_floating_irq_handler(struct airq_struct *airq,
}
}
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs,
+ unsigned long *bit)
{
- struct zpci_dev *zdev = to_zpci(pdev);
- unsigned int hwirq, msi_vecs, cpu;
- unsigned long bit;
- struct msi_desc *msi;
- struct msi_msg msg;
- int cpu_addr;
- int rc, irq;
-
- zdev->aisb = -1UL;
- zdev->msi_first_bit = -1U;
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
- msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
-
if (irq_delivery == DIRECTED) {
/* Allocate cpu vector bits */
- bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
- if (bit == -1UL)
+ *bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
+ if (*bit == -1UL)
return -EIO;
} else {
/* Allocate adapter summary indicator bit */
- bit = airq_iv_alloc_bit(zpci_sbv);
- if (bit == -1UL)
+ *bit = airq_iv_alloc_bit(zpci_sbv);
+ if (*bit == -1UL)
return -EIO;
- zdev->aisb = bit;
+ zdev->aisb = *bit;
/* Create adapter interrupt vector */
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
@@ -302,27 +282,66 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return -ENOMEM;
/* Wire up shortcut pointer */
- zpci_ibv[bit] = zdev->aibv;
+ zpci_ibv[*bit] = zdev->aibv;
/* Each function has its own interrupt vector */
- bit = 0;
+ *bit = 0;
}
+ return 0;
+}
- /* Request MSI interrupts */
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ struct msi_desc *msi;
+ struct msi_msg msg;
+ unsigned long bit;
+ int cpu_addr;
+ int rc, irq;
+
+ zdev->aisb = -1UL;
+ zdev->msi_first_bit = -1U;
+
+ msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
+ if (msi_vecs < nvec) {
+ pr_info("%s requested %d irqs, allocate system limit of %d",
+ pci_name(pdev), nvec, zdev->max_msi);
+ }
+
+ rc = __alloc_airq(zdev, msi_vecs, &bit);
+ if (rc < 0)
+ return rc;
+
+ /*
+ * Request MSI interrupts:
+ * When using MSI, nvec_used interrupt sources and their irq
+ * descriptors are controlled through one msi descriptor.
+ * Thus the outer loop over msi descriptors shall run only once,
+ * while two inner loops iterate over the interrupt vectors.
+ * When using MSI-X, each interrupt vector/irq descriptor
+ * is bound to exactly one msi descriptor (nvec_used is one).
+ * So the inner loops are executed once, while the outer iterates
+ * over the MSI-X descriptors.
+ */
hwirq = bit;
msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
- rc = -EIO;
if (hwirq - bit >= msi_vecs)
break;
- irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE,
- (irq_delivery == DIRECTED) ?
- msi->affinity : NULL);
+ irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used);
+ irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE,
+ (irq_delivery == DIRECTED) ?
+ msi->affinity : NULL);
if (irq < 0)
return -ENOMEM;
- rc = irq_set_msi_desc(irq, msi);
- if (rc)
- return rc;
- irq_set_chip_and_handler(irq, &zpci_irq_chip,
- handle_percpu_irq);
+
+ for (i = 0; i < irqs_per_msi; i++) {
+ rc = irq_set_msi_desc_off(irq, i, msi);
+ if (rc)
+ return rc;
+ irq_set_chip_and_handler(irq + i, &zpci_irq_chip,
+ handle_percpu_irq);
+ }
+
msg.data = hwirq - bit;
if (irq_delivery == DIRECTED) {
if (msi->affinity)
@@ -335,31 +354,35 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
msg.address_lo |= (cpu_addr << 8);
for_each_possible_cpu(cpu) {
- airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
+ for (i = 0; i < irqs_per_msi; i++)
+ airq_iv_set_data(zpci_ibv[cpu],
+ hwirq + i, irq + i);
}
} else {
msg.address_lo = zdev->msi_addr & 0xffffffff;
- airq_iv_set_data(zdev->aibv, hwirq, irq);
+ for (i = 0; i < irqs_per_msi; i++)
+ airq_iv_set_data(zdev->aibv, hwirq + i, irq + i);
}
msg.address_hi = zdev->msi_addr >> 32;
pci_write_msi_msg(irq, &msg);
- hwirq++;
+ hwirq += irqs_per_msi;
}
zdev->msi_first_bit = bit;
- zdev->msi_nr_irqs = msi_vecs;
+ zdev->msi_nr_irqs = hwirq - bit;
rc = zpci_set_irq(zdev);
if (rc)
return rc;
- return (msi_vecs == nvec) ? 0 : msi_vecs;
+ return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs;
}
void arch_teardown_msi_irqs(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
struct msi_desc *msi;
+ unsigned int i;
int rc;
/* Disable interrupts */
@@ -369,8 +392,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
/* Release MSI interrupts */
msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) {
- irq_set_msi_desc(msi->irq, NULL);
- irq_free_desc(msi->irq);
+ for (i = 0; i < msi->nvec_used; i++) {
+ irq_set_msi_desc(msi->irq + i, NULL);
+ irq_free_desc(msi->irq + i);
+ }
msi->msg.address_lo = 0;
msi->msg.address_hi = 0;
msi->msg.data = 0;
@@ -395,8 +420,7 @@ bool arch_restore_msi_irqs(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
- if (!zdev->irqs_registered)
- zpci_set_irq(zdev);
+ zpci_set_irq(zdev);
return true;
}
diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c
index ff34baf50a3e..df5b25dbe9ca 100644
--- a/arch/s390/pci/pci_kvm_hook.c
+++ b/arch/s390/pci/pci_kvm_hook.c
@@ -5,7 +5,9 @@
* Copyright (C) IBM Corp. 2022. All rights reserved.
* Author(s): Pierre Morel <pmorel@linux.ibm.com>
*/
+
#include <linux/kvm_host.h>
+#include <linux/export.h>
struct zpci_kvm_hook zpci_kvm_hook;
EXPORT_SYMBOL_GPL(zpci_kvm_hook);
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 5398729bfe1b..51e7a28af899 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -14,6 +14,7 @@
#include <asm/asm-extable.h>
#include <asm/pci_io.h>
#include <asm/pci_debug.h>
+#include <asm/asm.h>
static inline void zpci_err_mmio(u8 cc, u8 status, u64 offset)
{
@@ -30,20 +31,24 @@ static inline int __pcistb_mio_inuser(
void __iomem *ioaddr, const void __user *src,
u64 len, u8 *status)
{
- int cc = -ENXIO;
-
- asm volatile (
- " sacf 256\n"
- "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
- "1: ipm %[cc]\n"
- " srl %[cc],28\n"
- "2: sacf 768\n"
+ int cc, exception;
+ bool sacf_flag;
+
+ exception = 1;
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
+ " sacf 256\n"
+ "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
+ "1: lhi %[exc],0\n"
+ "2: sacf 768\n"
+ CC_IPM(cc)
EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : [cc] "+d" (cc), [len] "+d" (len)
+ : CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception)
: [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src))
- : "cc", "memory");
+ : CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
*status = len >> 24 & 0xff;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
static inline int __pcistg_mio_inuser(
@@ -51,7 +56,8 @@ static inline int __pcistg_mio_inuser(
u64 ulen, u8 *status)
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
- int cc = -ENXIO;
+ int cc, exception;
+ bool sacf_flag;
u64 val = 0;
u64 cnt = ulen;
u8 tmp;
@@ -61,25 +67,29 @@ static inline int __pcistg_mio_inuser(
* a register, then store it to PCI at @ioaddr while in secondary
* address space. pcistg then uses the user mappings.
*/
- asm volatile (
- " sacf 256\n"
- "0: llgc %[tmp],0(%[src])\n"
+ exception = 1;
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
+ " sacf 256\n"
+ "0: llgc %[tmp],0(%[src])\n"
"4: sllg %[val],%[val],8\n"
- " aghi %[src],1\n"
- " ogr %[val],%[tmp]\n"
- " brctg %[cnt],0b\n"
- "1: .insn rre,0xb9d40000,%[val],%[ioaddr_len]\n"
- "2: ipm %[cc]\n"
- " srl %[cc],28\n"
- "3: sacf 768\n"
+ " aghi %[src],1\n"
+ " ogr %[val],%[tmp]\n"
+ " brctg %[cnt],0b\n"
+ "1: .insn rre,0xb9d40000,%[val],%[ioaddr_len]\n"
+ "2: lhi %[exc],0\n"
+ "3: sacf 768\n"
+ CC_IPM(cc)
EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+ : [src] "+a" (src), [cnt] "+d" (cnt),
+ [val] "+d" (val), [tmp] "=d" (tmp), [exc] "+d" (exception),
+ CC_OUT(cc, cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
:
- [src] "+a" (src), [cnt] "+d" (cnt),
- [val] "+d" (val), [tmp] "=d" (tmp),
- [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
- :: "cc", "memory");
+ : CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
*status = ioaddr_len.odd >> 24 & 0xff;
+ cc = exception ? -ENXIO : CC_TRANSFORM(cc);
/* did we read everything from user memory? */
if (!cc && cnt != 0)
cc = -EFAULT;
@@ -118,12 +128,11 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
const void __user *, user_buffer, size_t, length)
{
+ struct follow_pfnmap_args args = { };
u8 local_buf[64];
void __iomem *io_addr;
void *buf;
struct vm_area_struct *vma;
- pte_t *ptep;
- spinlock_t *ptl;
long ret;
if (!zpci_is_enabled())
@@ -169,11 +178,17 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
if (!(vma->vm_flags & VM_WRITE))
goto out_unlock_mmap;
- ret = follow_pte(vma, mmio_addr, &ptep, &ptl);
- if (ret)
- goto out_unlock_mmap;
+ args.address = mmio_addr;
+ args.vma = vma;
+ ret = follow_pfnmap_start(&args);
+ if (ret) {
+ fixup_user_fault(current->mm, mmio_addr, FAULT_FLAG_WRITE, NULL);
+ ret = follow_pfnmap_start(&args);
+ if (ret)
+ goto out_unlock_mmap;
+ }
- io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+ io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
@@ -181,7 +196,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
ret = zpci_memcpy_toio(io_addr, buf, length);
out_unlock_pt:
- pte_unmap_unlock(ptep, ptl);
+ follow_pfnmap_end(&args);
out_unlock_mmap:
mmap_read_unlock(current->mm);
out_free:
@@ -195,9 +210,10 @@ static inline int __pcilg_mio_inuser(
u64 ulen, u8 *status)
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
+ bool sacf_flag;
u64 cnt = ulen;
int shift = ulen * 8;
- int cc = -ENXIO;
+ int cc, exception;
u64 val, tmp;
/*
@@ -205,27 +221,34 @@ static inline int __pcilg_mio_inuser(
* user space) into a register using pcilg then store these bytes at
* user address @dst
*/
- asm volatile (
- " sacf 256\n"
- "0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
- "1: ipm %[cc]\n"
- " srl %[cc],28\n"
- " ltr %[cc],%[cc]\n"
- " jne 4f\n"
- "2: ahi %[shift],-8\n"
- " srlg %[tmp],%[val],0(%[shift])\n"
- "3: stc %[tmp],0(%[dst])\n"
+ exception = 1;
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
+ " sacf 256\n"
+ "0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
+ "1: lhi %[exc],0\n"
+ " jne 4f\n"
+ "2: ahi %[shift],-8\n"
+ " srlg %[tmp],%[val],0(%[shift])\n"
+ "3: stc %[tmp],0(%[dst])\n"
"5: aghi %[dst],1\n"
- " brctg %[cnt],2b\n"
- "4: sacf 768\n"
+ " brctg %[cnt],2b\n"
+ /*
+ * Use xr to clear exc and set condition code to zero
+ * to ensure flag output is correct for this branch.
+ */
+ " xr %[exc],%[exc]\n"
+ "4: sacf 768\n"
+ CC_IPM(cc)
EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b)
+ : [ioaddr_len] "+&d" (ioaddr_len.pair), [exc] "+d" (exception),
+ CC_OUT(cc, cc), [val] "=d" (val),
+ [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
+ [shift] "+a" (shift)
:
- [ioaddr_len] "+&d" (ioaddr_len.pair),
- [cc] "+d" (cc), [val] "=d" (val),
- [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
- [shift] "+d" (shift)
- :: "cc", "memory");
-
+ : CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
+ cc = exception ? -ENXIO : CC_TRANSFORM(cc);
/* did we write everything to the user space buffer? */
if (!cc && cnt != 0)
cc = -EFAULT;
@@ -260,12 +283,11 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
void __user *, user_buffer, size_t, length)
{
+ struct follow_pfnmap_args args = { };
u8 local_buf[64];
void __iomem *io_addr;
void *buf;
struct vm_area_struct *vma;
- pte_t *ptep;
- spinlock_t *ptl;
long ret;
if (!zpci_is_enabled())
@@ -305,14 +327,20 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
goto out_unlock_mmap;
ret = -EACCES;
- if (!(vma->vm_flags & VM_WRITE))
+ if (!(vma->vm_flags & VM_READ))
goto out_unlock_mmap;
- ret = follow_pte(vma, mmio_addr, &ptep, &ptl);
- if (ret)
- goto out_unlock_mmap;
+ args.vma = vma;
+ args.address = mmio_addr;
+ ret = follow_pfnmap_start(&args);
+ if (ret) {
+ fixup_user_fault(current->mm, mmio_addr, 0, NULL);
+ ret = follow_pfnmap_start(&args);
+ if (ret)
+ goto out_unlock_mmap;
+ }
- io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+ io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
@@ -322,7 +350,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
ret = zpci_memcpy_fromio(buf, io_addr, length);
out_unlock_pt:
- pte_unmap_unlock(ptep, ptl);
+ follow_pfnmap_end(&args);
out_unlock_mmap:
mmap_read_unlock(current->mm);
diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c
new file mode 100644
index 000000000000..7030f7052926
--- /dev/null
+++ b/arch/s390/pci/pci_report.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define pr_fmt(fmt) "zpci: " fmt
+
+#include <linux/kernel.h>
+#include <linux/sprintf.h>
+#include <linux/pci.h>
+
+#include <asm/sclp.h>
+#include <asm/debug.h>
+#include <asm/pci_debug.h>
+
+#include "pci_report.h"
+
+#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714
+
+struct zpci_report_error_data {
+ u64 timestamp;
+ u64 err_log_id;
+ char log_data[];
+} __packed;
+
+#define ZPCI_REPORT_SIZE (PAGE_SIZE - sizeof(struct err_notify_sccb))
+#define ZPCI_REPORT_DATA_SIZE (ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data))
+
+struct zpci_report_error {
+ struct zpci_report_error_header header;
+ struct zpci_report_error_data data;
+} __packed;
+
+static const char *zpci_state_str(pci_channel_state_t state)
+{
+ switch (state) {
+ case pci_channel_io_normal:
+ return "normal";
+ case pci_channel_io_frozen:
+ return "frozen";
+ case pci_channel_io_perm_failure:
+ return "permanent-failure";
+ default:
+ return "invalid";
+ };
+}
+
+static int debug_log_header_fn(debug_info_t *id, struct debug_view *view,
+ int area, debug_entry_t *entry, char *out_buf,
+ size_t out_buf_size)
+{
+ unsigned long sec, usec;
+ unsigned int level;
+ char *except_str;
+ int rc = 0;
+
+ level = entry->level;
+ sec = entry->clock;
+ usec = do_div(sec, USEC_PER_SEC);
+
+ if (entry->exception)
+ except_str = "*";
+ else
+ except_str = "-";
+ rc += scnprintf(out_buf, out_buf_size, "%011ld:%06lu %1u %1s %04u ",
+ sec, usec, level, except_str,
+ entry->cpu);
+ return rc;
+}
+
+static int debug_prolog_header(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size)
+{
+ return scnprintf(out_buf, out_buf_size, "sec:usec level except cpu msg\n");
+}
+
+static struct debug_view debug_log_view = {
+ "pci_msg_log",
+ &debug_prolog_header,
+ &debug_log_header_fn,
+ &debug_sprintf_format_fn,
+ NULL,
+ NULL
+};
+
+/**
+ * zpci_report_status - Report the status of operations on a PCI device
+ * @zdev: The PCI device for which to report status
+ * @operation: A string representing the operation reported
+ * @status: A string representing the status of the operation
+ *
+ * This function creates a human readable report about an operation such as
+ * PCI device recovery and forwards this to the platform using the SCLP Write
+ * Event Data mechanism. Besides the operation and status strings the report
+ * also contains additional information about the device deemed useful for
+ * debug such as the currently bound device driver, if any, and error state.
+ * Additionally a string representation of pci_debug_msg_id, or as much as fits,
+ * is also included.
+ *
+ * Return: 0 on success an error code < 0 otherwise.
+ */
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status)
+{
+ struct zpci_report_error *report;
+ struct pci_driver *driver = NULL;
+ struct pci_dev *pdev = NULL;
+ char *buf, *end;
+ int ret;
+
+ if (!zdev || !zdev->zbus)
+ return -ENODEV;
+
+ /* Protected virtualization hosts get nothing from us */
+ if (prot_virt_guest)
+ return -ENODATA;
+
+ report = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!report)
+ return -ENOMEM;
+ if (zdev->zbus->bus)
+ pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+ if (pdev)
+ driver = to_pci_driver(pdev->dev.driver);
+
+ buf = report->data.log_data;
+ end = report->data.log_data + ZPCI_REPORT_DATA_SIZE;
+ buf += scnprintf(buf, end - buf, "report: %s\n", operation);
+ buf += scnprintf(buf, end - buf, "status: %s\n", status);
+ buf += scnprintf(buf, end - buf, "state: %s\n",
+ (pdev) ? zpci_state_str(pdev->error_state) : "n/a");
+ buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a");
+ ret = debug_dump(pci_debug_msg_id, &debug_log_view, buf, end - buf, true);
+ if (ret < 0)
+ pr_err("Reading PCI debug messages failed with code %d\n", ret);
+ else
+ buf += ret;
+
+ report->header.version = 1;
+ report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG;
+ report->header.length = buf - (char *)&report->data;
+ report->data.timestamp = ktime_get_clocktai_seconds();
+ report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT;
+
+ ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid);
+ if (ret)
+ pr_err("Reporting PCI status failed with code %d\n", ret);
+ else
+ pr_info("Reported PCI device status\n");
+
+ free_page((unsigned long)report);
+
+ return ret;
+}
diff --git a/arch/s390/pci/pci_report.h b/arch/s390/pci/pci_report.h
new file mode 100644
index 000000000000..e08003d51a97
--- /dev/null
+++ b/arch/s390/pci/pci_report.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+#ifndef __S390_PCI_REPORT_H
+#define __S390_PCI_REPORT_H
+
+struct zpci_dev;
+
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status);
+
+#endif /* __S390_PCI_REPORT_H */
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 0f4f1e8fc480..c2444a23e26c 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -6,8 +6,7 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/stat.h>
@@ -23,7 +22,7 @@ static ssize_t name##_show(struct device *dev, \
{ \
struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); \
\
- return sprintf(buf, fmt, zdev->member); \
+ return sysfs_emit(buf, fmt, zdev->member); \
} \
static DEVICE_ATTR_RO(name)
@@ -34,24 +33,27 @@ zpci_attr(pfgid, "0x%02x\n", pfgid);
zpci_attr(vfn, "0x%04x\n", vfn);
zpci_attr(pft, "0x%02x\n", pft);
zpci_attr(port, "%d\n", port);
+zpci_attr(fidparm, "0x%02x\n", fidparm);
zpci_attr(uid, "0x%x\n", uid);
zpci_attr(segment0, "0x%02x\n", pfip[0]);
zpci_attr(segment1, "0x%02x\n", pfip[1]);
zpci_attr(segment2, "0x%02x\n", pfip[2]);
zpci_attr(segment3, "0x%02x\n", pfip[3]);
+#define ZPCI_FW_ATTR_RO(_name) \
+ static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
static ssize_t mio_enabled_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- return sprintf(buf, zpci_use_mio(zdev) ? "1\n" : "0\n");
+ return sysfs_emit(buf, zpci_use_mio(zdev) ? "1\n" : "0\n");
}
static DEVICE_ATTR_RO(mio_enabled);
static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
{
- u8 status;
int ret;
pci_stop_and_remove_bus_device(pdev);
@@ -69,16 +71,8 @@ static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
return ret;
}
- ret = zpci_enable_device(zdev);
- if (ret)
- return ret;
+ ret = zpci_reenable_device(zdev);
- if (zdev->dma_table) {
- ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
- if (ret)
- zpci_disable_device(zdev);
- }
return ret;
}
@@ -134,7 +128,7 @@ out:
static DEVICE_ATTR_WO(recover);
static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
+ const struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
@@ -144,10 +138,10 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
return memory_read_from_buffer(buf, count, &off, zdev->util_str,
sizeof(zdev->util_str));
}
-static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+static const BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
+ const struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
struct zpci_report_error_header *report = (void *) buf;
@@ -163,7 +157,7 @@ static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
return ret ? ret : count;
}
-static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
+static const BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
static ssize_t uid_is_unique_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -172,6 +166,13 @@ static ssize_t uid_is_unique_show(struct device *dev,
}
static DEVICE_ATTR_RO(uid_is_unique);
+static ssize_t uid_checking_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", zpci_unique_uid ? 1 : 0);
+}
+ZPCI_FW_ATTR_RO(uid_checking);
+
/* analogous to smbios index */
static ssize_t index_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -197,12 +198,12 @@ static struct attribute *zpci_ident_attrs[] = {
NULL,
};
-static struct attribute_group zpci_ident_attr_group = {
+const struct attribute_group zpci_ident_attr_group = {
.attrs = zpci_ident_attrs,
.is_visible = zpci_index_is_visible,
};
-static struct bin_attribute *zpci_bin_attrs[] = {
+static const struct bin_attribute *const zpci_bin_attrs[] = {
&bin_attr_util_string,
&bin_attr_report_error,
NULL,
@@ -215,6 +216,7 @@ static struct attribute *zpci_dev_attrs[] = {
&dev_attr_pfgid.attr,
&dev_attr_pft.attr,
&dev_attr_port.attr,
+ &dev_attr_fidparm.attr,
&dev_attr_vfn.attr,
&dev_attr_uid.attr,
&dev_attr_recover.attr,
@@ -223,7 +225,7 @@ static struct attribute *zpci_dev_attrs[] = {
NULL,
};
-static struct attribute_group zpci_attr_group = {
+const struct attribute_group zpci_attr_group = {
.attrs = zpci_dev_attrs,
.bin_attrs = zpci_bin_attrs,
};
@@ -235,14 +237,23 @@ static struct attribute *pfip_attrs[] = {
&dev_attr_segment3.attr,
NULL,
};
-static struct attribute_group pfip_attr_group = {
+
+const struct attribute_group pfip_attr_group = {
.name = "pfip",
.attrs = pfip_attrs,
};
-const struct attribute_group *zpci_attr_groups[] = {
- &zpci_attr_group,
- &pfip_attr_group,
- &zpci_ident_attr_group,
+static struct attribute *clp_fw_attrs[] = {
+ &uid_checking_attr.attr,
NULL,
};
+
+static struct attribute_group clp_fw_attr_group = {
+ .name = "clp",
+ .attrs = clp_fw_attrs,
+};
+
+int __init __zpci_fw_sysfs_init(void)
+{
+ return sysfs_create_group(firmware_kobj, &clp_fw_attr_group);
+}
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 24eccaa29337..0c196a5b194a 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -8,20 +8,23 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
-CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
+CFLAGS_sha256.o := -D__NO_FORTIFY
$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(call if_changed_rule,as_o_S)
-KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
+KBUILD_CFLAGS := -std=gnu11 -fms-extensions -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common
KBUILD_CFLAGS += -fno-stack-protector
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS += -D__DISABLE_EXPORTS
KBUILD_CFLAGS += $(CLANG_FLAGS)
+KBUILD_CFLAGS += $(if $(CONFIG_CC_IS_CLANG),-Wno-microsoft-anon-tag)
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS += -D__DISABLE_EXPORTS
# Since we link purgatory with -r unresolved symbols are not checked, so we
# also link a purgatory.chk binary without -r to check for unresolved symbols.
diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S
index 0f93f2e72eba..db3ab2402621 100644
--- a/arch/s390/purgatory/head.S
+++ b/arch/s390/purgatory/head.S
@@ -156,7 +156,7 @@ SYM_CODE_START(purgatory_start)
agr %r10,%r9
/* Buffer location (in crash memory) and size. As the purgatory is
- * behind the point of no return it can re-use the stack as buffer.
+ * behind the point of no return it can reuse the stack as buffer.
*/
larl %r11,purgatory_end
larl %r12,stack
diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c
index 030efda05dbe..ecb38102187c 100644
--- a/arch/s390/purgatory/purgatory.c
+++ b/arch/s390/purgatory/purgatory.c
@@ -16,7 +16,7 @@ int verify_sha256_digest(void)
{
struct kexec_sha_region *ptr, *end;
u8 digest[SHA256_DIGEST_SIZE];
- struct sha256_state sctx;
+ struct sha256_ctx sctx;
sha256_init(&sctx);
end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 68580cbea4e6..2d28a569f793 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -29,6 +29,7 @@ static struct facility_def facility_defs[] = {
.bits = (int[]){
0, /* N3 instructions */
1, /* z/Arch mode installed */
+ 3, /* dat-enhancement 1 */
18, /* long displacement facility */
21, /* extended-immediate facility */
25, /* store clock fast */
@@ -54,6 +55,9 @@ static struct facility_def facility_defs[] = {
#ifdef CONFIG_HAVE_MARCH_Z15_FEATURES
61, /* miscellaneous-instruction-extension 3 */
#endif
+#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES
+ 84, /* miscellaneous-instruction-extension 4 */
+#endif
-1 /* END */
}
},
@@ -109,10 +113,12 @@ static struct facility_def facility_defs[] = {
15, /* AP Facilities Test */
156, /* etoken facility */
165, /* nnpa facility */
+ 170, /* ineffective-nonconstrained-transaction facility */
193, /* bear enhancement facility */
194, /* rdp enhancement facility */
196, /* processor activity instrumentation facility */
197, /* processor activity instrumentation extension 1 */
+ 201, /* concurrent-functions facility */
-1 /* END */
}
},
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
index a1bc02b29c81..7d76c417f83f 100644
--- a/arch/s390/tools/gen_opcode_table.c
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -201,6 +201,17 @@ static int cmp_long_insn(const void *a, const void *b)
return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name);
}
+static void print_insn_name(const char *name)
+{
+ size_t i, len;
+
+ len = strlen(name);
+ printf("{");
+ for (i = 0; i < len; i++)
+ printf(" \'%c\',", name[i]);
+ printf(" }");
+}
+
static void print_long_insn(struct gen_opcode *desc)
{
struct insn *insn;
@@ -223,7 +234,9 @@ static void print_long_insn(struct gen_opcode *desc)
insn = &desc->insn[i];
if (insn->name_len < 6)
continue;
- printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name);
+ printf("\t[LONG_INSN_%s] = ", insn->upper);
+ print_insn_name(insn->name);
+ printf(", \\\n");
}
printf("}\n\n");
}
@@ -236,11 +249,13 @@ static void print_opcode(struct insn *insn, int nr)
if (insn->type->byte != 0)
opcode += 2;
printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format);
- if (insn->name_len < 6)
- printf(".name = \"%s\" ", insn->name);
- else
- printf(".offset = LONG_INSN_%s ", insn->upper);
- printf("}, \\\n");
+ if (insn->name_len < 6) {
+ printf(".name = ");
+ print_insn_name(insn->name);
+ } else {
+ printf(".offset = LONG_INSN_%s", insn->upper);
+ }
+ printf(" }, \\\n");
}
static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset)
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 5f008e794898..def2659f6602 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -527,9 +527,9 @@ b938 sortl RRE_RR
b939 dfltcc RRF_R0RR2
b93a kdsa RRE_RR
b93b nnpa RRE_00
-b93c ppno RRE_RR
-b93e kimd RRE_RR
-b93f klmd RRE_RR
+b93c prno RRE_RR
+b93e kimd RRF_U0RR
+b93f klmd RRF_U0RR
b941 cfdtr RRF_UURF
b942 clgdtr RRF_UURF
b943 clfdtr RRF_UURF
@@ -549,6 +549,10 @@ b964 nngrk RRF_R0RR2
b965 ocgrk RRF_R0RR2
b966 nogrk RRF_R0RR2
b967 nxgrk RRF_R0RR2
+b968 clzg RRE_RR
+b969 ctzg RRE_RR
+b96c bextg RRF_R0RR2
+b96d bdepg RRF_R0RR2
b972 crt RRF_U0RR
b973 clrt RRF_U0RR
b974 nnrk RRF_R0RR2
@@ -796,6 +800,16 @@ e35b sy RXY_RRRD
e35c mfy RXY_RRRD
e35e aly RXY_RRRD
e35f sly RXY_RRRD
+e360 lxab RXY_RRRD
+e361 llxab RXY_RRRD
+e362 lxah RXY_RRRD
+e363 llxah RXY_RRRD
+e364 lxaf RXY_RRRD
+e365 llxaf RXY_RRRD
+e366 lxag RXY_RRRD
+e367 llxag RXY_RRRD
+e368 lxaq RXY_RRRD
+e369 llxaq RXY_RRRD
e370 sthy RXY_RRRD
e371 lay RXY_RRRD
e372 stcy RXY_RRRD
@@ -880,6 +894,8 @@ e63c vupkz VSI_URDV
e63d vstrl VSI_URDV
e63f vstrlr VRS_RRDV
e649 vlip VRI_V0UU2
+e64a vcvdq VRI_VV0UU
+e64e vcvbq VRR_VV0U2
e650 vcvb VRR_RV0UU
e651 vclzdp VRR_VV0U2
e652 vcvbg VRR_RV0UU
@@ -893,7 +909,7 @@ e65b vpsop VRI_VVUUU2
e65c vupkzl VRR_VV0U2
e65d vcfn VRR_VV0UU2
e65e vclfnl VRR_VV0UU2
-e65f vtp VRR_0V
+e65f vtp VRR_0V0U
e670 vpkzr VRI_VVV0UU2
e671 vap VRI_VVV0UU2
e672 vsrpr VRI_VVV0UU2
@@ -908,6 +924,7 @@ e67b vrp VRI_VVV0UU2
e67c vscshp VRR_VVV
e67d vcsph VRR_VVV0U0
e67e vsdp VRI_VVV0UU2
+e67f vtz VRR_0VVU
e700 vleb VRX_VRRDU
e701 vleh VRX_VRRDU
e702 vleg VRX_VRRDU
@@ -948,6 +965,7 @@ e74d vrep VRI_VVUU
e750 vpopct VRR_VV0U
e752 vctz VRR_VV0U
e753 vclz VRR_VV0U
+e754 vgem VRR_VV0U
e756 vlr VRX_VV
e75c vistr VRR_VV0U0U
e75f vseg VRR_VV0U
@@ -985,6 +1003,8 @@ e784 vpdi VRR_VVV0U
e785 vbperm VRR_VVV
e786 vsld VRI_VVV0U
e787 vsrd VRI_VVV0U
+e788 veval VRI_VVV0UV
+e789 vblend VRR_VVVU0V
e78a vstrc VRR_VVVUU0V
e78b vstrs VRR_VVVUU0V
e78c vperm VRR_VVV0V
@@ -1010,6 +1030,10 @@ e7ac vmale VRR_VVVU0V
e7ad vmalo VRR_VVVU0V
e7ae vmae VRR_VVVU0V
e7af vmao VRR_VVVU0V
+e7b0 vdl VRR_VVV0UU
+e7b1 vrl VRR_VVV0UU
+e7b2 vd VRR_VVV0UU
+e7b3 vr VRR_VVV0UU
e7b4 vgfm VRR_VVV0U
e7b8 vmsl VRR_VVVUU0V
e7b9 vaccc VRR_VVVU0V
@@ -1017,12 +1041,12 @@ e7bb vac VRR_VVVU0V
e7bc vgfma VRR_VVVU0V
e7bd vsbcbi VRR_VVVU0V
e7bf vsbi VRR_VVVU0V
-e7c0 vclgd VRR_VV0UUU
-e7c1 vcdlg VRR_VV0UUU
-e7c2 vcgd VRR_VV0UUU
-e7c3 vcdg VRR_VV0UUU
-e7c4 vlde VRR_VV0UU2
-e7c5 vled VRR_VV0UUU
+e7c0 vclfp VRR_VV0UUU
+e7c1 vcfpl VRR_VV0UUU
+e7c2 vcsfp VRR_VV0UUU
+e7c3 vcfps VRR_VV0UUU
+e7c4 vfll VRR_VV0UU2
+e7c5 vflr VRR_VV0UUU
e7c7 vfi VRR_VV0UUU
e7ca wfk VRR_VV0UU2
e7cb wfc VRR_VV0UU2
@@ -1094,9 +1118,9 @@ eb54 niy SIY_URD
eb55 cliy SIY_URD
eb56 oiy SIY_URD
eb57 xiy SIY_URD
-eb60 lric RSY_RDRU
-eb61 stric RSY_RDRU
-eb62 mric RSY_RDRU
+eb60 lric RSY_RURD2
+eb61 stric RSY_RURD2
+eb62 mric RSY_RURD2
eb6a asi SIY_IRD
eb6e alsi SIY_IRD
eb71 lpswey SIY_RD
@@ -1104,7 +1128,7 @@ eb7a agsi SIY_IRD
eb7e algsi SIY_IRD
eb80 icmh RSY_RURD
eb81 icmy RSY_RURD
-eb8a sqbs RSY_RDRU
+eb8a sqbs RSY_RURD2
eb8e mvclu RSY_RRRD
eb8f clclu RSY_RRRD
eb90 stmy RSY_RRRD
diff --git a/arch/s390/tools/relocs.c b/arch/s390/tools/relocs.c
index a74dbd5c9896..30a732c808f3 100644
--- a/arch/s390/tools/relocs.c
+++ b/arch/s390/tools/relocs.c
@@ -280,7 +280,7 @@ static int do_reloc(struct section *sec, Elf_Rel *rel)
case R_390_GOTOFF64:
break;
case R_390_64:
- add_reloc(&relocs64, offset - ehdr.e_entry);
+ add_reloc(&relocs64, offset);
break;
default:
die("Unsupported relocation type: %d\n", r_type);