summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig31
-rw-r--r--arch/alpha/include/asm/timex.h1
-rw-r--r--arch/arm/Kconfig11
-rw-r--r--arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts4
-rw-r--r--arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi10
-rw-r--r--arch/arm/boot/dts/aspeed-g6.dtsi10
-rw-r--r--arch/arm/configs/lpc18xx_defconfig1
-rw-r--r--arch/arm/configs/mps2_defconfig1
-rw-r--r--arch/arm/configs/stm32_defconfig1
-rw-r--r--arch/arm/configs/vf610m4_defconfig1
-rw-r--r--arch/arm/include/asm/arch_gicv3.h7
-rw-r--r--arch/arm/include/asm/assembler.h28
-rw-r--r--arch/arm/include/asm/module.h17
-rw-r--r--arch/arm/include/asm/timex.h1
-rw-r--r--arch/arm/include/asm/unwind.h1
-rw-r--r--arch/arm/kernel/entry-armv.S90
-rw-r--r--arch/arm/kernel/entry-common.S12
-rw-r--r--arch/arm/kernel/entry-header.S3
-rw-r--r--arch/arm/kernel/hw_breakpoint.c26
-rw-r--r--arch/arm/kernel/module.c78
-rw-r--r--arch/arm/kernel/signal.c1
-rw-r--r--arch/arm/mach-sunxi/Kconfig12
-rw-r--r--arch/arm/mm/proc-v7-bugs.c1
-rw-r--r--arch/arm/vdso/Makefile2
-rw-r--r--arch/arm64/Kconfig119
-rw-r--r--arch/arm64/Kconfig.platforms8
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250-mtp.dts12
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts40
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h6
-rw-r--r--arch/arm64/include/asm/archrandom.h2
-rw-r--r--arch/arm64/include/asm/asm-bug.h4
-rw-r--r--arch/arm64/include/asm/compiler.h16
-rw-r--r--arch/arm64/include/asm/cpu.h4
-rw-r--r--arch/arm64/include/asm/cpufeature.h24
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/debug-monitors.h4
-rw-r--r--arch/arm64/include/asm/el2_setup.h64
-rw-r--r--arch/arm64/include/asm/esr.h21
-rw-r--r--arch/arm64/include/asm/exception.h29
-rw-r--r--arch/arm64/include/asm/fpsimd.h135
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h87
-rw-r--r--arch/arm64/include/asm/ftrace.h7
-rw-r--r--arch/arm64/include/asm/hugetlb.h2
-rw-r--r--arch/arm64/include/asm/hwcap.h8
-rw-r--r--arch/arm64/include/asm/kvm_arm.h1
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h6
-rw-r--r--arch/arm64/include/asm/kvm_host.h6
-rw-r--r--arch/arm64/include/asm/kvm_ras.h2
-rw-r--r--arch/arm64/include/asm/mte.h1
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h4
-rw-r--r--arch/arm64/include/asm/pgtable.h3
-rw-r--r--arch/arm64/include/asm/processor.h36
-rw-r--r--arch/arm64/include/asm/stacktrace.h32
-rw-r--r--arch/arm64/include/asm/sysreg.h181
-rw-r--r--arch/arm64/include/asm/system_misc.h4
-rw-r--r--arch/arm64/include/asm/thread_info.h2
-rw-r--r--arch/arm64/include/asm/traps.h12
-rw-r--r--arch/arm64/include/asm/uaccess.h15
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h8
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h69
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h55
-rw-r--r--arch/arm64/kernel/cpu_errata.c2
-rw-r--r--arch/arm64/kernel/cpufeature.c179
-rw-r--r--arch/arm64/kernel/cpuinfo.c13
-rw-r--r--arch/arm64/kernel/debug-monitors.c12
-rw-r--r--arch/arm64/kernel/entry-common.c25
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S36
-rw-r--r--arch/arm64/kernel/entry-ftrace.S17
-rw-r--r--arch/arm64/kernel/entry.S2
-rw-r--r--arch/arm64/kernel/fpsimd.c655
-rw-r--r--arch/arm64/kernel/ftrace.c17
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c4
-rw-r--r--arch/arm64/kernel/kgdb.c6
-rw-r--r--arch/arm64/kernel/machine_kexec.c9
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c12
-rw-r--r--arch/arm64/kernel/mte.c48
-rw-r--r--arch/arm64/kernel/paravirt.c29
-rw-r--r--arch/arm64/kernel/probes/kprobes.c4
-rw-r--r--arch/arm64/kernel/probes/uprobes.c4
-rw-r--r--arch/arm64/kernel/process.c44
-rw-r--r--arch/arm64/kernel/ptrace.c366
-rw-r--r--arch/arm64/kernel/relocate_kernel.S22
-rw-r--r--arch/arm64/kernel/setup.c17
-rw-r--r--arch/arm64/kernel/signal.c189
-rw-r--r--arch/arm64/kernel/signal32.c1
-rw-r--r--arch/arm64/kernel/smp.c1
-rw-r--r--arch/arm64/kernel/stacktrace.c124
-rw-r--r--arch/arm64/kernel/sys_compat.c2
-rw-r--r--arch/arm64/kernel/syscall.c29
-rw-r--r--arch/arm64/kernel/traps.c67
-rw-r--r--arch/arm64/kernel/vdso/Makefile3
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S21
-rw-r--r--arch/arm64/kvm/arm.c4
-rw-r--r--arch/arm64/kvm/fpsimd.c43
-rw-r--r--arch/arm64/kvm/handle_exit.c16
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h2
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h28
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c30
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c2
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c4
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c11
-rw-r--r--arch/arm64/kvm/inject_fault.c4
-rw-r--r--arch/arm64/kvm/sys_regs.c16
-rw-r--r--arch/arm64/lib/mte.S4
-rw-r--r--arch/arm64/mm/copypage.c4
-rw-r--r--arch/arm64/mm/fault.c73
-rw-r--r--arch/arm64/mm/hugetlbpage.c46
-rw-r--r--arch/arm64/mm/init.c71
-rw-r--r--arch/arm64/mm/trans_pgd.c2
-rw-r--r--arch/arm64/tools/Makefile8
-rw-r--r--arch/arm64/tools/cpucaps2
-rwxr-xr-xarch/arm64/tools/gen-sysreg.awk268
-rw-r--r--arch/arm64/tools/sysreg369
-rw-r--r--arch/csky/Kbuild2
-rw-r--r--arch/csky/Kconfig8
-rw-r--r--arch/csky/Makefile3
-rw-r--r--arch/csky/abiv1/Makefile2
-rw-r--r--arch/csky/abiv1/memcpy.S347
-rw-r--r--arch/csky/abiv1/strksyms.c6
-rw-r--r--arch/csky/abiv2/Makefile2
-rw-r--r--arch/csky/abiv2/strksyms.c4
-rw-r--r--arch/csky/boot/Makefile1
-rw-r--r--arch/csky/include/asm/atomic.h237
-rw-r--r--arch/csky/include/asm/barrier.h11
-rw-r--r--arch/csky/include/asm/cmpxchg.h64
-rw-r--r--arch/csky/include/asm/io.h12
-rw-r--r--arch/csky/kernel/Makefile2
-rw-r--r--arch/csky/kernel/io.c91
-rw-r--r--arch/csky/kernel/module.c2
-rw-r--r--arch/csky/kernel/probes/kprobes.c2
-rw-r--r--arch/csky/kernel/probes/uprobes.c2
-rw-r--r--arch/csky/kernel/process.c1
-rw-r--r--arch/csky/lib/Makefile3
-rw-r--r--arch/csky/lib/string.c134
-rw-r--r--arch/csky/mm/dma-mapping.c1
-rw-r--r--arch/ia64/include/asm/timex.h1
-rw-r--r--arch/m68k/Kbuild1
-rw-r--r--arch/m68k/Kconfig.cpu2
-rw-r--r--arch/m68k/Kconfig.machine17
-rw-r--r--arch/m68k/configs/amiga_defconfig5
-rw-r--r--arch/m68k/configs/apollo_defconfig5
-rw-r--r--arch/m68k/configs/atari_defconfig5
-rw-r--r--arch/m68k/configs/bvme6000_defconfig5
-rw-r--r--arch/m68k/configs/hp300_defconfig5
-rw-r--r--arch/m68k/configs/mac_defconfig5
-rw-r--r--arch/m68k/configs/multi_defconfig5
-rw-r--r--arch/m68k/configs/mvme147_defconfig5
-rw-r--r--arch/m68k/configs/mvme16x_defconfig5
-rw-r--r--arch/m68k/configs/q40_defconfig5
-rw-r--r--arch/m68k/configs/sun3_defconfig5
-rw-r--r--arch/m68k/configs/sun3x_defconfig5
-rw-r--r--arch/m68k/configs/virt_defconfig68
-rw-r--r--arch/m68k/include/asm/config.h2
-rw-r--r--arch/m68k/include/asm/io.h3
-rw-r--r--arch/m68k/include/asm/irq.h3
-rw-r--r--arch/m68k/include/asm/pgtable_mm.h7
-rw-r--r--arch/m68k/include/asm/raw_io.h6
-rw-r--r--arch/m68k/include/asm/setup.h44
-rw-r--r--arch/m68k/include/asm/timex.h2
-rw-r--r--arch/m68k/include/asm/virt.h25
-rw-r--r--arch/m68k/include/uapi/asm/bootinfo-virt.h18
-rw-r--r--arch/m68k/include/uapi/asm/bootinfo.h1
-rw-r--r--arch/m68k/kernel/Makefile1
-rw-r--r--arch/m68k/kernel/entry.S4
-rw-r--r--arch/m68k/kernel/head.S31
-rw-r--r--arch/m68k/kernel/ptrace.c7
-rw-r--r--arch/m68k/kernel/setup_mm.c7
-rw-r--r--arch/m68k/kernel/signal.c1
-rw-r--r--arch/m68k/math-emu/fp_arith.c2
-rw-r--r--arch/m68k/mm/kmap.c21
-rw-r--r--arch/m68k/virt/Makefile6
-rw-r--r--arch/m68k/virt/config.c130
-rw-r--r--arch/m68k/virt/ints.c155
-rw-r--r--arch/m68k/virt/platform.c72
-rw-r--r--arch/mips/include/asm/timex.h17
-rw-r--r--arch/nios2/include/asm/timex.h3
-rw-r--r--arch/openrisc/include/asm/timex.h1
-rw-r--r--arch/openrisc/kernel/head.S9
-rw-r--r--arch/parisc/include/asm/cacheflush.h31
-rw-r--r--arch/parisc/include/asm/page.h6
-rw-r--r--arch/parisc/include/asm/timex.h3
-rw-r--r--arch/parisc/kernel/cache.c326
-rw-r--r--arch/parisc/kernel/patch.c25
-rw-r--r--arch/parisc/mm/fault.c6
-rw-r--r--arch/powerpc/include/asm/bug.h14
-rw-r--r--arch/powerpc/include/asm/timex.h1
-rw-r--r--arch/powerpc/kernel/fadump.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-core.c2
-rw-r--r--arch/riscv/Kconfig2
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi2
-rw-r--r--arch/riscv/boot/dts/sifive/fu540-c000.dtsi2
-rw-r--r--arch/riscv/include/asm/bug.h4
-rw-r--r--arch/riscv/include/asm/timex.h2
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/boot/.gitignore3
-rw-r--r--arch/s390/boot/Makefile78
-rw-r--r--arch/s390/boot/boot.h6
-rw-r--r--arch/s390/boot/clz_ctz.c (renamed from arch/s390/boot/compressed/clz_ctz.c)0
-rw-r--r--arch/s390/boot/compressed/.gitignore4
-rw-r--r--arch/s390/boot/compressed/Makefile86
-rw-r--r--arch/s390/boot/decompressor.c (renamed from arch/s390/boot/compressed/decompressor.c)0
-rw-r--r--arch/s390/boot/decompressor.h (renamed from arch/s390/boot/compressed/decompressor.h)0
-rw-r--r--arch/s390/boot/head.S366
-rw-r--r--arch/s390/boot/ipl_data.c84
-rw-r--r--arch/s390/boot/ipl_parm.c7
-rw-r--r--arch/s390/boot/kaslr.c2
-rw-r--r--arch/s390/boot/mem_detect.c2
-rw-r--r--arch/s390/boot/startup.c2
-rw-r--r--arch/s390/boot/vmlinux.lds.S (renamed from arch/s390/boot/compressed/vmlinux.lds.S)9
-rw-r--r--arch/s390/crypto/des_s390.c2
-rw-r--r--arch/s390/crypto/prng.c2
-rw-r--r--arch/s390/hypfs/hypfs_vm.c2
-rw-r--r--arch/s390/include/asm/alternative-asm.h76
-rw-r--r--arch/s390/include/asm/alternative.h93
-rw-r--r--arch/s390/include/asm/asm-extable.h12
-rw-r--r--arch/s390/include/asm/barrier.h16
-rw-r--r--arch/s390/include/asm/bug.h5
-rw-r--r--arch/s390/include/asm/cio.h2
-rw-r--r--arch/s390/include/asm/compat.h25
-rw-r--r--arch/s390/include/asm/ctl_reg.h4
-rw-r--r--arch/s390/include/asm/entry-common.h14
-rw-r--r--arch/s390/include/asm/ipl.h6
-rw-r--r--arch/s390/include/asm/lowcore.h5
-rw-r--r--arch/s390/include/asm/nmi.h2
-rw-r--r--arch/s390/include/asm/nospec-insn.h12
-rw-r--r--arch/s390/include/asm/pai.h74
-rw-r--r--arch/s390/include/asm/pci_debug.h7
-rw-r--r--arch/s390/include/asm/preempt.h15
-rw-r--r--arch/s390/include/asm/processor.h8
-rw-r--r--arch/s390/include/asm/ptrace.h29
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/include/asm/scsw.h83
-rw-r--r--arch/s390/include/asm/spinlock.h3
-rw-r--r--arch/s390/include/asm/stp.h4
-rw-r--r--arch/s390/include/asm/timex.h1
-rw-r--r--arch/s390/include/asm/vx-insn.h6
-rw-r--r--arch/s390/include/uapi/asm/pkey.h2
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h42
-rw-r--r--arch/s390/kernel/Makefile1
-rw-r--r--arch/s390/kernel/alternative.c61
-rw-r--r--arch/s390/kernel/compat_linux.h89
-rw-r--r--arch/s390/kernel/entry.S40
-rw-r--r--arch/s390/kernel/irq.c4
-rw-r--r--arch/s390/kernel/machine_kexec.c10
-rw-r--r--arch/s390/kernel/nmi.c6
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c148
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c688
-rw-r--r--arch/s390/kernel/relocate_kernel.S3
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/time.c12
-rw-r--r--arch/s390/kernel/vdso.c55
-rw-r--r--arch/s390/kvm/priv.c1
-rw-r--r--arch/s390/lib/spinlock.c4
-rw-r--r--arch/s390/mm/mmap.c4
-rw-r--r--arch/s390/pci/pci.c2
-rw-r--r--arch/s390/pci/pci_clp.c2
-rw-r--r--arch/s390/pci/pci_debug.c2
-rw-r--r--arch/s390/pci/pci_event.c3
-rw-r--r--arch/s390/pci/pci_insn.c108
-rw-r--r--arch/s390/purgatory/head.S30
-rw-r--r--arch/sh/boards/board-sh7757lcr.c2
-rw-r--r--arch/sh/boards/mach-ecovec24/setup.c2
-rw-r--r--arch/sh/boot/romimage/mmcif-sh7724.c2
-rw-r--r--arch/sh/configs/rsk7201_defconfig1
-rw-r--r--arch/sh/configs/rsk7203_defconfig1
-rw-r--r--arch/sh/configs/se7206_defconfig1
-rw-r--r--arch/sparc/include/asm/timex_32.h4
-rw-r--r--arch/sparc/kernel/signal32.c1
-rw-r--r--arch/sparc/kernel/signal_64.c1
-rw-r--r--arch/sparc/vdso/Makefile3
-rw-r--r--arch/um/drivers/ubd_kern.c3
-rw-r--r--arch/um/include/asm/timex.h9
-rw-r--r--arch/x86/Kconfig75
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/boot/boot.h73
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/boot/compressed/acpi.c176
-rw-r--r--arch/x86/boot/compressed/early_serial_console.c3
-rw-r--r--arch/x86/boot/compressed/efi.c234
-rw-r--r--arch/x86/boot/compressed/efi.h126
-rw-r--r--arch/x86/boot/compressed/head_64.S64
-rw-r--r--arch/x86/boot/compressed/ident_map_64.c39
-rw-r--r--arch/x86/boot/compressed/idt_64.c18
-rw-r--r--arch/x86/boot/compressed/kaslr.c3
-rw-r--r--arch/x86/boot/compressed/mem_encrypt.S36
-rw-r--r--arch/x86/boot/compressed/misc.c17
-rw-r--r--arch/x86/boot/compressed/misc.h60
-rw-r--r--arch/x86/boot/compressed/pgtable.h2
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c3
-rw-r--r--arch/x86/boot/compressed/sev.c263
-rw-r--r--arch/x86/boot/compressed/tdcall.S3
-rw-r--r--arch/x86/boot/compressed/tdx.c77
-rw-r--r--arch/x86/boot/compressed/tdx.h13
-rw-r--r--arch/x86/boot/cpucheck.c30
-rw-r--r--arch/x86/boot/cpuflags.c3
-rw-r--r--arch/x86/boot/cpuflags.h1
-rw-r--r--arch/x86/boot/header.S4
-rw-r--r--arch/x86/boot/io.h41
-rw-r--r--arch/x86/boot/main.c6
-rw-r--r--arch/x86/boot/msr.h26
-rw-r--r--arch/x86/coco/Makefile2
-rw-r--r--arch/x86/coco/core.c25
-rw-r--r--arch/x86/coco/tdx/Makefile3
-rw-r--r--arch/x86/coco/tdx/tdcall.S205
-rw-r--r--arch/x86/coco/tdx/tdx.c692
-rw-r--r--arch/x86/entry/calling.h19
-rw-r--r--arch/x86/entry/entry_64.S62
-rw-r--r--arch/x86/entry/entry_64_compat.S111
-rw-r--r--arch/x86/entry/vdso/Makefile3
-rw-r--r--arch/x86/entry/vdso/vma.c2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c2
-rw-r--r--arch/x86/events/Kconfig8
-rw-r--r--arch/x86/events/amd/Makefile1
-rw-r--r--arch/x86/events/amd/brs.c367
-rw-r--r--arch/x86/events/amd/core.c505
-rw-r--r--arch/x86/events/amd/ibs.c209
-rw-r--r--arch/x86/events/core.c12
-rw-r--r--arch/x86/events/intel/core.c2
-rw-r--r--arch/x86/events/intel/cstate.c2
-rw-r--r--arch/x86/events/intel/lbr.c36
-rw-r--r--arch/x86/events/intel/uncore.c2
-rw-r--r--arch/x86/events/intel/uncore_snb.c454
-rw-r--r--arch/x86/events/msr.c2
-rw-r--r--arch/x86/events/perf_event.h125
-rw-r--r--arch/x86/ia32/Makefile2
-rw-r--r--arch/x86/ia32/ia32_aout.c325
-rw-r--r--arch/x86/include/asm/acenv.h14
-rw-r--r--arch/x86/include/asm/amd-ibs.h2
-rw-r--r--arch/x86/include/asm/amd_nb.h1
-rw-r--r--arch/x86/include/asm/apic.h7
-rw-r--r--arch/x86/include/asm/apicdef.h6
-rw-r--r--arch/x86/include/asm/bootparam_utils.h1
-rw-r--r--arch/x86/include/asm/bug.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h21
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h6
-rw-r--r--arch/x86/include/asm/cpu.h22
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h7
-rw-r--r--arch/x86/include/asm/cpufeatures.h6
-rw-r--r--arch/x86/include/asm/cpuid.h34
-rw-r--r--arch/x86/include/asm/disabled-features.h16
-rw-r--r--arch/x86/include/asm/efi.h5
-rw-r--r--arch/x86/include/asm/elf.h15
-rw-r--r--arch/x86/include/asm/entry-common.h4
-rw-r--r--arch/x86/include/asm/fpu/api.h3
-rw-r--r--arch/x86/include/asm/fpu/internal.h0
-rw-r--r--arch/x86/include/asm/highmem.h1
-rw-r--r--arch/x86/include/asm/idtentry.h4
-rw-r--r--arch/x86/include/asm/io.h42
-rw-r--r--arch/x86/include/asm/irqflags.h8
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/kvm_para.h22
-rw-r--r--arch/x86/include/asm/mem_encrypt.h6
-rw-r--r--arch/x86/include/asm/mmu_context.h2
-rw-r--r--arch/x86/include/asm/mmx.h0
-rw-r--r--arch/x86/include/asm/msr-index.h19
-rw-r--r--arch/x86/include/asm/msr.h11
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/page_64.h2
-rw-r--r--arch/x86/include/asm/pci_x86.h9
-rw-r--r--arch/x86/include/asm/perf_event.h43
-rw-r--r--arch/x86/include/asm/pkeys.h8
-rw-r--r--arch/x86/include/asm/proto.h8
-rw-r--r--arch/x86/include/asm/ptrace.h4
-rw-r--r--arch/x86/include/asm/realmode.h1
-rw-r--r--arch/x86/include/asm/segment.h12
-rw-r--r--arch/x86/include/asm/setup.h31
-rw-r--r--arch/x86/include/asm/sev-common.h82
-rw-r--r--arch/x86/include/asm/sev.h137
-rw-r--r--arch/x86/include/asm/shared/io.h34
-rw-r--r--arch/x86/include/asm/shared/msr.h15
-rw-r--r--arch/x86/include/asm/shared/tdx.h40
-rw-r--r--arch/x86/include/asm/smap.h24
-rw-r--r--arch/x86/include/asm/special_insns.h7
-rw-r--r--arch/x86/include/asm/suspend_32.h2
-rw-r--r--arch/x86/include/asm/suspend_64.h12
-rw-r--r--arch/x86/include/asm/svm.h179
-rw-r--r--arch/x86/include/asm/tdx.h91
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/timex.h9
-rw-r--r--arch/x86/include/asm/topology.h23
-rw-r--r--arch/x86/include/asm/traps.h2
-rw-r--r--arch/x86/include/asm/tsc.h7
-rw-r--r--arch/x86/include/uapi/asm/amd_hsmp.h114
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h4
-rw-r--r--arch/x86/include/uapi/asm/svm.h13
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c102
-rw-r--r--arch/x86/kernel/acpi/cppc.c29
-rw-r--r--arch/x86/kernel/alternative.c6
-rw-r--r--arch/x86/kernel/amd_nb.c7
-rw-r--r--arch/x86/kernel/apic/apic.c28
-rw-r--r--arch/x86/kernel/apic/io_apic.c18
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c16
-rw-r--r--arch/x86/kernel/asm-offsets.c17
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c480
-rw-r--r--arch/x86/kernel/cpu/bugs.c7
-rw-r--r--arch/x86/kernel/cpu/common.c105
-rw-r--r--arch/x86/kernel/cpu/intel.c110
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c32
-rw-r--r--arch/x86/kernel/cpu/mce/apei.c8
-rw-r--r--arch/x86/kernel/cpu/mce/core.c4
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c110
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c59
-rw-r--r--arch/x86/kernel/cpu/proc.c11
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c14
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c113
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.h2
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c13
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/fpu/xstate.c65
-rw-r--r--arch/x86/kernel/fpu/xstate.h14
-rw-r--r--arch/x86/kernel/ftrace.c17
-rw-r--r--arch/x86/kernel/head64.c36
-rw-r--r--arch/x86/kernel/head_64.S65
-rw-r--r--arch/x86/kernel/idt.c3
-rw-r--r--arch/x86/kernel/nmi.c12
-rw-r--r--arch/x86/kernel/probe_roms.c13
-rw-r--r--arch/x86/kernel/process.c23
-rw-r--r--arch/x86/kernel/process_32.c13
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c6
-rw-r--r--arch/x86/kernel/setup.c28
-rw-r--r--arch/x86/kernel/sev-shared.c534
-rw-r--r--arch/x86/kernel/sev.c855
-rw-r--r--arch/x86/kernel/signal.c8
-rw-r--r--arch/x86/kernel/signal_compat.c2
-rw-r--r--arch/x86/kernel/smpboot.c373
-rw-r--r--arch/x86/kernel/sys_x86_64.c7
-rw-r--r--arch/x86/kernel/traps.c162
-rw-r--r--arch/x86/kernel/vm86_32.c4
-rw-r--r--arch/x86/kvm/cpuid.c19
-rw-r--r--arch/x86/kvm/hyperv.c4
-rw-r--r--arch/x86/kvm/mmu/mmu.c16
-rw-r--r--arch/x86/kvm/pmu.c7
-rw-r--r--arch/x86/kvm/svm/sev.c22
-rw-r--r--arch/x86/kvm/svm/svm.c8
-rw-r--r--arch/x86/kvm/svm/svm.h4
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/lib/insn-eval.c5
-rw-r--r--arch/x86/lib/kaslr.c2
-rw-r--r--arch/x86/lib/mmx_32.c0
-rw-r--r--arch/x86/math-emu/get_address.c2
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/amdtopology.c2
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init_64.c1
-rw-r--r--arch/x86/mm/ioremap.c5
-rw-r--r--arch/x86/mm/mem_encrypt.c13
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c71
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c8
-rw-r--r--arch/x86/mm/mmio-mod.c2
-rw-r--r--arch/x86/mm/numa_emulation.c4
-rw-r--r--arch/x86/mm/pat/memtype.c2
-rw-r--r--arch/x86/mm/pti.c2
-rw-r--r--arch/x86/mm/setup_nx.c62
-rw-r--r--arch/x86/pci/irq.c377
-rw-r--r--arch/x86/platform/efi/efi.c3
-rw-r--r--arch/x86/platform/uv/uv_nmi.c23
-rw-r--r--arch/x86/realmode/init.c2
-rw-r--r--arch/x86/realmode/rm/header.S1
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S57
-rw-r--r--arch/x86/realmode/rm/trampoline_common.S12
-rw-r--r--arch/x86/realmode/rm/wakemain.c4
-rw-r--r--arch/x86/virt/vmx/tdx/tdxcall.S96
-rw-r--r--arch/x86/xen/enlighten_pv.c2
-rw-r--r--arch/x86/xen/smp_pv.c5
-rw-r--r--arch/xtensa/Kconfig18
-rw-r--r--arch/xtensa/boot/lib/Makefile1
-rw-r--r--arch/xtensa/include/asm/barrier.h12
-rw-r--r--arch/xtensa/include/asm/bitops.h10
-rw-r--r--arch/xtensa/include/asm/coprocessor.h11
-rw-r--r--arch/xtensa/include/asm/processor.h7
-rw-r--r--arch/xtensa/include/asm/sections.h2
-rw-r--r--arch/xtensa/include/asm/thread_info.h11
-rw-r--r--arch/xtensa/include/asm/timex.h6
-rw-r--r--arch/xtensa/include/asm/traps.h40
-rw-r--r--arch/xtensa/kernel/Makefile1
-rw-r--r--arch/xtensa/kernel/asm-offsets.c19
-rw-r--r--arch/xtensa/kernel/coprocessor.S230
-rw-r--r--arch/xtensa/kernel/entry.S335
-rw-r--r--arch/xtensa/kernel/hibernate.c25
-rw-r--r--arch/xtensa/kernel/process.c112
-rw-r--r--arch/xtensa/kernel/ptrace.c3
-rw-r--r--arch/xtensa/kernel/s32c1i_selftest.c7
-rw-r--r--arch/xtensa/kernel/signal.c3
-rw-r--r--arch/xtensa/kernel/smp.c7
-rw-r--r--arch/xtensa/kernel/traps.c143
-rw-r--r--arch/xtensa/lib/Makefile2
-rw-r--r--arch/xtensa/lib/kcsan-stubs.c54
-rw-r--r--arch/xtensa/lib/memcopy.S20
-rw-r--r--arch/xtensa/mm/Makefile3
-rw-r--r--arch/xtensa/mm/fault.c112
-rw-r--r--arch/xtensa/mm/mmu.c2
-rw-r--r--arch/xtensa/platforms/iss/network.c150
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c18
-rw-r--r--arch/xtensa/platforms/xt2000/setup.c2
502 files changed, 15165 insertions, 5341 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 31c4fdc4a4ba..763b1b5e4f41 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -24,6 +24,13 @@ config KEXEC_ELF
config HAVE_IMA_KEXEC
bool
+config ARCH_HAS_SUBPAGE_FAULTS
+ bool
+ help
+ Select if the architecture can check permissions at sub-page
+ granularity (e.g. arm64 MTE). The probe_user_*() functions
+ must be implemented.
+
config HOTPLUG_SMT
bool
@@ -35,6 +42,7 @@ config KPROBES
depends on MODULES
depends on HAVE_KPROBES
select KALLSYMS
+ select TASKS_RCU if PREEMPTION
help
Kprobes allows you to trap at almost any kernel address and
execute a callback function. register_kprobe() establishes
@@ -46,6 +54,7 @@ config JUMP_LABEL
bool "Optimize very unlikely/likely branches"
depends on HAVE_ARCH_JUMP_LABEL
depends on CC_HAS_ASM_GOTO
+ select OBJTOOL if HAVE_JUMP_LABEL_HACK
help
This option enables a transparent branch optimization that
makes certain almost-always-true or almost-always-false branch
@@ -723,10 +732,7 @@ config ARCH_SUPPORTS_CFI_CLANG
config CFI_CLANG
bool "Use Clang's Control Flow Integrity (CFI)"
depends on LTO_CLANG && ARCH_SUPPORTS_CFI_CLANG
- # Clang >= 12:
- # - https://bugs.llvm.org/show_bug.cgi?id=46258
- # - https://bugs.llvm.org/show_bug.cgi?id=47479
- depends on CLANG_VERSION >= 120000
+ depends on CLANG_VERSION >= 140000
select KALLSYMS
help
This option enables Clang’s forward-edge Control Flow Integrity
@@ -1026,11 +1032,23 @@ config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
depends on MMU
select ARCH_HAS_ELF_RANDOMIZE
+config HAVE_OBJTOOL
+ bool
+
+config HAVE_JUMP_LABEL_HACK
+ bool
+
+config HAVE_NOINSTR_HACK
+ bool
+
+config HAVE_NOINSTR_VALIDATION
+ bool
+
config HAVE_STACK_VALIDATION
bool
help
- Architecture supports the 'objtool check' host tool command, which
- performs compile-time stack metadata validation.
+ Architecture supports objtool compile-time frame pointer rule
+ validation.
config HAVE_RELIABLE_STACKTRACE
bool
@@ -1300,6 +1318,7 @@ config HAVE_STATIC_CALL
config HAVE_STATIC_CALL_INLINE
bool
depends on HAVE_STATIC_CALL
+ select OBJTOOL
config HAVE_PREEMPT_DYNAMIC
bool
diff --git a/arch/alpha/include/asm/timex.h b/arch/alpha/include/asm/timex.h
index b565cc6f408e..f89798da8a14 100644
--- a/arch/alpha/include/asm/timex.h
+++ b/arch/alpha/include/asm/timex.h
@@ -28,5 +28,6 @@ static inline cycles_t get_cycles (void)
__asm__ __volatile__ ("rpcc %0" : "=r"(ret));
return ret;
}
+#define get_cycles get_cycles
#endif
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2e8091e2d8a8..0dcf88e7f9cf 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -972,6 +972,17 @@ config ARM_ERRATA_764369
relevant cache maintenance functions and sets a specific bit
in the diagnostic control register of the SCU.
+config ARM_ERRATA_764319
+ bool "ARM errata: Read to DBGPRSR and DBGOSLSR may generate Undefined instruction"
+ depends on CPU_V7
+ help
+ This option enables the workaround for the 764319 Cortex A-9 erratum.
+ CP14 read accesses to the DBGPRSR and DBGOSLSR registers generate an
+ unexpected Undefined Instruction exception when the DBGSWENABLE
+ external pin is set to 0, even when the CP14 accesses are performed
+ from a privileged mode. This work around catches the exception in a
+ way the kernel does not stop execution.
+
config ARM_ERRATA_775420
bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock"
depends on CPU_V7
diff --git a/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts b/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts
index e71ccfd1df63..ff4c07c69af1 100644
--- a/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts
@@ -100,12 +100,14 @@
lm25066@40 {
compatible = "lm25066";
reg = <0x40>;
+ shunt-resistor-micro-ohms = <1000>;
};
/* 12VSB PMIC */
lm25066@41 {
compatible = "lm25066";
reg = <0x41>;
+ shunt-resistor-micro-ohms = <10000>;
};
};
@@ -196,7 +198,7 @@
gpio-line-names =
/* A */ "LOCATORLED_STATUS_N", "BMC_MAC2_INTB", "NMI_BTN_N", "BMC_NMI",
"", "", "", "",
- /* B */ "DDR_MEM_TEMP", "", "", "", "", "", "", "",
+ /* B */ "POST_COMPLETE_N", "", "", "", "", "", "", "",
/* C */ "", "", "", "", "PCIE_HP_SEL_N", "PCIE_SATA_SEL_N", "LOCATORBTN", "",
/* D */ "BMC_PSIN", "BMC_PSOUT", "BMC_RESETCON", "RESETCON",
"", "", "", "PSU_FAN_FAIL_N",
diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
index e4775bbceecc..7cd4f075e325 100644
--- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
+++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
@@ -117,9 +117,9 @@
groups = "FWSPID";
};
- pinctrl_fwqspid_default: fwqspid_default {
- function = "FWSPID";
- groups = "FWQSPID";
+ pinctrl_fwqspi_default: fwqspi_default {
+ function = "FWQSPI";
+ groups = "FWQSPI";
};
pinctrl_fwspiwp_default: fwspiwp_default {
@@ -653,12 +653,12 @@
};
pinctrl_qspi1_default: qspi1_default {
- function = "QSPI1";
+ function = "SPI1";
groups = "QSPI1";
};
pinctrl_qspi2_default: qspi2_default {
- function = "QSPI2";
+ function = "SPI2";
groups = "QSPI2";
};
diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi
index 3d5ce9da42c3..9d2a0ce4ca06 100644
--- a/arch/arm/boot/dts/aspeed-g6.dtsi
+++ b/arch/arm/boot/dts/aspeed-g6.dtsi
@@ -389,6 +389,16 @@
reg = <0x1e6f2000 0x1000>;
};
+ video: video@1e700000 {
+ compatible = "aspeed,ast2600-video-engine";
+ reg = <0x1e700000 0x1000>;
+ clocks = <&syscon ASPEED_CLK_GATE_VCLK>,
+ <&syscon ASPEED_CLK_GATE_ECLK>;
+ clock-names = "vclk", "eclk";
+ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
gpio0: gpio@1e780000 {
#gpio-cells = <2>;
gpio-controller;
diff --git a/arch/arm/configs/lpc18xx_defconfig b/arch/arm/configs/lpc18xx_defconfig
index be882ea0eee4..688c9849eec8 100644
--- a/arch/arm/configs/lpc18xx_defconfig
+++ b/arch/arm/configs/lpc18xx_defconfig
@@ -30,7 +30,6 @@ CONFIG_ARM_APPENDED_DTB=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_BINFMT_FLAT=y
CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
# CONFIG_COREDUMP is not set
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/arm/configs/mps2_defconfig b/arch/arm/configs/mps2_defconfig
index 89f4a6ff30bd..c1e98e33a348 100644
--- a/arch/arm/configs/mps2_defconfig
+++ b/arch/arm/configs/mps2_defconfig
@@ -23,7 +23,6 @@ CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_ZBOOT_ROM_TEXT=0x0
CONFIG_ZBOOT_ROM_BSS=0x0
CONFIG_BINFMT_FLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
# CONFIG_COREDUMP is not set
# CONFIG_SUSPEND is not set
CONFIG_NET=y
diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig
index 551db328009d..71d6bfcf4551 100644
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@@ -28,7 +28,6 @@ CONFIG_ZBOOT_ROM_BSS=0x0
CONFIG_XIP_KERNEL=y
CONFIG_XIP_PHYS_ADDR=0x08008000
CONFIG_BINFMT_FLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
# CONFIG_COREDUMP is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
diff --git a/arch/arm/configs/vf610m4_defconfig b/arch/arm/configs/vf610m4_defconfig
index a89f035c3b01..70fdbfd83484 100644
--- a/arch/arm/configs/vf610m4_defconfig
+++ b/arch/arm/configs/vf610m4_defconfig
@@ -18,7 +18,6 @@ CONFIG_XIP_KERNEL=y
CONFIG_XIP_PHYS_ADDR=0x0f000080
CONFIG_BINFMT_FLAT=y
CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
# CONFIG_SUSPEND is not set
# CONFIG_UEVENT_HELPER is not set
# CONFIG_STANDALONE is not set
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 413abfb42989..f82a819eb0db 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void) \
return read_sysreg(a32); \
} \
+CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1)
CPUIF_MAP(ICC_PMR, ICC_PMR_EL1)
CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1)
CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1)
@@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1)
/* Low-level accessors */
-static inline void gic_write_eoir(u32 irq)
-{
- write_sysreg(irq, ICC_EOIR1);
- isb();
-}
-
static inline void gic_write_dir(u32 val)
{
write_sysreg(val, ICC_DIR);
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 34fe8d2dd5d1..90fbe4a3f9c8 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -666,12 +666,11 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
__adldst_l str, \src, \sym, \tmp, \cond
.endm
- .macro __ldst_va, op, reg, tmp, sym, cond
+ .macro __ldst_va, op, reg, tmp, sym, cond, offset
#if __LINUX_ARM_ARCH__ >= 7 || \
!defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
mov_l \tmp, \sym, \cond
- \op\cond \reg, [\tmp]
#else
/*
* Avoid a literal load, by emitting a sequence of ADD/LDR instructions
@@ -683,24 +682,29 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
.reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym
.reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym
.reloc .L2_\@, R_ARM_LDR_PC_G2, \sym
-.L0_\@: sub\cond \tmp, pc, #8
-.L1_\@: sub\cond \tmp, \tmp, #4
-.L2_\@: \op\cond \reg, [\tmp, #0]
+.L0_\@: sub\cond \tmp, pc, #8 - \offset
+.L1_\@: sub\cond \tmp, \tmp, #4 - \offset
+.L2_\@:
#endif
+ \op\cond \reg, [\tmp, #\offset]
.endm
/*
* ldr_va - load a 32-bit word from the virtual address of \sym
*/
- .macro ldr_va, rd:req, sym:req, cond
- __ldst_va ldr, \rd, \rd, \sym, \cond
+ .macro ldr_va, rd:req, sym:req, cond, tmp, offset=0
+ .ifnb \tmp
+ __ldst_va ldr, \rd, \tmp, \sym, \cond, \offset
+ .else
+ __ldst_va ldr, \rd, \rd, \sym, \cond, \offset
+ .endif
.endm
/*
* str_va - store a 32-bit word to the virtual address of \sym
*/
.macro str_va, rn:req, sym:req, tmp:req, cond
- __ldst_va str, \rn, \tmp, \sym, \cond
+ __ldst_va str, \rn, \tmp, \sym, \cond, 0
.endm
/*
@@ -727,9 +731,11 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
* are permitted to overlap with 'rd' if != sp
*/
.macro ldr_this_cpu, rd:req, sym:req, t1:req, t2:req
-#if __LINUX_ARM_ARCH__ >= 7 || \
- !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
- (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
+#ifndef CONFIG_SMP
+ ldr_va \rd, \sym, tmp=\t1
+#elif __LINUX_ARM_ARCH__ >= 7 || \
+ !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
+ (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
this_cpu_offset \t1
mov_l \t2, \sym
ldr \rd, [\t1, \t2]
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index cfffae67c04e..5546c9751478 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -3,20 +3,10 @@
#define _ASM_ARM_MODULE_H
#include <asm-generic/module.h>
-
-struct unwind_table;
+#include <asm/unwind.h>
#ifdef CONFIG_ARM_UNWIND
-enum {
- ARM_SEC_INIT,
- ARM_SEC_DEVINIT,
- ARM_SEC_CORE,
- ARM_SEC_EXIT,
- ARM_SEC_DEVEXIT,
- ARM_SEC_HOT,
- ARM_SEC_UNLIKELY,
- ARM_SEC_MAX,
-};
+#define ELF_SECTION_UNWIND 0x70000001
#endif
#define PLT_ENT_STRIDE L1_CACHE_BYTES
@@ -36,7 +26,8 @@ struct mod_plt_sec {
struct mod_arch_specific {
#ifdef CONFIG_ARM_UNWIND
- struct unwind_table *unwind[ARM_SEC_MAX];
+ struct list_head unwind_list;
+ struct unwind_table *init_table;
#endif
#ifdef CONFIG_ARM_MODULE_PLTS
struct mod_plt_sec core;
diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
index 7c3b3671d6c2..6d1337c169cd 100644
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@@ -11,5 +11,6 @@
typedef unsigned long cycles_t;
#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; })
+#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback())
#endif
diff --git a/arch/arm/include/asm/unwind.h b/arch/arm/include/asm/unwind.h
index 0f8a3439902d..b51f85417f58 100644
--- a/arch/arm/include/asm/unwind.h
+++ b/arch/arm/include/asm/unwind.h
@@ -24,6 +24,7 @@ struct unwind_idx {
struct unwind_table {
struct list_head list;
+ struct list_head mod_list;
const struct unwind_idx *start;
const struct unwind_idx *origin;
const struct unwind_idx *stop;
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 06508698abb8..c39303e5c234 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -61,9 +61,8 @@
.macro pabt_helper
@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
#ifdef MULTI_PABORT
- ldr ip, .LCprocfns
- mov lr, pc
- ldr pc, [ip, #PROCESSOR_PABT_FUNC]
+ ldr_va ip, processor, offset=PROCESSOR_PABT_FUNC
+ bl_r ip
#else
bl CPU_PABORT_HANDLER
#endif
@@ -82,9 +81,8 @@
@ the fault status register in r1. r9 must be preserved.
@
#ifdef MULTI_DABORT
- ldr ip, .LCprocfns
- mov lr, pc
- ldr pc, [ip, #PROCESSOR_DABT_FUNC]
+ ldr_va ip, processor, offset=PROCESSOR_DABT_FUNC
+ bl_r ip
#else
bl CPU_DABORT_HANDLER
#endif
@@ -302,16 +300,6 @@ __fiq_svc:
UNWIND(.fnend )
ENDPROC(__fiq_svc)
- .align 5
-.LCcralign:
- .word cr_alignment
-#ifdef MULTI_DABORT
-.LCprocfns:
- .word processor
-#endif
-.LCfp:
- .word fp_enter
-
/*
* Abort mode handlers
*/
@@ -370,7 +358,7 @@ ENDPROC(__fiq_abt)
THUMB( stmia sp, {r0 - r12} )
ATRAP( mrc p15, 0, r7, c1, c0, 0)
- ATRAP( ldr r8, .LCcralign)
+ ATRAP( ldr_va r8, cr_alignment)
ldmia r0, {r3 - r5}
add r0, sp, #S_PC @ here for interlock avoidance
@@ -379,8 +367,6 @@ ENDPROC(__fiq_abt)
str r3, [sp] @ save the "real" r0 copied
@ from the exception stack
- ATRAP( ldr r8, [r8, #0])
-
@
@ We are now ready to fill in the remaining blanks on the stack:
@
@@ -505,9 +491,7 @@ __und_usr_thumb:
*/
#if __LINUX_ARM_ARCH__ < 7
/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
-#define NEED_CPU_ARCHITECTURE
- ldr r5, .LCcpu_architecture
- ldr r5, [r5]
+ ldr_va r5, cpu_architecture
cmp r5, #CPU_ARCH_ARMv7
blo __und_usr_fault_16 @ 16bit undefined instruction
/*
@@ -654,12 +638,6 @@ call_fpe:
ret.w lr @ CP#14 (Debug)
ret.w lr @ CP#15 (Control)
-#ifdef NEED_CPU_ARCHITECTURE
- .align 2
-.LCcpu_architecture:
- .word __cpu_architecture
-#endif
-
#ifdef CONFIG_NEON
.align 6
@@ -685,9 +663,8 @@ call_fpe:
#endif
do_fpe:
- ldr r4, .LCfp
add r10, r10, #TI_FPSTATE @ r10 = workspace
- ldr pc, [r4] @ Call FP module USR entry point
+ ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point
/*
* The FP module is called with these registers set:
@@ -1101,6 +1078,12 @@ __kuser_helper_end:
*/
.macro vector_stub, name, mode, correction=0
.align 5
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+vector_bhb_bpiall_\name:
+ mcr p15, 0, r0, c7, c5, 6 @ BPIALL
+ @ isb not needed due to "movs pc, lr" in the vector stub
+ @ which gives a "context synchronisation".
+#endif
vector_\name:
.if \correction
@@ -1111,7 +1094,8 @@ vector_\name:
stmia sp, {r0, lr} @ save r0, lr
@ Save spsr_<exception> (parent CPSR)
-2: mrs lr, spsr
+.Lvec_\name:
+ mrs lr, spsr
str lr, [sp, #8] @ save spsr
@
@@ -1145,28 +1129,14 @@ vector_bhb_loop8_\name:
@ bhb workaround
mov r0, #8
-3: b . + 4
+3: W(b) . + 4
subs r0, r0, #1
bne 3b
- dsb
- isb
- b 2b
-ENDPROC(vector_bhb_loop8_\name)
-
-vector_bhb_bpiall_\name:
- .if \correction
- sub lr, lr, #\correction
- .endif
-
- @ Save r0, lr_<exception> (parent PC)
- stmia sp, {r0, lr}
-
- @ bhb workaround
- mcr p15, 0, r0, c7, c5, 6 @ BPIALL
+ dsb nsh
@ isb not needed due to "movs pc, lr" in the vector stub
@ which gives a "context synchronisation".
- b 2b
-ENDPROC(vector_bhb_bpiall_\name)
+ b .Lvec_\name
+ENDPROC(vector_bhb_loop8_\name)
.previous
#endif
@@ -1176,10 +1146,15 @@ ENDPROC(vector_bhb_bpiall_\name)
.endm
.section .stubs, "ax", %progbits
- @ This must be the first word
+ @ These need to remain at the start of the section so that
+ @ they are in range of the 'SWI' entries in the vector tables
+ @ located 4k down.
+.L__vector_swi:
.word vector_swi
#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+.L__vector_bhb_loop8_swi:
.word vector_bhb_loop8_swi
+.L__vector_bhb_bpiall_swi:
.word vector_bhb_bpiall_swi
#endif
@@ -1322,10 +1297,11 @@ vector_addrexcptn:
.globl vector_fiq
.section .vectors, "ax", %progbits
-.L__vectors_start:
W(b) vector_rst
W(b) vector_und
- W(ldr) pc, .L__vectors_start + 0x1000
+ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi )
+THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi )
+ W(ldr) pc, .
W(b) vector_pabt
W(b) vector_dabt
W(b) vector_addrexcptn
@@ -1334,10 +1310,11 @@ vector_addrexcptn:
#ifdef CONFIG_HARDEN_BRANCH_HISTORY
.section .vectors.bhb.loop8, "ax", %progbits
-.L__vectors_bhb_loop8_start:
W(b) vector_rst
W(b) vector_bhb_loop8_und
- W(ldr) pc, .L__vectors_bhb_loop8_start + 0x1004
+ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi )
+THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi )
+ W(ldr) pc, .
W(b) vector_bhb_loop8_pabt
W(b) vector_bhb_loop8_dabt
W(b) vector_addrexcptn
@@ -1345,10 +1322,11 @@ vector_addrexcptn:
W(b) vector_bhb_loop8_fiq
.section .vectors.bhb.bpiall, "ax", %progbits
-.L__vectors_bhb_bpiall_start:
W(b) vector_rst
W(b) vector_bhb_bpiall_und
- W(ldr) pc, .L__vectors_bhb_bpiall_start + 0x1008
+ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi )
+THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_bpiall_swi )
+ W(ldr) pc, .
W(b) vector_bhb_bpiall_pabt
W(b) vector_bhb_bpiall_dabt
W(b) vector_addrexcptn
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 90d40f4d56cf..7aa3ded4af92 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -164,7 +164,7 @@ ENTRY(vector_bhb_loop8_swi)
1: b 2f
2: subs r8, r8, #1
bne 1b
- dsb
+ dsb nsh
isb
b 3f
ENDPROC(vector_bhb_loop8_swi)
@@ -198,7 +198,7 @@ ENTRY(vector_swi)
#endif
reload_current r10, ip
zero_fp
- alignment_trap r10, ip, __cr_alignment
+ alignment_trap r10, ip, cr_alignment
asm_trace_hardirqs_on save=0
enable_irq_notrace
ct_user_exit save=0
@@ -328,14 +328,6 @@ __sys_trace_return:
bl syscall_trace_exit
b ret_slow_syscall
- .align 5
-#ifdef CONFIG_ALIGNMENT_TRAP
- .type __cr_alignment, #object
-__cr_alignment:
- .word cr_alignment
-#endif
- .ltorg
-
.macro syscall_table_start, sym
.equ __sys_nr, 0
.type \sym, #object
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 9a1dc142f782..5865621bf691 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -48,8 +48,7 @@
.macro alignment_trap, rtmp1, rtmp2, label
#ifdef CONFIG_ALIGNMENT_TRAP
mrc p15, 0, \rtmp2, c1, c0, 0
- ldr \rtmp1, \label
- ldr \rtmp1, [\rtmp1]
+ ldr_va \rtmp1, \label
teq \rtmp1, \rtmp2
mcrne p15, 0, \rtmp1, c1, c0, 0
#endif
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index b1423fb130ea..054e9199f30d 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -941,6 +941,23 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
return ret;
}
+#ifdef CONFIG_ARM_ERRATA_764319
+static int oslsr_fault;
+
+static int debug_oslsr_trap(struct pt_regs *regs, unsigned int instr)
+{
+ oslsr_fault = 1;
+ instruction_pointer(regs) += 4;
+ return 0;
+}
+
+static struct undef_hook debug_oslsr_hook = {
+ .instr_mask = 0xffffffff,
+ .instr_val = 0xee115e91,
+ .fn = debug_oslsr_trap,
+};
+#endif
+
/*
* One-time initialisation.
*/
@@ -974,7 +991,16 @@ static bool core_has_os_save_restore(void)
case ARM_DEBUG_ARCH_V7_1:
return true;
case ARM_DEBUG_ARCH_V7_ECP14:
+#ifdef CONFIG_ARM_ERRATA_764319
+ oslsr_fault = 0;
+ register_undef_hook(&debug_oslsr_hook);
ARM_DBG_READ(c1, c1, 4, oslsr);
+ unregister_undef_hook(&debug_oslsr_hook);
+ if (oslsr_fault)
+ return false;
+#else
+ ARM_DBG_READ(c1, c1, 4, oslsr);
+#endif
if (oslsr & ARM_OSLSR_OSLM0)
return true;
fallthrough;
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 549abcedf795..d59c36dc0494 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -459,46 +459,40 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
#ifdef CONFIG_ARM_UNWIND
const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
const Elf_Shdr *sechdrs_end = sechdrs + hdr->e_shnum;
- struct mod_unwind_map maps[ARM_SEC_MAX];
- int i;
+ struct list_head *unwind_list = &mod->arch.unwind_list;
- memset(maps, 0, sizeof(maps));
+ INIT_LIST_HEAD(unwind_list);
+ mod->arch.init_table = NULL;
for (s = sechdrs; s < sechdrs_end; s++) {
const char *secname = secstrs + s->sh_name;
+ const char *txtname;
+ const Elf_Shdr *txt_sec;
- if (!(s->sh_flags & SHF_ALLOC))
+ if (!(s->sh_flags & SHF_ALLOC) ||
+ s->sh_type != ELF_SECTION_UNWIND)
continue;
- if (strcmp(".ARM.exidx.init.text", secname) == 0)
- maps[ARM_SEC_INIT].unw_sec = s;
- else if (strcmp(".ARM.exidx", secname) == 0)
- maps[ARM_SEC_CORE].unw_sec = s;
- else if (strcmp(".ARM.exidx.exit.text", secname) == 0)
- maps[ARM_SEC_EXIT].unw_sec = s;
- else if (strcmp(".ARM.exidx.text.unlikely", secname) == 0)
- maps[ARM_SEC_UNLIKELY].unw_sec = s;
- else if (strcmp(".ARM.exidx.text.hot", secname) == 0)
- maps[ARM_SEC_HOT].unw_sec = s;
- else if (strcmp(".init.text", secname) == 0)
- maps[ARM_SEC_INIT].txt_sec = s;
- else if (strcmp(".text", secname) == 0)
- maps[ARM_SEC_CORE].txt_sec = s;
- else if (strcmp(".exit.text", secname) == 0)
- maps[ARM_SEC_EXIT].txt_sec = s;
- else if (strcmp(".text.unlikely", secname) == 0)
- maps[ARM_SEC_UNLIKELY].txt_sec = s;
- else if (strcmp(".text.hot", secname) == 0)
- maps[ARM_SEC_HOT].txt_sec = s;
- }
+ if (!strcmp(".ARM.exidx", secname))
+ txtname = ".text";
+ else
+ txtname = secname + strlen(".ARM.exidx");
+ txt_sec = find_mod_section(hdr, sechdrs, txtname);
+
+ if (txt_sec) {
+ struct unwind_table *table =
+ unwind_table_add(s->sh_addr,
+ s->sh_size,
+ txt_sec->sh_addr,
+ txt_sec->sh_size);
- for (i = 0; i < ARM_SEC_MAX; i++)
- if (maps[i].unw_sec && maps[i].txt_sec)
- mod->arch.unwind[i] =
- unwind_table_add(maps[i].unw_sec->sh_addr,
- maps[i].unw_sec->sh_size,
- maps[i].txt_sec->sh_addr,
- maps[i].txt_sec->sh_size);
+ list_add(&table->mod_list, unwind_list);
+
+ /* save init table for module_arch_freeing_init */
+ if (strcmp(".ARM.exidx.init.text", secname) == 0)
+ mod->arch.init_table = table;
+ }
+ }
#endif
#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
s = find_mod_section(hdr, sechdrs, ".pv_table");
@@ -519,19 +513,27 @@ void
module_arch_cleanup(struct module *mod)
{
#ifdef CONFIG_ARM_UNWIND
- int i;
+ struct unwind_table *tmp;
+ struct unwind_table *n;
- for (i = 0; i < ARM_SEC_MAX; i++) {
- unwind_table_del(mod->arch.unwind[i]);
- mod->arch.unwind[i] = NULL;
+ list_for_each_entry_safe(tmp, n,
+ &mod->arch.unwind_list, mod_list) {
+ list_del(&tmp->mod_list);
+ unwind_table_del(tmp);
}
+ mod->arch.init_table = NULL;
#endif
}
void __weak module_arch_freeing_init(struct module *mod)
{
#ifdef CONFIG_ARM_UNWIND
- unwind_table_del(mod->arch.unwind[ARM_SEC_INIT]);
- mod->arch.unwind[ARM_SEC_INIT] = NULL;
+ struct unwind_table *init = mod->arch.init_table;
+
+ if (init) {
+ mod->arch.init_table = NULL;
+ list_del(&init->mod_list);
+ unwind_table_del(init);
+ }
#endif
}
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 459abc5d1819..ea128e32e8ca 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -708,6 +708,7 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x18);
static_assert(offsetof(siginfo_t, si_pkey) == 0x14);
static_assert(offsetof(siginfo_t, si_perf_data) == 0x10);
static_assert(offsetof(siginfo_t, si_perf_type) == 0x14);
+static_assert(offsetof(siginfo_t, si_perf_flags) == 0x18);
static_assert(offsetof(siginfo_t, si_band) == 0x0c);
static_assert(offsetof(siginfo_t, si_fd) == 0x10);
static_assert(offsetof(siginfo_t, si_call_addr) == 0x0c);
diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index e5c2fce281cd..abdb99fe1e97 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -4,10 +4,7 @@ menuconfig ARCH_SUNXI
depends on ARCH_MULTI_V5 || ARCH_MULTI_V7
select ARCH_HAS_RESET_CONTROLLER
select CLKSRC_MMIO
- select GENERIC_IRQ_CHIP
select GPIOLIB
- select IRQ_DOMAIN_HIERARCHY
- select IRQ_FASTEOI_HIERARCHY_HANDLERS
select PINCTRL
select PM_OPP
select SUN4I_TIMER
@@ -22,10 +19,12 @@ if ARCH_MULTI_V7
config MACH_SUN4I
bool "Allwinner A10 (sun4i) SoCs support"
default ARCH_SUNXI
+ select SUN4I_INTC
config MACH_SUN5I
bool "Allwinner A10s / A13 (sun5i) SoCs support"
default ARCH_SUNXI
+ select SUN4I_INTC
select SUN5I_HSTIMER
config MACH_SUN6I
@@ -34,6 +33,8 @@ config MACH_SUN6I
select ARM_GIC
select MFD_SUN6I_PRCM
select SUN5I_HSTIMER
+ select SUN6I_R_INTC
+ select SUNXI_NMI_INTC
config MACH_SUN7I
bool "Allwinner A20 (sun7i) SoCs support"
@@ -43,17 +44,21 @@ config MACH_SUN7I
select ARCH_SUPPORTS_BIG_ENDIAN
select HAVE_ARM_ARCH_TIMER
select SUN5I_HSTIMER
+ select SUNXI_NMI_INTC
config MACH_SUN8I
bool "Allwinner sun8i Family SoCs support"
default ARCH_SUNXI
select ARM_GIC
select MFD_SUN6I_PRCM
+ select SUN6I_R_INTC
+ select SUNXI_NMI_INTC
config MACH_SUN9I
bool "Allwinner (sun9i) SoCs support"
default ARCH_SUNXI
select ARM_GIC
+ select SUNXI_NMI_INTC
config ARCH_SUNXI_MC_SMP
bool
@@ -69,6 +74,7 @@ if ARCH_MULTI_V5
config MACH_SUNIV
bool "Allwinner ARMv5 F-series (suniv) SoCs support"
default ARCH_SUNXI
+ select SUN4I_INTC
help
Support for Allwinner suniv ARMv5 SoCs.
(F1C100A, F1C100s, F1C200s, F1C500, F1C600)
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 06dbfb968182..fb9f3eb6bf48 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -288,6 +288,7 @@ void cpu_v7_ca15_ibe(void)
{
if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)))
cpu_v7_spectre_v2_init();
+ cpu_v7_spectre_bhb_init();
}
void cpu_v7_bugs_init(void)
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index ec52b776f926..8ca1c9f262a2 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -28,7 +28,7 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
CFLAGS_REMOVE_vdso.o = -pg
# Force -O2 to avoid libgcc dependencies
-CFLAGS_REMOVE_vgettimeofday.o = -pg -Os $(GCC_PLUGINS_CFLAGS)
+CFLAGS_REMOVE_vgettimeofday.o = -pg -Os $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS)
ifeq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o = -O2
else
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 20ea89d9ac2f..d550f5acfaf3 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -262,31 +262,31 @@ config ARM64_CONT_PMD_SHIFT
default 4
config ARCH_MMAP_RND_BITS_MIN
- default 14 if ARM64_64K_PAGES
- default 16 if ARM64_16K_PAGES
- default 18
+ default 14 if ARM64_64K_PAGES
+ default 16 if ARM64_16K_PAGES
+ default 18
# max bits determined by the following formula:
# VA_BITS - PAGE_SHIFT - 3
config ARCH_MMAP_RND_BITS_MAX
- default 19 if ARM64_VA_BITS=36
- default 24 if ARM64_VA_BITS=39
- default 27 if ARM64_VA_BITS=42
- default 30 if ARM64_VA_BITS=47
- default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES
- default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES
- default 33 if ARM64_VA_BITS=48
- default 14 if ARM64_64K_PAGES
- default 16 if ARM64_16K_PAGES
- default 18
+ default 19 if ARM64_VA_BITS=36
+ default 24 if ARM64_VA_BITS=39
+ default 27 if ARM64_VA_BITS=42
+ default 30 if ARM64_VA_BITS=47
+ default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES
+ default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES
+ default 33 if ARM64_VA_BITS=48
+ default 14 if ARM64_64K_PAGES
+ default 16 if ARM64_16K_PAGES
+ default 18
config ARCH_MMAP_RND_COMPAT_BITS_MIN
- default 7 if ARM64_64K_PAGES
- default 9 if ARM64_16K_PAGES
- default 11
+ default 7 if ARM64_64K_PAGES
+ default 9 if ARM64_16K_PAGES
+ default 11
config ARCH_MMAP_RND_COMPAT_BITS_MAX
- default 16
+ default 16
config NO_IOPORT_MAP
def_bool y if !PCI
@@ -313,7 +313,7 @@ config GENERIC_HWEIGHT
def_bool y
config GENERIC_CSUM
- def_bool y
+ def_bool y
config GENERIC_CALIBRATE_DELAY
def_bool y
@@ -1046,8 +1046,7 @@ config SOCIONEXT_SYNQUACER_PREITS
If unsure, say Y.
-endmenu
-
+endmenu # "ARM errata workarounds via the alternatives framework"
choice
prompt "Page size"
@@ -1575,9 +1574,9 @@ config SETEND_EMULATION
be unexpected results in the applications.
If unsure, say Y
-endif
+endif # ARMV8_DEPRECATED
-endif
+endif # COMPAT
menu "ARMv8.1 architectural features"
@@ -1602,15 +1601,15 @@ config ARM64_PAN
bool "Enable support for Privileged Access Never (PAN)"
default y
help
- Privileged Access Never (PAN; part of the ARMv8.1 Extensions)
- prevents the kernel or hypervisor from accessing user-space (EL0)
- memory directly.
+ Privileged Access Never (PAN; part of the ARMv8.1 Extensions)
+ prevents the kernel or hypervisor from accessing user-space (EL0)
+ memory directly.
- Choosing this option will cause any unprotected (not using
- copy_to_user et al) memory access to fail with a permission fault.
+ Choosing this option will cause any unprotected (not using
+ copy_to_user et al) memory access to fail with a permission fault.
- The feature is detected at runtime, and will remain as a 'nop'
- instruction if the cpu does not implement the feature.
+ The feature is detected at runtime, and will remain as a 'nop'
+ instruction if the cpu does not implement the feature.
config AS_HAS_LDAPR
def_bool $(as-instr,.arch_extension rcpc)
@@ -1638,15 +1637,15 @@ config ARM64_USE_LSE_ATOMICS
built with binutils >= 2.25 in order for the new instructions
to be used.
-endmenu
+endmenu # "ARMv8.1 architectural features"
menu "ARMv8.2 architectural features"
config AS_HAS_ARMV8_2
- def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a)
+ def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a)
config AS_HAS_SHA3
- def_bool $(as-instr,.arch armv8.2-a+sha3)
+ def_bool $(as-instr,.arch armv8.2-a+sha3)
config ARM64_PMEM
bool "Enable support for persistent memory"
@@ -1690,7 +1689,7 @@ config ARM64_CNP
at runtime, and does not affect PEs that do not implement
this feature.
-endmenu
+endmenu # "ARMv8.2 architectural features"
menu "ARMv8.3 architectural features"
@@ -1753,7 +1752,7 @@ config AS_HAS_PAC
config AS_HAS_CFI_NEGATE_RA_STATE
def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n)
-endmenu
+endmenu # "ARMv8.3 architectural features"
menu "ARMv8.4 architectural features"
@@ -1794,7 +1793,7 @@ config ARM64_TLB_RANGE
The feature introduces new assembly instructions, and they were
support when binutils >= 2.30.
-endmenu
+endmenu # "ARMv8.4 architectural features"
menu "ARMv8.5 architectural features"
@@ -1880,6 +1879,7 @@ config ARM64_MTE
depends on AS_HAS_LSE_ATOMICS
# Required for tag checking in the uaccess routines
depends on ARM64_PAN
+ select ARCH_HAS_SUBPAGE_FAULTS
select ARCH_USES_HIGH_VMA_FLAGS
help
Memory Tagging (part of the ARMv8.5 Extensions) provides
@@ -1901,7 +1901,7 @@ config ARM64_MTE
Documentation/arm64/memory-tagging-extension.rst.
-endmenu
+endmenu # "ARMv8.5 architectural features"
menu "ARMv8.7 architectural features"
@@ -1910,12 +1910,12 @@ config ARM64_EPAN
default y
depends on ARM64_PAN
help
- Enhanced Privileged Access Never (EPAN) allows Privileged
- Access Never to be used with Execute-only mappings.
+ Enhanced Privileged Access Never (EPAN) allows Privileged
+ Access Never to be used with Execute-only mappings.
- The feature is detected at runtime, and will remain disabled
- if the cpu does not implement the feature.
-endmenu
+ The feature is detected at runtime, and will remain disabled
+ if the cpu does not implement the feature.
+endmenu # "ARMv8.7 architectural features"
config ARM64_SVE
bool "ARM Scalable Vector Extension support"
@@ -1948,6 +1948,17 @@ config ARM64_SVE
booting the kernel. If unsure and you are not observing these
symptoms, you should assume that it is safe to say Y.
+config ARM64_SME
+ bool "ARM Scalable Matrix Extension support"
+ default y
+ depends on ARM64_SVE
+ help
+ The Scalable Matrix Extension (SME) is an extension to the AArch64
+ execution state which utilises a substantial subset of the SVE
+ instruction set, together with the addition of new architectural
+ register state capable of holding two dimensional matrix tiles to
+ enable various matrix operations.
+
config ARM64_MODULE_PLTS
bool "Use PLTs to allow module memory to spill over into vmalloc area"
depends on MODULES
@@ -1991,7 +2002,7 @@ config ARM64_DEBUG_PRIORITY_MASKING
the validity of ICC_PMR_EL1 when calling concerned functions.
If unsure, say N
-endif
+endif # ARM64_PSEUDO_NMI
config RELOCATABLE
bool "Build a relocatable kernel image" if EXPERT
@@ -2050,7 +2061,19 @@ config STACKPROTECTOR_PER_TASK
def_bool y
depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_SYSREG
-endmenu
+# The GPIO number here must be sorted by descending number. In case of
+# a multiplatform kernel, we just want the highest value required by the
+# selected platforms.
+config ARCH_NR_GPIO
+ int
+ default 2048 if ARCH_APPLE
+ default 0
+ help
+ Maximum number of GPIOs in the system.
+
+ If unsure, leave the default value.
+
+endmenu # "Kernel Features"
menu "Boot options"
@@ -2114,7 +2137,7 @@ config EFI
help
This option provides support for runtime services provided
by UEFI firmware (such as non-volatile variables, realtime
- clock, and platform reset). A UEFI stub is also provided to
+ clock, and platform reset). A UEFI stub is also provided to
allow the kernel to be booted as an EFI application. This
is only useful on systems that have UEFI firmware.
@@ -2129,7 +2152,7 @@ config DMI
However, even with this option, the resultant kernel should
continue to boot on existing non-UEFI platforms.
-endmenu
+endmenu # "Boot options"
config SYSVIPC_COMPAT
def_bool y
@@ -2150,7 +2173,7 @@ config ARCH_HIBERNATION_HEADER
config ARCH_SUSPEND_POSSIBLE
def_bool y
-endmenu
+endmenu # "Power management options"
menu "CPU Power Management"
@@ -2158,7 +2181,7 @@ source "drivers/cpuidle/Kconfig"
source "drivers/cpufreq/Kconfig"
-endmenu
+endmenu # "CPU Power Management"
source "drivers/acpi/Kconfig"
@@ -2166,4 +2189,4 @@ source "arch/arm64/kvm/Kconfig"
if CRYPTO
source "arch/arm64/crypto/Kconfig"
-endif
+endif # CRYPTO
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 30b123cde02c..4e6d635a1731 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -11,12 +11,11 @@ config ARCH_ACTIONS
config ARCH_SUNXI
bool "Allwinner sunxi 64-bit SoC Family"
select ARCH_HAS_RESET_CONTROLLER
- select GENERIC_IRQ_CHIP
- select IRQ_DOMAIN_HIERARCHY
- select IRQ_FASTEOI_HIERARCHY_HANDLERS
select PINCTRL
select RESET_CONTROLLER
select SUN4I_TIMER
+ select SUN6I_R_INTC
+ select SUNXI_NMI_INTC
help
This enables support for Allwinner sunxi based SoCs like the A64.
@@ -253,6 +252,7 @@ config ARCH_INTEL_SOCFPGA
config ARCH_SYNQUACER
bool "Socionext SynQuacer SoC Family"
+ select IRQ_FASTEOI_HIERARCHY_HANDLERS
config ARCH_TEGRA
bool "NVIDIA Tegra SoC Family"
@@ -325,4 +325,4 @@ config ARCH_ZYNQMP
help
This enables support for Xilinx ZynqMP Family
-endmenu
+endmenu # "Platform selection"
diff --git a/arch/arm64/boot/dts/qcom/sm8250-mtp.dts b/arch/arm64/boot/dts/qcom/sm8250-mtp.dts
index fb99cc2827c7..7ab3627cc347 100644
--- a/arch/arm64/boot/dts/qcom/sm8250-mtp.dts
+++ b/arch/arm64/boot/dts/qcom/sm8250-mtp.dts
@@ -622,6 +622,10 @@
status = "okay";
};
+&rxmacro {
+ status = "okay";
+};
+
&slpi {
status = "okay";
firmware-name = "qcom/sm8250/slpi.mbn";
@@ -773,6 +777,8 @@
};
&swr1 {
+ status = "okay";
+
wcd_rx: wcd9380-rx@0,4 {
compatible = "sdw20217010d00";
reg = <0 4>;
@@ -781,6 +787,8 @@
};
&swr2 {
+ status = "okay";
+
wcd_tx: wcd9380-tx@0,3 {
compatible = "sdw20217010d00";
reg = <0 3>;
@@ -819,6 +827,10 @@
};
};
+&txmacro {
+ status = "okay";
+};
+
&uart12 {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi
index af8f22636436..1304b86af1a0 100644
--- a/arch/arm64/boot/dts/qcom/sm8250.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi
@@ -2255,6 +2255,7 @@
pinctrl-0 = <&rx_swr_active>;
compatible = "qcom,sm8250-lpass-rx-macro";
reg = <0 0x3200000 0 0x1000>;
+ status = "disabled";
clocks = <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
<&q6afecc LPASS_CLK_ID_TX_CORE_NPL_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
@@ -2273,6 +2274,7 @@
swr1: soundwire-controller@3210000 {
reg = <0 0x3210000 0 0x2000>;
compatible = "qcom,soundwire-v1.5.1";
+ status = "disabled";
interrupts = <GIC_SPI 298 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&rxmacro>;
clock-names = "iface";
@@ -2300,6 +2302,7 @@
pinctrl-0 = <&tx_swr_active>;
compatible = "qcom,sm8250-lpass-tx-macro";
reg = <0 0x3220000 0 0x1000>;
+ status = "disabled";
clocks = <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
<&q6afecc LPASS_CLK_ID_TX_CORE_NPL_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
@@ -2323,6 +2326,7 @@
compatible = "qcom,soundwire-v1.5.1";
interrupts-extended = <&intc GIC_SPI 297 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "core";
+ status = "disabled";
clocks = <&txmacro>;
clock-names = "iface";
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
index a01886b467ed..067fe4a6b178 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
@@ -16,6 +16,7 @@
aliases {
ethernet0 = &gmac0;
+ ethernet1 = &gmac1;
mmc0 = &sdmmc0;
mmc1 = &sdhci;
};
@@ -78,7 +79,6 @@
assigned-clocks = <&cru SCLK_GMAC0_RX_TX>, <&cru SCLK_GMAC0>;
assigned-clock-parents = <&cru SCLK_GMAC0_RGMII_SPEED>, <&cru CLK_MAC0_2TOP>;
clock_in_out = "input";
- phy-handle = <&rgmii_phy0>;
phy-mode = "rgmii";
pinctrl-names = "default";
pinctrl-0 = <&gmac0_miim
@@ -90,8 +90,38 @@
snps,reset-active-low;
/* Reset time is 20ms, 100ms for rtl8211f */
snps,reset-delays-us = <0 20000 100000>;
+ tx_delay = <0x4f>;
+ rx_delay = <0x0f>;
+ status = "okay";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+};
+
+&gmac1 {
+ assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1>;
+ assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru CLK_MAC1_2TOP>;
+ clock_in_out = "output";
+ phy-handle = <&rgmii_phy1>;
+ phy-mode = "rgmii";
+ pinctrl-names = "default";
+ pinctrl-0 = <&gmac1m1_miim
+ &gmac1m1_tx_bus2
+ &gmac1m1_rx_bus2
+ &gmac1m1_rgmii_clk
+ &gmac1m1_rgmii_bus>;
+
+ snps,reset-gpio = <&gpio3 RK_PB0 GPIO_ACTIVE_LOW>;
+ snps,reset-active-low;
+ /* Reset time is 20ms, 100ms for rtl8211f */
+ snps,reset-delays-us = <0 20000 100000>;
+
tx_delay = <0x3c>;
rx_delay = <0x2f>;
+
status = "okay";
};
@@ -315,8 +345,8 @@
status = "disabled";
};
-&mdio0 {
- rgmii_phy0: ethernet-phy@0 {
+&mdio1 {
+ rgmii_phy1: ethernet-phy@0 {
compatible = "ethernet-phy-ieee802.3-c22";
reg = <0x0>;
};
@@ -345,9 +375,9 @@
pmuio2-supply = <&vcc3v3_pmu>;
vccio1-supply = <&vccio_acodec>;
vccio3-supply = <&vccio_sd>;
- vccio4-supply = <&vcc_1v8>;
+ vccio4-supply = <&vcc_3v3>;
vccio5-supply = <&vcc_3v3>;
- vccio6-supply = <&vcc_3v3>;
+ vccio6-supply = <&vcc_1v8>;
vccio7-supply = <&vcc_3v3>;
status = "okay";
};
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 345fe98605ba..5c8ee5a541d2 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -7,3 +7,4 @@ generic-y += parport.h
generic-y += user.h
generated-y += cpucaps.h
+generated-y += sysreg-defs.h
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 8bd5afc7b692..48d4473e8eee 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -26,12 +26,6 @@
* sets the GP register's most significant bits to 0 with an explicit cast.
*/
-static inline void gic_write_eoir(u32 irq)
-{
- write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
- isb();
-}
-
static __always_inline void gic_write_dir(u32 irq)
{
write_sysreg_s(irq, SYS_ICC_DIR_EL1);
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index d1bb5e71df25..3a6b6d38c5b8 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -142,7 +142,7 @@ static inline bool __init __early_cpu_has_rndr(void)
{
/* Open code as we run prior to the first call to cpufeature. */
unsigned long ftr = read_sysreg_s(SYS_ID_AA64ISAR0_EL1);
- return (ftr >> ID_AA64ISAR0_RNDR_SHIFT) & 0xf;
+ return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
}
static inline bool __init __must_check
diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h
index 03f52f84a4f3..c762038ba400 100644
--- a/arch/arm64/include/asm/asm-bug.h
+++ b/arch/arm64/include/asm/asm-bug.h
@@ -14,7 +14,7 @@
14472: .string file; \
.popsection; \
\
- .long 14472b - 14470b; \
+ .long 14472b - .; \
.short line;
#else
#define _BUGVERBOSE_LOCATION(file, line)
@@ -25,7 +25,7 @@
#define __BUG_ENTRY(flags) \
.pushsection __bug_table,"aw"; \
.align 2; \
- 14470: .long 14471f - 14470b; \
+ 14470: .long 14471f - .; \
_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
.short flags; \
.popsection; \
diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h
index dc3ea4080e2e..6fb2e6bcc392 100644
--- a/arch/arm64/include/asm/compiler.h
+++ b/arch/arm64/include/asm/compiler.h
@@ -23,20 +23,4 @@
#define __builtin_return_address(val) \
(void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val)))
-#ifdef CONFIG_CFI_CLANG
-/*
- * With CONFIG_CFI_CLANG, the compiler replaces function address
- * references with the address of the function's CFI jump table
- * entry. The function_nocfi macro always returns the address of the
- * actual function instead.
- */
-#define function_nocfi(x) ({ \
- void *addr; \
- asm("adrp %0, " __stringify(x) "\n\t" \
- "add %0, %0, :lo12:" __stringify(x) \
- : "=r" (addr)); \
- addr; \
-})
-#endif
-
#endif /* __ASM_COMPILER_H */
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index a58e366f0b07..115cdec1ae87 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -58,11 +58,15 @@ struct cpuinfo_arm64 {
u64 reg_id_aa64pfr0;
u64 reg_id_aa64pfr1;
u64 reg_id_aa64zfr0;
+ u64 reg_id_aa64smfr0;
struct cpuinfo_32bit aarch32;
/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
u64 reg_zcr;
+
+ /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */
+ u64 reg_smcr;
};
DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index c62e7e5e2f0c..14a8f3d93add 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -622,6 +622,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
return val > 0;
}
+static inline bool id_aa64pfr1_sme(u64 pfr1)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT);
+
+ return val > 0;
+}
+
static inline bool id_aa64pfr1_mte(u64 pfr1)
{
u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);
@@ -759,6 +766,23 @@ static __always_inline bool system_supports_sve(void)
cpus_have_const_cap(ARM64_SVE);
}
+static __always_inline bool system_supports_sme(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_SME) &&
+ cpus_have_const_cap(ARM64_SME);
+}
+
+static __always_inline bool system_supports_fa64(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_SME) &&
+ cpus_have_const_cap(ARM64_SME_FA64);
+}
+
+static __always_inline bool system_supports_tpidr2(void)
+{
+ return system_supports_sme();
+}
+
static __always_inline bool system_supports_cnp(void)
{
return IS_ENABLED(CONFIG_ARM64_CNP) &&
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index ff8f4511df71..92331c07c2d1 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -36,7 +36,7 @@
#define MIDR_VARIANT(midr) \
(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
#define MIDR_IMPLEMENTOR_SHIFT 24
-#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT)
+#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_IMPLEMENTOR(midr) \
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 00c291067e57..7b7e05c02691 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -64,7 +64,7 @@ struct task_struct;
struct step_hook {
struct list_head node;
- int (*fn)(struct pt_regs *regs, unsigned int esr);
+ int (*fn)(struct pt_regs *regs, unsigned long esr);
};
void register_user_step_hook(struct step_hook *hook);
@@ -75,7 +75,7 @@ void unregister_kernel_step_hook(struct step_hook *hook);
struct break_hook {
struct list_head node;
- int (*fn)(struct pt_regs *regs, unsigned int esr);
+ int (*fn)(struct pt_regs *regs, unsigned long esr);
u16 imm;
u16 mask; /* These bits are ignored when comparing with imm */
};
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index c31be7eda9df..34ceff08cac4 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -143,6 +143,50 @@
.Lskip_sve_\@:
.endm
+/* SME register access and priority mapping */
+.macro __init_el2_nvhe_sme
+ mrs x1, id_aa64pfr1_el1
+ ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
+ cbz x1, .Lskip_sme_\@
+
+ bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps
+ msr cptr_el2, x0 // Disable copro. traps to EL2
+ isb
+
+ mrs x1, sctlr_el2
+ orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
+ msr sctlr_el2, x1
+ isb
+
+ mov x1, #0 // SMCR controls
+
+ mrs_s x2, SYS_ID_AA64SMFR0_EL1
+ ubfx x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM?
+ cbz x2, .Lskip_sme_fa64_\@
+
+ orr x1, x1, SMCR_ELx_FA64_MASK
+.Lskip_sme_fa64_\@:
+
+ orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector
+ msr_s SYS_SMCR_EL2, x1 // length for EL1.
+
+ mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
+ ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
+ cbz x1, .Lskip_sme_\@
+
+ msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
+
+ mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
+ ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
+ cbz x1, .Lskip_sme_\@
+
+ mrs_s x1, SYS_HCRX_EL2
+ orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping
+ msr_s SYS_HCRX_EL2, x1
+
+.Lskip_sme_\@:
+.endm
+
/* Disable any fine grained traps */
.macro __init_el2_fgt
mrs x1, id_aa64mmfr0_el1
@@ -153,15 +197,26 @@
mrs x1, id_aa64dfr0_el1
ubfx x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
cmp x1, #3
- b.lt .Lset_fgt_\@
+ b.lt .Lset_debug_fgt_\@
/* Disable PMSNEVFR_EL1 read and write traps */
orr x0, x0, #(1 << 62)
-.Lset_fgt_\@:
+.Lset_debug_fgt_\@:
msr_s SYS_HDFGRTR_EL2, x0
msr_s SYS_HDFGWTR_EL2, x0
- msr_s SYS_HFGRTR_EL2, xzr
- msr_s SYS_HFGWTR_EL2, xzr
+
+ mov x0, xzr
+ mrs x1, id_aa64pfr1_el1
+ ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
+ cbz x1, .Lset_fgt_\@
+
+ /* Disable nVHE traps of TPIDR2 and SMPRI */
+ orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK
+ orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK
+
+.Lset_fgt_\@:
+ msr_s SYS_HFGRTR_EL2, x0
+ msr_s SYS_HFGWTR_EL2, x0
msr_s SYS_HFGITR_EL2, xzr
mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU
@@ -196,6 +251,7 @@
__init_el2_nvhe_idregs
__init_el2_nvhe_cptr
__init_el2_nvhe_sve
+ __init_el2_nvhe_sme
__init_el2_fgt
__init_el2_nvhe_prepare_eret
.endm
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index d52a0b269ee8..8f236de7359c 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -37,7 +37,8 @@
#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */
/* Unallocated EC: 0x1B */
#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */
-/* Unallocated EC: 0x1D - 0x1E */
+#define ESR_ELx_EC_SME (0x1D)
+/* Unallocated EC: 0x1E */
#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
#define ESR_ELx_EC_IABT_LOW (0x20)
#define ESR_ELx_EC_IABT_CUR (0x21)
@@ -75,6 +76,7 @@
#define ESR_ELx_IL_SHIFT (25)
#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
+#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK)
/* ISS field definitions shared by different classes */
#define ESR_ELx_WNR_SHIFT (6)
@@ -136,7 +138,7 @@
#define ESR_ELx_WFx_ISS_TI (UL(1) << 0)
#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0)
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
-#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
+#define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1)
#define DISR_EL1_IDS (UL(1) << 24)
/*
@@ -327,17 +329,26 @@
#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\
ESR_ELx_CP15_32_ISS_DIR_READ)
+/*
+ * ISS values for SME traps
+ */
+
+#define ESR_ELx_SME_ISS_SME_DISABLED 0
+#define ESR_ELx_SME_ISS_ILL 1
+#define ESR_ELx_SME_ISS_SM_DISABLED 2
+#define ESR_ELx_SME_ISS_ZA_DISABLED 3
+
#ifndef __ASSEMBLY__
#include <asm/types.h>
-static inline bool esr_is_data_abort(u32 esr)
+static inline bool esr_is_data_abort(unsigned long esr)
{
- const u32 ec = ESR_ELx_EC(esr);
+ const unsigned long ec = ESR_ELx_EC(esr);
return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
}
-const char *esr_get_class_string(u32 esr);
+const char *esr_get_class_string(unsigned long esr);
#endif /* __ASSEMBLY */
#endif /* __ASM_ESR_H */
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 339477dca551..d94aecff9690 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -19,9 +19,9 @@
#define __exception_irq_entry __kprobes
#endif
-static inline u32 disr_to_esr(u64 disr)
+static inline unsigned long disr_to_esr(u64 disr)
{
- unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
+ unsigned long esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
if ((disr & DISR_EL1_IDS) == 0)
esr |= (disr & DISR_EL1_ESR_MASK);
@@ -57,23 +57,24 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs,
void (*func)(struct pt_regs *));
asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs);
-void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
+void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs);
void do_undefinstr(struct pt_regs *regs);
void do_bti(struct pt_regs *regs);
-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
struct pt_regs *regs);
-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
-void do_sve_acc(unsigned int esr, struct pt_regs *regs);
-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
-void do_sysinstr(unsigned int esr, struct pt_regs *regs);
-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
-void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
-void do_cp15instr(unsigned int esr, struct pt_regs *regs);
+void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs);
+void do_sve_acc(unsigned long esr, struct pt_regs *regs);
+void do_sme_acc(unsigned long esr, struct pt_regs *regs);
+void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs);
+void do_sysinstr(unsigned long esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs);
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr);
+void do_cp15instr(unsigned long esr, struct pt_regs *regs);
void do_el0_svc(struct pt_regs *regs);
void do_el0_svc_compat(struct pt_regs *regs);
-void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr);
-void do_serror(struct pt_regs *regs, unsigned int esr);
+void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr);
+void do_serror(struct pt_regs *regs, unsigned long esr);
void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
-void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far);
+void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
#endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index cb24385e3632..9bb1873f5295 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -32,6 +32,18 @@
#define VFP_STATE_SIZE ((32 * 8) + 4)
#endif
+/*
+ * When we defined the maximum SVE vector length we defined the ABI so
+ * that the maximum vector length included all the reserved for future
+ * expansion bits in ZCR rather than those just currently defined by
+ * the architecture. While SME follows a similar pattern the fact that
+ * it includes a square matrix means that any allocations that attempt
+ * to cover the maximum potential vector length (such as happen with
+ * the regset used for ptrace) end up being extremely large. Define
+ * the much lower actual limit for use in such situations.
+ */
+#define SME_VQ_MAX 16
+
struct task_struct;
extern void fpsimd_save_state(struct user_fpsimd_state *state);
@@ -46,11 +58,23 @@ extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
- void *sve_state, unsigned int sve_vl);
+ void *sve_state, unsigned int sve_vl,
+ void *za_state, unsigned int sme_vl,
+ u64 *svcr);
extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_and_flush_cpu_state(void);
+static inline bool thread_sm_enabled(struct thread_struct *thread)
+{
+ return system_supports_sme() && (thread->svcr & SVCR_SM_MASK);
+}
+
+static inline bool thread_za_enabled(struct thread_struct *thread)
+{
+ return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
+}
+
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
#define VL_ARCH_MAX 0x100
@@ -62,7 +86,14 @@ static inline size_t sve_ffr_offset(int vl)
static inline void *sve_pffr(struct thread_struct *thread)
{
- return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread));
+ unsigned int vl;
+
+ if (system_supports_sme() && thread_sm_enabled(thread))
+ vl = thread_get_sme_vl(thread);
+ else
+ vl = thread_get_sve_vl(thread);
+
+ return (char *)thread->sve_state + sve_ffr_offset(vl);
}
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
@@ -71,11 +102,17 @@ extern void sve_load_state(void const *state, u32 const *pfpsr,
extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
extern void sve_set_vq(unsigned long vq_minus_1);
+extern void sme_set_vq(unsigned long vq_minus_1);
+extern void za_save_state(void *state);
+extern void za_load_state(void const *state);
struct arm64_cpu_capabilities;
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern u64 read_zcr_features(void);
+extern u64 read_smcr_features(void);
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
@@ -119,6 +156,7 @@ struct vl_info {
extern void sve_alloc(struct task_struct *task);
extern void fpsimd_release_task(struct task_struct *task);
extern void fpsimd_sync_to_sve(struct task_struct *task);
+extern void fpsimd_force_sync_to_sve(struct task_struct *task);
extern void sve_sync_to_fpsimd(struct task_struct *task);
extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
@@ -171,6 +209,12 @@ static inline void write_vl(enum vec_type type, u64 val)
write_sysreg_s(tmp | val, SYS_ZCR_EL1);
break;
#endif
+#ifdef CONFIG_ARM64_SME
+ case ARM64_VEC_SME:
+ tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK;
+ write_sysreg_s(tmp | val, SYS_SMCR_EL1);
+ break;
+#endif
default:
WARN_ON_ONCE(1);
break;
@@ -208,6 +252,8 @@ static inline bool sve_vq_available(unsigned int vq)
return vq_available(ARM64_VEC_SVE, vq);
}
+size_t sve_state_size(struct task_struct const *task);
+
#else /* ! CONFIG_ARM64_SVE */
static inline void sve_alloc(struct task_struct *task) { }
@@ -247,8 +293,93 @@ static inline void vec_update_vq_map(enum vec_type t) { }
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
static inline void sve_setup(void) { }
+static inline size_t sve_state_size(struct task_struct const *task)
+{
+ return 0;
+}
+
#endif /* ! CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_SME
+
+static inline void sme_user_disable(void)
+{
+ sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
+}
+
+static inline void sme_user_enable(void)
+{
+ sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
+}
+
+static inline void sme_smstart_sm(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop_sm(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr"));
+}
+
+extern void __init sme_setup(void);
+
+static inline int sme_max_vl(void)
+{
+ return vec_max_vl(ARM64_VEC_SME);
+}
+
+static inline int sme_max_virtualisable_vl(void)
+{
+ return vec_max_virtualisable_vl(ARM64_VEC_SME);
+}
+
+extern void sme_alloc(struct task_struct *task);
+extern unsigned int sme_get_vl(void);
+extern int sme_set_current_vl(unsigned long arg);
+extern int sme_get_current_vl(void);
+
+/*
+ * Return how many bytes of memory are required to store the full SME
+ * specific state (currently just ZA) for task, given task's currently
+ * configured vector length.
+ */
+static inline size_t za_state_size(struct task_struct const *task)
+{
+ unsigned int vl = task_get_sme_vl(task);
+
+ return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+}
+
+#else
+
+static inline void sme_user_disable(void) { BUILD_BUG(); }
+static inline void sme_user_enable(void) { BUILD_BUG(); }
+
+static inline void sme_smstart_sm(void) { }
+static inline void sme_smstop_sm(void) { }
+static inline void sme_smstop(void) { }
+
+static inline void sme_alloc(struct task_struct *task) { }
+static inline void sme_setup(void) { }
+static inline unsigned int sme_get_vl(void) { return 0; }
+static inline int sme_max_vl(void) { return 0; }
+static inline int sme_max_virtualisable_vl(void) { return 0; }
+static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
+static inline int sme_get_current_vl(void) { return -EINVAL; }
+
+static inline size_t za_state_size(struct task_struct const *task)
+{
+ return 0;
+}
+
+#endif /* ! CONFIG_ARM64_SME */
+
/* For use by EFI runtime services calls only */
extern void __efi_fpsimd_begin(void);
extern void __efi_fpsimd_end(void);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 2509d7dde55a..5e0910cf4832 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -93,6 +93,12 @@
.endif
.endm
+.macro _sme_check_wv v
+ .if (\v) < 12 || (\v) > 15
+ .error "Bad vector select register \v."
+ .endif
+.endm
+
/* SVE instruction encodings for non-SVE-capable assemblers */
/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
@@ -174,6 +180,54 @@
| (\np)
.endm
+/* SME instruction encodings for non-SME-capable assemblers */
+/* (pre binutils 2.38/LLVM 13) */
+
+/* RDSVL X\nx, #\imm */
+.macro _sme_rdsvl nx, imm
+ _check_general_reg \nx
+ _check_num (\imm), -0x20, 0x1f
+ .inst 0x04bf5800 \
+ | (\nx) \
+ | (((\imm) & 0x3f) << 5)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_str_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_ldr_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * Zero the entire ZA array
+ * ZERO ZA
+ */
+.macro zero_za
+ .inst 0xc00800ff
+.endm
+
.macro __for from:req, to:req
.if (\from) == (\to)
_for__body %\from
@@ -208,6 +262,17 @@
921:
.endm
+/* Update SMCR_EL1.LEN with the new VQ */
+.macro sme_load_vq xvqminus1, xtmp, xtmp2
+ mrs_s \xtmp, SYS_SMCR_EL1
+ bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
+ orr \xtmp2, \xtmp2, \xvqminus1
+ cmp \xtmp2, \xtmp
+ b.eq 921f
+ msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
+921:
+.endm
+
/* Preserve the first 128-bits of Znz and zero the rest. */
.macro _sve_flush_z nz
_sve_check_zreg \nz
@@ -254,3 +319,25 @@
ldr w\nxtmp, [\xpfpsr, #4]
msr fpcr, x\nxtmp
.endm
+
+.macro sme_save_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_str_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
+.endm
+
+.macro sme_load_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_ldr_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
+.endm
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 1494cfa8639b..dbc45a4157fa 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -80,8 +80,15 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
struct dyn_ftrace;
+struct ftrace_ops;
+struct ftrace_regs;
+
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
#define ftrace_init_nop ftrace_init_nop
+
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
#endif
#define ftrace_return_address(n) return_address(n)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 1242f71937f8..d656822b13f1 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -44,6 +44,8 @@ extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
#define __HAVE_ARCH_HUGE_PTE_CLEAR
extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz);
+#define __HAVE_ARCH_HUGE_PTEP_GET
+extern pte_t huge_ptep_get(pte_t *ptep);
extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned long sz);
#define set_huge_swap_pte_at set_huge_swap_pte_at
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 8db5ec0089db..9f0ce004fdbc 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -109,6 +109,14 @@
#define KERNEL_HWCAP_AFP __khwcap2_feature(AFP)
#define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES)
#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3)
+#define KERNEL_HWCAP_SME __khwcap2_feature(SME)
+#define KERNEL_HWCAP_SME_I16I64 __khwcap2_feature(SME_I16I64)
+#define KERNEL_HWCAP_SME_F64F64 __khwcap2_feature(SME_F64F64)
+#define KERNEL_HWCAP_SME_I8I32 __khwcap2_feature(SME_I8I32)
+#define KERNEL_HWCAP_SME_F16F32 __khwcap2_feature(SME_F16F32)
+#define KERNEL_HWCAP_SME_B16F32 __khwcap2_feature(SME_B16F32)
+#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32)
+#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 1767ded83888..13ae232ec4a1 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -279,6 +279,7 @@
#define CPTR_EL2_TCPAC (1U << 31)
#define CPTR_EL2_TAM (1 << 30)
#define CPTR_EL2_TTA (1 << 20)
+#define CPTR_EL2_TSM (1 << 12)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
#define CPTR_EL2_TZ (1 << 8)
#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f71358271b71..08233172e7a9 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -236,14 +236,14 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
return mode != PSR_MODE_EL0t;
}
-static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
+static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.esr_el2;
}
static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
if (esr & ESR_ELx_CV)
return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT;
@@ -374,7 +374,7 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
return ESR_ELx_SYS64_ISS_RT(esr);
}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 94a27a7520f4..d5888dedf02a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -153,7 +153,7 @@ struct kvm_arch {
};
struct kvm_vcpu_fault_info {
- u32 esr_el2; /* Hyp Syndrom Register */
+ u64 esr_el2; /* Hyp Syndrom Register */
u64 far_el2; /* Hyp Fault Address Register */
u64 hpfar_el2; /* Hyp IPA Fault Address Register */
u64 disr_el1; /* Deferred [SError] Status Register */
@@ -295,8 +295,11 @@ struct vcpu_reset_state {
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt;
+
+ /* Guest floating point state */
void *sve_state;
unsigned int sve_max_vl;
+ u64 svcr;
/* Stage 2 paging state used by the hardware on next switch */
struct kvm_s2_mmu *hw_mmu;
@@ -451,6 +454,7 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
+#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h
index 8ac6ee77437c..87e10d9a635b 100644
--- a/arch/arm64/include/asm/kvm_ras.h
+++ b/arch/arm64/include/asm/kvm_ras.h
@@ -14,7 +14,7 @@
* Was this synchronous external abort a RAS notification?
* Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
*/
-static inline int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr)
+static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr)
{
/* apei_claim_sea(NULL) expects to mask interrupts itself */
lockdep_assert_irqs_enabled();
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index adcb937342f1..aa523591a44e 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -47,6 +47,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg);
long get_mte_ctrl(struct task_struct *task);
int mte_ptrace_copy_tags(struct task_struct *child, long request,
unsigned long addr, unsigned long data);
+size_t mte_probe_user_range(const char __user *uaddr, size_t size);
#else /* CONFIG_ARM64_MTE */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 66671ff05183..dd3d12bce07b 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -49,7 +49,7 @@
#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-#define PTRS_PER_PMD PTRS_PER_PTE
+#define PTRS_PER_PMD (1 << (PAGE_SHIFT - 3))
#endif
/*
@@ -59,7 +59,7 @@
#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
-#define PTRS_PER_PUD PTRS_PER_PTE
+#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3))
#endif
/*
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index dff2b483ea50..45c358538f13 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1001,7 +1001,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
*/
static inline bool arch_faults_on_old_pte(void)
{
- WARN_ON(preemptible());
+ /* The register read below requires a stable CPU to make any sense */
+ cant_migrate();
return !cpu_has_hw_af();
}
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 73e38d9a540c..bf8aafee1eac 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -118,6 +118,7 @@ struct debug_info {
enum vec_type {
ARM64_VEC_SVE = 0,
+ ARM64_VEC_SME,
ARM64_VEC_MAX,
};
@@ -153,6 +154,7 @@ struct thread_struct {
unsigned int fpsimd_cpu;
void *sve_state; /* SVE registers, if any */
+ void *za_state; /* ZA register, if any */
unsigned int vl[ARM64_VEC_MAX]; /* vector length */
unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
unsigned long fault_address; /* fault info */
@@ -168,6 +170,8 @@ struct thread_struct {
u64 mte_ctrl;
#endif
u64 sctlr_user;
+ u64 svcr;
+ u64 tpidr2_el0;
};
static inline unsigned int thread_get_vl(struct thread_struct *thread,
@@ -181,6 +185,19 @@ static inline unsigned int thread_get_sve_vl(struct thread_struct *thread)
return thread_get_vl(thread, ARM64_VEC_SVE);
}
+static inline unsigned int thread_get_sme_vl(struct thread_struct *thread)
+{
+ return thread_get_vl(thread, ARM64_VEC_SME);
+}
+
+static inline unsigned int thread_get_cur_vl(struct thread_struct *thread)
+{
+ if (system_supports_sme() && (thread->svcr & SVCR_SM_MASK))
+ return thread_get_sme_vl(thread);
+ else
+ return thread_get_sve_vl(thread);
+}
+
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type);
void task_set_vl(struct task_struct *task, enum vec_type type,
unsigned long vl);
@@ -194,6 +211,11 @@ static inline unsigned int task_get_sve_vl(const struct task_struct *task)
return task_get_vl(task, ARM64_VEC_SVE);
}
+static inline unsigned int task_get_sme_vl(const struct task_struct *task)
+{
+ return task_get_vl(task, ARM64_VEC_SME);
+}
+
static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl)
{
task_set_vl(task, ARM64_VEC_SVE, vl);
@@ -354,9 +376,11 @@ extern void __init minsigstksz_setup(void);
*/
#include <asm/fpsimd.h>
-/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
+/* Userspace interface for PR_S[MV]E_{SET,GET}_VL prctl()s: */
#define SVE_SET_VL(arg) sve_set_current_vl(arg)
#define SVE_GET_VL() sve_get_current_vl()
+#define SME_SET_VL(arg) sme_set_current_vl(arg)
+#define SME_GET_VL() sme_get_current_vl()
/* PR_PAC_RESET_KEYS prctl */
#define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg)
@@ -381,12 +405,10 @@ long get_tagged_addr_ctrl(struct task_struct *task);
* of header definitions for the use of task_stack_page.
*/
-#define current_top_of_stack() \
-({ \
- struct stack_info _info; \
- BUG_ON(!on_accessible_stack(current, current_stack_pointer, 1, &_info)); \
- _info.high; \
-})
+/*
+ * The top of the current task's task stack
+ */
+#define current_top_of_stack() ((unsigned long)current->stack + THREAD_SIZE)
#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1, NULL))
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index e77cdef9ca29..aec9315bf156 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -31,38 +31,6 @@ struct stack_info {
enum stack_type type;
};
-/*
- * A snapshot of a frame record or fp/lr register values, along with some
- * accounting information necessary for robust unwinding.
- *
- * @fp: The fp value in the frame record (or the real fp)
- * @pc: The lr value in the frame record (or the real lr)
- *
- * @stacks_done: Stacks which have been entirely unwound, for which it is no
- * longer valid to unwind to.
- *
- * @prev_fp: The fp that pointed to this frame record, or a synthetic value
- * of 0. This is used to ensure that within a stack, each
- * subsequent frame record is at an increasing address.
- * @prev_type: The type of stack this frame record was on, or a synthetic
- * value of STACK_TYPE_UNKNOWN. This is used to detect a
- * transition from one stack to another.
- *
- * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
- * associated with the most recently encountered replacement lr
- * value.
- */
-struct stackframe {
- unsigned long fp;
- unsigned long pc;
- DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
- unsigned long prev_fp;
- enum stack_type prev_type;
-#ifdef CONFIG_KRETPROBES
- struct llist_node *kr_cur;
-#endif
-};
-
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
const char *loglvl);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index fbf5f8bb9055..55f998c3dc28 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -115,9 +115,21 @@
#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
/*
+ * Automatically generated definitions for system registers, the
+ * manual encodings below are in the process of being converted to
+ * come from here. The header relies on the definition of sys_reg()
+ * earlier in this file.
+ */
+#include "asm/sysreg-defs.h"
+
+/*
* System registers, organised loosely by encoding but grouped together
* where the architected name contains an index. e.g. ID_MMFR<n>_EL1.
*/
+#define SYS_SVCR_SMSTOP_SM_EL0 sys_reg(0, 3, 4, 2, 3)
+#define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3)
+#define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3)
+
#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2)
#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0)
#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2)
@@ -181,6 +193,7 @@
#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0)
#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1)
#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4)
+#define SYS_ID_AA64SMFR0_EL1 sys_reg(3, 0, 0, 4, 5)
#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0)
#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1)
@@ -188,7 +201,6 @@
#define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4)
#define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5)
-#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0)
#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1)
#define SYS_ID_AA64ISAR2_EL1 sys_reg(3, 0, 0, 6, 2)
@@ -196,17 +208,12 @@
#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2)
-#define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0)
#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1)
-#define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2)
#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5)
#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6)
-#define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0)
#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1)
-#define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0)
-#define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1)
#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2)
#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0)
@@ -242,7 +249,6 @@
#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0)
#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1)
-#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0)
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
#define SYS_PAR_EL1_F BIT(0)
@@ -441,7 +447,6 @@
#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
-#define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1)
#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4)
#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7)
@@ -449,11 +454,12 @@
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0)
-#define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1)
#define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
-#define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0)
+#define SMIDR_EL1_IMPLEMENTER_SHIFT 24
+#define SMIDR_EL1_SMPS_SHIFT 15
+#define SMIDR_EL1_AFFINITY_SHIFT 0
#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1)
#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7)
@@ -477,6 +483,7 @@
#define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2)
#define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3)
+#define SYS_TPIDR2_EL0 sys_reg(3, 3, 13, 0, 5)
#define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7)
@@ -544,9 +551,8 @@
#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4)
#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5)
#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6)
-#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0)
#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
-#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
+#define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2)
#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
@@ -557,7 +563,6 @@
#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3)
#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
#define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0)
-#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0)
#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
@@ -603,8 +608,6 @@
/* VHE encodings for architectural EL0/1 system registers */
#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
-#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
-#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
@@ -614,11 +617,9 @@
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0)
-#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
-#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1)
@@ -628,31 +629,30 @@
#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2)
/* Common SCTLR_ELx flags. */
+#define SCTLR_ELx_ENTP2 (BIT(60))
#define SCTLR_ELx_DSSBS (BIT(44))
#define SCTLR_ELx_ATA (BIT(43))
-#define SCTLR_ELx_TCF_SHIFT 40
-#define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT)
-#define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT)
-#define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT)
-#define SCTLR_ELx_TCF_ASYMM (UL(0x3) << SCTLR_ELx_TCF_SHIFT)
-#define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT)
-
#define SCTLR_ELx_ENIA_SHIFT 31
-#define SCTLR_ELx_ITFSB (BIT(37))
-#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT))
-#define SCTLR_ELx_ENIB (BIT(30))
-#define SCTLR_ELx_ENDA (BIT(27))
-#define SCTLR_ELx_EE (BIT(25))
-#define SCTLR_ELx_IESB (BIT(21))
-#define SCTLR_ELx_WXN (BIT(19))
-#define SCTLR_ELx_ENDB (BIT(13))
-#define SCTLR_ELx_I (BIT(12))
-#define SCTLR_ELx_SA (BIT(3))
-#define SCTLR_ELx_C (BIT(2))
-#define SCTLR_ELx_A (BIT(1))
-#define SCTLR_ELx_M (BIT(0))
+#define SCTLR_ELx_ITFSB (BIT(37))
+#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT))
+#define SCTLR_ELx_ENIB (BIT(30))
+#define SCTLR_ELx_LSMAOE (BIT(29))
+#define SCTLR_ELx_nTLSMD (BIT(28))
+#define SCTLR_ELx_ENDA (BIT(27))
+#define SCTLR_ELx_EE (BIT(25))
+#define SCTLR_ELx_EIS (BIT(22))
+#define SCTLR_ELx_IESB (BIT(21))
+#define SCTLR_ELx_TSCXT (BIT(20))
+#define SCTLR_ELx_WXN (BIT(19))
+#define SCTLR_ELx_ENDB (BIT(13))
+#define SCTLR_ELx_I (BIT(12))
+#define SCTLR_ELx_EOS (BIT(11))
+#define SCTLR_ELx_SA (BIT(3))
+#define SCTLR_ELx_C (BIT(2))
+#define SCTLR_ELx_A (BIT(1))
+#define SCTLR_ELx_M (BIT(0))
/* SCTLR_EL2 specific flags. */
#define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \
@@ -674,34 +674,6 @@
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
/* SCTLR_EL1 specific flags. */
-#define SCTLR_EL1_EPAN (BIT(57))
-#define SCTLR_EL1_ATA0 (BIT(42))
-
-#define SCTLR_EL1_TCF0_SHIFT 38
-#define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT)
-#define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT)
-#define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT)
-#define SCTLR_EL1_TCF0_ASYMM (UL(0x3) << SCTLR_EL1_TCF0_SHIFT)
-#define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT)
-
-#define SCTLR_EL1_BT1 (BIT(36))
-#define SCTLR_EL1_BT0 (BIT(35))
-#define SCTLR_EL1_UCI (BIT(26))
-#define SCTLR_EL1_E0E (BIT(24))
-#define SCTLR_EL1_SPAN (BIT(23))
-#define SCTLR_EL1_NTWE (BIT(18))
-#define SCTLR_EL1_NTWI (BIT(16))
-#define SCTLR_EL1_UCT (BIT(15))
-#define SCTLR_EL1_DZE (BIT(14))
-#define SCTLR_EL1_UMA (BIT(9))
-#define SCTLR_EL1_SED (BIT(8))
-#define SCTLR_EL1_ITD (BIT(7))
-#define SCTLR_EL1_CP15BEN (BIT(5))
-#define SCTLR_EL1_SA0 (BIT(4))
-
-#define SCTLR_EL1_RES1 ((BIT(11)) | (BIT(20)) | (BIT(22)) | (BIT(28)) | \
- (BIT(29)))
-
#ifdef CONFIG_CPU_BIG_ENDIAN
#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE)
#else
@@ -709,13 +681,17 @@
#endif
#define INIT_SCTLR_EL1_MMU_OFF \
- (ENDIAN_SET_EL1 | SCTLR_EL1_RES1)
+ (ENDIAN_SET_EL1 | SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | \
+ SCTLR_EL1_EIS | SCTLR_EL1_TSCXT | SCTLR_EL1_EOS)
#define INIT_SCTLR_EL1_MMU_ON \
- (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_EL1_SA0 | \
- SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \
- SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
- ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_EPAN | SCTLR_EL1_RES1)
+ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | \
+ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I | \
+ SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_nTWE | \
+ SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
+ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_EPAN | \
+ SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | SCTLR_EL1_EIS | \
+ SCTLR_EL1_TSCXT | SCTLR_EL1_EOS)
/* MAIR_ELx memory attributes (used by Linux) */
#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00)
@@ -728,25 +704,6 @@
/* Position the attr at the correct index */
#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8))
-/* id_aa64isar0 */
-#define ID_AA64ISAR0_RNDR_SHIFT 60
-#define ID_AA64ISAR0_TLB_SHIFT 56
-#define ID_AA64ISAR0_TS_SHIFT 52
-#define ID_AA64ISAR0_FHM_SHIFT 48
-#define ID_AA64ISAR0_DP_SHIFT 44
-#define ID_AA64ISAR0_SM4_SHIFT 40
-#define ID_AA64ISAR0_SM3_SHIFT 36
-#define ID_AA64ISAR0_SHA3_SHIFT 32
-#define ID_AA64ISAR0_RDM_SHIFT 28
-#define ID_AA64ISAR0_ATOMICS_SHIFT 20
-#define ID_AA64ISAR0_CRC32_SHIFT 16
-#define ID_AA64ISAR0_SHA2_SHIFT 12
-#define ID_AA64ISAR0_SHA1_SHIFT 8
-#define ID_AA64ISAR0_AES_SHIFT 4
-
-#define ID_AA64ISAR0_TLB_RANGE_NI 0x0
-#define ID_AA64ISAR0_TLB_RANGE 0x2
-
/* id_aa64isar1 */
#define ID_AA64ISAR1_I8MM_SHIFT 52
#define ID_AA64ISAR1_DGH_SHIFT 48
@@ -836,6 +793,7 @@
#define ID_AA64PFR0_ELx_32BIT_64BIT 0x2
/* id_aa64pfr1 */
+#define ID_AA64PFR1_SME_SHIFT 24
#define ID_AA64PFR1_MPAMFRAC_SHIFT 16
#define ID_AA64PFR1_RASFRAC_SHIFT 12
#define ID_AA64PFR1_MTE_SHIFT 8
@@ -846,6 +804,7 @@
#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1
#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2
#define ID_AA64PFR1_BT_BTI 0x1
+#define ID_AA64PFR1_SME 1
#define ID_AA64PFR1_MTE_NI 0x0
#define ID_AA64PFR1_MTE_EL0 0x1
@@ -874,6 +833,23 @@
#define ID_AA64ZFR0_AES_PMULL 0x2
#define ID_AA64ZFR0_SVEVER_SVE2 0x1
+/* id_aa64smfr0 */
+#define ID_AA64SMFR0_FA64_SHIFT 63
+#define ID_AA64SMFR0_I16I64_SHIFT 52
+#define ID_AA64SMFR0_F64F64_SHIFT 48
+#define ID_AA64SMFR0_I8I32_SHIFT 36
+#define ID_AA64SMFR0_F16F32_SHIFT 35
+#define ID_AA64SMFR0_B16F32_SHIFT 34
+#define ID_AA64SMFR0_F32F32_SHIFT 32
+
+#define ID_AA64SMFR0_FA64 0x1
+#define ID_AA64SMFR0_I16I64 0x4
+#define ID_AA64SMFR0_F64F64 0x1
+#define ID_AA64SMFR0_I8I32 0x4
+#define ID_AA64SMFR0_F16F32 0x1
+#define ID_AA64SMFR0_B16F32 0x1
+#define ID_AA64SMFR0_F32F32 0x1
+
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_ECV_SHIFT 60
#define ID_AA64MMFR0_FGT_SHIFT 56
@@ -926,6 +902,7 @@
/* id_aa64mmfr1 */
#define ID_AA64MMFR1_ECBHB_SHIFT 60
+#define ID_AA64MMFR1_HCX_SHIFT 40
#define ID_AA64MMFR1_AFP_SHIFT 44
#define ID_AA64MMFR1_ETS_SHIFT 36
#define ID_AA64MMFR1_TWED_SHIFT 32
@@ -1110,18 +1087,12 @@
#define DCZID_DZP_SHIFT 4
#define DCZID_BS_SHIFT 0
-/*
- * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which
- * are reserved by the SVE architecture for future expansion of the LEN
- * field, with compatible semantics.
- */
-#define ZCR_ELx_LEN_SHIFT 0
-#define ZCR_ELx_LEN_SIZE 9
-#define ZCR_ELx_LEN_MASK 0x1ff
-
#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */
#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */
+#define CPACR_EL1_SMEN_EL1EN (BIT(24)) /* enable EL1 access */
+#define CPACR_EL1_SMEN_EL0EN (BIT(25)) /* enable EL0 access, if EL1EN set */
+
#define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */
#define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */
@@ -1170,6 +1141,8 @@
#define TRFCR_ELx_ExTRE BIT(1)
#define TRFCR_ELx_E0TRE BIT(0)
+/* HCRX_EL2 definitions */
+#define HCRX_EL2_SMPME_MASK (1 << 5)
/* GIC Hypervisor interface registers */
/* ICH_MISR_EL2 bit definitions */
@@ -1233,6 +1206,12 @@
#define ICH_VTR_TDS_SHIFT 19
#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT)
+/* HFG[WR]TR_EL2 bit definitions */
+#define HFGxTR_EL2_nTPIDR2_EL0_SHIFT 55
+#define HFGxTR_EL2_nTPIDR2_EL0_MASK BIT_MASK(HFGxTR_EL2_nTPIDR2_EL0_SHIFT)
+#define HFGxTR_EL2_nSMPRI_EL1_SHIFT 54
+#define HFGxTR_EL2_nSMPRI_EL1_MASK BIT_MASK(HFGxTR_EL2_nSMPRI_EL1_SHIFT)
+
#define ARM64_FEATURE_FIELD_BITS 4
/* Create a mask for the feature bits of the specified feature. */
@@ -1345,4 +1324,10 @@
#endif
+#define SYS_FIELD_PREP(reg, field, val) \
+ FIELD_PREP(reg##_##field##_MASK, val)
+
+#define SYS_FIELD_PREP_ENUM(reg, field, val) \
+ FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val)
+
#endif /* __ASM_SYSREG_H */
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index 305a7157c6a6..0eb7709422e2 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -23,9 +23,9 @@ void die(const char *msg, struct pt_regs *regs, int err);
struct siginfo;
void arm64_notify_die(const char *str, struct pt_regs *regs,
int signo, int sicode, unsigned long far,
- int err);
+ unsigned long err);
-void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
+void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned long,
struct pt_regs *),
int sig, int code, const char *name);
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e1317b7c4525..848739c15de8 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -82,6 +82,8 @@ int arch_dup_task_struct(struct task_struct *dst,
#define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */
#define TIF_SSBD 25 /* Wants SSB mitigation */
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
+#define TIF_SME 27 /* SME in use */
+#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 54f32a0675df..6e5826470bea 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -24,7 +24,7 @@ struct undef_hook {
void register_undef_hook(struct undef_hook *hook);
void unregister_undef_hook(struct undef_hook *hook);
-void force_signal_inject(int signal, int code, unsigned long address, unsigned int err);
+void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
void arm64_notify_segfault(unsigned long addr);
void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
@@ -57,7 +57,7 @@ static inline int in_entry_text(unsigned long ptr)
* errors share the same encoding as an all-zeros encoding from a CPU that
* doesn't support RAS.
*/
-static inline bool arm64_is_ras_serror(u32 esr)
+static inline bool arm64_is_ras_serror(unsigned long esr)
{
WARN_ON(preemptible());
@@ -77,9 +77,9 @@ static inline bool arm64_is_ras_serror(u32 esr)
* We treat them as Uncontainable.
* Non-RAS SError's are reported as Uncontained/Uncategorized.
*/
-static inline u32 arm64_ras_serror_get_severity(u32 esr)
+static inline unsigned long arm64_ras_serror_get_severity(unsigned long esr)
{
- u32 aet = esr & ESR_ELx_AET;
+ unsigned long aet = esr & ESR_ELx_AET;
if (!arm64_is_ras_serror(esr)) {
/* Not a RAS error, we can't interpret the ESR. */
@@ -98,6 +98,6 @@ static inline u32 arm64_ras_serror_get_severity(u32 esr)
return aet;
}
-bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr);
-void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr);
+bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr);
+void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr);
#endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index e8dce0cc5eaa..63f9c828f1a7 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -460,4 +460,19 @@ static inline int __copy_from_user_flushcache(void *dst, const void __user *src,
}
#endif
+#ifdef CONFIG_ARCH_HAS_SUBPAGE_FAULTS
+
+/*
+ * Return 0 on success, the number of bytes not probed otherwise.
+ */
+static inline size_t probe_subpage_writeable(const char __user *uaddr,
+ size_t size)
+{
+ if (!system_supports_mte())
+ return 0;
+ return mte_probe_user_range(uaddr, size);
+}
+
+#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */
+
#endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 99cb5d383048..b0256cec63b5 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -79,5 +79,13 @@
#define HWCAP2_AFP (1 << 20)
#define HWCAP2_RPRES (1 << 21)
#define HWCAP2_MTE3 (1 << 22)
+#define HWCAP2_SME (1 << 23)
+#define HWCAP2_SME_I16I64 (1 << 24)
+#define HWCAP2_SME_F64F64 (1 << 25)
+#define HWCAP2_SME_I8I32 (1 << 26)
+#define HWCAP2_SME_F16F32 (1 << 27)
+#define HWCAP2_SME_B16F32 (1 << 28)
+#define HWCAP2_SME_F32F32 (1 << 29)
+#define HWCAP2_SME_FA64 (1 << 30)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index c1b6ddc02d2f..ab585359242d 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -139,8 +139,10 @@ struct kvm_guest_debug_arch {
__u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS];
};
+#define KVM_DEBUG_ARCH_HSR_HIGH_VALID (1 << 0)
struct kvm_debug_exit_arch {
__u32 hsr;
+ __u32 hsr_high; /* ESR_EL2[61:32] */
__u64 far; /* used for watchpoints */
};
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 758ae984ff97..7fa2f7036aa7 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -109,7 +109,7 @@ struct user_hwdebug_state {
} dbg_regs[16];
};
-/* SVE/FP/SIMD state (NT_ARM_SVE) */
+/* SVE/FP/SIMD state (NT_ARM_SVE & NT_ARM_SSVE) */
struct user_sve_header {
__u32 size; /* total meaningful regset content in bytes */
@@ -220,6 +220,7 @@ struct user_sve_header {
(SVE_PT_SVE_PREG_OFFSET(vq, __SVE_NUM_PREGS) - \
SVE_PT_SVE_PREGS_OFFSET(vq))
+/* For streaming mode SVE (SSVE) FFR must be read and written as zero */
#define SVE_PT_SVE_FFR_OFFSET(vq) \
(SVE_PT_REGS_OFFSET + __SVE_FFR_OFFSET(vq))
@@ -240,10 +241,12 @@ struct user_sve_header {
- SVE_PT_SVE_OFFSET + (__SVE_VQ_BYTES - 1)) \
/ __SVE_VQ_BYTES * __SVE_VQ_BYTES)
-#define SVE_PT_SIZE(vq, flags) \
- (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \
- SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \
- : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags))
+#define SVE_PT_SIZE(vq, flags) \
+ (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \
+ SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \
+ : ((((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD ? \
+ SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags) \
+ : SVE_PT_REGS_OFFSET)))
/* pointer authentication masks (NT_ARM_PAC_MASK) */
@@ -265,6 +268,62 @@ struct user_pac_generic_keys {
__uint128_t apgakey;
};
+/* ZA state (NT_ARM_ZA) */
+
+struct user_za_header {
+ __u32 size; /* total meaningful regset content in bytes */
+ __u32 max_size; /* maxmium possible size for this thread */
+ __u16 vl; /* current vector length */
+ __u16 max_vl; /* maximum possible vector length */
+ __u16 flags;
+ __u16 __reserved;
+};
+
+/*
+ * Common ZA_PT_* flags:
+ * These must be kept in sync with prctl interface in <linux/prctl.h>
+ */
+#define ZA_PT_VL_INHERIT ((1 << 17) /* PR_SME_VL_INHERIT */ >> 16)
+#define ZA_PT_VL_ONEXEC ((1 << 18) /* PR_SME_SET_VL_ONEXEC */ >> 16)
+
+
+/*
+ * The remainder of the ZA state follows struct user_za_header. The
+ * total size of the ZA state (including header) depends on the
+ * metadata in the header: ZA_PT_SIZE(vq, flags) gives the total size
+ * of the state in bytes, including the header.
+ *
+ * Refer to <asm/sigcontext.h> for details of how to pass the correct
+ * "vq" argument to these macros.
+ */
+
+/* Offset from the start of struct user_za_header to the register data */
+#define ZA_PT_ZA_OFFSET \
+ ((sizeof(struct user_za_header) + (__SVE_VQ_BYTES - 1)) \
+ / __SVE_VQ_BYTES * __SVE_VQ_BYTES)
+
+/*
+ * The payload starts at offset ZA_PT_ZA_OFFSET, and is of size
+ * ZA_PT_ZA_SIZE(vq, flags).
+ *
+ * The ZA array is stored as a sequence of horizontal vectors ZAV of SVL/8
+ * bytes each, starting from vector 0.
+ *
+ * Additional data might be appended in the future.
+ *
+ * The ZA matrix is represented in memory in an endianness-invariant layout
+ * which differs from the layout used for the FPSIMD V-registers on big-endian
+ * systems: see sigcontext.h for more explanation.
+ */
+
+#define ZA_PT_ZAV_OFFSET(vq, n) \
+ (ZA_PT_ZA_OFFSET + ((vq * __SVE_VQ_BYTES) * n))
+
+#define ZA_PT_ZA_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
+
+#define ZA_PT_SIZE(vq) \
+ (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq))
+
#endif /* __ASSEMBLY__ */
#endif /* _UAPI__ASM_PTRACE_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 0c796c795dbe..4aaf31e3bf16 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -134,6 +134,17 @@ struct extra_context {
struct sve_context {
struct _aarch64_ctx head;
__u16 vl;
+ __u16 flags;
+ __u16 __reserved[2];
+};
+
+#define SVE_SIG_FLAG_SM 0x1 /* Context describes streaming mode */
+
+#define ZA_MAGIC 0x54366345
+
+struct za_context {
+ struct _aarch64_ctx head;
+ __u16 vl;
__u16 __reserved[3];
};
@@ -186,9 +197,16 @@ struct sve_context {
* sve_context.vl must equal the thread's current vector length when
* doing a sigreturn.
*
+ * On systems with support for SME the SVE register state may reflect either
+ * streaming or non-streaming mode. In streaming mode the streaming mode
+ * vector length will be used and the flag SVE_SIG_FLAG_SM will be set in
+ * the flags field. It is permitted to enter or leave streaming mode in
+ * a signal return, applications should take care to ensure that any difference
+ * in vector length between the two modes is handled, including any resizing
+ * and movement of context blocks.
*
- * Note: for all these macros, the "vq" argument denotes the SVE
- * vector length in quadwords (i.e., units of 128 bits).
+ * Note: for all these macros, the "vq" argument denotes the vector length
+ * in quadwords (i.e., units of 128 bits).
*
* The correct way to obtain vq is to use sve_vq_from_vl(vl). The
* result is valid if and only if sve_vl_valid(vl) is true. This is
@@ -249,4 +267,37 @@ struct sve_context {
#define SVE_SIG_CONTEXT_SIZE(vq) \
(SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq))
+/*
+ * If the ZA register is enabled for the thread at signal delivery then,
+ * za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl))
+ * and the register data may be accessed using the ZA_SIG_*() macros.
+ *
+ * If za_context.head.size < ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl))
+ * then ZA was not enabled and no register data was included in which case
+ * ZA register was not enabled for the thread and no register data
+ * the ZA_SIG_*() macros should not be used except for this check.
+ *
+ * The same convention applies when returning from a signal: a caller
+ * will need to remove or resize the za_context block if it wants to
+ * enable the ZA register when it was previously non-live or vice-versa.
+ * This may require the caller to allocate fresh memory and/or move other
+ * context blocks in the signal frame.
+ *
+ * Changing the vector length during signal return is not permitted:
+ * za_context.vl must equal the thread's current SME vector length when
+ * doing a sigreturn.
+ */
+
+#define ZA_SIG_REGS_OFFSET \
+ ((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1)) \
+ / __SVE_VQ_BYTES * __SVE_VQ_BYTES)
+
+#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
+
+#define ZA_SIG_ZAV_OFFSET(vq, n) (ZA_SIG_REGS_OFFSET + \
+ (SVE_SIG_ZREG_SIZE(vq) * n))
+
+#define ZA_SIG_CONTEXT_SIZE(vq) \
+ (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq))
+
#endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a0f3d0aaa3c5..c05cc3b6162e 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -217,7 +217,7 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_23154
-const struct midr_range cavium_erratum_23154_cpus[] = {
+static const struct midr_range cavium_erratum_23154_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_THUNDERX),
MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX),
MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX),
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 2cb9cc9e0eff..4ccddf382e5b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -191,20 +191,20 @@ static bool __system_matches_cap(unsigned int n);
* sync with the documentation of the CPU feature register ABI.
*/
static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RNDR_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TLB_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_TLB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_TS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_FHM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_DP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SM4_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SM3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_RDM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_ATOMIC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_CRC32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_AES_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -261,6 +261,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
@@ -293,6 +295,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
@@ -557,7 +577,13 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = {
static const struct arm64_ftr_bits ftr_zcr[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
- ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0), /* LEN */
+ ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_WIDTH, 0), /* LEN */
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_smcr[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
+ SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_WIDTH, 0), /* LEN */
ARM64_FTR_END,
};
@@ -645,6 +671,7 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
&id_aa64pfr1_override),
ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
+ ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -665,6 +692,7 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 1, CRm = 2 */
ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
+ ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr),
/* Op1 = 1, CRn = 0, CRm = 0 */
ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
@@ -959,6 +987,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
+ init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0);
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
init_32bit_cpu_features(&info->aarch32);
@@ -968,6 +997,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
vec_init_vq_map(ARM64_VEC_SVE);
}
+ if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+ init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr);
+ if (IS_ENABLED(CONFIG_ARM64_SME))
+ vec_init_vq_map(ARM64_VEC_SME);
+ }
+
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
@@ -1194,6 +1229,9 @@ void update_cpu_features(int cpu,
taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
+ taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
+ info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
+
if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
info->reg_zcr, boot->reg_zcr);
@@ -1204,6 +1242,16 @@ void update_cpu_features(int cpu,
vec_update_vq_map(ARM64_VEC_SVE);
}
+ if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+ taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu,
+ info->reg_smcr, boot->reg_smcr);
+
+ /* Probe vector lengths, unless we already gave up on SME */
+ if (id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)) &&
+ !system_capabilities_finalized())
+ vec_update_vq_map(ARM64_VEC_SME);
+ }
+
/*
* The kernel uses the LDGM/STGM instructions and the number of tags
* they read/write depends on the GMID_EL1.BS field. Check that the
@@ -1287,6 +1335,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
read_sysreg_case(SYS_ID_AA64PFR0_EL1);
read_sysreg_case(SYS_ID_AA64PFR1_EL1);
read_sysreg_case(SYS_ID_AA64ZFR0_EL1);
+ read_sysreg_case(SYS_ID_AA64SMFR0_EL1);
read_sysreg_case(SYS_ID_AA64DFR0_EL1);
read_sysreg_case(SYS_ID_AA64DFR1_EL1);
read_sysreg_case(SYS_ID_AA64MMFR0_EL1);
@@ -2012,7 +2061,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
- .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
+ .field_pos = ID_AA64ISAR0_EL1_ATOMIC_SHIFT,
.field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 2,
@@ -2194,10 +2243,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
- .field_pos = ID_AA64ISAR0_TLB_SHIFT,
+ .field_pos = ID_AA64ISAR0_EL1_TLB_SHIFT,
.field_width = 4,
.sign = FTR_UNSIGNED,
- .min_field_value = ID_AA64ISAR0_TLB_RANGE,
+ .min_field_value = ID_AA64ISAR0_EL1_TLB_RANGE,
},
#ifdef CONFIG_ARM64_HW_AFDBM
{
@@ -2226,7 +2275,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
- .field_pos = ID_AA64ISAR0_CRC32_SHIFT,
+ .field_pos = ID_AA64ISAR0_EL1_CRC32_SHIFT,
.field_width = 4,
.min_field_value = 1,
},
@@ -2381,7 +2430,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
- .field_pos = ID_AA64ISAR0_RNDR_SHIFT,
+ .field_pos = ID_AA64ISAR0_EL1_RNDR_SHIFT,
.field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
@@ -2441,6 +2490,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.min_field_value = 1,
},
+#ifdef CONFIG_ARM64_SME
+ {
+ .desc = "Scalable Matrix Extension",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_SME,
+ .sys_reg = SYS_ID_AA64PFR1_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64PFR1_SME_SHIFT,
+ .field_width = 4,
+ .min_field_value = ID_AA64PFR1_SME,
+ .matches = has_cpuid_feature,
+ .cpu_enable = sme_kernel_enable,
+ },
+ /* FA64 should be sorted after the base SME capability */
+ {
+ .desc = "FA64",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_SME_FA64,
+ .sys_reg = SYS_ID_AA64SMFR0_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64SMFR0_FA64_SHIFT,
+ .field_width = 1,
+ .min_field_value = ID_AA64SMFR0_FA64,
+ .matches = has_cpuid_feature,
+ .cpu_enable = fa64_kernel_enable,
+ },
+#endif /* CONFIG_ARM64_SME */
{},
};
@@ -2513,22 +2589,22 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
#endif
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_ATOMIC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
@@ -2574,6 +2650,16 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
+#ifdef CONFIG_ARM64_SME
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+#endif /* CONFIG_ARM64_SME */
{},
};
@@ -2871,6 +2957,23 @@ static void verify_sve_features(void)
/* Add checks on other ZCR bits here if necessary */
}
+static void verify_sme_features(void)
+{
+ u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
+ u64 smcr = read_smcr_features();
+
+ unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK;
+ unsigned int len = smcr & SMCR_ELx_LEN_MASK;
+
+ if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) {
+ pr_crit("CPU%d: SME: vector length support mismatch\n",
+ smp_processor_id());
+ cpu_die_early();
+ }
+
+ /* Add checks on other SMCR bits here if necessary */
+}
+
static void verify_hyp_capabilities(void)
{
u64 safe_mmfr1, mmfr0, mmfr1;
@@ -2923,6 +3026,9 @@ static void verify_local_cpu_capabilities(void)
if (system_supports_sve())
verify_sve_features();
+ if (system_supports_sme())
+ verify_sme_features();
+
if (is_hyp_mode_available())
verify_hyp_capabilities();
}
@@ -3040,6 +3146,7 @@ void __init setup_cpu_features(void)
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
sve_setup();
+ sme_setup();
minsigstksz_setup();
/* Advertise that we have computed the system capabilities */
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 330b92ea863a..8a8136a096ac 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -98,6 +98,14 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_AFP] = "afp",
[KERNEL_HWCAP_RPRES] = "rpres",
[KERNEL_HWCAP_MTE3] = "mte3",
+ [KERNEL_HWCAP_SME] = "sme",
+ [KERNEL_HWCAP_SME_I16I64] = "smei16i64",
+ [KERNEL_HWCAP_SME_F64F64] = "smef64f64",
+ [KERNEL_HWCAP_SME_I8I32] = "smei8i32",
+ [KERNEL_HWCAP_SME_F16F32] = "smef16f32",
+ [KERNEL_HWCAP_SME_B16F32] = "smeb16f32",
+ [KERNEL_HWCAP_SME_F32F32] = "smef32f32",
+ [KERNEL_HWCAP_SME_FA64] = "smefa64",
};
#ifdef CONFIG_COMPAT
@@ -401,6 +409,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
+ info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1);
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
info->reg_gmid = read_cpuid(GMID_EL1);
@@ -412,6 +421,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
id_aa64pfr0_sve(info->reg_id_aa64pfr0))
info->reg_zcr = read_zcr_features();
+ if (IS_ENABLED(CONFIG_ARM64_SME) &&
+ id_aa64pfr1_sme(info->reg_id_aa64pfr1))
+ info->reg_smcr = read_smcr_features();
+
cpuinfo_detect_icache_policy(info);
}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 4f3661eeb7ec..bf9fe71589bc 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -202,7 +202,7 @@ void unregister_kernel_step_hook(struct step_hook *hook)
* So we call all the registered handlers, until the right handler is
* found which returns zero.
*/
-static int call_step_hook(struct pt_regs *regs, unsigned int esr)
+static int call_step_hook(struct pt_regs *regs, unsigned long esr)
{
struct step_hook *hook;
struct list_head *list;
@@ -238,7 +238,7 @@ static void send_user_sigtrap(int si_code)
"User debug trap");
}
-static int single_step_handler(unsigned long unused, unsigned int esr,
+static int single_step_handler(unsigned long unused, unsigned long esr,
struct pt_regs *regs)
{
bool handler_found = false;
@@ -299,11 +299,11 @@ void unregister_kernel_break_hook(struct break_hook *hook)
unregister_debug_hook(&hook->node);
}
-static int call_break_hook(struct pt_regs *regs, unsigned int esr)
+static int call_break_hook(struct pt_regs *regs, unsigned long esr)
{
struct break_hook *hook;
struct list_head *list;
- int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL;
+ int (*fn)(struct pt_regs *regs, unsigned long esr) = NULL;
list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;
@@ -312,7 +312,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
* entirely not preemptible, and we can use rcu list safely here.
*/
list_for_each_entry_rcu(hook, list, node) {
- unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
+ unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
if ((comment & ~hook->mask) == hook->imm)
fn = hook->fn;
@@ -322,7 +322,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
}
NOKPROBE_SYMBOL(call_break_hook);
-static int brk_handler(unsigned long unused, unsigned int esr,
+static int brk_handler(unsigned long unused, unsigned long esr,
struct pt_regs *regs)
{
if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 878c65aa7206..56cefd33eb8e 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -75,7 +75,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
if (interrupts_enabled(regs)) {
if (regs->exit_rcu) {
trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ lockdep_hardirqs_on_prepare();
rcu_irq_exit();
lockdep_hardirqs_on(CALLER_ADDR0);
return;
@@ -121,7 +121,7 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs)
static __always_inline void __exit_to_user_mode(void)
{
trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ lockdep_hardirqs_on_prepare();
user_enter_irqoff();
lockdep_hardirqs_on(CALLER_ADDR0);
}
@@ -179,7 +179,7 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs)
ftrace_nmi_exit();
if (restore) {
trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ lockdep_hardirqs_on_prepare();
}
rcu_nmi_exit();
@@ -215,7 +215,7 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
if (restore) {
trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ lockdep_hardirqs_on_prepare();
}
rcu_nmi_exit();
@@ -282,13 +282,13 @@ extern void (*handle_arch_irq)(struct pt_regs *);
extern void (*handle_arch_fiq)(struct pt_regs *);
static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
- unsigned int esr)
+ unsigned long esr)
{
arm64_enter_nmi(regs);
console_verbose();
- pr_crit("Unhandled %s exception on CPU%d, ESR 0x%08x -- %s\n",
+ pr_crit("Unhandled %s exception on CPU%d, ESR 0x%016lx -- %s\n",
vector, smp_processor_id(), esr,
esr_get_class_string(esr));
@@ -537,6 +537,14 @@ static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
exit_to_user_mode(regs);
}
+static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_sme_acc(esr, regs);
+ exit_to_user_mode(regs);
+}
+
static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
{
enter_from_user_mode(regs);
@@ -645,6 +653,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_SVE:
el0_sve_acc(regs, esr);
break;
+ case ESR_ELx_EC_SME:
+ el0_sme_acc(regs, esr);
+ break;
case ESR_ELx_EC_FP_EXC64:
el0_fpsimd_exc(regs, esr);
break;
@@ -818,7 +829,7 @@ UNHANDLED(el0t, 32, error)
#ifdef CONFIG_VMAP_STACK
asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
{
- unsigned int esr = read_sysreg(esr_el1);
+ unsigned long esr = read_sysreg(esr_el1);
unsigned long far = read_sysreg(far_el1);
arm64_enter_nmi(regs);
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index dc242e269f9a..229436f33df5 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -86,3 +86,39 @@ SYM_FUNC_START(sve_flush_live)
SYM_FUNC_END(sve_flush_live)
#endif /* CONFIG_ARM64_SVE */
+
+#ifdef CONFIG_ARM64_SME
+
+SYM_FUNC_START(sme_get_vl)
+ _sme_rdsvl 0, 1
+ ret
+SYM_FUNC_END(sme_get_vl)
+
+SYM_FUNC_START(sme_set_vq)
+ sme_load_vq x0, x1, x2
+ ret
+SYM_FUNC_END(sme_set_vq)
+
+/*
+ * Save the SME state
+ *
+ * x0 - pointer to buffer for state
+ */
+SYM_FUNC_START(za_save_state)
+ _sme_rdsvl 1, 1 // x1 = VL/8
+ sme_save_za 0, x1, 12
+ ret
+SYM_FUNC_END(za_save_state)
+
+/*
+ * Load the SME state
+ *
+ * x0 - pointer to buffer for state
+ */
+SYM_FUNC_START(za_load_state)
+ _sme_rdsvl 1, 1 // x1 = VL/8
+ sme_load_za 0, x1, 12
+ ret
+SYM_FUNC_END(za_load_state)
+
+#endif /* CONFIG_ARM64_SME */
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index e535480a4069..d42a205ef625 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -97,12 +97,6 @@ SYM_CODE_START(ftrace_common)
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
bl ftrace_stub
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) // ftrace_graph_caller();
- nop // If enabled, this will be replaced
- // "b ftrace_graph_caller"
-#endif
-
/*
* At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
* x19-x29 per the AAPCS, and we created frame records upon entry, so we need
@@ -127,17 +121,6 @@ ftrace_common_return:
ret x9
SYM_CODE_END(ftrace_common)
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-SYM_CODE_START(ftrace_graph_caller)
- ldr x0, [sp, #S_PC]
- sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
- add x1, sp, #S_LR // parent_ip (callsite's LR)
- ldr x2, [sp, #PT_REGS_SIZE] // parent fp (callsite's FP)
- bl prepare_ftrace_return
- b ftrace_common_return
-SYM_CODE_END(ftrace_graph_caller)
-#endif
-
#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
/*
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ede028dee81b..5b82b9292400 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -596,7 +596,7 @@ SYM_CODE_START_LOCAL(ret_to_user)
ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step
enable_step_tsk x19, x2
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
- bl stackleak_erase
+ bl stackleak_erase_on_task_stack
#endif
kernel_exit 0
SYM_CODE_END(ret_to_user)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 47af76e53221..819979398127 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -121,7 +121,10 @@
struct fpsimd_last_state_struct {
struct user_fpsimd_state *st;
void *sve_state;
+ void *za_state;
+ u64 *svcr;
unsigned int sve_vl;
+ unsigned int sme_vl;
};
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
@@ -136,6 +139,12 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
.max_virtualisable_vl = SVE_VL_MIN,
},
#endif
+#ifdef CONFIG_ARM64_SME
+ [ARM64_VEC_SME] = {
+ .type = ARM64_VEC_SME,
+ .name = "SME",
+ },
+#endif
};
static unsigned int vec_vl_inherit_flag(enum vec_type type)
@@ -143,6 +152,8 @@ static unsigned int vec_vl_inherit_flag(enum vec_type type)
switch (type) {
case ARM64_VEC_SVE:
return TIF_SVE_VL_INHERIT;
+ case ARM64_VEC_SME:
+ return TIF_SME_VL_INHERIT;
default:
WARN_ON_ONCE(1);
return 0;
@@ -186,6 +197,26 @@ extern void __percpu *efi_sve_state;
#endif /* ! CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_SME
+
+static int get_sme_default_vl(void)
+{
+ return get_default_vl(ARM64_VEC_SME);
+}
+
+static void set_sme_default_vl(int val)
+{
+ set_default_vl(ARM64_VEC_SME, val);
+}
+
+static void sme_free(struct task_struct *);
+
+#else
+
+static inline void sme_free(struct task_struct *t) { }
+
+#endif
+
DEFINE_PER_CPU(bool, fpsimd_context_busy);
EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
@@ -206,10 +237,19 @@ static void __get_cpu_fpsimd_context(void)
*
* The double-underscore version must only be called if you know the task
* can't be preempted.
+ *
+ * On RT kernels local_bh_disable() is not sufficient because it only
+ * serializes soft interrupt related sections via a local lock, but stays
+ * preemptible. Disabling preemption is the right choice here as bottom
+ * half processing is always in thread context on RT kernels so it
+ * implicitly prevents bottom half processing as well.
*/
static void get_cpu_fpsimd_context(void)
{
- local_bh_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_disable();
+ else
+ preempt_disable();
__get_cpu_fpsimd_context();
}
@@ -230,7 +270,10 @@ static void __put_cpu_fpsimd_context(void)
static void put_cpu_fpsimd_context(void)
{
__put_cpu_fpsimd_context();
- local_bh_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_enable();
+ else
+ preempt_enable();
}
static bool have_cpu_fpsimd_context(void)
@@ -238,23 +281,6 @@ static bool have_cpu_fpsimd_context(void)
return !preemptible() && __this_cpu_read(fpsimd_context_busy);
}
-/*
- * Call __sve_free() directly only if you know task can't be scheduled
- * or preempted.
- */
-static void __sve_free(struct task_struct *task)
-{
- kfree(task->thread.sve_state);
- task->thread.sve_state = NULL;
-}
-
-static void sve_free(struct task_struct *task)
-{
- WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
-
- __sve_free(task);
-}
-
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
{
return task->thread.vl[type];
@@ -279,16 +305,27 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
}
/*
+ * TIF_SME controls whether a task can use SME without trapping while
+ * in userspace, when TIF_SME is set then we must have storage
+ * alocated in sve_state and za_state to store the contents of both ZA
+ * and the SVE registers for both streaming and non-streaming modes.
+ *
+ * If both SVCR.ZA and SVCR.SM are disabled then at any point we
+ * may disable TIF_SME and reenable traps.
+ */
+
+
+/*
* TIF_SVE controls whether a task can use SVE without trapping while
- * in userspace, and also the way a task's FPSIMD/SVE state is stored
- * in thread_struct.
+ * in userspace, and also (together with TIF_SME) the way a task's
+ * FPSIMD/SVE state is stored in thread_struct.
*
* The kernel uses this flag to track whether a user task is actively
* using SVE, and therefore whether full SVE register state needs to
* be tracked. If not, the cheaper FPSIMD context handling code can
* be used instead of the more costly SVE equivalents.
*
- * * TIF_SVE set:
+ * * TIF_SVE or SVCR.SM set:
*
* The task can execute SVE instructions while in userspace without
* trapping to the kernel.
@@ -296,7 +333,8 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
* When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
* corresponding Zn), P0-P15 and FFR are encoded in in
* task->thread.sve_state, formatted appropriately for vector
- * length task->thread.sve_vl.
+ * length task->thread.sve_vl or, if SVCR.SM is set,
+ * task->thread.sme_vl.
*
* task->thread.sve_state must point to a valid buffer at least
* sve_state_size(task) bytes in size.
@@ -334,16 +372,44 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
*/
static void task_fpsimd_load(void)
{
+ bool restore_sve_regs = false;
+ bool restore_ffr;
+
WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());
+ /* Check if we should restore SVE first */
if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
+ restore_sve_regs = true;
+ restore_ffr = true;
+ }
+
+ /* Restore SME, override SVE register configuration if needed */
+ if (system_supports_sme()) {
+ unsigned long sme_vl = task_get_sme_vl(current);
+
+ /* Ensure VL is set up for restoring data */
+ if (test_thread_flag(TIF_SME))
+ sme_set_vq(sve_vq_from_vl(sme_vl) - 1);
+
+ write_sysreg_s(current->thread.svcr, SYS_SVCR);
+
+ if (thread_za_enabled(&current->thread))
+ za_load_state(current->thread.za_state);
+
+ if (thread_sm_enabled(&current->thread)) {
+ restore_sve_regs = true;
+ restore_ffr = system_supports_fa64();
+ }
+ }
+
+ if (restore_sve_regs)
sve_load_state(sve_pffr(&current->thread),
- &current->thread.uw.fpsimd_state.fpsr, true);
- } else {
+ &current->thread.uw.fpsimd_state.fpsr,
+ restore_ffr);
+ else
fpsimd_load_state(&current->thread.uw.fpsimd_state);
- }
}
/*
@@ -361,6 +427,9 @@ static void fpsimd_save(void)
struct fpsimd_last_state_struct const *last =
this_cpu_ptr(&fpsimd_last_state);
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
+ bool save_sve_regs = false;
+ bool save_ffr;
+ unsigned int vl;
WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());
@@ -368,9 +437,32 @@ static void fpsimd_save(void)
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
- if (IS_ENABLED(CONFIG_ARM64_SVE) &&
- test_thread_flag(TIF_SVE)) {
- if (WARN_ON(sve_get_vl() != last->sve_vl)) {
+ if (test_thread_flag(TIF_SVE)) {
+ save_sve_regs = true;
+ save_ffr = true;
+ vl = last->sve_vl;
+ }
+
+ if (system_supports_sme()) {
+ u64 *svcr = last->svcr;
+ *svcr = read_sysreg_s(SYS_SVCR);
+
+ *svcr = read_sysreg_s(SYS_SVCR);
+
+ if (*svcr & SVCR_ZA_MASK)
+ za_save_state(last->za_state);
+
+ /* If we are in streaming mode override regular SVE. */
+ if (*svcr & SVCR_SM_MASK) {
+ save_sve_regs = true;
+ save_ffr = system_supports_fa64();
+ vl = last->sme_vl;
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {
+ /* Get the configured VL from RDVL, will account for SM */
+ if (WARN_ON(sve_get_vl() != vl)) {
/*
* Can't save the user regs, so current would
* re-enter user with corrupt state.
@@ -381,8 +473,8 @@ static void fpsimd_save(void)
}
sve_save_state((char *)last->sve_state +
- sve_ffr_offset(last->sve_vl),
- &last->st->fpsr, true);
+ sve_ffr_offset(vl),
+ &last->st->fpsr, save_ffr);
} else {
fpsimd_save_state(last->st);
}
@@ -409,6 +501,8 @@ static unsigned int find_supported_vector_length(enum vec_type type,
if (vl > max_vl)
vl = max_vl;
+ if (vl < info->min_vl)
+ vl = info->min_vl;
bit = find_next_bit(info->vq_map, SVE_VQ_MAX,
__vq_to_bit(sve_vq_from_vl(vl)));
@@ -467,6 +561,30 @@ static int __init sve_sysctl_init(void)
static int __init sve_sysctl_init(void) { return 0; }
#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
+#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)
+static struct ctl_table sme_default_vl_table[] = {
+ {
+ .procname = "sme_default_vector_length",
+ .mode = 0644,
+ .proc_handler = vec_proc_do_default_vl,
+ .extra1 = &vl_info[ARM64_VEC_SME],
+ },
+ { }
+};
+
+static int __init sme_sysctl_init(void)
+{
+ if (system_supports_sme())
+ if (!register_sysctl("abi", sme_default_vl_table))
+ return -EINVAL;
+
+ return 0;
+}
+
+#else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
+static int __init sme_sysctl_init(void) { return 0; }
+#endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
+
#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
@@ -520,7 +638,7 @@ static void fpsimd_to_sve(struct task_struct *task)
if (!system_supports_sve())
return;
- vq = sve_vq_from_vl(task_get_sve_vl(task));
+ vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
__fpsimd_to_sve(sst, fst, vq);
}
@@ -537,7 +655,7 @@ static void fpsimd_to_sve(struct task_struct *task)
*/
static void sve_to_fpsimd(struct task_struct *task)
{
- unsigned int vq;
+ unsigned int vq, vl;
void const *sst = task->thread.sve_state;
struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
unsigned int i;
@@ -546,7 +664,8 @@ static void sve_to_fpsimd(struct task_struct *task)
if (!system_supports_sve())
return;
- vq = sve_vq_from_vl(task_get_sve_vl(task));
+ vl = thread_get_cur_vl(&task->thread);
+ vq = sve_vq_from_vl(vl);
for (i = 0; i < SVE_NUM_ZREGS; ++i) {
p = (__uint128_t const *)ZREG(sst, vq, i);
fst->vregs[i] = arm64_le128_to_cpu(*p);
@@ -554,14 +673,37 @@ static void sve_to_fpsimd(struct task_struct *task)
}
#ifdef CONFIG_ARM64_SVE
+/*
+ * Call __sve_free() directly only if you know task can't be scheduled
+ * or preempted.
+ */
+static void __sve_free(struct task_struct *task)
+{
+ kfree(task->thread.sve_state);
+ task->thread.sve_state = NULL;
+}
+
+static void sve_free(struct task_struct *task)
+{
+ WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+
+ __sve_free(task);
+}
/*
* Return how many bytes of memory are required to store the full SVE
* state for task, given task's currently configured vector length.
*/
-static size_t sve_state_size(struct task_struct const *task)
+size_t sve_state_size(struct task_struct const *task)
{
- return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task)));
+ unsigned int vl = 0;
+
+ if (system_supports_sve())
+ vl = task_get_sve_vl(task);
+ if (system_supports_sme())
+ vl = max(vl, task_get_sme_vl(task));
+
+ return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
}
/*
@@ -588,6 +730,19 @@ void sve_alloc(struct task_struct *task)
/*
+ * Force the FPSIMD state shared with SVE to be updated in the SVE state
+ * even if the SVE state is the current active state.
+ *
+ * This should only be called by ptrace. task must be non-runnable.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ */
+void fpsimd_force_sync_to_sve(struct task_struct *task)
+{
+ fpsimd_to_sve(task);
+}
+
+/*
* Ensure that task->thread.sve_state is up to date with respect to
* the user task, irrespective of when SVE is in use or not.
*
@@ -597,7 +752,8 @@ void sve_alloc(struct task_struct *task)
*/
void fpsimd_sync_to_sve(struct task_struct *task)
{
- if (!test_tsk_thread_flag(task, TIF_SVE))
+ if (!test_tsk_thread_flag(task, TIF_SVE) &&
+ !thread_sm_enabled(&task->thread))
fpsimd_to_sve(task);
}
@@ -611,7 +767,8 @@ void fpsimd_sync_to_sve(struct task_struct *task)
*/
void sve_sync_to_fpsimd(struct task_struct *task)
{
- if (test_tsk_thread_flag(task, TIF_SVE))
+ if (test_tsk_thread_flag(task, TIF_SVE) ||
+ thread_sm_enabled(&task->thread))
sve_to_fpsimd(task);
}
@@ -636,7 +793,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
if (!test_tsk_thread_flag(task, TIF_SVE))
return;
- vq = sve_vq_from_vl(task_get_sve_vl(task));
+ vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
__fpsimd_to_sve(sst, fst, vq);
@@ -680,8 +837,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
/*
* To ensure the FPSIMD bits of the SVE vector registers are preserved,
* write any live register state back to task_struct, and convert to a
- * regular FPSIMD thread. Since the vector length can only be changed
- * with a syscall we can't be in streaming mode while reconfiguring.
+ * regular FPSIMD thread.
*/
if (task == current) {
get_cpu_fpsimd_context();
@@ -690,17 +846,26 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
}
fpsimd_flush_task_state(task);
- if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
+ if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
+ thread_sm_enabled(&task->thread))
sve_to_fpsimd(task);
+ if (system_supports_sme() && type == ARM64_VEC_SME) {
+ task->thread.svcr &= ~(SVCR_SM_MASK |
+ SVCR_ZA_MASK);
+ clear_thread_flag(TIF_SME);
+ }
+
if (task == current)
put_cpu_fpsimd_context();
/*
- * Force reallocation of task SVE state to the correct size
- * on next use:
+ * Force reallocation of task SVE and SME state to the correct
+ * size on next use:
*/
sve_free(task);
+ if (system_supports_sme() && type == ARM64_VEC_SME)
+ sme_free(task);
task_set_vl(task, type, vl);
@@ -761,6 +926,36 @@ int sve_get_current_vl(void)
return vec_prctl_status(ARM64_VEC_SVE, 0);
}
+#ifdef CONFIG_ARM64_SME
+/* PR_SME_SET_VL */
+int sme_set_current_vl(unsigned long arg)
+{
+ unsigned long vl, flags;
+ int ret;
+
+ vl = arg & PR_SME_VL_LEN_MASK;
+ flags = arg & ~vl;
+
+ if (!system_supports_sme() || is_compat_task())
+ return -EINVAL;
+
+ ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags);
+ if (ret)
+ return ret;
+
+ return vec_prctl_status(ARM64_VEC_SME, flags);
+}
+
+/* PR_SME_GET_VL */
+int sme_get_current_vl(void)
+{
+ if (!system_supports_sme() || is_compat_task())
+ return -EINVAL;
+
+ return vec_prctl_status(ARM64_VEC_SME, 0);
+}
+#endif /* CONFIG_ARM64_SME */
+
static void vec_probe_vqs(struct vl_info *info,
DECLARE_BITMAP(map, SVE_VQ_MAX))
{
@@ -770,7 +965,23 @@ static void vec_probe_vqs(struct vl_info *info,
for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
write_vl(info->type, vq - 1); /* self-syncing */
- vl = sve_get_vl();
+
+ switch (info->type) {
+ case ARM64_VEC_SVE:
+ vl = sve_get_vl();
+ break;
+ case ARM64_VEC_SME:
+ vl = sme_get_vl();
+ break;
+ default:
+ vl = 0;
+ break;
+ }
+
+ /* Minimum VL identified? */
+ if (sve_vq_from_vl(vl) > vq)
+ break;
+
vq = sve_vq_from_vl(vl); /* skip intervening lengths */
set_bit(__vq_to_bit(vq), map);
}
@@ -856,21 +1067,25 @@ int vec_verify_vq_map(enum vec_type type)
static void __init sve_efi_setup(void)
{
- struct vl_info *info = &vl_info[ARM64_VEC_SVE];
+ int max_vl = 0;
+ int i;
if (!IS_ENABLED(CONFIG_EFI))
return;
+ for (i = 0; i < ARRAY_SIZE(vl_info); i++)
+ max_vl = max(vl_info[i].max_vl, max_vl);
+
/*
* alloc_percpu() warns and prints a backtrace if this goes wrong.
* This is evidence of a crippled system and we are returning void,
* so no attempt is made to handle this situation here.
*/
- if (!sve_vl_valid(info->max_vl))
+ if (!sve_vl_valid(max_vl))
goto fail;
efi_sve_state = __alloc_percpu(
- SVE_SIG_REGS_SIZE(sve_vq_from_vl(info->max_vl)), SVE_VQ_BYTES);
+ SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES);
if (!efi_sve_state)
goto fail;
@@ -989,10 +1204,172 @@ void __init sve_setup(void)
void fpsimd_release_task(struct task_struct *dead_task)
{
__sve_free(dead_task);
+ sme_free(dead_task);
}
#endif /* CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_SME
+
+/*
+ * Ensure that task->thread.za_state is allocated and sufficiently large.
+ *
+ * This function should be used only in preparation for replacing
+ * task->thread.za_state with new data. The memory is always zeroed
+ * here to prevent stale data from showing through: this is done in
+ * the interest of testability and predictability, the architecture
+ * guarantees that when ZA is enabled it will be zeroed.
+ */
+void sme_alloc(struct task_struct *task)
+{
+ if (task->thread.za_state) {
+ memset(task->thread.za_state, 0, za_state_size(task));
+ return;
+ }
+
+ /* This could potentially be up to 64K. */
+ task->thread.za_state =
+ kzalloc(za_state_size(task), GFP_KERNEL);
+}
+
+static void sme_free(struct task_struct *task)
+{
+ kfree(task->thread.za_state);
+ task->thread.za_state = NULL;
+}
+
+void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ /* Set priority for all PEs to architecturally defined minimum */
+ write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
+ SYS_SMPRI_EL1);
+
+ /* Allow SME in kernel */
+ write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
+ isb();
+
+ /* Allow EL0 to access TPIDR2 */
+ write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
+ isb();
+}
+
+/*
+ * This must be called after sme_kernel_enable(), we rely on the
+ * feature table being sorted to ensure this.
+ */
+void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ /* Allow use of FA64 */
+ write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
+ SYS_SMCR_EL1);
+}
+
+/*
+ * Read the pseudo-SMCR used by cpufeatures to identify the supported
+ * vector length.
+ *
+ * Use only if SME is present.
+ * This function clobbers the SME vector length.
+ */
+u64 read_smcr_features(void)
+{
+ u64 smcr;
+ unsigned int vq_max;
+
+ sme_kernel_enable(NULL);
+ sme_smstart_sm();
+
+ /*
+ * Set the maximum possible VL.
+ */
+ write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
+ SYS_SMCR_EL1);
+
+ smcr = read_sysreg_s(SYS_SMCR_EL1);
+ smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
+ vq_max = sve_vq_from_vl(sve_get_vl());
+ smcr |= vq_max - 1; /* set LEN field to maximum effective value */
+
+ sme_smstop_sm();
+
+ return smcr;
+}
+
+void __init sme_setup(void)
+{
+ struct vl_info *info = &vl_info[ARM64_VEC_SME];
+ u64 smcr;
+ int min_bit;
+
+ if (!system_supports_sme())
+ return;
+
+ /*
+ * SME doesn't require any particular vector length be
+ * supported but it does require at least one. We should have
+ * disabled the feature entirely while bringing up CPUs but
+ * let's double check here.
+ */
+ WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
+
+ min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
+ info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
+
+ smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
+ info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1);
+
+ /*
+ * Sanity-check that the max VL we determined through CPU features
+ * corresponds properly to sme_vq_map. If not, do our best:
+ */
+ if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME,
+ info->max_vl)))
+ info->max_vl = find_supported_vector_length(ARM64_VEC_SME,
+ info->max_vl);
+
+ WARN_ON(info->min_vl > info->max_vl);
+
+ /*
+ * For the default VL, pick the maximum supported value <= 32
+ * (256 bits) if there is one since this is guaranteed not to
+ * grow the signal frame when in streaming mode, otherwise the
+ * minimum available VL will be used.
+ */
+ set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32));
+
+ pr_info("SME: minimum available vector length %u bytes per vector\n",
+ info->min_vl);
+ pr_info("SME: maximum available vector length %u bytes per vector\n",
+ info->max_vl);
+ pr_info("SME: default vector length %u bytes per vector\n",
+ get_sme_default_vl());
+}
+
+#endif /* CONFIG_ARM64_SME */
+
+static void sve_init_regs(void)
+{
+ /*
+ * Convert the FPSIMD state to SVE, zeroing all the state that
+ * is not shared with FPSIMD. If (as is likely) the current
+ * state is live in the registers then do this there and
+ * update our metadata for the current task including
+ * disabling the trap, otherwise update our in-memory copy.
+ * We are guaranteed to not be in streaming mode, we can only
+ * take a SVE trap when not in streaming mode and we can't be
+ * in streaming mode when taking a SME trap.
+ */
+ if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ unsigned long vq_minus_one =
+ sve_vq_from_vl(task_get_sve_vl(current)) - 1;
+ sve_set_vq(vq_minus_one);
+ sve_flush_live(true, vq_minus_one);
+ fpsimd_bind_task_to_cpu();
+ } else {
+ fpsimd_to_sve(current);
+ }
+}
+
/*
* Trapped SVE access
*
@@ -1004,7 +1381,7 @@ void fpsimd_release_task(struct task_struct *dead_task)
* would have disabled the SVE access trap for userspace during
* ret_to_user, making an SVE access trap impossible in that case.
*/
-void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+void do_sve_acc(unsigned long esr, struct pt_regs *regs)
{
/* Even if we chose not to use SVE, the hardware could still trap: */
if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
@@ -1024,29 +1401,84 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
WARN_ON(1); /* SVE access shouldn't have trapped */
/*
- * Convert the FPSIMD state to SVE, zeroing all the state that
- * is not shared with FPSIMD. If (as is likely) the current
- * state is live in the registers then do this there and
- * update our metadata for the current task including
- * disabling the trap, otherwise update our in-memory copy.
+ * Even if the task can have used streaming mode we can only
+ * generate SVE access traps in normal SVE mode and
+ * transitioning out of streaming mode may discard any
+ * streaming mode state. Always clear the high bits to avoid
+ * any potential errors tracking what is properly initialised.
+ */
+ sve_init_regs();
+
+ put_cpu_fpsimd_context();
+}
+
+/*
+ * Trapped SME access
+ *
+ * Storage is allocated for the full SVE and SME state, the current
+ * FPSIMD register contents are migrated to SVE if SVE is not already
+ * active, and the access trap is disabled.
+ *
+ * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()
+ * would have disabled the SME access trap for userspace during
+ * ret_to_user, making an SVE access trap impossible in that case.
+ */
+void do_sme_acc(unsigned long esr, struct pt_regs *regs)
+{
+ /* Even if we chose not to use SME, the hardware could still trap: */
+ if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+ return;
+ }
+
+ /*
+ * If this not a trap due to SME being disabled then something
+ * is being used in the wrong mode, report as SIGILL.
*/
+ if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+ return;
+ }
+
+ sve_alloc(current);
+ sme_alloc(current);
+ if (!current->thread.sve_state || !current->thread.za_state) {
+ force_sig(SIGKILL);
+ return;
+ }
+
+ get_cpu_fpsimd_context();
+
+ /* With TIF_SME userspace shouldn't generate any traps */
+ if (test_and_set_thread_flag(TIF_SME))
+ WARN_ON(1);
+
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
unsigned long vq_minus_one =
- sve_vq_from_vl(task_get_sve_vl(current)) - 1;
- sve_set_vq(vq_minus_one);
- sve_flush_live(true, vq_minus_one);
+ sve_vq_from_vl(task_get_sme_vl(current)) - 1;
+ sme_set_vq(vq_minus_one);
+
fpsimd_bind_task_to_cpu();
- } else {
- fpsimd_to_sve(current);
}
+ /*
+ * If SVE was not already active initialise the SVE registers,
+ * any non-shared state between the streaming and regular SVE
+ * registers is architecturally guaranteed to be zeroed when
+ * we enter streaming mode. We do not need to initialize ZA
+ * since ZA must be disabled at this point and enabling ZA is
+ * architecturally defined to zero ZA.
+ */
+ if (system_supports_sve() && !test_thread_flag(TIF_SVE))
+ sve_init_regs();
+
put_cpu_fpsimd_context();
}
/*
* Trapped FP/ASIMD access.
*/
-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
{
/* TODO: implement lazy context saving/restoring */
WARN_ON(1);
@@ -1055,7 +1487,7 @@ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
/*
* Raise a SIGFPE for the current process.
*/
-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
{
unsigned int si_code = FPE_FLTUNK;
@@ -1141,6 +1573,9 @@ static void fpsimd_flush_thread_vl(enum vec_type type)
void fpsimd_flush_thread(void)
{
+ void *sve_state = NULL;
+ void *za_state = NULL;
+
if (!system_supports_fpsimd())
return;
@@ -1152,11 +1587,28 @@ void fpsimd_flush_thread(void)
if (system_supports_sve()) {
clear_thread_flag(TIF_SVE);
- sve_free(current);
+
+ /* Defer kfree() while in atomic context */
+ sve_state = current->thread.sve_state;
+ current->thread.sve_state = NULL;
+
fpsimd_flush_thread_vl(ARM64_VEC_SVE);
}
+ if (system_supports_sme()) {
+ clear_thread_flag(TIF_SME);
+
+ /* Defer kfree() while in atomic context */
+ za_state = current->thread.za_state;
+ current->thread.za_state = NULL;
+
+ fpsimd_flush_thread_vl(ARM64_VEC_SME);
+ current->thread.svcr = 0;
+ }
+
put_cpu_fpsimd_context();
+ kfree(sve_state);
+ kfree(za_state);
}
/*
@@ -1198,22 +1650,34 @@ static void fpsimd_bind_task_to_cpu(void)
WARN_ON(!system_supports_fpsimd());
last->st = &current->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state;
+ last->za_state = current->thread.za_state;
last->sve_vl = task_get_sve_vl(current);
+ last->sme_vl = task_get_sme_vl(current);
+ last->svcr = &current->thread.svcr;
current->thread.fpsimd_cpu = smp_processor_id();
+ /*
+ * Toggle SVE and SME trapping for userspace if needed, these
+ * are serialsied by ret_to_user().
+ */
+ if (system_supports_sme()) {
+ if (test_thread_flag(TIF_SME))
+ sme_user_enable();
+ else
+ sme_user_disable();
+ }
+
if (system_supports_sve()) {
- /* Toggle SVE trapping for userspace if needed */
if (test_thread_flag(TIF_SVE))
sve_user_enable();
else
sve_user_disable();
-
- /* Serialised by exception return to user */
}
}
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
- unsigned int sve_vl)
+ unsigned int sve_vl, void *za_state,
+ unsigned int sme_vl, u64 *svcr)
{
struct fpsimd_last_state_struct *last =
this_cpu_ptr(&fpsimd_last_state);
@@ -1222,8 +1686,11 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
WARN_ON(!in_softirq() && !irqs_disabled());
last->st = st;
+ last->svcr = svcr;
last->sve_state = sve_state;
+ last->za_state = za_state;
last->sve_vl = sve_vl;
+ last->sme_vl = sme_vl;
}
/*
@@ -1320,6 +1787,15 @@ static void fpsimd_flush_cpu_state(void)
{
WARN_ON(!system_supports_fpsimd());
__this_cpu_write(fpsimd_last_state.st, NULL);
+
+ /*
+ * Leaving streaming mode enabled will cause issues for any kernel
+ * NEON and leaving streaming mode or ZA enabled may increase power
+ * consumption.
+ */
+ if (system_supports_sme())
+ sme_smstop();
+
set_thread_flag(TIF_FOREIGN_FPSTATE);
}
@@ -1397,6 +1873,7 @@ EXPORT_SYMBOL(kernel_neon_end);
static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
static DEFINE_PER_CPU(bool, efi_sve_state_used);
+static DEFINE_PER_CPU(bool, efi_sm_state);
/*
* EFI runtime services support functions
@@ -1431,12 +1908,28 @@ void __efi_fpsimd_begin(void)
*/
if (system_supports_sve() && likely(efi_sve_state)) {
char *sve_state = this_cpu_ptr(efi_sve_state);
+ bool ffr = true;
+ u64 svcr;
__this_cpu_write(efi_sve_state_used, true);
+ if (system_supports_sme()) {
+ svcr = read_sysreg_s(SYS_SVCR);
+
+ if (!system_supports_fa64())
+ ffr = svcr & SVCR_SM_MASK;
+
+ __this_cpu_write(efi_sm_state, ffr);
+ }
+
sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
&this_cpu_ptr(&efi_fpsimd_state)->fpsr,
- true);
+ ffr);
+
+ if (system_supports_sme())
+ sysreg_clear_set_s(SYS_SVCR,
+ SVCR_SM_MASK, 0);
+
} else {
fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
}
@@ -1459,11 +1952,26 @@ void __efi_fpsimd_end(void)
if (system_supports_sve() &&
likely(__this_cpu_read(efi_sve_state_used))) {
char const *sve_state = this_cpu_ptr(efi_sve_state);
+ bool ffr = true;
+
+ /*
+ * Restore streaming mode; EFI calls are
+ * normal function calls so should not return in
+ * streaming mode.
+ */
+ if (system_supports_sme()) {
+ if (__this_cpu_read(efi_sm_state)) {
+ sysreg_clear_set_s(SYS_SVCR,
+ 0,
+ SVCR_SM_MASK);
+ if (!system_supports_fa64())
+ ffr = efi_sm_state;
+ }
+ }
- sve_set_vq(sve_vq_from_vl(sve_get_vl()) - 1);
sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()),
&this_cpu_ptr(&efi_fpsimd_state)->fpsr,
- true);
+ ffr);
__this_cpu_write(efi_sve_state_used, false);
} else {
@@ -1538,6 +2046,13 @@ static int __init fpsimd_init(void)
if (!cpu_have_named_feature(ASIMD))
pr_notice("Advanced SIMD is not implemented\n");
- return sve_sysctl_init();
+
+ if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE))
+ pr_notice("SME is implemented but not SVE\n");
+
+ sve_sysctl_init();
+ sme_sysctl_init();
+
+ return 0;
}
core_initcall(fpsimd_init);
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 4506c4a90ac1..f447c4a36f69 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -268,6 +268,22 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
}
#ifdef CONFIG_DYNAMIC_FTRACE
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+ /*
+ * When DYNAMIC_FTRACE_WITH_REGS is selected, `fregs` can never be NULL
+ * and arch_ftrace_get_regs(fregs) will always give a non-NULL pt_regs
+ * in which we can safely modify the LR.
+ */
+ struct pt_regs *regs = arch_ftrace_get_regs(fregs);
+ unsigned long *parent = (unsigned long *)&procedure_link_pointer(regs);
+
+ prepare_ftrace_return(ip, parent, frame_pointer(regs));
+}
+#else
/*
* Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
* depending on @enable.
@@ -297,5 +313,6 @@ int ftrace_disable_ftrace_graph_caller(void)
{
return ftrace_modify_graph_caller(false);
}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index cd868084e724..b29a311bb055 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -617,7 +617,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers);
/*
* Debug exception handlers.
*/
-static int breakpoint_handler(unsigned long unused, unsigned int esr,
+static int breakpoint_handler(unsigned long unused, unsigned long esr,
struct pt_regs *regs)
{
int i, step = 0, *kernel_step;
@@ -751,7 +751,7 @@ static int watchpoint_report(struct perf_event *wp, unsigned long addr,
return step;
}
-static int watchpoint_handler(unsigned long addr, unsigned int esr,
+static int watchpoint_handler(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
int i, step = 0, *kernel_step, access, closest_match = 0;
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 2aede780fb80..cda9c1e9864f 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -232,14 +232,14 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
return err;
}
-static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
+static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr)
{
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
NOKPROBE_SYMBOL(kgdb_brk_fn)
-static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
+static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr)
{
compiled_break = 1;
kgdb_handle_exception(1, SIGTRAP, 0, regs);
@@ -248,7 +248,7 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
}
NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
-static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
+static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
{
if (!kgdb_single_step)
return DBG_HOOK_ERROR;
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index e16b248699d5..19c2d487cb08 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn)
/* in reserved memory? */
addr = __pfn_to_phys(pfn);
- if ((addr < crashk_res.start) || (crashk_res.end < addr))
- return false;
+ if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
+ if (!crashk_low_res.end)
+ return false;
+
+ if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
+ return false;
+ }
if (!kexec_crash_image)
return true;
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index 59c648d51848..889951291cc0 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
/* Exclude crashkernel region */
ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+ if (ret)
+ goto out;
+
+ if (crashk_low_res.end) {
+ ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+ if (ret)
+ goto out;
+ }
- if (!ret)
- ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
+ ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
+out:
kfree(cmem);
return ret;
}
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 78b3e0f8e997..57b30bcf9f21 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -15,6 +15,7 @@
#include <linux/swapops.h>
#include <linux/thread_info.h>
#include <linux/types.h>
+#include <linux/uaccess.h>
#include <linux/uio.h>
#include <asm/barrier.h>
@@ -76,6 +77,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte)
mte_sync_page_tags(page, old_pte, check_swap,
pte_is_tagged);
}
+
+ /* ensure the tags are visible before the PTE is set */
+ smp_wmb();
}
int memcmp_pages(struct page *page1, struct page *page2)
@@ -106,7 +110,8 @@ int memcmp_pages(struct page *page1, struct page *page2)
static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
{
/* Enable MTE Sync Mode for EL1. */
- sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, tcf);
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK,
+ SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf));
isb();
pr_info_once("MTE: enabled in %s mode at EL1\n", mode);
@@ -122,12 +127,12 @@ void mte_enable_kernel_sync(void)
WARN_ONCE(system_uses_mte_async_or_asymm_mode(),
"MTE async mode enabled system wide!");
- __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC);
+ __mte_enable_kernel("synchronous", SCTLR_EL1_TCF_SYNC);
}
void mte_enable_kernel_async(void)
{
- __mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC);
+ __mte_enable_kernel("asynchronous", SCTLR_EL1_TCF_ASYNC);
/*
* MTE async mode is set system wide by the first PE that
@@ -144,7 +149,7 @@ void mte_enable_kernel_async(void)
void mte_enable_kernel_asymm(void)
{
if (cpus_have_cap(ARM64_MTE_ASYMM)) {
- __mte_enable_kernel("asymmetric", SCTLR_ELx_TCF_ASYMM);
+ __mte_enable_kernel("asymmetric", SCTLR_EL1_TCF_ASYMM);
/*
* MTE asymm mode behaves as async mode for store
@@ -216,11 +221,11 @@ static void mte_update_sctlr_user(struct task_struct *task)
* default order.
*/
if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM)
- sctlr |= SCTLR_EL1_TCF0_ASYMM;
+ sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYMM);
else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
- sctlr |= SCTLR_EL1_TCF0_ASYNC;
+ sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYNC);
else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
- sctlr |= SCTLR_EL1_TCF0_SYNC;
+ sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, SYNC);
task->thread.sctlr_user = sctlr;
}
@@ -543,3 +548,32 @@ static int register_mte_tcf_preferred_sysctl(void)
return 0;
}
subsys_initcall(register_mte_tcf_preferred_sysctl);
+
+/*
+ * Return 0 on success, the number of bytes not probed otherwise.
+ */
+size_t mte_probe_user_range(const char __user *uaddr, size_t size)
+{
+ const char __user *end = uaddr + size;
+ int err = 0;
+ char val;
+
+ __raw_get_user(val, uaddr, err);
+ if (err)
+ return size;
+
+ uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE);
+ while (uaddr < end) {
+ /*
+ * A read is sufficient for mte, the caller should have probed
+ * for the pte write permission if required.
+ */
+ __raw_get_user(val, uaddr, err);
+ if (err)
+ return end - uaddr;
+ uaddr += MTE_GRANULE_SIZE;
+ }
+ (void)val;
+
+ return 0;
+}
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 75fed4460407..57c7c211f8c7 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu)
DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
struct pv_time_stolen_time_region {
- struct pvclock_vcpu_stolen_time *kaddr;
+ struct pvclock_vcpu_stolen_time __rcu *kaddr;
};
static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
@@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
/* return stolen time in ns by asking the hypervisor */
static u64 para_steal_clock(int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
+ u64 ret = 0;
reg = per_cpu_ptr(&stolen_time_region, cpu);
@@ -61,28 +63,37 @@ static u64 para_steal_clock(int cpu)
* online notification callback runs. Until the callback
* has run we just return zero.
*/
- if (!reg->kaddr)
+ rcu_read_lock();
+ kaddr = rcu_dereference(reg->kaddr);
+ if (!kaddr) {
+ rcu_read_unlock();
return 0;
+ }
- return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+ ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
+ rcu_read_unlock();
+ return ret;
}
static int stolen_time_cpu_down_prepare(unsigned int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
reg = this_cpu_ptr(&stolen_time_region);
if (!reg->kaddr)
return 0;
- memunmap(reg->kaddr);
- memset(reg, 0, sizeof(*reg));
+ kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
+ synchronize_rcu();
+ memunmap(kaddr);
return 0;
}
static int stolen_time_cpu_online(unsigned int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
struct arm_smccc_res res;
@@ -93,17 +104,19 @@ static int stolen_time_cpu_online(unsigned int cpu)
if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
return -EINVAL;
- reg->kaddr = memremap(res.a0,
+ kaddr = memremap(res.a0,
sizeof(struct pvclock_vcpu_stolen_time),
MEMREMAP_WB);
+ rcu_assign_pointer(reg->kaddr, kaddr);
+
if (!reg->kaddr) {
pr_warn("Failed to map stolen time data structure\n");
return -ENOMEM;
}
- if (le32_to_cpu(reg->kaddr->revision) != 0 ||
- le32_to_cpu(reg->kaddr->attributes) != 0) {
+ if (le32_to_cpu(kaddr->revision) != 0 ||
+ le32_to_cpu(kaddr->attributes) != 0) {
pr_warn_once("Unexpected revision or attributes in stolen time data\n");
return -ENXIO;
}
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index d9dfa82c1f18..d1d182320245 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -335,7 +335,7 @@ static void __kprobes kprobe_handler(struct pt_regs *regs)
}
static int __kprobes
-kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr)
+kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
unsigned long addr = instruction_pointer(regs);
@@ -359,7 +359,7 @@ static struct break_hook kprobes_break_ss_hook = {
};
static int __kprobes
-kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
+kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
{
kprobe_handler(regs);
return DBG_HOOK_HANDLED;
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index 9be668f3f034..d49aef2657cd 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -166,7 +166,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
}
static int uprobe_breakpoint_handler(struct pt_regs *regs,
- unsigned int esr)
+ unsigned long esr)
{
if (uprobe_pre_sstep_notifier(regs))
return DBG_HOOK_HANDLED;
@@ -175,7 +175,7 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs,
}
static int uprobe_single_step_handler(struct pt_regs *regs,
- unsigned int esr)
+ unsigned long esr)
{
struct uprobe_task *utask = current->utask;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 7fa97df55e3a..9734c9fb1a32 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -250,6 +250,8 @@ void show_regs(struct pt_regs *regs)
static void tls_thread_flush(void)
{
write_sysreg(0, tpidr_el0);
+ if (system_supports_tpidr2())
+ write_sysreg_s(0, SYS_TPIDR2_EL0);
if (is_compat_task()) {
current->thread.uw.tp_value = 0;
@@ -298,16 +300,42 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
/*
* Detach src's sve_state (if any) from dst so that it does not
- * get erroneously used or freed prematurely. dst's sve_state
+ * get erroneously used or freed prematurely. dst's copies
* will be allocated on demand later on if dst uses SVE.
* For consistency, also clear TIF_SVE here: this could be done
* later in copy_process(), but to avoid tripping up future
- * maintainers it is best not to leave TIF_SVE and sve_state in
+ * maintainers it is best not to leave TIF flags and buffers in
* an inconsistent state, even temporarily.
*/
dst->thread.sve_state = NULL;
clear_tsk_thread_flag(dst, TIF_SVE);
+ /*
+ * In the unlikely event that we create a new thread with ZA
+ * enabled we should retain the ZA state so duplicate it here.
+ * This may be shortly freed if we exec() or if CLONE_SETTLS
+ * but it's simpler to do it here. To avoid confusing the rest
+ * of the code ensure that we have a sve_state allocated
+ * whenever za_state is allocated.
+ */
+ if (thread_za_enabled(&src->thread)) {
+ dst->thread.sve_state = kzalloc(sve_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sve_state)
+ return -ENOMEM;
+ dst->thread.za_state = kmemdup(src->thread.za_state,
+ za_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.za_state) {
+ kfree(dst->thread.sve_state);
+ dst->thread.sve_state = NULL;
+ return -ENOMEM;
+ }
+ } else {
+ dst->thread.za_state = NULL;
+ clear_tsk_thread_flag(dst, TIF_SME);
+ }
+
/* clear any pending asynchronous tag fault raised by the parent */
clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
@@ -343,6 +371,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
* out-of-sync with the saved value.
*/
*task_user_tls(p) = read_sysreg(tpidr_el0);
+ if (system_supports_tpidr2())
+ p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
@@ -353,10 +383,12 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
/*
* If a TLS pointer was passed to clone, use it for the new
- * thread.
+ * thread. We also reset TPIDR2 if it's in use.
*/
- if (clone_flags & CLONE_SETTLS)
+ if (clone_flags & CLONE_SETTLS) {
p->thread.uw.tp_value = tls;
+ p->thread.tpidr2_el0 = 0;
+ }
} else {
/*
* A kthread has no context to ERET to, so ensure any buggy
@@ -387,6 +419,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
void tls_preserve_current_state(void)
{
*task_user_tls(current) = read_sysreg(tpidr_el0);
+ if (system_supports_tpidr2() && !is_compat_task())
+ current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
}
static void tls_thread_switch(struct task_struct *next)
@@ -399,6 +433,8 @@ static void tls_thread_switch(struct task_struct *next)
write_sysreg(0, tpidrro_el0);
write_sysreg(*task_user_tls(next), tpidr_el0);
+ if (system_supports_tpidr2())
+ write_sysreg_s(next->thread.tpidr2_el0, SYS_TPIDR2_EL0);
}
/*
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 230a47b9189e..21da83187a60 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -713,21 +713,51 @@ static int system_call_set(struct task_struct *target,
#ifdef CONFIG_ARM64_SVE
static void sve_init_header_from_task(struct user_sve_header *header,
- struct task_struct *target)
+ struct task_struct *target,
+ enum vec_type type)
{
unsigned int vq;
+ bool active;
+ bool fpsimd_only;
+ enum vec_type task_type;
memset(header, 0, sizeof(*header));
- header->flags = test_tsk_thread_flag(target, TIF_SVE) ?
- SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD;
- if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
- header->flags |= SVE_PT_VL_INHERIT;
+ /* Check if the requested registers are active for the task */
+ if (thread_sm_enabled(&target->thread))
+ task_type = ARM64_VEC_SME;
+ else
+ task_type = ARM64_VEC_SVE;
+ active = (task_type == type);
+
+ switch (type) {
+ case ARM64_VEC_SVE:
+ if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
+ header->flags |= SVE_PT_VL_INHERIT;
+ fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
+ break;
+ case ARM64_VEC_SME:
+ if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
+ header->flags |= SVE_PT_VL_INHERIT;
+ fpsimd_only = false;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return;
+ }
- header->vl = task_get_sve_vl(target);
+ if (active) {
+ if (fpsimd_only) {
+ header->flags |= SVE_PT_REGS_FPSIMD;
+ } else {
+ header->flags |= SVE_PT_REGS_SVE;
+ }
+ }
+
+ header->vl = task_get_vl(target, type);
vq = sve_vq_from_vl(header->vl);
- header->max_vl = sve_max_vl();
+ header->max_vl = vec_max_vl(type);
header->size = SVE_PT_SIZE(vq, header->flags);
header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
SVE_PT_REGS_SVE);
@@ -738,19 +768,17 @@ static unsigned int sve_size_from_header(struct user_sve_header const *header)
return ALIGN(header->size, SVE_VQ_BYTES);
}
-static int sve_get(struct task_struct *target,
- const struct user_regset *regset,
- struct membuf to)
+static int sve_get_common(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to,
+ enum vec_type type)
{
struct user_sve_header header;
unsigned int vq;
unsigned long start, end;
- if (!system_supports_sve())
- return -EINVAL;
-
/* Header */
- sve_init_header_from_task(&header, target);
+ sve_init_header_from_task(&header, target, type);
vq = sve_vq_from_vl(header.vl);
membuf_write(&to, &header, sizeof(header));
@@ -758,49 +786,61 @@ static int sve_get(struct task_struct *target,
if (target == current)
fpsimd_preserve_current_state();
- /* Registers: FPSIMD-only case */
-
BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
- if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD)
+ BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+
+ switch ((header.flags & SVE_PT_REGS_MASK)) {
+ case SVE_PT_REGS_FPSIMD:
return __fpr_get(target, regset, to);
- /* Otherwise: full SVE case */
+ case SVE_PT_REGS_SVE:
+ start = SVE_PT_SVE_OFFSET;
+ end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+ membuf_write(&to, target->thread.sve_state, end - start);
- BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
- start = SVE_PT_SVE_OFFSET;
- end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
- membuf_write(&to, target->thread.sve_state, end - start);
+ start = end;
+ end = SVE_PT_SVE_FPSR_OFFSET(vq);
+ membuf_zero(&to, end - start);
- start = end;
- end = SVE_PT_SVE_FPSR_OFFSET(vq);
- membuf_zero(&to, end - start);
+ /*
+ * Copy fpsr, and fpcr which must follow contiguously in
+ * struct fpsimd_state:
+ */
+ start = end;
+ end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+ membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr,
+ end - start);
- /*
- * Copy fpsr, and fpcr which must follow contiguously in
- * struct fpsimd_state:
- */
- start = end;
- end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
- membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, end - start);
+ start = end;
+ end = sve_size_from_header(&header);
+ return membuf_zero(&to, end - start);
- start = end;
- end = sve_size_from_header(&header);
- return membuf_zero(&to, end - start);
+ default:
+ return 0;
+ }
}
-static int sve_set(struct task_struct *target,
+static int sve_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
+ struct membuf to)
+{
+ if (!system_supports_sve())
+ return -EINVAL;
+
+ return sve_get_common(target, regset, to, ARM64_VEC_SVE);
+}
+
+static int sve_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ enum vec_type type)
{
int ret;
struct user_sve_header header;
unsigned int vq;
unsigned long start, end;
- if (!system_supports_sve())
- return -EINVAL;
-
/* Header */
if (count < sizeof(header))
return -EINVAL;
@@ -813,13 +853,37 @@ static int sve_set(struct task_struct *target,
* Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by
* vec_set_vector_length(), which will also validate them for us:
*/
- ret = vec_set_vector_length(target, ARM64_VEC_SVE, header.vl,
+ ret = vec_set_vector_length(target, type, header.vl,
((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
if (ret)
goto out;
/* Actual VL set may be less than the user asked for: */
- vq = sve_vq_from_vl(task_get_sve_vl(target));
+ vq = sve_vq_from_vl(task_get_vl(target, type));
+
+ /* Enter/exit streaming mode */
+ if (system_supports_sme()) {
+ u64 old_svcr = target->thread.svcr;
+
+ switch (type) {
+ case ARM64_VEC_SVE:
+ target->thread.svcr &= ~SVCR_SM_MASK;
+ break;
+ case ARM64_VEC_SME:
+ target->thread.svcr |= SVCR_SM_MASK;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ /*
+ * If we switched then invalidate any existing SVE
+ * state and ensure there's storage.
+ */
+ if (target->thread.svcr != old_svcr)
+ sve_alloc(target);
+ }
/* Registers: FPSIMD-only case */
@@ -828,10 +892,15 @@ static int sve_set(struct task_struct *target,
ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
SVE_PT_FPSIMD_OFFSET);
clear_tsk_thread_flag(target, TIF_SVE);
+ if (type == ARM64_VEC_SME)
+ fpsimd_force_sync_to_sve(target);
goto out;
}
- /* Otherwise: full SVE case */
+ /*
+ * Otherwise: no registers or full SVE case. For backwards
+ * compatibility reasons we treat empty flags as SVE registers.
+ */
/*
* If setting a different VL from the requested VL and there is
@@ -852,8 +921,9 @@ static int sve_set(struct task_struct *target,
/*
* Ensure target->thread.sve_state is up to date with target's
- * FPSIMD regs, so that a short copyin leaves trailing registers
- * unmodified.
+ * FPSIMD regs, so that a short copyin leaves trailing
+ * registers unmodified. Always enable SVE even if going into
+ * streaming mode.
*/
fpsimd_sync_to_sve(target);
set_tsk_thread_flag(target, TIF_SVE);
@@ -889,8 +959,181 @@ out:
return ret;
}
+static int sve_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ if (!system_supports_sve())
+ return -EINVAL;
+
+ return sve_set_common(target, regset, pos, count, kbuf, ubuf,
+ ARM64_VEC_SVE);
+}
+
#endif /* CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_SME
+
+static int ssve_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_sme())
+ return -EINVAL;
+
+ return sve_get_common(target, regset, to, ARM64_VEC_SME);
+}
+
+static int ssve_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ if (!system_supports_sme())
+ return -EINVAL;
+
+ return sve_set_common(target, regset, pos, count, kbuf, ubuf,
+ ARM64_VEC_SME);
+}
+
+static int za_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ struct user_za_header header;
+ unsigned int vq;
+ unsigned long start, end;
+
+ if (!system_supports_sme())
+ return -EINVAL;
+
+ /* Header */
+ memset(&header, 0, sizeof(header));
+
+ if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
+ header.flags |= ZA_PT_VL_INHERIT;
+
+ header.vl = task_get_sme_vl(target);
+ vq = sve_vq_from_vl(header.vl);
+ header.max_vl = sme_max_vl();
+ header.max_size = ZA_PT_SIZE(vq);
+
+ /* If ZA is not active there is only the header */
+ if (thread_za_enabled(&target->thread))
+ header.size = ZA_PT_SIZE(vq);
+ else
+ header.size = ZA_PT_ZA_OFFSET;
+
+ membuf_write(&to, &header, sizeof(header));
+
+ BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header));
+ end = ZA_PT_ZA_OFFSET;
+
+ if (target == current)
+ fpsimd_preserve_current_state();
+
+ /* Any register data to include? */
+ if (thread_za_enabled(&target->thread)) {
+ start = end;
+ end = ZA_PT_SIZE(vq);
+ membuf_write(&to, target->thread.za_state, end - start);
+ }
+
+ /* Zero any trailing padding */
+ start = end;
+ end = ALIGN(header.size, SVE_VQ_BYTES);
+ return membuf_zero(&to, end - start);
+}
+
+static int za_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ struct user_za_header header;
+ unsigned int vq;
+ unsigned long start, end;
+
+ if (!system_supports_sme())
+ return -EINVAL;
+
+ /* Header */
+ if (count < sizeof(header))
+ return -EINVAL;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
+ 0, sizeof(header));
+ if (ret)
+ goto out;
+
+ /*
+ * All current ZA_PT_* flags are consumed by
+ * vec_set_vector_length(), which will also validate them for
+ * us:
+ */
+ ret = vec_set_vector_length(target, ARM64_VEC_SME, header.vl,
+ ((unsigned long)header.flags) << 16);
+ if (ret)
+ goto out;
+
+ /* Actual VL set may be less than the user asked for: */
+ vq = sve_vq_from_vl(task_get_sme_vl(target));
+
+ /* Ensure there is some SVE storage for streaming mode */
+ if (!target->thread.sve_state) {
+ sve_alloc(target);
+ if (!target->thread.sve_state) {
+ clear_thread_flag(TIF_SME);
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /* Allocate/reinit ZA storage */
+ sme_alloc(target);
+ if (!target->thread.za_state) {
+ ret = -ENOMEM;
+ clear_tsk_thread_flag(target, TIF_SME);
+ goto out;
+ }
+
+ /* If there is no data then disable ZA */
+ if (!count) {
+ target->thread.svcr &= ~SVCR_ZA_MASK;
+ goto out;
+ }
+
+ /*
+ * If setting a different VL from the requested VL and there is
+ * register data, the data layout will be wrong: don't even
+ * try to set the registers in this case.
+ */
+ if (vq != sve_vq_from_vl(header.vl)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header));
+ start = ZA_PT_ZA_OFFSET;
+ end = ZA_PT_SIZE(vq);
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.za_state,
+ start, end);
+ if (ret)
+ goto out;
+
+ /* Mark ZA as active and let userspace use it */
+ set_tsk_thread_flag(target, TIF_SME);
+ target->thread.svcr |= SVCR_ZA_MASK;
+
+out:
+ fpsimd_flush_task_state(target);
+ return ret;
+}
+
+#endif /* CONFIG_ARM64_SME */
+
#ifdef CONFIG_ARM64_PTR_AUTH
static int pac_mask_get(struct task_struct *target,
const struct user_regset *regset,
@@ -1108,6 +1351,10 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
#endif
+#ifdef CONFIG_ARM64_SVE
+ REGSET_SSVE,
+ REGSET_ZA,
+#endif
#ifdef CONFIG_ARM64_PTR_AUTH
REGSET_PAC_MASK,
REGSET_PAC_ENABLED_KEYS,
@@ -1188,6 +1435,33 @@ static const struct user_regset aarch64_regsets[] = {
.set = sve_set,
},
#endif
+#ifdef CONFIG_ARM64_SME
+ [REGSET_SSVE] = { /* Streaming mode SVE */
+ .core_note_type = NT_ARM_SSVE,
+ .n = DIV_ROUND_UP(SVE_PT_SIZE(SME_VQ_MAX, SVE_PT_REGS_SVE),
+ SVE_VQ_BYTES),
+ .size = SVE_VQ_BYTES,
+ .align = SVE_VQ_BYTES,
+ .regset_get = ssve_get,
+ .set = ssve_set,
+ },
+ [REGSET_ZA] = { /* SME ZA */
+ .core_note_type = NT_ARM_ZA,
+ /*
+ * ZA is a single register but it's variably sized and
+ * the ptrace core requires that the size of any data
+ * be an exact multiple of the configured register
+ * size so report as though we had SVE_VQ_BYTES
+ * registers. These values aren't exposed to
+ * userspace.
+ */
+ .n = DIV_ROUND_UP(ZA_PT_SIZE(SME_VQ_MAX), SVE_VQ_BYTES),
+ .size = SVE_VQ_BYTES,
+ .align = SVE_VQ_BYTES,
+ .regset_get = za_get,
+ .set = za_set,
+ },
+#endif
#ifdef CONFIG_ARM64_PTR_AUTH
[REGSET_PAC_MASK] = {
.core_note_type = NT_ARM_PAC_MASK,
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index f0a3df9e18a3..413f899e4ac6 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -37,6 +37,15 @@
* safe memory that has been set up to be preserved during the copy operation.
*/
SYM_CODE_START(arm64_relocate_new_kernel)
+ /*
+ * The kimage structure isn't allocated specially and may be clobbered
+ * during relocation. We must load any values we need from it prior to
+ * any relocation occurring.
+ */
+ ldr x28, [x0, #KIMAGE_START]
+ ldr x27, [x0, #KIMAGE_ARCH_EL2_VECTORS]
+ ldr x26, [x0, #KIMAGE_ARCH_DTB_MEM]
+
/* Setup the list loop variables. */
ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */
ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */
@@ -72,21 +81,20 @@ SYM_CODE_START(arm64_relocate_new_kernel)
ic iallu
dsb nsh
isb
- ldr x4, [x0, #KIMAGE_START] /* relocation start */
- ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */
- ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */
turn_off_mmu x12, x13
/* Start new image. */
- cbz x1, .Lel1
- mov x1, x4 /* relocation start */
- mov x2, x0 /* dtb address */
+ cbz x27, .Lel1
+ mov x1, x28 /* kernel entry point */
+ mov x2, x26 /* dtb address */
mov x3, xzr
mov x4, xzr
mov x0, #HVC_SOFT_RESTART
hvc #0 /* Jumps from el2 */
.Lel1:
+ mov x0, x26 /* dtb address */
+ mov x1, xzr
mov x2, xzr
mov x3, xzr
- br x4 /* Jumps from el1 */
+ br x28 /* Jumps from el1 */
SYM_CODE_END(arm64_relocate_new_kernel)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 3505789cf4bd..fea3223704b6 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -225,6 +225,8 @@ static void __init request_standard_resources(void)
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
+ insert_resource(&iomem_resource, &kernel_code);
+ insert_resource(&iomem_resource, &kernel_data);
num_standard_resources = memblock.memory.cnt;
res_size = num_standard_resources * sizeof(*standard_resources);
@@ -246,20 +248,7 @@ static void __init request_standard_resources(void)
res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
}
- request_resource(&iomem_resource, res);
-
- if (kernel_code.start >= res->start &&
- kernel_code.end <= res->end)
- request_resource(res, &kernel_code);
- if (kernel_data.start >= res->start &&
- kernel_data.end <= res->end)
- request_resource(res, &kernel_data);
-#ifdef CONFIG_KEXEC_CORE
- /* Userspace will find "Crash kernel" region in /proc/iomem. */
- if (crashk_res.end && crashk_res.start >= res->start &&
- crashk_res.end <= res->end)
- request_resource(res, &crashk_res);
-#endif
+ insert_resource(&iomem_resource, res);
}
}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 4a4122ef6f39..edb2d9206a78 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -56,6 +56,7 @@ struct rt_sigframe_user_layout {
unsigned long fpsimd_offset;
unsigned long esr_offset;
unsigned long sve_offset;
+ unsigned long za_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
@@ -218,6 +219,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
struct user_ctxs {
struct fpsimd_context __user *fpsimd;
struct sve_context __user *sve;
+ struct za_context __user *za;
};
#ifdef CONFIG_ARM64_SVE
@@ -226,11 +228,17 @@ static int preserve_sve_context(struct sve_context __user *ctx)
{
int err = 0;
u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+ u16 flags = 0;
unsigned int vl = task_get_sve_vl(current);
unsigned int vq = 0;
- if (test_thread_flag(TIF_SVE))
+ if (thread_sm_enabled(&current->thread)) {
+ vl = task_get_sme_vl(current);
vq = sve_vq_from_vl(vl);
+ flags |= SVE_SIG_FLAG_SM;
+ } else if (test_thread_flag(TIF_SVE)) {
+ vq = sve_vq_from_vl(vl);
+ }
memset(reserved, 0, sizeof(reserved));
@@ -238,6 +246,7 @@ static int preserve_sve_context(struct sve_context __user *ctx)
__put_user_error(round_up(SVE_SIG_CONTEXT_SIZE(vq), 16),
&ctx->head.size, err);
__put_user_error(vl, &ctx->vl, err);
+ __put_user_error(flags, &ctx->flags, err);
BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
@@ -258,18 +267,28 @@ static int preserve_sve_context(struct sve_context __user *ctx)
static int restore_sve_fpsimd_context(struct user_ctxs *user)
{
int err;
- unsigned int vq;
+ unsigned int vl, vq;
struct user_fpsimd_state fpsimd;
struct sve_context sve;
if (__copy_from_user(&sve, user->sve, sizeof(sve)))
return -EFAULT;
- if (sve.vl != task_get_sve_vl(current))
+ if (sve.flags & SVE_SIG_FLAG_SM) {
+ if (!system_supports_sme())
+ return -EINVAL;
+
+ vl = task_get_sme_vl(current);
+ } else {
+ vl = task_get_sve_vl(current);
+ }
+
+ if (sve.vl != vl)
return -EINVAL;
if (sve.head.size <= sizeof(*user->sve)) {
clear_thread_flag(TIF_SVE);
+ current->thread.svcr &= ~SVCR_SM_MASK;
goto fpsimd_only;
}
@@ -301,7 +320,10 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (err)
return -EFAULT;
- set_thread_flag(TIF_SVE);
+ if (sve.flags & SVE_SIG_FLAG_SM)
+ current->thread.svcr |= SVCR_SM_MASK;
+ else
+ set_thread_flag(TIF_SVE);
fpsimd_only:
/* copy the FP and status/control registers */
@@ -326,6 +348,101 @@ extern int restore_sve_fpsimd_context(struct user_ctxs *user);
#endif /* ! CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_SME
+
+static int preserve_za_context(struct za_context __user *ctx)
+{
+ int err = 0;
+ u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+ unsigned int vl = task_get_sme_vl(current);
+ unsigned int vq;
+
+ if (thread_za_enabled(&current->thread))
+ vq = sve_vq_from_vl(vl);
+ else
+ vq = 0;
+
+ memset(reserved, 0, sizeof(reserved));
+
+ __put_user_error(ZA_MAGIC, &ctx->head.magic, err);
+ __put_user_error(round_up(ZA_SIG_CONTEXT_SIZE(vq), 16),
+ &ctx->head.size, err);
+ __put_user_error(vl, &ctx->vl, err);
+ BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+ err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+
+ if (vq) {
+ /*
+ * This assumes that the ZA state has already been saved to
+ * the task struct by calling the function
+ * fpsimd_signal_preserve_current_state().
+ */
+ err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
+ current->thread.za_state,
+ ZA_SIG_REGS_SIZE(vq));
+ }
+
+ return err ? -EFAULT : 0;
+}
+
+static int restore_za_context(struct user_ctxs __user *user)
+{
+ int err;
+ unsigned int vq;
+ struct za_context za;
+
+ if (__copy_from_user(&za, user->za, sizeof(za)))
+ return -EFAULT;
+
+ if (za.vl != task_get_sme_vl(current))
+ return -EINVAL;
+
+ if (za.head.size <= sizeof(*user->za)) {
+ current->thread.svcr &= ~SVCR_ZA_MASK;
+ return 0;
+ }
+
+ vq = sve_vq_from_vl(za.vl);
+
+ if (za.head.size < ZA_SIG_CONTEXT_SIZE(vq))
+ return -EINVAL;
+
+ /*
+ * Careful: we are about __copy_from_user() directly into
+ * thread.za_state with preemption enabled, so protection is
+ * needed to prevent a racing context switch from writing stale
+ * registers back over the new data.
+ */
+
+ fpsimd_flush_task_state(current);
+ /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
+
+ sme_alloc(current);
+ if (!current->thread.za_state) {
+ current->thread.svcr &= ~SVCR_ZA_MASK;
+ clear_thread_flag(TIF_SME);
+ return -ENOMEM;
+ }
+
+ err = __copy_from_user(current->thread.za_state,
+ (char __user const *)user->za +
+ ZA_SIG_REGS_OFFSET,
+ ZA_SIG_REGS_SIZE(vq));
+ if (err)
+ return -EFAULT;
+
+ set_thread_flag(TIF_SME);
+ current->thread.svcr |= SVCR_ZA_MASK;
+
+ return 0;
+}
+#else /* ! CONFIG_ARM64_SME */
+
+/* Turn any non-optimised out attempts to use these into a link error: */
+extern int preserve_za_context(void __user *ctx);
+extern int restore_za_context(struct user_ctxs *user);
+
+#endif /* ! CONFIG_ARM64_SME */
static int parse_user_sigframe(struct user_ctxs *user,
struct rt_sigframe __user *sf)
@@ -340,6 +457,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->fpsimd = NULL;
user->sve = NULL;
+ user->za = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -393,7 +511,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
break;
case SVE_MAGIC:
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
goto invalid;
if (user->sve)
@@ -405,6 +523,19 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->sve = (struct sve_context __user *)head;
break;
+ case ZA_MAGIC:
+ if (!system_supports_sme())
+ goto invalid;
+
+ if (user->za)
+ goto invalid;
+
+ if (size < sizeof(*user->za))
+ goto invalid;
+
+ user->za = (struct za_context __user *)head;
+ break;
+
case EXTRA_MAGIC:
if (have_extra_context)
goto invalid;
@@ -528,6 +659,9 @@ static int restore_sigframe(struct pt_regs *regs,
}
}
+ if (err == 0 && system_supports_sme() && user.za)
+ err = restore_za_context(&user);
+
return err;
}
@@ -594,11 +728,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
if (system_supports_sve()) {
unsigned int vq = 0;
- if (add_all || test_thread_flag(TIF_SVE)) {
- int vl = sve_max_vl();
+ if (add_all || test_thread_flag(TIF_SVE) ||
+ thread_sm_enabled(&current->thread)) {
+ int vl = max(sve_max_vl(), sme_max_vl());
if (!add_all)
- vl = task_get_sve_vl(current);
+ vl = thread_get_cur_vl(&current->thread);
vq = sve_vq_from_vl(vl);
}
@@ -609,6 +744,24 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
+ if (system_supports_sme()) {
+ unsigned int vl;
+ unsigned int vq = 0;
+
+ if (add_all)
+ vl = sme_max_vl();
+ else
+ vl = task_get_sme_vl(current);
+
+ if (thread_za_enabled(&current->thread))
+ vq = sve_vq_from_vl(vl);
+
+ err = sigframe_alloc(user, &user->za_offset,
+ ZA_SIG_CONTEXT_SIZE(vq));
+ if (err)
+ return err;
+ }
+
return sigframe_alloc_end(user);
}
@@ -649,13 +802,21 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
}
- /* Scalable Vector Extension state, if present */
- if (system_supports_sve() && err == 0 && user->sve_offset) {
+ /* Scalable Vector Extension state (including streaming), if present */
+ if ((system_supports_sve() || system_supports_sme()) &&
+ err == 0 && user->sve_offset) {
struct sve_context __user *sve_ctx =
apply_user_offset(user, user->sve_offset);
err |= preserve_sve_context(sve_ctx);
}
+ /* ZA state if present */
+ if (system_supports_sme() && err == 0 && user->za_offset) {
+ struct za_context __user *za_ctx =
+ apply_user_offset(user, user->za_offset);
+ err |= preserve_za_context(za_ctx);
+ }
+
if (err == 0 && user->extra_offset) {
char __user *sfp = (char __user *)user->sigframe;
char __user *userp =
@@ -759,6 +920,13 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
/* TCO (Tag Check Override) always cleared for signal handlers */
regs->pstate &= ~PSR_TCO_BIT;
+ /* Signal handlers are invoked with ZA and streaming mode disabled */
+ if (system_supports_sme()) {
+ current->thread.svcr &= ~(SVCR_ZA_MASK |
+ SVCR_SM_MASK);
+ sme_smstop();
+ }
+
if (ka->sa.sa_flags & SA_RESTORER)
sigtramp = ka->sa.sa_restorer;
else
@@ -1011,6 +1179,7 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x28);
static_assert(offsetof(siginfo_t, si_pkey) == 0x20);
static_assert(offsetof(siginfo_t, si_perf_data) == 0x18);
static_assert(offsetof(siginfo_t, si_perf_type) == 0x20);
+static_assert(offsetof(siginfo_t, si_perf_flags) == 0x24);
static_assert(offsetof(siginfo_t, si_band) == 0x10);
static_assert(offsetof(siginfo_t, si_fd) == 0x18);
static_assert(offsetof(siginfo_t, si_call_addr) == 0x10);
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index d984282b979f..4700f8522d27 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -487,6 +487,7 @@ static_assert(offsetof(compat_siginfo_t, si_upper) == 0x18);
static_assert(offsetof(compat_siginfo_t, si_pkey) == 0x14);
static_assert(offsetof(compat_siginfo_t, si_perf_data) == 0x10);
static_assert(offsetof(compat_siginfo_t, si_perf_type) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_perf_flags) == 0x18);
static_assert(offsetof(compat_siginfo_t, si_band) == 0x0c);
static_assert(offsetof(compat_siginfo_t, si_fd) == 0x10);
static_assert(offsetof(compat_siginfo_t, si_call_addr) == 0x0c);
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3b46041f2b97..62ed361a4376 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -512,6 +512,7 @@ struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu)
{
return &cpu_madt_gicc[cpu];
}
+EXPORT_SYMBOL_GPL(acpi_cpu_get_madt_gicc);
/*
* acpi_map_gic_cpu_interface - parse processor MADT entry
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index e4103e085681..0467cb79f080 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -19,43 +19,60 @@
#include <asm/stacktrace.h>
/*
- * AArch64 PCS assigns the frame pointer to x29.
+ * A snapshot of a frame record or fp/lr register values, along with some
+ * accounting information necessary for robust unwinding.
*
- * A simple function prologue looks like this:
- * sub sp, sp, #0x10
- * stp x29, x30, [sp]
- * mov x29, sp
+ * @fp: The fp value in the frame record (or the real fp)
+ * @pc: The lr value in the frame record (or the real lr)
*
- * A simple function epilogue looks like this:
- * mov sp, x29
- * ldp x29, x30, [sp]
- * add sp, sp, #0x10
+ * @stacks_done: Stacks which have been entirely unwound, for which it is no
+ * longer valid to unwind to.
+ *
+ * @prev_fp: The fp that pointed to this frame record, or a synthetic value
+ * of 0. This is used to ensure that within a stack, each
+ * subsequent frame record is at an increasing address.
+ * @prev_type: The type of stack this frame record was on, or a synthetic
+ * value of STACK_TYPE_UNKNOWN. This is used to detect a
+ * transition from one stack to another.
+ *
+ * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
+ * associated with the most recently encountered replacement lr
+ * value.
*/
+struct unwind_state {
+ unsigned long fp;
+ unsigned long pc;
+ DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
+ unsigned long prev_fp;
+ enum stack_type prev_type;
+#ifdef CONFIG_KRETPROBES
+ struct llist_node *kr_cur;
+#endif
+};
-
-static notrace void start_backtrace(struct stackframe *frame, unsigned long fp,
- unsigned long pc)
+static notrace void unwind_init(struct unwind_state *state, unsigned long fp,
+ unsigned long pc)
{
- frame->fp = fp;
- frame->pc = pc;
+ state->fp = fp;
+ state->pc = pc;
#ifdef CONFIG_KRETPROBES
- frame->kr_cur = NULL;
+ state->kr_cur = NULL;
#endif
/*
* Prime the first unwind.
*
- * In unwind_frame() we'll check that the FP points to a valid stack,
+ * In unwind_next() we'll check that the FP points to a valid stack,
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
* treated as a transition to whichever stack that happens to be. The
* prev_fp value won't be used, but we set it to 0 such that it is
* definitely not an accessible stack address.
*/
- bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
- frame->prev_fp = 0;
- frame->prev_type = STACK_TYPE_UNKNOWN;
+ bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
+ state->prev_fp = 0;
+ state->prev_type = STACK_TYPE_UNKNOWN;
}
-NOKPROBE_SYMBOL(start_backtrace);
+NOKPROBE_SYMBOL(unwind_init);
/*
* Unwind from one frame record (A) to the next frame record (B).
@@ -64,15 +81,12 @@ NOKPROBE_SYMBOL(start_backtrace);
* records (e.g. a cycle), determined based on the location and fp value of A
* and the location (but not the fp value) of B.
*/
-static int notrace unwind_frame(struct task_struct *tsk,
- struct stackframe *frame)
+static int notrace unwind_next(struct task_struct *tsk,
+ struct unwind_state *state)
{
- unsigned long fp = frame->fp;
+ unsigned long fp = state->fp;
struct stack_info info;
- if (!tsk)
- tsk = current;
-
/* Final frame; nothing to unwind */
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
return -ENOENT;
@@ -83,7 +97,7 @@ static int notrace unwind_frame(struct task_struct *tsk,
if (!on_accessible_stack(tsk, fp, 16, &info))
return -EINVAL;
- if (test_bit(info.type, frame->stacks_done))
+ if (test_bit(info.type, state->stacks_done))
return -EINVAL;
/*
@@ -99,27 +113,27 @@ static int notrace unwind_frame(struct task_struct *tsk,
* stack to another, it's never valid to unwind back to that first
* stack.
*/
- if (info.type == frame->prev_type) {
- if (fp <= frame->prev_fp)
+ if (info.type == state->prev_type) {
+ if (fp <= state->prev_fp)
return -EINVAL;
} else {
- set_bit(frame->prev_type, frame->stacks_done);
+ set_bit(state->prev_type, state->stacks_done);
}
/*
* Record this frame record's values and location. The prev_fp and
- * prev_type are only meaningful to the next unwind_frame() invocation.
+ * prev_type are only meaningful to the next unwind_next() invocation.
*/
- frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
- frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
- frame->prev_fp = fp;
- frame->prev_type = info.type;
+ state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
+ state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
+ state->prev_fp = fp;
+ state->prev_type = info.type;
- frame->pc = ptrauth_strip_insn_pac(frame->pc);
+ state->pc = ptrauth_strip_insn_pac(state->pc);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (tsk->ret_stack &&
- (frame->pc == (unsigned long)return_to_handler)) {
+ (state->pc == (unsigned long)return_to_handler)) {
unsigned long orig_pc;
/*
* This is a case where function graph tracer has
@@ -127,37 +141,37 @@ static int notrace unwind_frame(struct task_struct *tsk,
* to hook a function return.
* So replace it to an original value.
*/
- orig_pc = ftrace_graph_ret_addr(tsk, NULL, frame->pc,
- (void *)frame->fp);
- if (WARN_ON_ONCE(frame->pc == orig_pc))
+ orig_pc = ftrace_graph_ret_addr(tsk, NULL, state->pc,
+ (void *)state->fp);
+ if (WARN_ON_ONCE(state->pc == orig_pc))
return -EINVAL;
- frame->pc = orig_pc;
+ state->pc = orig_pc;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#ifdef CONFIG_KRETPROBES
- if (is_kretprobe_trampoline(frame->pc))
- frame->pc = kretprobe_find_ret_addr(tsk, (void *)frame->fp, &frame->kr_cur);
+ if (is_kretprobe_trampoline(state->pc))
+ state->pc = kretprobe_find_ret_addr(tsk, (void *)state->fp, &state->kr_cur);
#endif
return 0;
}
-NOKPROBE_SYMBOL(unwind_frame);
+NOKPROBE_SYMBOL(unwind_next);
-static void notrace walk_stackframe(struct task_struct *tsk,
- struct stackframe *frame,
- bool (*fn)(void *, unsigned long), void *data)
+static void notrace unwind(struct task_struct *tsk,
+ struct unwind_state *state,
+ stack_trace_consume_fn consume_entry, void *cookie)
{
while (1) {
int ret;
- if (!fn(data, frame->pc))
+ if (!consume_entry(cookie, state->pc))
break;
- ret = unwind_frame(tsk, frame);
+ ret = unwind_next(tsk, state);
if (ret < 0)
break;
}
}
-NOKPROBE_SYMBOL(walk_stackframe);
+NOKPROBE_SYMBOL(unwind);
static bool dump_backtrace_entry(void *arg, unsigned long where)
{
@@ -196,17 +210,17 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
void *cookie, struct task_struct *task,
struct pt_regs *regs)
{
- struct stackframe frame;
+ struct unwind_state state;
if (regs)
- start_backtrace(&frame, regs->regs[29], regs->pc);
+ unwind_init(&state, regs->regs[29], regs->pc);
else if (task == current)
- start_backtrace(&frame,
+ unwind_init(&state,
(unsigned long)__builtin_frame_address(1),
(unsigned long)__builtin_return_address(0));
else
- start_backtrace(&frame, thread_saved_fp(task),
+ unwind_init(&state, thread_saved_fp(task),
thread_saved_pc(task));
- walk_stackframe(task, &frame, consume_entry, cookie);
+ unwind(task, &state, consume_entry, cookie);
}
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 12c6864e51e1..df14336c3a29 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -113,6 +113,6 @@ long compat_arm_syscall(struct pt_regs *regs, int scno)
addr = instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4);
arm64_notify_die("Oops - bad compat syscall(2)", regs,
- SIGILL, ILL_ILLTRP, addr, scno);
+ SIGILL, ILL_ILLTRP, addr, 0);
return 0;
}
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index c938603b3ba0..733451fe7e41 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -158,11 +158,36 @@ trace_exit:
syscall_trace_exit(regs);
}
-static inline void sve_user_discard(void)
+/*
+ * As per the ABI exit SME streaming mode and clear the SVE state not
+ * shared with FPSIMD on syscall entry.
+ */
+static inline void fp_user_discard(void)
{
+ /*
+ * If SME is active then exit streaming mode. If ZA is active
+ * then flush the SVE registers but leave userspace access to
+ * both SVE and SME enabled, otherwise disable SME for the
+ * task and fall through to disabling SVE too. This means
+ * that after a syscall we never have any streaming mode
+ * register state to track, if this changes the KVM code will
+ * need updating.
+ */
+ if (system_supports_sme() && test_thread_flag(TIF_SME)) {
+ u64 svcr = read_sysreg_s(SYS_SVCR);
+
+ if (svcr & SVCR_SM_MASK)
+ sme_smstop_sm();
+ }
+
if (!system_supports_sve())
return;
+ /*
+ * If SME is not active then disable SVE, the registers will
+ * be cleared when userspace next attempts to access them and
+ * we do not need to track the SVE register state until then.
+ */
clear_thread_flag(TIF_SVE);
/*
@@ -177,7 +202,7 @@ static inline void sve_user_discard(void)
void do_el0_svc(struct pt_regs *regs)
{
- sve_user_discard();
+ fp_user_discard();
el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 0529fd57567e..9ac7a81b79be 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -242,7 +242,7 @@ static void arm64_show_signal(int signo, const char *str)
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
struct task_struct *tsk = current;
- unsigned int esr = tsk->thread.fault_code;
+ unsigned long esr = tsk->thread.fault_code;
struct pt_regs *regs = task_pt_regs(tsk);
/* Leave if the signal won't be shown */
@@ -253,7 +253,7 @@ static void arm64_show_signal(int signo, const char *str)
pr_info("%s[%d]: unhandled exception: ", tsk->comm, task_pid_nr(tsk));
if (esr)
- pr_cont("%s, ESR 0x%08x, ", esr_get_class_string(esr), esr);
+ pr_cont("%s, ESR 0x%016lx, ", esr_get_class_string(esr), esr);
pr_cont("%s", str);
print_vma_addr(KERN_CONT " in ", regs->pc);
@@ -287,7 +287,7 @@ void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far,
void arm64_notify_die(const char *str, struct pt_regs *regs,
int signo, int sicode, unsigned long far,
- int err)
+ unsigned long err)
{
if (user_mode(regs)) {
WARN_ON(regs != current_pt_regs());
@@ -439,7 +439,7 @@ exit:
return fn ? fn(regs, instr) : 1;
}
-void force_signal_inject(int signal, int code, unsigned long address, unsigned int err)
+void force_signal_inject(int signal, int code, unsigned long address, unsigned long err)
{
const char *desc;
struct pt_regs *regs = current_pt_regs();
@@ -506,7 +506,7 @@ void do_bti(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(do_bti);
-void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr)
+void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr)
{
/*
* Unexpected FPAC exception or pointer authentication failure in
@@ -532,7 +532,7 @@ NOKPROBE_SYMBOL(do_ptrauth_fault);
uaccess_ttbr0_disable(); \
}
-static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
+static void user_cache_maint_handler(unsigned long esr, struct pt_regs *regs)
{
unsigned long tagged_address, address;
int rt = ESR_ELx_SYS64_ISS_RT(esr);
@@ -572,7 +572,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
-static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
+static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
{
int rt = ESR_ELx_SYS64_ISS_RT(esr);
unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
@@ -591,7 +591,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
-static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
+static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
{
int rt = ESR_ELx_SYS64_ISS_RT(esr);
@@ -599,7 +599,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
-static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
+static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
{
int rt = ESR_ELx_SYS64_ISS_RT(esr);
@@ -607,7 +607,7 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
-static void mrs_handler(unsigned int esr, struct pt_regs *regs)
+static void mrs_handler(unsigned long esr, struct pt_regs *regs)
{
u32 sysreg, rt;
@@ -618,15 +618,15 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs)
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
}
-static void wfi_handler(unsigned int esr, struct pt_regs *regs)
+static void wfi_handler(unsigned long esr, struct pt_regs *regs)
{
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
struct sys64_hook {
- unsigned int esr_mask;
- unsigned int esr_val;
- void (*handler)(unsigned int esr, struct pt_regs *regs);
+ unsigned long esr_mask;
+ unsigned long esr_val;
+ void (*handler)(unsigned long esr, struct pt_regs *regs);
};
static const struct sys64_hook sys64_hooks[] = {
@@ -675,7 +675,7 @@ static const struct sys64_hook sys64_hooks[] = {
};
#ifdef CONFIG_COMPAT
-static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
+static bool cp15_cond_valid(unsigned long esr, struct pt_regs *regs)
{
int cond;
@@ -695,7 +695,7 @@ static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
return aarch32_opcode_cond_checks[cond](regs->pstate);
}
-static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
+static void compat_cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
{
int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT;
@@ -712,7 +712,7 @@ static const struct sys64_hook cp15_32_hooks[] = {
{},
};
-static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
+static void compat_cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
{
int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT;
int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT;
@@ -737,7 +737,7 @@ static const struct sys64_hook cp15_64_hooks[] = {
{},
};
-void do_cp15instr(unsigned int esr, struct pt_regs *regs)
+void do_cp15instr(unsigned long esr, struct pt_regs *regs)
{
const struct sys64_hook *hook, *hook_base;
@@ -778,7 +778,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs)
NOKPROBE_SYMBOL(do_cp15instr);
#endif
-void do_sysinstr(unsigned int esr, struct pt_regs *regs)
+void do_sysinstr(unsigned long esr, struct pt_regs *regs)
{
const struct sys64_hook *hook;
@@ -821,6 +821,7 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_SVE] = "SVE",
[ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB",
[ESR_ELx_EC_FPAC] = "FPAC",
+ [ESR_ELx_EC_SME] = "SME",
[ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF",
[ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)",
[ESR_ELx_EC_IABT_CUR] = "IABT (current EL)",
@@ -842,7 +843,7 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_BRK64] = "BRK (AArch64)",
};
-const char *esr_get_class_string(u32 esr)
+const char *esr_get_class_string(unsigned long esr)
{
return esr_class_str[ESR_ELx_EC(esr)];
}
@@ -851,7 +852,7 @@ const char *esr_get_class_string(u32 esr)
* bad_el0_sync handles unexpected, but potentially recoverable synchronous
* exceptions taken from EL0.
*/
-void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr)
{
unsigned long pc = instruction_pointer(regs);
@@ -867,7 +868,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
-void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
+void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far)
{
unsigned long tsk_stk = (unsigned long)current->stack;
unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
@@ -876,7 +877,7 @@ void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
console_verbose();
pr_emerg("Insufficient stack space to handle exception!");
- pr_emerg("ESR: 0x%08x -- %s\n", esr, esr_get_class_string(esr));
+ pr_emerg("ESR: 0x%016lx -- %s\n", esr, esr_get_class_string(esr));
pr_emerg("FAR: 0x%016lx\n", far);
pr_emerg("Task stack: [0x%016lx..0x%016lx]\n",
@@ -897,11 +898,11 @@ void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
}
#endif
-void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
+void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr)
{
console_verbose();
- pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",
+ pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n",
smp_processor_id(), esr, esr_get_class_string(esr));
if (regs)
__show_regs(regs);
@@ -912,9 +913,9 @@ void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
unreachable();
}
-bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
+bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr)
{
- u32 aet = arm64_ras_serror_get_severity(esr);
+ unsigned long aet = arm64_ras_serror_get_severity(esr);
switch (aet) {
case ESR_ELx_AET_CE: /* corrected error */
@@ -944,7 +945,7 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
}
}
-void do_serror(struct pt_regs *regs, unsigned int esr)
+void do_serror(struct pt_regs *regs, unsigned long esr)
{
/* non-RAS errors are not containable */
if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
@@ -965,7 +966,7 @@ int is_valid_bugaddr(unsigned long addr)
return 1;
}
-static int bug_handler(struct pt_regs *regs, unsigned int esr)
+static int bug_handler(struct pt_regs *regs, unsigned long esr)
{
switch (report_bug(regs->pc, regs)) {
case BUG_TRAP_TYPE_BUG:
@@ -990,7 +991,7 @@ static struct break_hook bug_break_hook = {
.imm = BUG_BRK_IMM,
};
-static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr)
+static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
{
pr_err("%s generated an invalid instruction at %pS!\n",
"Kernel text patching",
@@ -1012,7 +1013,7 @@ static struct break_hook fault_break_hook = {
#define KASAN_ESR_SIZE_MASK 0x0f
#define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK))
-static int kasan_handler(struct pt_regs *regs, unsigned int esr)
+static int kasan_handler(struct pt_regs *regs, unsigned long esr)
{
bool recover = esr & KASAN_ESR_RECOVER;
bool write = esr & KASAN_ESR_WRITE;
@@ -1055,11 +1056,11 @@ static struct break_hook kasan_break_hook = {
* Initial handler for AArch64 BRK exceptions
* This handler only used until debug_traps_init().
*/
-int __init early_brk64(unsigned long addr, unsigned int esr,
+int __init early_brk64(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
#ifdef CONFIG_KASAN_SW_TAGS
- unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
+ unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index ac1964ebed1e..f6e25d7c346a 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -32,7 +32,8 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
# -Wmissing-prototypes and -Wmissing-declarations are removed from
# the CFLAGS of vgettimeofday.c to make possible to build the
# kernel with CONFIG_WERROR enabled.
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
+ $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
$(CC_FLAGS_LTO) -Wmissing-prototypes -Wmissing-declarations
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index edaf0faf766f..2d4a8f995175 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -93,7 +93,6 @@ jiffies = jiffies_64;
#ifdef CONFIG_HIBERNATION
#define HIBERNATE_TEXT \
- . = ALIGN(SZ_4K); \
__hibernate_exit_text_start = .; \
*(.hibernate_exit.text) \
__hibernate_exit_text_end = .;
@@ -103,7 +102,6 @@ jiffies = jiffies_64;
#ifdef CONFIG_KEXEC_CORE
#define KEXEC_TEXT \
- . = ALIGN(SZ_4K); \
__relocate_new_kernel_start = .; \
*(.kexec_relocate.text) \
__relocate_new_kernel_end = .;
@@ -170,9 +168,6 @@ SECTIONS
KPROBES_TEXT
HYPERVISOR_TEXT
IDMAP_TEXT
- HIBERNATE_TEXT
- KEXEC_TEXT
- TRAMP_TEXT
*(.gnu.warning)
. = ALIGN(16);
*(.got) /* Global offset table */
@@ -194,6 +189,14 @@ SECTIONS
HYPERVISOR_DATA_SECTIONS
+ /* code sections that are never executed via the kernel mapping */
+ .rodata.text : {
+ TRAMP_TEXT
+ HIBERNATE_TEXT
+ KEXEC_TEXT
+ . = ALIGN(PAGE_SIZE);
+ }
+
idmap_pg_dir = .;
. += IDMAP_DIR_SIZE;
idmap_pg_end = .;
@@ -337,8 +340,8 @@ ASSERT(__hyp_idmap_text_end - __hyp_idmap_text_start <= PAGE_SIZE,
ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"ID map text too big or misaligned")
#ifdef CONFIG_HIBERNATION
-ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
- <= SZ_4K, "Hibernate exit text too big or misaligned")
+ASSERT(__hibernate_exit_text_end - __hibernate_exit_text_start <= SZ_4K,
+ "Hibernate exit text is bigger than 4 KiB")
#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE,
@@ -362,7 +365,7 @@ ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET,
#ifdef CONFIG_KEXEC_CORE
/* kexec relocation code should fit into one KEXEC_CONTROL_PAGE_SIZE */
-ASSERT(__relocate_new_kernel_end - (__relocate_new_kernel_start & ~(SZ_4K - 1))
- <= SZ_4K, "kexec relocation code is too big or misaligned")
+ASSERT(__relocate_new_kernel_end - __relocate_new_kernel_start <= SZ_4K,
+ "kexec relocation code is bigger than 4 KiB")
ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken")
#endif
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 523bc934fe2f..cedc3ba2c098 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -783,6 +783,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
ret = 1;
run->exit_reason = KVM_EXIT_UNKNOWN;
+ run->flags = 0;
while (ret > 0) {
/*
* Check conditions before entering the guest
@@ -1436,7 +1437,8 @@ static int kvm_init_vector_slots(void)
base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
- if (kvm_system_needs_idmapped_vectors() && !has_vhe()) {
+ if (kvm_system_needs_idmapped_vectors() &&
+ !is_protected_kvm_enabled()) {
err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
__BP_HARDEN_HYP_VECS_SZ, &base);
if (err)
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 397fdac75cb1..3d251a4d2cf7 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -82,6 +82,26 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
+
+ /*
+ * We don't currently support SME guests but if we leave
+ * things in streaming mode then when the guest starts running
+ * FPSIMD or SVE code it may generate SME traps so as a
+ * special case if we are in streaming mode we force the host
+ * state to be saved now and exit streaming mode so that we
+ * don't have to handle any SME traps for valid guest
+ * operations. Do this for ZA as well for now for simplicity.
+ */
+ if (system_supports_sme()) {
+ if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
+ vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;
+
+ if (read_sysreg_s(SYS_SVCR) &
+ (SVCR_SM_MASK | SVCR_ZA_MASK)) {
+ vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+ fpsimd_save_and_flush_cpu_state();
+ }
+ }
}
/*
@@ -109,9 +129,14 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
WARN_ON_ONCE(!irqs_disabled());
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
+ /*
+ * Currently we do not support SME guests so SVCR is
+ * always 0 and we just need a variable to point to.
+ */
fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
vcpu->arch.sve_state,
- vcpu->arch.sve_max_vl);
+ vcpu->arch.sve_max_vl,
+ NULL, 0, &vcpu->arch.svcr);
clear_thread_flag(TIF_FOREIGN_FPSTATE);
update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
@@ -130,6 +155,22 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
local_irq_save(flags);
+ /*
+ * If we have VHE then the Hyp code will reset CPACR_EL1 to
+ * CPACR_EL1_DEFAULT and we need to reenable SME.
+ */
+ if (has_vhe() && system_supports_sme()) {
+ /* Also restore EL0 state seen on entry */
+ if (vcpu->arch.flags & KVM_ARM64_HOST_SME_ENABLED)
+ sysreg_clear_set(CPACR_EL1, 0,
+ CPACR_EL1_SMEN_EL0EN |
+ CPACR_EL1_SMEN_EL1EN);
+ else
+ sysreg_clear_set(CPACR_EL1,
+ CPACR_EL1_SMEN_EL0EN,
+ CPACR_EL1_SMEN_EL1EN);
+ }
+
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
if (vcpu_has_sve(vcpu)) {
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 97fe14aab1a3..0b829292dc54 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -26,7 +26,7 @@
typedef int (*exit_handle_fn)(struct kvm_vcpu *);
-static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
+static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
{
if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
kvm_inject_vabt(vcpu);
@@ -117,10 +117,12 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
run->exit_reason = KVM_EXIT_DEBUG;
- run->debug.arch.hsr = esr;
+ run->debug.arch.hsr = lower_32_bits(esr);
+ run->debug.arch.hsr_high = upper_32_bits(esr);
+ run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID;
if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW)
run->debug.arch.far = vcpu->arch.fault.far_el2;
@@ -130,9 +132,9 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
- kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n",
+ kvm_pr_unimpl("Unknown exception class: esr: %#016llx -- %s\n",
esr, esr_get_class_string(esr));
kvm_inject_undefined(vcpu);
@@ -187,7 +189,7 @@ static exit_handle_fn arm_exit_handlers[] = {
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
u8 esr_ec = ESR_ELx_EC(esr);
return arm_exit_handlers[esr_ec];
@@ -334,6 +336,6 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
*/
kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);
- panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
+ panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%016llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
spsr, elr_virt, esr, far, hpfar, par, vcpu);
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 5d31f6c64c8c..37d9f211c200 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -266,7 +266,7 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
return true;
}
-static inline bool esr_is_ptrauth_trap(u32 esr)
+static inline bool esr_is_ptrauth_trap(u64 esr)
{
switch (esr_sys64_to_sysreg(esr)) {
case SYS_APIAKEYLO_EL1:
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index 5ad626527d41..fd55014b3497 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -159,20 +159,20 @@
* No restrictions on instructions implemented in AArch64.
*/
#define PVM_ID_AA64ISAR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
)
#define PVM_ID_AA64ISAR1_ALLOW (\
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 6410d21d8695..caace61ea459 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -47,10 +47,24 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
__activate_traps_fpsimd32(vcpu);
}
+ if (cpus_have_final_cap(ARM64_SME))
+ val |= CPTR_EL2_TSM;
write_sysreg(val, cptr_el2);
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
+ if (cpus_have_final_cap(ARM64_SME)) {
+ val = read_sysreg_s(SYS_HFGRTR_EL2);
+ val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK |
+ HFGxTR_EL2_nSMPRI_EL1_MASK);
+ write_sysreg_s(val, SYS_HFGRTR_EL2);
+
+ val = read_sysreg_s(SYS_HFGWTR_EL2);
+ val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK |
+ HFGxTR_EL2_nSMPRI_EL1_MASK);
+ write_sysreg_s(val, SYS_HFGWTR_EL2);
+ }
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
@@ -94,9 +108,25 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
+ if (cpus_have_final_cap(ARM64_SME)) {
+ u64 val;
+
+ val = read_sysreg_s(SYS_HFGRTR_EL2);
+ val |= HFGxTR_EL2_nTPIDR2_EL0_MASK |
+ HFGxTR_EL2_nSMPRI_EL1_MASK;
+ write_sysreg_s(val, SYS_HFGRTR_EL2);
+
+ val = read_sysreg_s(SYS_HFGWTR_EL2);
+ val |= HFGxTR_EL2_nTPIDR2_EL0_MASK |
+ HFGxTR_EL2_nSMPRI_EL1_MASK;
+ write_sysreg_s(val, SYS_HFGWTR_EL2);
+ }
+
cptr = CPTR_EL2_DEFAULT;
if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED))
cptr |= CPTR_EL2_TZ;
+ if (cpus_have_final_cap(ARM64_SME))
+ cptr &= ~CPTR_EL2_TSM;
write_sysreg(cptr, cptr_el2);
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 33f5181af330..619f94fc95fa 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -33,7 +33,7 @@ u64 id_aa64mmfr2_el1_sys_val;
*/
static void inject_undef64(struct kvm_vcpu *vcpu)
{
- u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+ u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
*vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 4fb419f7b8b6..6cb638b184b1 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -473,7 +473,7 @@ static int __vgic_v3_bpr_min(void)
static int __vgic_v3_get_group(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
return crm != 8;
@@ -1016,7 +1016,7 @@ static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
{
int rt;
- u32 esr;
+ u64 esr;
u32 vmcr;
void (*fn)(struct kvm_vcpu *, u32, int);
bool is_read;
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 262dfe03134d..969f20daf97a 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -41,7 +41,8 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val = read_sysreg(cpacr_el1);
val |= CPACR_EL1_TTA;
- val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
+ val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
+ CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
/*
* With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
@@ -62,6 +63,10 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
__activate_traps_fpsimd32(vcpu);
}
+ if (cpus_have_final_cap(ARM64_SME))
+ write_sysreg(read_sysreg(sctlr_el2) & ~SCTLR_ELx_ENTP2,
+ sctlr_el2);
+
write_sysreg(val, cpacr_el1);
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1);
@@ -83,6 +88,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
*/
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
+ if (cpus_have_final_cap(ARM64_SME))
+ write_sysreg(read_sysreg(sctlr_el2) | SCTLR_ELx_ENTP2,
+ sctlr_el2);
+
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
if (!arm64_kernel_unmapped_at_el0())
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index ba20405d2dc2..55a5dbe957e0 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -18,7 +18,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
- u32 esr = 0;
+ u64 esr = 0;
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
@@ -50,7 +50,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
static void inject_undef64(struct kvm_vcpu *vcpu)
{
- u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+ u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 7b45c040cc27..18b403b58b53 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1123,8 +1123,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
- if (irqchip_in_kernel(vcpu->kvm) &&
- vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+ if (kvm_vgic_global_state.type == VGIC_V3) {
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC);
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1);
}
@@ -1132,6 +1131,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
case SYS_ID_AA64PFR1_EL1:
if (!kvm_has_mte(vcpu->kvm))
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
+
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_SME);
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
@@ -1553,7 +1554,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_UNALLOCATED(4,2),
ID_UNALLOCATED(4,3),
ID_SANITISED(ID_AA64ZFR0_EL1),
- ID_UNALLOCATED(4,5),
+ ID_HIDDEN(ID_AA64SMFR0_EL1),
ID_UNALLOCATED(4,6),
ID_UNALLOCATED(4,7),
@@ -1596,6 +1597,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
+ { SYS_DESC(SYS_SMPRI_EL1), undef_access },
+ { SYS_DESC(SYS_SMCR_EL1), undef_access },
{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
{ SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
{ SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 },
@@ -1678,8 +1681,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr },
{ SYS_DESC(SYS_CLIDR_EL1), access_clidr },
+ { SYS_DESC(SYS_SMIDR_EL1), undef_access },
{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
{ SYS_DESC(SYS_CTR_EL0), access_ctr },
+ { SYS_DESC(SYS_SVCR), undef_access },
{ PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr,
.reset = reset_pmcr, .reg = PMCR_EL0 },
@@ -1719,6 +1724,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
{ SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
+ { SYS_DESC(SYS_TPIDR2_EL0), undef_access },
{ SYS_DESC(SYS_SCXTNUM_EL0), undef_access },
@@ -2304,7 +2310,7 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
size_t nr_global)
{
struct sys_reg_params params;
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
int Rt2 = (esr >> 10) & 0x1f;
@@ -2354,7 +2360,7 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
size_t nr_global)
{
struct sys_reg_params params;
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
params.CRm = (esr >> 1) & 0xf;
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index 8590af3c98c0..eeb9e45bcce8 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -93,7 +93,7 @@ SYM_FUNC_START(mte_copy_tags_from_user)
mov x3, x1
cbz x2, 2f
1:
- user_ldst 2f, ldtrb, w4, x1, 0
+USER(2f, ldtrb w4, [x1])
lsl x4, x4, #MTE_TAG_SHIFT
stg x4, [x0], #MTE_GRANULE_SIZE
add x1, x1, #1
@@ -120,7 +120,7 @@ SYM_FUNC_START(mte_copy_tags_to_user)
1:
ldg x4, [x1]
ubfx x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE
- user_ldst 2f, sttrb, w4, x0, 0
+USER(2f, sttrb w4, [x0])
add x0, x0, #1
add x1, x1, #MTE_GRANULE_SIZE
subs x2, x2, #1
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index b5447e53cd73..0dea80bf6de4 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -16,8 +16,8 @@
void copy_highpage(struct page *to, struct page *from)
{
- struct page *kto = page_address(to);
- struct page *kfrom = page_address(from);
+ void *kto = page_address(to);
+ void *kfrom = page_address(from);
copy_page(kto, kfrom);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 77341b160aca..c5e11768e5c1 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -43,7 +43,7 @@
#include <asm/traps.h>
struct fault_info {
- int (*fn)(unsigned long far, unsigned int esr,
+ int (*fn)(unsigned long far, unsigned long esr,
struct pt_regs *regs);
int sig;
int code;
@@ -53,17 +53,17 @@ struct fault_info {
static const struct fault_info fault_info[];
static struct fault_info debug_fault_info[];
-static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
+static inline const struct fault_info *esr_to_fault_info(unsigned long esr)
{
return fault_info + (esr & ESR_ELx_FSC);
}
-static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr)
+static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr)
{
return debug_fault_info + DBG_ESR_EVT(esr);
}
-static void data_abort_decode(unsigned int esr)
+static void data_abort_decode(unsigned long esr)
{
pr_alert("Data abort info:\n");
@@ -85,11 +85,11 @@ static void data_abort_decode(unsigned int esr)
(esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
}
-static void mem_abort_decode(unsigned int esr)
+static void mem_abort_decode(unsigned long esr)
{
pr_alert("Mem abort info:\n");
- pr_alert(" ESR = 0x%08x\n", esr);
+ pr_alert(" ESR = 0x%016lx\n", esr);
pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n",
ESR_ELx_EC(esr), esr_get_class_string(esr),
(esr & ESR_ELx_IL) ? 32 : 16);
@@ -99,7 +99,7 @@ static void mem_abort_decode(unsigned int esr)
pr_alert(" EA = %lu, S1PTW = %lu\n",
(esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT,
(esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT);
- pr_alert(" FSC = 0x%02x: %s\n", (esr & ESR_ELx_FSC),
+ pr_alert(" FSC = 0x%02lx: %s\n", (esr & ESR_ELx_FSC),
esr_to_fault_info(esr)->name);
if (esr_is_data_abort(esr))
@@ -229,20 +229,20 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
return 1;
}
-static bool is_el1_instruction_abort(unsigned int esr)
+static bool is_el1_instruction_abort(unsigned long esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
}
-static bool is_el1_data_abort(unsigned int esr)
+static bool is_el1_data_abort(unsigned long esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_CUR;
}
-static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
+static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
- unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+ unsigned long fsc_type = esr & ESR_ELx_FSC_TYPE;
if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr))
return false;
@@ -258,7 +258,7 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
}
static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
- unsigned int esr,
+ unsigned long esr,
struct pt_regs *regs)
{
unsigned long flags;
@@ -290,7 +290,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
}
static void die_kernel_fault(const char *msg, unsigned long addr,
- unsigned int esr, struct pt_regs *regs)
+ unsigned long esr, struct pt_regs *regs)
{
bust_spinlocks(1);
@@ -308,7 +308,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
}
#ifdef CONFIG_KASAN_HW_TAGS
-static void report_tag_fault(unsigned long addr, unsigned int esr,
+static void report_tag_fault(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
/*
@@ -320,11 +320,11 @@ static void report_tag_fault(unsigned long addr, unsigned int esr,
}
#else
/* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */
-static inline void report_tag_fault(unsigned long addr, unsigned int esr,
+static inline void report_tag_fault(unsigned long addr, unsigned long esr,
struct pt_regs *regs) { }
#endif
-static void do_tag_recovery(unsigned long addr, unsigned int esr,
+static void do_tag_recovery(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
@@ -335,13 +335,14 @@ static void do_tag_recovery(unsigned long addr, unsigned int esr,
* It will be done lazily on the other CPUs when they will hit a
* tag fault.
*/
- sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_NONE);
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK,
+ SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF, NONE));
isb();
}
-static bool is_el1_mte_sync_tag_check_fault(unsigned int esr)
+static bool is_el1_mte_sync_tag_check_fault(unsigned long esr)
{
- unsigned int fsc = esr & ESR_ELx_FSC;
+ unsigned long fsc = esr & ESR_ELx_FSC;
if (!is_el1_data_abort(esr))
return false;
@@ -352,7 +353,7 @@ static bool is_el1_mte_sync_tag_check_fault(unsigned int esr)
return false;
}
-static void __do_kernel_fault(unsigned long addr, unsigned int esr,
+static void __do_kernel_fault(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
const char *msg;
@@ -393,7 +394,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
die_kernel_fault(msg, addr, esr, regs);
}
-static void set_thread_esr(unsigned long address, unsigned int esr)
+static void set_thread_esr(unsigned long address, unsigned long esr)
{
current->thread.fault_address = address;
@@ -441,7 +442,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr)
* exception level). Fail safe by not providing an ESR
* context record at all.
*/
- WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr);
+ WARN(1, "ESR 0x%lx is not DABT or IABT from EL0\n", esr);
esr = 0;
break;
}
@@ -450,7 +451,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr)
current->thread.fault_code = esr;
}
-static void do_bad_area(unsigned long far, unsigned int esr,
+static void do_bad_area(unsigned long far, unsigned long esr,
struct pt_regs *regs)
{
unsigned long addr = untagged_addr(far);
@@ -501,7 +502,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
return handle_mm_fault(vma, addr, mm_flags, regs);
}
-static bool is_el0_instruction_abort(unsigned int esr)
+static bool is_el0_instruction_abort(unsigned long esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
}
@@ -510,12 +511,12 @@ static bool is_el0_instruction_abort(unsigned int esr)
* Note: not valid for EL1 DC IVAC, but we never use that such that it
* should fault. EL0 cannot issue DC IVAC (undef).
*/
-static bool is_write_abort(unsigned int esr)
+static bool is_write_abort(unsigned long esr)
{
return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
}
-static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
+static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
struct pt_regs *regs)
{
const struct fault_info *inf;
@@ -671,7 +672,7 @@ no_context:
}
static int __kprobes do_translation_fault(unsigned long far,
- unsigned int esr,
+ unsigned long esr,
struct pt_regs *regs)
{
unsigned long addr = untagged_addr(far);
@@ -683,19 +684,19 @@ static int __kprobes do_translation_fault(unsigned long far,
return 0;
}
-static int do_alignment_fault(unsigned long far, unsigned int esr,
+static int do_alignment_fault(unsigned long far, unsigned long esr,
struct pt_regs *regs)
{
do_bad_area(far, esr, regs);
return 0;
}
-static int do_bad(unsigned long far, unsigned int esr, struct pt_regs *regs)
+static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs)
{
return 1; /* "fault" */
}
-static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs)
+static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
{
const struct fault_info *inf;
unsigned long siaddr;
@@ -725,7 +726,7 @@ static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs)
return 0;
}
-static int do_tag_check_fault(unsigned long far, unsigned int esr,
+static int do_tag_check_fault(unsigned long far, unsigned long esr,
struct pt_regs *regs)
{
/*
@@ -805,7 +806,7 @@ static const struct fault_info fault_info[] = {
{ do_bad, SIGKILL, SI_KERNEL, "unknown 63" },
};
-void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
+void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs)
{
const struct fault_info *inf = esr_to_fault_info(esr);
unsigned long addr = untagged_addr(far);
@@ -825,14 +826,14 @@ void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(do_mem_abort);
-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs)
{
arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN,
addr, esr);
}
NOKPROBE_SYMBOL(do_sp_pc_abort);
-int __init early_brk64(unsigned long addr, unsigned int esr,
+int __init early_brk64(unsigned long addr, unsigned long esr,
struct pt_regs *regs);
/*
@@ -852,7 +853,7 @@ static struct fault_info __refdata debug_fault_info[] = {
};
void __init hook_debug_fault_code(int nr,
- int (*fn)(unsigned long, unsigned int, struct pt_regs *),
+ int (*fn)(unsigned long, unsigned long, struct pt_regs *),
int sig, int code, const char *name)
{
BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));
@@ -885,7 +886,7 @@ static void debug_exception_exit(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(debug_exception_exit);
-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
struct pt_regs *regs)
{
const struct fault_info *inf = esr_to_debug_fault_info(esr);
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index cbace1c9e137..64bb078e2e7b 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -158,6 +158,28 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
return contig_ptes;
}
+pte_t huge_ptep_get(pte_t *ptep)
+{
+ int ncontig, i;
+ size_t pgsize;
+ pte_t orig_pte = ptep_get(ptep);
+
+ if (!pte_present(orig_pte) || !pte_cont(orig_pte))
+ return orig_pte;
+
+ ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize);
+ for (i = 0; i < ncontig; i++, ptep++) {
+ pte_t pte = ptep_get(ptep);
+
+ if (pte_dirty(pte))
+ orig_pte = pte_mkdirty(orig_pte);
+
+ if (pte_young(pte))
+ orig_pte = pte_mkyoung(orig_pte);
+ }
+ return orig_pte;
+}
+
/*
* Changing some bits of contiguous entries requires us to follow a
* Break-Before-Make approach, breaking the whole contiguous set
@@ -166,15 +188,14 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
*
* This helper performs the break step.
*/
-static pte_t get_clear_flush(struct mm_struct *mm,
+static pte_t get_clear_contig(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep,
unsigned long pgsize,
unsigned long ncontig)
{
- pte_t orig_pte = huge_ptep_get(ptep);
- bool valid = pte_valid(orig_pte);
- unsigned long i, saddr = addr;
+ pte_t orig_pte = ptep_get(ptep);
+ unsigned long i;
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
pte_t pte = ptep_get_and_clear(mm, addr, ptep);
@@ -190,11 +211,6 @@ static pte_t get_clear_flush(struct mm_struct *mm,
if (pte_young(pte))
orig_pte = pte_mkyoung(orig_pte);
}
-
- if (valid) {
- struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
- flush_tlb_range(&vma, saddr, addr);
- }
return orig_pte;
}
@@ -385,14 +401,14 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
{
int ncontig;
size_t pgsize;
- pte_t orig_pte = huge_ptep_get(ptep);
+ pte_t orig_pte = ptep_get(ptep);
if (!pte_cont(orig_pte))
return ptep_get_and_clear(mm, addr, ptep);
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
- return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
+ return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
}
/*
@@ -408,11 +424,11 @@ static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
{
int i;
- if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
+ if (pte_write(pte) != pte_write(ptep_get(ptep)))
return 1;
for (i = 0; i < ncontig; i++) {
- pte_t orig_pte = huge_ptep_get(ptep + i);
+ pte_t orig_pte = ptep_get(ptep + i);
if (pte_dirty(pte) != pte_dirty(orig_pte))
return 1;
@@ -443,7 +459,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
if (!__cont_access_flags_changed(ptep, pte, ncontig))
return 0;
- orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
+ orig_pte = get_clear_contig(vma->vm_mm, addr, ptep, pgsize, ncontig);
/* Make sure we don't lose the dirty or young state */
if (pte_dirty(orig_pte))
@@ -476,7 +492,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
dpfn = pgsize >> PAGE_SHIFT;
- pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
+ pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig);
pte = pte_wrprotect(pte);
hugeprot = pte_pgprot(pte);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 1e7b1550e2fc..a1410143ea62 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -90,6 +90,32 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit;
phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1;
#endif
+/* Current arm64 boot protocol requires 2MB alignment */
+#define CRASH_ALIGN SZ_2M
+
+#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit
+#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1)
+
+static int __init reserve_crashkernel_low(unsigned long long low_size)
+{
+ unsigned long long low_base;
+
+ low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
+ if (!low_base) {
+ pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
+ return -ENOMEM;
+ }
+
+ pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
+ low_base, low_base + low_size, low_size >> 20);
+
+ crashk_low_res.start = low_base;
+ crashk_low_res.end = low_base + low_size - 1;
+ insert_resource(&iomem_resource, &crashk_low_res);
+
+ return 0;
+}
+
/*
* reserve_crashkernel() - reserves memory for crash kernel
*
@@ -100,17 +126,35 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1;
static void __init reserve_crashkernel(void)
{
unsigned long long crash_base, crash_size;
- unsigned long long crash_max = arm64_dma_phys_limit;
+ unsigned long long crash_low_size = 0;
+ unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
+ char *cmdline = boot_command_line;
int ret;
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
return;
- ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+ /* crashkernel=X[@offset] */
+ ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
&crash_size, &crash_base);
- /* no crashkernel= or invalid value specified */
- if (ret || !crash_size)
+ if (ret == -ENOENT) {
+ ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
+ if (ret || !crash_size)
+ return;
+
+ /*
+ * crashkernel=Y,low can be specified or not, but invalid value
+ * is not allowed.
+ */
+ ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
+ if (ret && (ret != -ENOENT))
+ return;
+
+ crash_max = CRASH_ADDR_HIGH_MAX;
+ } else if (ret || !crash_size) {
+ /* The specified value is invalid */
return;
+ }
crash_size = PAGE_ALIGN(crash_size);
@@ -118,8 +162,7 @@ static void __init reserve_crashkernel(void)
if (crash_base)
crash_max = crash_base + crash_size;
- /* Current arm64 boot protocol requires 2MB alignment */
- crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
+ crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
crash_base, crash_max);
if (!crash_base) {
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
@@ -127,6 +170,12 @@ static void __init reserve_crashkernel(void)
return;
}
+ if ((crash_base >= CRASH_ADDR_LOW_MAX) &&
+ crash_low_size && reserve_crashkernel_low(crash_low_size)) {
+ memblock_phys_free(crash_base, crash_size);
+ return;
+ }
+
pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
crash_base, crash_base + crash_size, crash_size >> 20);
@@ -135,8 +184,12 @@ static void __init reserve_crashkernel(void)
* map. Inform kmemleak so that it won't try to access it.
*/
kmemleak_ignore_phys(crash_base);
+ if (crashk_low_res.end)
+ kmemleak_ignore_phys(crashk_low_res.start);
+
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
+ insert_resource(&iomem_resource, &crashk_res);
}
/*
@@ -157,7 +210,7 @@ static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
return min(zone_mask, memblock_end_of_DRAM() - 1) + 1;
}
-static void __init zone_sizes_init(unsigned long min, unsigned long max)
+static void __init zone_sizes_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
unsigned int __maybe_unused acpi_zone_dma_bits;
@@ -176,7 +229,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (!arm64_dma_phys_limit)
arm64_dma_phys_limit = dma32_phys_limit;
#endif
- max_zone_pfns[ZONE_NORMAL] = max;
+ max_zone_pfns[ZONE_NORMAL] = max_pfn;
free_area_init(max_zone_pfns);
}
@@ -374,7 +427,7 @@ void __init bootmem_init(void)
* done after the fixed reservations
*/
sparse_init();
- zone_sizes_init(min, max);
+ zone_sizes_init();
/*
* Reserve the CMA area after arm64_dma_phys_limit was initialised.
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
index d7da8ca40d2e..4ea2eefbc053 100644
--- a/arch/arm64/mm/trans_pgd.c
+++ b/arch/arm64/mm/trans_pgd.c
@@ -238,7 +238,7 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
int this_level, index, level_lsb, level_msb;
dst_addr &= PAGE_MASK;
- prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_EXEC));
+ prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_ROX));
for (this_level = 3; this_level >= 0; this_level--) {
levels[this_level] = trans_alloc(info);
diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile
index cf1307188150..07a93ab21a62 100644
--- a/arch/arm64/tools/Makefile
+++ b/arch/arm64/tools/Makefile
@@ -3,7 +3,7 @@
gen := arch/$(ARCH)/include/generated
kapi := $(gen)/asm
-kapi-hdrs-y := $(kapi)/cpucaps.h
+kapi-hdrs-y := $(kapi)/cpucaps.h $(kapi)/sysreg-defs.h
targets += $(addprefix ../../../, $(kapi-hdrs-y))
@@ -14,5 +14,11 @@ kapi: $(kapi-hdrs-y)
quiet_cmd_gen_cpucaps = GEN $@
cmd_gen_cpucaps = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@
+quiet_cmd_gen_sysreg = GEN $@
+ cmd_gen_sysreg = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@
+
$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE
$(call if_changed,gen_cpucaps)
+
+$(kapi)/sysreg-defs.h: $(src)/gen-sysreg.awk $(src)/sysreg FORCE
+ $(call if_changed,gen_sysreg)
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 3ed418f70e3b..e52b289a27c2 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -43,6 +43,8 @@ KVM_PROTECTED_MODE
MISMATCHED_CACHE_TYPE
MTE
MTE_ASYMM
+SME
+SME_FA64
SPECTRE_V2
SPECTRE_V3A
SPECTRE_V4
diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk
new file mode 100755
index 000000000000..89bfb74e28de
--- /dev/null
+++ b/arch/arm64/tools/gen-sysreg.awk
@@ -0,0 +1,268 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+# gen-sysreg.awk: arm64 sysreg header generator
+#
+# Usage: awk -f gen-sysreg.awk sysregs.txt
+
+# Log an error and terminate
+function fatal(msg) {
+ print "Error at " NR ": " msg > "/dev/stderr"
+ exit 1
+}
+
+# Sanity check that the start or end of a block makes sense at this point in
+# the file. If not, produce an error and terminate.
+#
+# @this - the $Block or $EndBlock
+# @prev - the only valid block to already be in (value of @block)
+# @new - the new value of @block
+function change_block(this, prev, new) {
+ if (block != prev)
+ fatal("unexpected " this " (inside " block ")")
+
+ block = new
+}
+
+# Sanity check the number of records for a field makes sense. If not, produce
+# an error and terminate.
+function expect_fields(nf) {
+ if (NF != nf)
+ fatal(NF " fields found where " nf " expected")
+}
+
+# Print a CPP macro definition, padded with spaces so that the macro bodies
+# line up in a column
+function define(name, val) {
+ printf "%-48s%s\n", "#define " name, val
+}
+
+# Print standard BITMASK/SHIFT/WIDTH CPP definitions for a field
+function define_field(reg, field, msb, lsb) {
+ define(reg "_" field, "GENMASK(" msb ", " lsb ")")
+ define(reg "_" field "_MASK", "GENMASK(" msb ", " lsb ")")
+ define(reg "_" field "_SHIFT", lsb)
+ define(reg "_" field "_WIDTH", msb - lsb + 1)
+}
+
+# Parse a "<msb>[:<lsb>]" string into the global variables @msb and @lsb
+function parse_bitdef(reg, field, bitdef, _bits)
+{
+ if (bitdef ~ /^[0-9]+$/) {
+ msb = bitdef
+ lsb = bitdef
+ } else if (split(bitdef, _bits, ":") == 2) {
+ msb = _bits[1]
+ lsb = _bits[2]
+ } else {
+ fatal("invalid bit-range definition '" bitdef "'")
+ }
+
+
+ if (msb != next_bit)
+ fatal(reg "." field " starts at " msb " not " next_bit)
+ if (63 < msb || msb < 0)
+ fatal(reg "." field " invalid high bit in '" bitdef "'")
+ if (63 < lsb || lsb < 0)
+ fatal(reg "." field " invalid low bit in '" bitdef "'")
+ if (msb < lsb)
+ fatal(reg "." field " invalid bit-range '" bitdef "'")
+ if (low > high)
+ fatal(reg "." field " has invalid range " high "-" low)
+
+ next_bit = lsb - 1
+}
+
+BEGIN {
+ print "#ifndef __ASM_SYSREG_DEFS_H"
+ print "#define __ASM_SYSREG_DEFS_H"
+ print ""
+ print "/* Generated file - do not edit */"
+ print ""
+
+ block = "None"
+}
+
+END {
+ print "#endif /* __ASM_SYSREG_DEFS_H */"
+}
+
+# skip blank lines and comment lines
+/^$/ { next }
+/^#/ { next }
+
+/^SysregFields/ {
+ change_block("SysregFields", "None", "SysregFields")
+ expect_fields(2)
+
+ reg = $2
+
+ res0 = "UL(0)"
+ res1 = "UL(0)"
+
+ next_bit = 63
+
+ next
+}
+
+/^EndSysregFields/ {
+ if (next_bit > 0)
+ fatal("Unspecified bits in " reg)
+
+ change_block("EndSysregFields", "SysregFields", "None")
+
+ define(reg "_RES0", "(" res0 ")")
+ define(reg "_RES1", "(" res1 ")")
+ print ""
+
+ reg = null
+ res0 = null
+ res1 = null
+
+ next
+}
+
+/^Sysreg/ {
+ change_block("Sysreg", "None", "Sysreg")
+ expect_fields(7)
+
+ reg = $2
+ op0 = $3
+ op1 = $4
+ crn = $5
+ crm = $6
+ op2 = $7
+
+ res0 = "UL(0)"
+ res1 = "UL(0)"
+
+ define("REG_" reg, "S" op0 "_" op1 "_C" crn "_C" crm "_" op2)
+ define("SYS_" reg, "sys_reg(" op0 ", " op1 ", " crn ", " crm ", " op2 ")")
+
+ define("SYS_" reg "_Op0", op0)
+ define("SYS_" reg "_Op1", op1)
+ define("SYS_" reg "_CRn", crn)
+ define("SYS_" reg "_CRm", crm)
+ define("SYS_" reg "_Op2", op2)
+
+ print ""
+
+ next_bit = 63
+
+ next
+}
+
+/^EndSysreg/ {
+ if (next_bit > 0)
+ fatal("Unspecified bits in " reg)
+
+ change_block("EndSysreg", "Sysreg", "None")
+
+ if (res0 != null)
+ define(reg "_RES0", "(" res0 ")")
+ if (res1 != null)
+ define(reg "_RES1", "(" res1 ")")
+ if (res0 != null || res1 != null)
+ print ""
+
+ reg = null
+ op0 = null
+ op1 = null
+ crn = null
+ crm = null
+ op2 = null
+ res0 = null
+ res1 = null
+
+ next
+}
+
+# Currently this is effectivey a comment, in future we may want to emit
+# defines for the fields.
+/^Fields/ && (block == "Sysreg") {
+ expect_fields(2)
+
+ if (next_bit != 63)
+ fatal("Some fields already defined for " reg)
+
+ print "/* For " reg " fields see " $2 " */"
+ print ""
+
+ next_bit = 0
+ res0 = null
+ res1 = null
+
+ next
+}
+
+
+/^Res0/ && (block == "Sysreg" || block == "SysregFields") {
+ expect_fields(2)
+ parse_bitdef(reg, "RES0", $2)
+ field = "RES0_" msb "_" lsb
+
+ res0 = res0 " | GENMASK_ULL(" msb ", " lsb ")"
+
+ next
+}
+
+/^Res1/ && (block == "Sysreg" || block == "SysregFields") {
+ expect_fields(2)
+ parse_bitdef(reg, "RES1", $2)
+ field = "RES1_" msb "_" lsb
+
+ res1 = res1 " | GENMASK_ULL(" msb ", " lsb ")"
+
+ next
+}
+
+/^Field/ && (block == "Sysreg" || block == "SysregFields") {
+ expect_fields(3)
+ field = $3
+ parse_bitdef(reg, field, $2)
+
+ define_field(reg, field, msb, lsb)
+ print ""
+
+ next
+}
+
+/^Raz/ && (block == "Sysreg" || block == "SysregFields") {
+ expect_fields(2)
+ parse_bitdef(reg, field, $2)
+
+ next
+}
+
+/^Enum/ {
+ change_block("Enum", "Sysreg", "Enum")
+ expect_fields(3)
+ field = $3
+ parse_bitdef(reg, field, $2)
+
+ define_field(reg, field, msb, lsb)
+
+ next
+}
+
+/^EndEnum/ {
+ change_block("EndEnum", "Enum", "Sysreg")
+ field = null
+ msb = null
+ lsb = null
+ print ""
+ next
+}
+
+/0b[01]+/ && block = "Enum" {
+ expect_fields(2)
+ val = $1
+ name = $2
+
+ define(reg "_" field "_" name, "UL(" val ")")
+ next
+}
+
+# Any lines not handled by previous rules are unexpected
+{
+ fatal("unhandled statement")
+}
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
new file mode 100644
index 000000000000..ff5e552f7420
--- /dev/null
+++ b/arch/arm64/tools/sysreg
@@ -0,0 +1,369 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# System register metadata
+
+# Each System register is described by a Sysreg block:
+
+# Sysreg <name> <op0> <op1> <crn> <crm> <op2>
+# <field>
+# ...
+# EndSysreg
+
+# Within a Sysreg block, each field can be described as one of:
+
+# Res0 <msb>[:<lsb>]
+
+# Res1 <msb>[:<lsb>]
+
+# Field <msb>[:<lsb>] <name>
+
+# Enum <msb>[:<lsb>] <name>
+# <enumval> <enumname>
+# ...
+# EndEnum
+
+# Alternatively if multiple registers share the same layout then
+# a SysregFields block can be used to describe the shared layout
+
+# SysregFields <fieldsname>
+# <field>
+# ...
+# EndSysregFields
+
+# and referenced from within the Sysreg:
+
+# Sysreg <name> <op0> <op1> <crn> <crm> <op2>
+# Fields <fieldsname>
+# EndSysreg
+
+# For ID registers we adopt a few conventions for translating the
+# language in the ARM into defines:
+#
+# NI - Not implemented
+# IMP - Implemented
+#
+# In general it is recommended that new enumeration items be named for the
+# feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration
+# item ACCDATA) though it may be more taseful to do something else.
+
+Sysreg ID_AA64ISAR0_EL1 3 0 0 6 0
+Enum 63:60 RNDR
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 59:56 TLB
+ 0b0000 NI
+ 0b0001 OS
+ 0b0010 RANGE
+EndEnum
+Enum 55:52 TS
+ 0b0000 NI
+ 0b0001 FLAGM
+ 0b0010 FLAGM2
+EndEnum
+Enum 51:48 FHM
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 47:44 DP
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 43:40 SM4
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 39:36 SM3
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 35:32 SHA3
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 31:28 RDM
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 27:24 TME
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 23:20 ATOMIC
+ 0b0000 NI
+ 0b0010 IMP
+EndEnum
+Enum 19:16 CRC32
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 15:12 SHA2
+ 0b0000 NI
+ 0b0001 SHA256
+ 0b0010 SHA512
+EndEnum
+Enum 11:8 SHA1
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 7:4 AES
+ 0b0000 NI
+ 0b0001 AES
+ 0b0010 PMULL
+EndEnum
+Res0 3:0
+EndSysreg
+
+Sysreg SCTLR_EL1 3 0 1 0 0
+Field 63 TIDCP
+Field 62 SPINMASK
+Field 61 NMI
+Field 60 EnTP2
+Res0 59:58
+Field 57 EPAN
+Field 56 EnALS
+Field 55 EnAS0
+Field 54 EnASR
+Field 53 TME
+Field 52 TME0
+Field 51 TMT
+Field 50 TMT0
+Field 49:46 TWEDEL
+Field 45 TWEDEn
+Field 44 DSSBS
+Field 43 ATA
+Field 42 ATA0
+Enum 41:40 TCF
+ 0b00 NONE
+ 0b01 SYNC
+ 0b10 ASYNC
+ 0b11 ASYMM
+EndEnum
+Enum 39:38 TCF0
+ 0b00 NONE
+ 0b01 SYNC
+ 0b10 ASYNC
+ 0b11 ASYMM
+EndEnum
+Field 37 ITFSB
+Field 36 BT1
+Field 35 BT0
+Res0 34
+Field 33 MSCEn
+Field 32 CMOW
+Field 31 EnIA
+Field 30 EnIB
+Field 29 LSMAOE
+Field 28 nTLSMD
+Field 27 EnDA
+Field 26 UCI
+Field 25 EE
+Field 24 E0E
+Field 23 SPAN
+Field 22 EIS
+Field 21 IESB
+Field 20 TSCXT
+Field 19 WXN
+Field 18 nTWE
+Res0 17
+Field 16 nTWI
+Field 15 UCT
+Field 14 DZE
+Field 13 EnDB
+Field 12 I
+Field 11 EOS
+Field 10 EnRCTX
+Field 9 UMA
+Field 8 SED
+Field 7 ITD
+Field 6 nAA
+Field 5 CP15BEN
+Field 4 SA0
+Field 3 SA
+Field 2 C
+Field 1 A
+Field 0 M
+EndSysreg
+
+SysregFields CPACR_ELx
+Res0 63:29
+Field 28 TTA
+Res0 27:26
+Field 25:24 SMEN
+Res0 23:22
+Field 21:20 FPEN
+Res0 19:18
+Field 17:16 ZEN
+Res0 15:0
+EndSysregFields
+
+Sysreg CPACR_EL1 3 0 1 0 2
+Fields CPACR_ELx
+EndSysreg
+
+Sysreg SMPRI_EL1 3 0 1 2 4
+Res0 63:4
+Field 3:0 PRIORITY
+EndSysreg
+
+SysregFields ZCR_ELx
+Res0 63:9
+Raz 8:4
+Field 3:0 LEN
+EndSysregFields
+
+Sysreg ZCR_EL1 3 0 1 2 0
+Fields ZCR_ELx
+EndSysreg
+
+SysregFields SMCR_ELx
+Res0 63:32
+Field 31 FA64
+Res0 30:9
+Raz 8:4
+Field 3:0 LEN
+EndSysregFields
+
+Sysreg SMCR_EL1 3 0 1 2 6
+Fields SMCR_ELx
+EndSysreg
+
+Sysreg FAR_EL1 3 0 6 0 0
+Field 63:0 ADDR
+EndSysreg
+
+SysregFields CONTEXTIDR_ELx
+Res0 63:32
+Field 31:0 PROCID
+EndSysregFields
+
+Sysreg CONTEXTIDR_EL1 3 0 13 0 1
+Fields CONTEXTIDR_ELx
+EndSysreg
+
+Sysreg CLIDR_EL1 3 1 0 0 1
+Res0 63:47
+Field 46:33 Ttypen
+Field 32:30 ICB
+Field 29:27 LoUU
+Field 26:24 LoC
+Field 23:21 LoUIS
+Field 20:18 Ctype7
+Field 17:15 Ctype6
+Field 14:12 Ctype5
+Field 11:9 Ctype4
+Field 8:6 Ctype3
+Field 5:3 Ctype2
+Field 2:0 Ctype1
+EndSysreg
+
+Sysreg SMIDR_EL1 3 1 0 0 6
+Res0 63:32
+Field 31:24 IMPLEMENTER
+Field 23:16 REVISION
+Field 15 SMPS
+Res0 14:12
+Field 11:0 AFFINITY
+EndSysreg
+
+Sysreg CSSELR_EL1 3 2 0 0 0
+Res0 63:5
+Field 4 TnD
+Field 3:1 Level
+Field 0 InD
+EndSysreg
+
+Sysreg SVCR 3 3 4 2 2
+Res0 63:2
+Field 1 ZA
+Field 0 SM
+EndSysreg
+
+Sysreg ZCR_EL2 3 4 1 2 0
+Fields ZCR_ELx
+EndSysreg
+
+Sysreg SMPRIMAP_EL2 3 4 1 2 5
+Field 63:60 P15
+Field 59:56 P14
+Field 55:52 P13
+Field 51:48 P12
+Field 47:44 P11
+Field 43:40 P10
+Field 39:36 F9
+Field 35:32 P8
+Field 31:28 P7
+Field 27:24 P6
+Field 23:20 P5
+Field 19:16 P4
+Field 15:12 P3
+Field 11:8 P2
+Field 7:4 P1
+Field 3:0 P0
+EndSysreg
+
+Sysreg SMCR_EL2 3 4 1 2 6
+Fields SMCR_ELx
+EndSysreg
+
+Sysreg DACR32_EL2 3 4 3 0 0
+Res0 63:32
+Field 31:30 D15
+Field 29:28 D14
+Field 27:26 D13
+Field 25:24 D12
+Field 23:22 D11
+Field 21:20 D10
+Field 19:18 D9
+Field 17:16 D8
+Field 15:14 D7
+Field 13:12 D6
+Field 11:10 D5
+Field 9:8 D4
+Field 7:6 D3
+Field 5:4 D2
+Field 3:2 D1
+Field 1:0 D0
+EndSysreg
+
+Sysreg FAR_EL2 3 4 6 0 0
+Field 63:0 ADDR
+EndSysreg
+
+Sysreg CONTEXTIDR_EL2 3 4 13 0 1
+Fields CONTEXTIDR_ELx
+EndSysreg
+
+Sysreg CPACR_EL12 3 5 1 0 2
+Fields CPACR_ELx
+EndSysreg
+
+Sysreg ZCR_EL12 3 5 1 2 0
+Fields ZCR_ELx
+EndSysreg
+
+Sysreg SMCR_EL12 3 5 1 2 6
+Fields SMCR_ELx
+EndSysreg
+
+Sysreg FAR_EL12 3 5 6 0 0
+Field 63:0 ADDR
+EndSysreg
+
+Sysreg CONTEXTIDR_EL12 3 5 13 0 1
+Fields CONTEXTIDR_ELx
+EndSysreg
+
+SysregFields TTBRx_EL1
+Field 63:48 ASID
+Field 47:1 BADDR
+Field 0 CnP
+EndSysregFields
+
+Sysreg TTBR0_EL1 3 0 2 0 0
+Fields TTBRx_EL1
+EndSysreg
+
+Sysreg TTBR1_EL1 3 0 2 0 1
+Fields TTBRx_EL1
+EndSysreg
diff --git a/arch/csky/Kbuild b/arch/csky/Kbuild
index 4e39f7abdeb6..0621eaea4196 100644
--- a/arch/csky/Kbuild
+++ b/arch/csky/Kbuild
@@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
+obj-y += kernel/ mm/
+
# for cleaning
subdir- += boot
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 75ef86605d69..21d72b078eef 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -320,6 +320,14 @@ config HOTPLUG_CPU
controlled through /sys/devices/system/cpu/cpu1/hotplug/target.
Say N if you want to disable CPU hotplug.
+
+config HAVE_EFFICIENT_UNALIGNED_STRING_OPS
+ bool "Enable EFFICIENT_UNALIGNED_STRING_OPS for abiv2"
+ depends on CPU_CK807 || CPU_CK810 || CPU_CK860
+ help
+ Say Y here to enable EFFICIENT_UNALIGNED_STRING_OPS. Some CPU models could
+ deal with unaligned access by hardware.
+
endmenu
source "arch/csky/Kconfig.platforms"
diff --git a/arch/csky/Makefile b/arch/csky/Makefile
index 866805077636..4e1d619fd5c6 100644
--- a/arch/csky/Makefile
+++ b/arch/csky/Makefile
@@ -61,15 +61,12 @@ KBUILD_AFLAGS += $(KBUILD_CFLAGS)
head-y := arch/csky/kernel/head.o
-core-y += arch/csky/kernel/
-core-y += arch/csky/mm/
core-y += arch/csky/$(CSKYABI)/
libs-y += arch/csky/lib/ \
$(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
boot := arch/csky/boot
-core-y += $(boot)/dts/
all: zImage
diff --git a/arch/csky/abiv1/Makefile b/arch/csky/abiv1/Makefile
index 601ce3b2fb85..a4b2ade0fc67 100644
--- a/arch/csky/abiv1/Makefile
+++ b/arch/csky/abiv1/Makefile
@@ -4,5 +4,3 @@ obj-y += bswapdi.o
obj-y += bswapsi.o
obj-y += cacheflush.o
obj-y += mmap.o
-obj-y += memcpy.o
-obj-y += strksyms.o
diff --git a/arch/csky/abiv1/memcpy.S b/arch/csky/abiv1/memcpy.S
deleted file mode 100644
index 5078eb5169fa..000000000000
--- a/arch/csky/abiv1/memcpy.S
+++ /dev/null
@@ -1,347 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-
-.macro GET_FRONT_BITS rx y
-#ifdef __cskyLE__
- lsri \rx, \y
-#else
- lsli \rx, \y
-#endif
-.endm
-
-.macro GET_AFTER_BITS rx y
-#ifdef __cskyLE__
- lsli \rx, \y
-#else
- lsri \rx, \y
-#endif
-.endm
-
-/* void *memcpy(void *dest, const void *src, size_t n); */
-ENTRY(memcpy)
- mov r7, r2
- cmplti r4, 4
- bt .L_copy_by_byte
- mov r6, r2
- andi r6, 3
- cmpnei r6, 0
- jbt .L_dest_not_aligned
- mov r6, r3
- andi r6, 3
- cmpnei r6, 0
- jbt .L_dest_aligned_but_src_not_aligned
-.L0:
- cmplti r4, 16
- jbt .L_aligned_and_len_less_16bytes
- subi sp, 8
- stw r8, (sp, 0)
-.L_aligned_and_len_larger_16bytes:
- ldw r1, (r3, 0)
- ldw r5, (r3, 4)
- ldw r8, (r3, 8)
- stw r1, (r7, 0)
- ldw r1, (r3, 12)
- stw r5, (r7, 4)
- stw r8, (r7, 8)
- stw r1, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L_aligned_and_len_larger_16bytes
- ldw r8, (sp, 0)
- addi sp, 8
- cmpnei r4, 0
- jbf .L_return
-
-.L_aligned_and_len_less_16bytes:
- cmplti r4, 4
- bt .L_copy_by_byte
-.L1:
- ldw r1, (r3, 0)
- stw r1, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- jbf .L1
- br .L_copy_by_byte
-
-.L_return:
- rts
-
-.L_copy_by_byte: /* len less than 4 bytes */
- cmpnei r4, 0
- jbf .L_return
-.L4:
- ldb r1, (r3, 0)
- stb r1, (r7, 0)
- addi r3, 1
- addi r7, 1
- decne r4
- jbt .L4
- rts
-
-/*
- * If dest is not aligned, just copying some bytes makes the dest align.
- * Afther that, we judge whether the src is aligned.
- */
-.L_dest_not_aligned:
- mov r5, r3
- rsub r5, r5, r7
- abs r5, r5
- cmplt r5, r4
- bt .L_copy_by_byte
- mov r5, r7
- sub r5, r3
- cmphs r5, r4
- bf .L_copy_by_byte
- mov r5, r6
-.L5:
- ldb r1, (r3, 0) /* makes the dest align. */
- stb r1, (r7, 0)
- addi r5, 1
- subi r4, 1
- addi r3, 1
- addi r7, 1
- cmpnei r5, 4
- jbt .L5
- cmplti r4, 4
- jbt .L_copy_by_byte
- mov r6, r3 /* judge whether the src is aligned. */
- andi r6, 3
- cmpnei r6, 0
- jbf .L0
-
-/* Judge the number of misaligned, 1, 2, 3? */
-.L_dest_aligned_but_src_not_aligned:
- mov r5, r3
- rsub r5, r5, r7
- abs r5, r5
- cmplt r5, r4
- bt .L_copy_by_byte
- bclri r3, 0
- bclri r3, 1
- ldw r1, (r3, 0)
- addi r3, 4
- cmpnei r6, 2
- bf .L_dest_aligned_but_src_not_aligned_2bytes
- cmpnei r6, 3
- bf .L_dest_aligned_but_src_not_aligned_3bytes
-
-.L_dest_aligned_but_src_not_aligned_1byte:
- mov r5, r7
- sub r5, r3
- cmphs r5, r4
- bf .L_copy_by_byte
- cmplti r4, 16
- bf .L11
-.L10: /* If the len is less than 16 bytes */
- GET_FRONT_BITS r1 8
- mov r5, r1
- ldw r6, (r3, 0)
- mov r1, r6
- GET_AFTER_BITS r6 24
- or r5, r6
- stw r5, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- bf .L10
- subi r3, 3
- br .L_copy_by_byte
-.L11:
- subi sp, 16
- stw r8, (sp, 0)
- stw r9, (sp, 4)
- stw r10, (sp, 8)
- stw r11, (sp, 12)
-.L12:
- ldw r5, (r3, 0)
- ldw r11, (r3, 4)
- ldw r8, (r3, 8)
- ldw r9, (r3, 12)
-
- GET_FRONT_BITS r1 8 /* little or big endian? */
- mov r10, r5
- GET_AFTER_BITS r5 24
- or r5, r1
-
- GET_FRONT_BITS r10 8
- mov r1, r11
- GET_AFTER_BITS r11 24
- or r11, r10
-
- GET_FRONT_BITS r1 8
- mov r10, r8
- GET_AFTER_BITS r8 24
- or r8, r1
-
- GET_FRONT_BITS r10 8
- mov r1, r9
- GET_AFTER_BITS r9 24
- or r9, r10
-
- stw r5, (r7, 0)
- stw r11, (r7, 4)
- stw r8, (r7, 8)
- stw r9, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L12
- ldw r8, (sp, 0)
- ldw r9, (sp, 4)
- ldw r10, (sp, 8)
- ldw r11, (sp, 12)
- addi sp , 16
- cmplti r4, 4
- bf .L10
- subi r3, 3
- br .L_copy_by_byte
-
-.L_dest_aligned_but_src_not_aligned_2bytes:
- cmplti r4, 16
- bf .L21
-.L20:
- GET_FRONT_BITS r1 16
- mov r5, r1
- ldw r6, (r3, 0)
- mov r1, r6
- GET_AFTER_BITS r6 16
- or r5, r6
- stw r5, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- bf .L20
- subi r3, 2
- br .L_copy_by_byte
- rts
-
-.L21: /* n > 16 */
- subi sp, 16
- stw r8, (sp, 0)
- stw r9, (sp, 4)
- stw r10, (sp, 8)
- stw r11, (sp, 12)
-
-.L22:
- ldw r5, (r3, 0)
- ldw r11, (r3, 4)
- ldw r8, (r3, 8)
- ldw r9, (r3, 12)
-
- GET_FRONT_BITS r1 16
- mov r10, r5
- GET_AFTER_BITS r5 16
- or r5, r1
-
- GET_FRONT_BITS r10 16
- mov r1, r11
- GET_AFTER_BITS r11 16
- or r11, r10
-
- GET_FRONT_BITS r1 16
- mov r10, r8
- GET_AFTER_BITS r8 16
- or r8, r1
-
- GET_FRONT_BITS r10 16
- mov r1, r9
- GET_AFTER_BITS r9 16
- or r9, r10
-
- stw r5, (r7, 0)
- stw r11, (r7, 4)
- stw r8, (r7, 8)
- stw r9, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L22
- ldw r8, (sp, 0)
- ldw r9, (sp, 4)
- ldw r10, (sp, 8)
- ldw r11, (sp, 12)
- addi sp, 16
- cmplti r4, 4
- bf .L20
- subi r3, 2
- br .L_copy_by_byte
-
-
-.L_dest_aligned_but_src_not_aligned_3bytes:
- cmplti r4, 16
- bf .L31
-.L30:
- GET_FRONT_BITS r1 24
- mov r5, r1
- ldw r6, (r3, 0)
- mov r1, r6
- GET_AFTER_BITS r6 8
- or r5, r6
- stw r5, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- bf .L30
- subi r3, 1
- br .L_copy_by_byte
-.L31:
- subi sp, 16
- stw r8, (sp, 0)
- stw r9, (sp, 4)
- stw r10, (sp, 8)
- stw r11, (sp, 12)
-.L32:
- ldw r5, (r3, 0)
- ldw r11, (r3, 4)
- ldw r8, (r3, 8)
- ldw r9, (r3, 12)
-
- GET_FRONT_BITS r1 24
- mov r10, r5
- GET_AFTER_BITS r5 8
- or r5, r1
-
- GET_FRONT_BITS r10 24
- mov r1, r11
- GET_AFTER_BITS r11 8
- or r11, r10
-
- GET_FRONT_BITS r1 24
- mov r10, r8
- GET_AFTER_BITS r8 8
- or r8, r1
-
- GET_FRONT_BITS r10 24
- mov r1, r9
- GET_AFTER_BITS r9 8
- or r9, r10
-
- stw r5, (r7, 0)
- stw r11, (r7, 4)
- stw r8, (r7, 8)
- stw r9, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L32
- ldw r8, (sp, 0)
- ldw r9, (sp, 4)
- ldw r10, (sp, 8)
- ldw r11, (sp, 12)
- addi sp, 16
- cmplti r4, 4
- bf .L30
- subi r3, 1
- br .L_copy_by_byte
diff --git a/arch/csky/abiv1/strksyms.c b/arch/csky/abiv1/strksyms.c
deleted file mode 100644
index c7ccbb27e8d7..000000000000
--- a/arch/csky/abiv1/strksyms.c
+++ /dev/null
@@ -1,6 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/module.h>
-
-EXPORT_SYMBOL(memcpy);
diff --git a/arch/csky/abiv2/Makefile b/arch/csky/abiv2/Makefile
index c561efa5533c..ea8005fe01a8 100644
--- a/arch/csky/abiv2/Makefile
+++ b/arch/csky/abiv2/Makefile
@@ -2,9 +2,11 @@
obj-y += cacheflush.o
obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
obj-y += memcmp.o
+ifeq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS), y)
obj-y += memcpy.o
obj-y += memmove.o
obj-y += memset.o
+endif
obj-y += strcmp.o
obj-y += strcpy.o
obj-y += strlen.o
diff --git a/arch/csky/abiv2/strksyms.c b/arch/csky/abiv2/strksyms.c
index 06da723d8202..8d1fd28c6cf9 100644
--- a/arch/csky/abiv2/strksyms.c
+++ b/arch/csky/abiv2/strksyms.c
@@ -3,10 +3,12 @@
#include <linux/module.h>
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcmp);
EXPORT_SYMBOL(memmove);
+#endif
+EXPORT_SYMBOL(memcmp);
EXPORT_SYMBOL(strcmp);
EXPORT_SYMBOL(strcpy);
EXPORT_SYMBOL(strlen);
diff --git a/arch/csky/boot/Makefile b/arch/csky/boot/Makefile
index dbc9b1bd72f0..c3cfde28f8e6 100644
--- a/arch/csky/boot/Makefile
+++ b/arch/csky/boot/Makefile
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
targets := Image zImage uImage
-targets += $(dtb-y)
$(obj)/Image: vmlinux FORCE
$(call if_changed,objcopy)
diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h
new file mode 100644
index 000000000000..60406ef9c2bb
--- /dev/null
+++ b/arch/csky/include/asm/atomic.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_CSKY_ATOMIC_H
+#define __ASM_CSKY_ATOMIC_H
+
+#ifdef CONFIG_SMP
+#include <asm-generic/atomic64.h>
+
+#include <asm/cmpxchg.h>
+#include <asm/barrier.h>
+
+#define __atomic_acquire_fence() __bar_brarw()
+
+#define __atomic_release_fence() __bar_brwaw()
+
+static __always_inline int arch_atomic_read(const atomic_t *v)
+{
+ return READ_ONCE(v->counter);
+}
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
+{
+ WRITE_ONCE(v->counter, i);
+}
+
+#define ATOMIC_OP(op) \
+static __always_inline \
+void arch_atomic_##op(int i, atomic_t *v) \
+{ \
+ unsigned long tmp; \
+ __asm__ __volatile__ ( \
+ "1: ldex.w %0, (%2) \n" \
+ " " #op " %0, %1 \n" \
+ " stex.w %0, (%2) \n" \
+ " bez %0, 1b \n" \
+ : "=&r" (tmp) \
+ : "r" (i), "r" (&v->counter) \
+ : "memory"); \
+}
+
+ATOMIC_OP(add)
+ATOMIC_OP(sub)
+ATOMIC_OP(and)
+ATOMIC_OP( or)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_OP
+
+#define ATOMIC_FETCH_OP(op) \
+static __always_inline \
+int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+{ \
+ register int ret, tmp; \
+ __asm__ __volatile__ ( \
+ "1: ldex.w %0, (%3) \n" \
+ " mov %1, %0 \n" \
+ " " #op " %0, %2 \n" \
+ " stex.w %0, (%3) \n" \
+ " bez %0, 1b \n" \
+ : "=&r" (tmp), "=&r" (ret) \
+ : "r" (i), "r"(&v->counter) \
+ : "memory"); \
+ return ret; \
+}
+
+#define ATOMIC_OP_RETURN(op, c_op) \
+static __always_inline \
+int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
+{ \
+ return arch_atomic_fetch_##op##_relaxed(i, v) c_op i; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_FETCH_OP(op) \
+ ATOMIC_OP_RETURN(op, c_op)
+
+ATOMIC_OPS(add, +)
+ATOMIC_OPS(sub, -)
+
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP_RETURN
+
+#define ATOMIC_OPS(op) \
+ ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS( or)
+ATOMIC_OPS(xor)
+
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+
+#undef ATOMIC_OPS
+
+#undef ATOMIC_FETCH_OP
+
+static __always_inline int
+arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+ int prev, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%3) \n"
+ " cmpne %0, %4 \n"
+ " bf 2f \n"
+ " mov %1, %0 \n"
+ " add %1, %2 \n"
+ " stex.w %1, (%3) \n"
+ " bez %1, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (prev), "=&r" (tmp)
+ : "r" (a), "r" (&v->counter), "r" (u)
+ : "memory");
+
+ return prev;
+}
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
+
+static __always_inline bool
+arch_atomic_inc_unless_negative(atomic_t *v)
+{
+ int rc, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%2) \n"
+ " movi %1, 0 \n"
+ " blz %0, 2f \n"
+ " movi %1, 1 \n"
+ " addi %0, 1 \n"
+ " stex.w %0, (%2) \n"
+ " bez %0, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (tmp), "=&r" (rc)
+ : "r" (&v->counter)
+ : "memory");
+
+ return tmp ? true : false;
+
+}
+#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
+
+static __always_inline bool
+arch_atomic_dec_unless_positive(atomic_t *v)
+{
+ int rc, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%2) \n"
+ " movi %1, 0 \n"
+ " bhz %0, 2f \n"
+ " movi %1, 1 \n"
+ " subi %0, 1 \n"
+ " stex.w %0, (%2) \n"
+ " bez %0, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (tmp), "=&r" (rc)
+ : "r" (&v->counter)
+ : "memory");
+
+ return tmp ? true : false;
+}
+#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
+
+static __always_inline int
+arch_atomic_dec_if_positive(atomic_t *v)
+{
+ int dec, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%2) \n"
+ " subi %1, %0, 1 \n"
+ " blz %1, 2f \n"
+ " stex.w %1, (%2) \n"
+ " bez %1, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (dec), "=&r" (tmp)
+ : "r" (&v->counter)
+ : "memory");
+
+ return dec - 1;
+}
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
+
+#define ATOMIC_OP() \
+static __always_inline \
+int arch_atomic_xchg_relaxed(atomic_t *v, int n) \
+{ \
+ return __xchg_relaxed(n, &(v->counter), 4); \
+} \
+static __always_inline \
+int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \
+{ \
+ return __cmpxchg_relaxed(&(v->counter), o, n, 4); \
+} \
+static __always_inline \
+int arch_atomic_cmpxchg_acquire(atomic_t *v, int o, int n) \
+{ \
+ return __cmpxchg_acquire(&(v->counter), o, n, 4); \
+} \
+static __always_inline \
+int arch_atomic_cmpxchg(atomic_t *v, int o, int n) \
+{ \
+ return __cmpxchg(&(v->counter), o, n, 4); \
+}
+
+#define ATOMIC_OPS() \
+ ATOMIC_OP()
+
+ATOMIC_OPS()
+
+#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed
+#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP
+
+#else
+#include <asm-generic/atomic.h>
+#endif
+
+#endif /* __ASM_CSKY_ATOMIC_H */
diff --git a/arch/csky/include/asm/barrier.h b/arch/csky/include/asm/barrier.h
index f4045dd53e17..15de58b10aec 100644
--- a/arch/csky/include/asm/barrier.h
+++ b/arch/csky/include/asm/barrier.h
@@ -37,17 +37,21 @@
* bar.brar
* bar.bwaw
*/
+#define FULL_FENCE ".long 0x842fc000\n"
+#define ACQUIRE_FENCE ".long 0x8427c000\n"
+#define RELEASE_FENCE ".long 0x842ec000\n"
+
#define __bar_brw() asm volatile (".long 0x842cc000\n":::"memory")
#define __bar_br() asm volatile (".long 0x8424c000\n":::"memory")
#define __bar_bw() asm volatile (".long 0x8428c000\n":::"memory")
#define __bar_arw() asm volatile (".long 0x8423c000\n":::"memory")
#define __bar_ar() asm volatile (".long 0x8421c000\n":::"memory")
#define __bar_aw() asm volatile (".long 0x8422c000\n":::"memory")
-#define __bar_brwarw() asm volatile (".long 0x842fc000\n":::"memory")
-#define __bar_brarw() asm volatile (".long 0x8427c000\n":::"memory")
+#define __bar_brwarw() asm volatile (FULL_FENCE:::"memory")
+#define __bar_brarw() asm volatile (ACQUIRE_FENCE:::"memory")
#define __bar_bwarw() asm volatile (".long 0x842bc000\n":::"memory")
#define __bar_brwar() asm volatile (".long 0x842dc000\n":::"memory")
-#define __bar_brwaw() asm volatile (".long 0x842ec000\n":::"memory")
+#define __bar_brwaw() asm volatile (RELEASE_FENCE:::"memory")
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
#define __bar_bwaw() asm volatile (".long 0x842ac000\n":::"memory")
@@ -56,7 +60,6 @@
#define __smp_rmb() __bar_brar()
#define __smp_wmb() __bar_bwaw()
-#define ACQUIRE_FENCE ".long 0x8427c000\n"
#define __smp_acquire_fence() __bar_brarw()
#define __smp_release_fence() __bar_brwaw()
diff --git a/arch/csky/include/asm/cmpxchg.h b/arch/csky/include/asm/cmpxchg.h
index d1bef11f8dc9..5b8faccd65e4 100644
--- a/arch/csky/include/asm/cmpxchg.h
+++ b/arch/csky/include/asm/cmpxchg.h
@@ -64,15 +64,71 @@ extern void __bad_xchg(void);
#define arch_cmpxchg_relaxed(ptr, o, n) \
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
-#define arch_cmpxchg(ptr, o, n) \
+#define __cmpxchg_acquire(ptr, old, new, size) \
({ \
+ __typeof__(ptr) __ptr = (ptr); \
+ __typeof__(new) __new = (new); \
+ __typeof__(new) __tmp; \
+ __typeof__(old) __old = (old); \
+ __typeof__(*(ptr)) __ret; \
+ switch (size) { \
+ case 4: \
+ asm volatile ( \
+ "1: ldex.w %0, (%3) \n" \
+ " cmpne %0, %4 \n" \
+ " bt 2f \n" \
+ " mov %1, %2 \n" \
+ " stex.w %1, (%3) \n" \
+ " bez %1, 1b \n" \
+ ACQUIRE_FENCE \
+ "2: \n" \
+ : "=&r" (__ret), "=&r" (__tmp) \
+ : "r" (__new), "r"(__ptr), "r"(__old) \
+ :); \
+ break; \
+ default: \
+ __bad_xchg(); \
+ } \
+ __ret; \
+})
+
+#define arch_cmpxchg_acquire(ptr, o, n) \
+ (__cmpxchg_acquire((ptr), (o), (n), sizeof(*(ptr))))
+
+#define __cmpxchg(ptr, old, new, size) \
+({ \
+ __typeof__(ptr) __ptr = (ptr); \
+ __typeof__(new) __new = (new); \
+ __typeof__(new) __tmp; \
+ __typeof__(old) __old = (old); \
__typeof__(*(ptr)) __ret; \
- __smp_release_fence(); \
- __ret = arch_cmpxchg_relaxed(ptr, o, n); \
- __smp_acquire_fence(); \
+ switch (size) { \
+ case 4: \
+ asm volatile ( \
+ RELEASE_FENCE \
+ "1: ldex.w %0, (%3) \n" \
+ " cmpne %0, %4 \n" \
+ " bt 2f \n" \
+ " mov %1, %2 \n" \
+ " stex.w %1, (%3) \n" \
+ " bez %1, 1b \n" \
+ FULL_FENCE \
+ "2: \n" \
+ : "=&r" (__ret), "=&r" (__tmp) \
+ : "r" (__new), "r"(__ptr), "r"(__old) \
+ :); \
+ break; \
+ default: \
+ __bad_xchg(); \
+ } \
__ret; \
})
+#define arch_cmpxchg(ptr, o, n) \
+ (__cmpxchg((ptr), (o), (n), sizeof(*(ptr))))
+
+#define arch_cmpxchg_local(ptr, o, n) \
+ (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
#else
#include <asm-generic/cmpxchg.h>
#endif
diff --git a/arch/csky/include/asm/io.h b/arch/csky/include/asm/io.h
index f82654053dc0..4725bb977b0f 100644
--- a/arch/csky/include/asm/io.h
+++ b/arch/csky/include/asm/io.h
@@ -5,7 +5,6 @@
#include <linux/pgtable.h>
#include <linux/types.h>
-#include <linux/version.h>
/*
* I/O memory access primitives. Reads are ordered relative to any
@@ -33,6 +32,17 @@
#endif
/*
+ * String version of I/O memory access operations.
+ */
+extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
+extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
+extern void __memset_io(volatile void __iomem *, int, size_t);
+
+#define memset_io(c,v,l) __memset_io((c),(v),(l))
+#define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l))
+#define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l))
+
+/*
* I/O memory mapping functions.
*/
#define ioremap_wc(addr, size) \
diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile
index 6c0f36010ed0..4eb41421ca5b 100644
--- a/arch/csky/kernel/Makefile
+++ b/arch/csky/kernel/Makefile
@@ -2,7 +2,7 @@
extra-y := head.o vmlinux.lds
obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
-obj-y += power.o syscall.o syscall_table.o setup.o
+obj-y += power.o syscall.o syscall_table.o setup.o io.o
obj-y += process.o cpu-probe.o ptrace.o stacktrace.o
obj-y += probes/
diff --git a/arch/csky/kernel/io.c b/arch/csky/kernel/io.c
new file mode 100644
index 000000000000..5883f13fa2b1
--- /dev/null
+++ b/arch/csky/kernel/io.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/io.h>
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ */
+void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
+{
+ while (count && !IS_ALIGNED((unsigned long)from, 4)) {
+ *(u8 *)to = __raw_readb(from);
+ from++;
+ to++;
+ count--;
+ }
+
+ while (count >= 4) {
+ *(u32 *)to = __raw_readl(from);
+ from += 4;
+ to += 4;
+ count -= 4;
+ }
+
+ while (count) {
+ *(u8 *)to = __raw_readb(from);
+ from++;
+ to++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memcpy_fromio);
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ */
+void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
+{
+ while (count && !IS_ALIGNED((unsigned long)to, 4)) {
+ __raw_writeb(*(u8 *)from, to);
+ from++;
+ to++;
+ count--;
+ }
+
+ while (count >= 4) {
+ __raw_writel(*(u32 *)from, to);
+ from += 4;
+ to += 4;
+ count -= 4;
+ }
+
+ while (count) {
+ __raw_writeb(*(u8 *)from, to);
+ from++;
+ to++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memcpy_toio);
+
+/*
+ * "memset" on IO memory space.
+ */
+void __memset_io(volatile void __iomem *dst, int c, size_t count)
+{
+ u32 qc = (u8)c;
+
+ qc |= qc << 8;
+ qc |= qc << 16;
+
+ while (count && !IS_ALIGNED((unsigned long)dst, 4)) {
+ __raw_writeb(c, dst);
+ dst++;
+ count--;
+ }
+
+ while (count >= 4) {
+ __raw_writel(qc, dst);
+ dst += 4;
+ count -= 4;
+ }
+
+ while (count) {
+ __raw_writeb(c, dst);
+ dst++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memset_io);
diff --git a/arch/csky/kernel/module.c b/arch/csky/kernel/module.c
index 6cd82d69c655..f11b3e573344 100644
--- a/arch/csky/kernel/module.c
+++ b/arch/csky/kernel/module.c
@@ -68,7 +68,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
*location = rel[i].r_addend + sym->st_value;
break;
case R_CSKY_PC32:
- /* Add the value, subtract its postition */
+ /* Add the value, subtract its position */
*location = rel[i].r_addend + sym->st_value
- (uint32_t)location;
break;
diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c
index 42920f25e73c..34ba684d5962 100644
--- a/arch/csky/kernel/probes/kprobes.c
+++ b/arch/csky/kernel/probes/kprobes.c
@@ -30,7 +30,7 @@ static int __kprobes patch_text_cb(void *priv)
struct csky_insn_patch *param = priv;
unsigned int addr = (unsigned int)param->addr;
- if (atomic_inc_return(&param->cpu_count) == 1) {
+ if (atomic_inc_return(&param->cpu_count) == num_online_cpus()) {
*(u16 *) addr = cpu_to_le16(param->opcode);
dcache_wb_range(addr, addr + 2);
atomic_inc(&param->cpu_count);
diff --git a/arch/csky/kernel/probes/uprobes.c b/arch/csky/kernel/probes/uprobes.c
index 1a9e0961b2b5..2d31a12e46cf 100644
--- a/arch/csky/kernel/probes/uprobes.c
+++ b/arch/csky/kernel/probes/uprobes.c
@@ -102,7 +102,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
struct uprobe_task *utask = current->utask;
/*
- * Task has received a fatal signal, so reset back to probbed
+ * Task has received a fatal signal, so reset back to probed
* address.
*/
instruction_pointer_set(regs, utask->vaddr);
diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c
index 3d0ca22cd0e2..5de04707aa07 100644
--- a/arch/csky/kernel/process.c
+++ b/arch/csky/kernel/process.c
@@ -2,7 +2,6 @@
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/debug.h>
diff --git a/arch/csky/lib/Makefile b/arch/csky/lib/Makefile
index 7fbdbb2c4d12..d0ce6e2d7ab2 100644
--- a/arch/csky/lib/Makefile
+++ b/arch/csky/lib/Makefile
@@ -1,3 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
lib-y := usercopy.o delay.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS), y)
+lib-y += string.o
+endif
diff --git a/arch/csky/lib/string.c b/arch/csky/lib/string.c
new file mode 100644
index 000000000000..d65626fcaeac
--- /dev/null
+++ b/arch/csky/lib/string.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * String functions optimized for hardware which doesn't
+ * handle unaligned memory accesses efficiently.
+ *
+ * Copyright (C) 2021 Matteo Croce
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+
+/* Minimum size for a word copy to be convenient */
+#define BYTES_LONG sizeof(long)
+#define WORD_MASK (BYTES_LONG - 1)
+#define MIN_THRESHOLD (BYTES_LONG * 2)
+
+/* convenience union to avoid cast between different pointer types */
+union types {
+ u8 *as_u8;
+ unsigned long *as_ulong;
+ uintptr_t as_uptr;
+};
+
+union const_types {
+ const u8 *as_u8;
+ unsigned long *as_ulong;
+ uintptr_t as_uptr;
+};
+
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ union const_types s = { .as_u8 = src };
+ union types d = { .as_u8 = dest };
+ int distance = 0;
+
+ if (count < MIN_THRESHOLD)
+ goto copy_remainder;
+
+ /* Copy a byte at time until destination is aligned. */
+ for (; d.as_uptr & WORD_MASK; count--)
+ *d.as_u8++ = *s.as_u8++;
+
+ distance = s.as_uptr & WORD_MASK;
+
+ if (distance) {
+ unsigned long last, next;
+
+ /*
+ * s is distance bytes ahead of d, and d just reached
+ * the alignment boundary. Move s backward to word align it
+ * and shift data to compensate for distance, in order to do
+ * word-by-word copy.
+ */
+ s.as_u8 -= distance;
+
+ next = s.as_ulong[0];
+ for (; count >= BYTES_LONG; count -= BYTES_LONG) {
+ last = next;
+ next = s.as_ulong[1];
+
+ d.as_ulong[0] = last >> (distance * 8) |
+ next << ((BYTES_LONG - distance) * 8);
+
+ d.as_ulong++;
+ s.as_ulong++;
+ }
+
+ /* Restore s with the original offset. */
+ s.as_u8 += distance;
+ } else {
+ /*
+ * If the source and dest lower bits are the same, do a simple
+ * 32/64 bit wide copy.
+ */
+ for (; count >= BYTES_LONG; count -= BYTES_LONG)
+ *d.as_ulong++ = *s.as_ulong++;
+ }
+
+copy_remainder:
+ while (count--)
+ *d.as_u8++ = *s.as_u8++;
+
+ return dest;
+}
+EXPORT_SYMBOL(memcpy);
+
+/*
+ * Simply check if the buffer overlaps an call memcpy() in case,
+ * otherwise do a simple one byte at time backward copy.
+ */
+void *memmove(void *dest, const void *src, size_t count)
+{
+ if (dest < src || src + count <= dest)
+ return memcpy(dest, src, count);
+
+ if (dest > src) {
+ const char *s = src + count;
+ char *tmp = dest + count;
+
+ while (count--)
+ *--tmp = *--s;
+ }
+ return dest;
+}
+EXPORT_SYMBOL(memmove);
+
+void *memset(void *s, int c, size_t count)
+{
+ union types dest = { .as_u8 = s };
+
+ if (count >= MIN_THRESHOLD) {
+ unsigned long cu = (unsigned long)c;
+
+ /* Compose an ulong with 'c' repeated 4/8 times */
+ cu |= cu << 8;
+ cu |= cu << 16;
+ /* Suppress warning on 32 bit machines */
+ cu |= (cu << 16) << 16;
+
+ for (; count && dest.as_uptr & WORD_MASK; count--)
+ *dest.as_u8++ = c;
+
+ /* Copy using the largest size allowed */
+ for (; count >= BYTES_LONG; count -= BYTES_LONG)
+ *dest.as_ulong++ = cu;
+ }
+
+ /* copy the remainder */
+ while (count--)
+ *dest.as_u8++ = c;
+
+ return s;
+}
+EXPORT_SYMBOL(memset);
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index c3a775a7e8f9..82447029feb4 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -9,7 +9,6 @@
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/types.h>
-#include <linux/version.h>
#include <asm/cache.h>
static inline void cache_op(phys_addr_t paddr, size_t size,
diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h
index 869a3ac6bf23..7ccc077a60be 100644
--- a/arch/ia64/include/asm/timex.h
+++ b/arch/ia64/include/asm/timex.h
@@ -39,6 +39,7 @@ get_cycles (void)
ret = ia64_getreg(_IA64_REG_AR_ITC);
return ret;
}
+#define get_cycles get_cycles
extern void ia64_cpu_local_tick (void);
extern unsigned long long ia64_native_sched_clock (void);
diff --git a/arch/m68k/Kbuild b/arch/m68k/Kbuild
index 18abb35c26a1..7762af9f6def 100644
--- a/arch/m68k/Kbuild
+++ b/arch/m68k/Kbuild
@@ -17,3 +17,4 @@ obj-$(CONFIG_M68060) += ifpsp060/
obj-$(CONFIG_M68KFPU_EMU) += math-emu/
obj-$(CONFIG_M68000) += 68000/
obj-$(CONFIG_COLDFIRE) += coldfire/
+obj-$(CONFIG_VIRT) += virt/
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index 16ea9a67723c..3d5da25c73b5 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -327,7 +327,7 @@ comment "Processor Specific Options"
config M68KFPU_EMU
bool "Math emulation support"
- depends on MMU
+ depends on M68KCLASSIC && FPU
help
At some point in the future, this will cause floating-point math
instructions to be emulated by the kernel on machines that lack a
diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine
index eeab4f3e6c19..188a8f8a0104 100644
--- a/arch/m68k/Kconfig.machine
+++ b/arch/m68k/Kconfig.machine
@@ -149,6 +149,23 @@ config SUN3
If you don't want to compile a kernel exclusively for a Sun 3, say N.
+config VIRT
+ bool "Virtual M68k Machine support"
+ depends on MMU
+ select GENERIC_CLOCKEVENTS
+ select GOLDFISH
+ select GOLDFISH_TIMER
+ select GOLDFISH_TTY
+ select M68040
+ select MMU_MOTOROLA if MMU
+ select RTC_CLASS
+ select RTC_DRV_GOLDFISH
+ select TTY
+ select VIRTIO_MMIO
+ help
+ This options enable a pure virtual machine based on m68k,
+ VIRTIO MMIO devices and GOLDFISH interfaces (TTY, RTC, PIC)
+
config PILOT
bool
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 114aaa3f955a..c181030218bf 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -42,7 +42,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -581,6 +580,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -613,7 +613,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -638,7 +637,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -659,6 +657,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 30b9d932b930..40755648fb6c 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -38,7 +38,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -538,6 +537,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -594,7 +593,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -615,6 +613,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 51ff3180e69d..be0d9155fc5b 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -45,7 +45,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -558,6 +557,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -590,7 +590,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -615,7 +614,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -636,6 +634,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 7d95ca4366e4..9af0e2d0d153 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -35,7 +35,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -530,6 +529,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -562,7 +562,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -586,7 +585,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -607,6 +605,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index e306e3813607..49341d66feb6 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -37,7 +37,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -540,6 +539,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -572,7 +572,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -596,7 +595,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -617,6 +615,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 41316cf02441..92b33d5ffab1 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -36,7 +36,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -560,6 +559,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -592,7 +592,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -617,7 +616,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -638,6 +636,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 2fc3f0df6d43..6aaa947bc849 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -56,7 +56,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -646,6 +645,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -678,7 +678,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -703,7 +702,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -724,6 +722,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 9603f4396469..b62d65e59938 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -34,7 +34,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -529,6 +528,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -561,7 +561,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -585,7 +584,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -606,6 +604,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index c9cabd3344df..8ecf261487d4 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -35,7 +35,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -530,6 +529,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -562,7 +562,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -586,7 +585,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -607,6 +605,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 5f994bf44fb8..7540d908897b 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -36,7 +36,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -547,6 +546,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -579,7 +579,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
CONFIG_GLOB_SELFTEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -604,7 +603,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -625,6 +623,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 183e33f7d4a0..832b45944617 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -32,7 +32,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -529,6 +528,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -561,7 +561,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -584,7 +583,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -605,6 +603,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 8214263b9ab8..9171b687e565 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -32,7 +32,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
CONFIG_BINFMT_MISC=m
# CONFIG_COMPACTION is not set
CONFIG_ZPOOL=m
@@ -528,6 +527,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -560,7 +560,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
# CONFIG_CRYPTO_HW is not set
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
CONFIG_XZ_DEC_TEST=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
@@ -584,7 +583,6 @@ CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
@@ -605,6 +603,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
CONFIG_TEST_MEMINIT=m
CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/virt_defconfig b/arch/m68k/configs/virt_defconfig
new file mode 100644
index 000000000000..8059bd618370
--- /dev/null
+++ b/arch/m68k/configs/virt_defconfig
@@ -0,0 +1,68 @@
+CONFIG_LOCALVERSION="-virt"
+CONFIG_SYSVIPC=y
+CONFIG_CGROUPS=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_RDMA=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_VIRT=y
+CONFIG_PROC_HARDWARE=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_ATARI_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_LDM_PARTITION=y
+CONFIG_LDM_DEBUG=y
+CONFIG_SUN_PARTITION=y
+CONFIG_SYSV68_PARTITION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_HW_RANDOM_VIRTIO=y
+CONFIG_DRM=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FB=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_VIRTIO=y
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VIRTIO_INPUT=y
+CONFIG_EXT4_FS=y
+CONFIG_AUTOFS_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_9P_FS=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/include/asm/config.h b/arch/m68k/include/asm/config.h
index e73ffa23c4f5..9bb888ab5009 100644
--- a/arch/m68k/include/asm/config.h
+++ b/arch/m68k/include/asm/config.h
@@ -17,6 +17,7 @@ extern int mac_parse_bootinfo(const struct bi_record *record);
extern int mvme147_parse_bootinfo(const struct bi_record *record);
extern int mvme16x_parse_bootinfo(const struct bi_record *record);
extern int q40_parse_bootinfo(const struct bi_record *record);
+extern int virt_parse_bootinfo(const struct bi_record *record);
extern void config_amiga(void);
extern void config_apollo(void);
@@ -29,5 +30,6 @@ extern void config_mvme16x(void);
extern void config_q40(void);
extern void config_sun3(void);
extern void config_sun3x(void);
+extern void config_virt(void);
#endif /* _M68K_CONFIG_H */
diff --git a/arch/m68k/include/asm/io.h b/arch/m68k/include/asm/io.h
index aabe6420ead2..aaeabc65e63c 100644
--- a/arch/m68k/include/asm/io.h
+++ b/arch/m68k/include/asm/io.h
@@ -8,6 +8,9 @@
#include <asm/io_mm.h>
#endif
+#define gf_ioread32 ioread32be
+#define gf_iowrite32 iowrite32be
+
#include <asm-generic/io.h>
#endif /* _M68K_IO_H */
diff --git a/arch/m68k/include/asm/irq.h b/arch/m68k/include/asm/irq.h
index 91dd493791d7..7829e955ca04 100644
--- a/arch/m68k/include/asm/irq.h
+++ b/arch/m68k/include/asm/irq.h
@@ -12,7 +12,8 @@
*/
#if defined(CONFIG_COLDFIRE)
#define NR_IRQS 256
-#elif defined(CONFIG_VME) || defined(CONFIG_SUN3) || defined(CONFIG_SUN3X)
+#elif defined(CONFIG_VME) || defined(CONFIG_SUN3) || \
+ defined(CONFIG_SUN3X) || defined(CONFIG_VIRT)
#define NR_IRQS 200
#elif defined(CONFIG_ATARI)
#define NR_IRQS 141
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index 143ba7de9bda..9b4e2fe2ac82 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -80,6 +80,9 @@
#elif defined(CONFIG_COLDFIRE)
#define KMAP_START 0xe0000000
#define KMAP_END 0xf0000000
+#elif defined(CONFIG_VIRT)
+#define KMAP_START 0xdf000000
+#define KMAP_END 0xff000000
#else
#define KMAP_START 0xd0000000
#define KMAP_END 0xf0000000
@@ -92,6 +95,10 @@ extern unsigned long m68k_vmalloc_end;
#elif defined(CONFIG_COLDFIRE)
#define VMALLOC_START 0xd0000000
#define VMALLOC_END 0xe0000000
+#elif defined(CONFIG_VIRT)
+#define VMALLOC_OFFSET PAGE_SIZE
+#define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
+#define VMALLOC_END KMAP_START
#else
/* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 8MB value just means that there will be a 8MB "hole" after the
diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h
index 80eb2396d01e..3ba40bc1dfaa 100644
--- a/arch/m68k/include/asm/raw_io.h
+++ b/arch/m68k/include/asm/raw_io.h
@@ -80,14 +80,14 @@
({ u16 __v = le16_to_cpu(*(__force volatile u16 *) (addr)); __v; })
#define rom_out_8(addr, b) \
- ({u8 __maybe_unused __w, __v = (b); u32 _addr = ((u32) (addr)); \
+ (void)({u8 __maybe_unused __w, __v = (b); u32 _addr = ((u32) (addr)); \
__w = ((*(__force volatile u8 *) ((_addr | 0x10000) + (__v<<1)))); })
#define rom_out_be16(addr, w) \
- ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
+ (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
__w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v & 0xFF)<<1)))); \
__w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v >> 8)<<1)))); })
#define rom_out_le16(addr, w) \
- ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
+ (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
__w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v >> 8)<<1)))); \
__w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v & 0xFF)<<1)))); })
diff --git a/arch/m68k/include/asm/setup.h b/arch/m68k/include/asm/setup.h
index 8f2023f8c1c4..2c99477aaf89 100644
--- a/arch/m68k/include/asm/setup.h
+++ b/arch/m68k/include/asm/setup.h
@@ -37,7 +37,8 @@ extern unsigned long m68k_machtype;
#elif defined(CONFIG_ATARI) || defined(CONFIG_MAC) || defined(CONFIG_APOLLO) \
|| defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000) \
|| defined(CONFIG_HP300) || defined(CONFIG_Q40) \
- || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+ || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147) \
+ || defined(CONFIG_VIRT)
# define MACH_IS_AMIGA (m68k_machtype == MACH_AMIGA)
#else
# define MACH_AMIGA_ONLY
@@ -50,7 +51,8 @@ extern unsigned long m68k_machtype;
#elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_APOLLO) \
|| defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000) \
|| defined(CONFIG_HP300) || defined(CONFIG_Q40) \
- || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+ || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147) \
+ || defined(CONFIG_VIRT)
# define MACH_IS_ATARI (m68k_machtype == MACH_ATARI)
#else
# define MACH_ATARI_ONLY
@@ -63,7 +65,8 @@ extern unsigned long m68k_machtype;
#elif defined(CONFIG_AMIGA) || defined(CONFIG_ATARI) || defined(CONFIG_APOLLO) \
|| defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000) \
|| defined(CONFIG_HP300) || defined(CONFIG_Q40) \
- || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+ || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147) \
+ || defined(CONFIG_VIRT)
# define MACH_IS_MAC (m68k_machtype == MACH_MAC)
#else
# define MACH_MAC_ONLY
@@ -84,7 +87,8 @@ extern unsigned long m68k_machtype;
#elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
|| defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000) \
|| defined(CONFIG_HP300) || defined(CONFIG_Q40) \
- || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+ || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147) \
+ || defined(CONFIG_VIRT)
# define MACH_IS_APOLLO (m68k_machtype == MACH_APOLLO)
#else
# define MACH_APOLLO_ONLY
@@ -97,7 +101,8 @@ extern unsigned long m68k_machtype;
#elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
|| defined(CONFIG_APOLLO) || defined(CONFIG_BVME6000) \
|| defined(CONFIG_HP300) || defined(CONFIG_Q40) \
- || defined(CONFIG_SUN3X) || defined(CONFIG_MVME16x)
+ || defined(CONFIG_SUN3X) || defined(CONFIG_MVME16x) \
+ || defined(CONFIG_VIRT)
# define MACH_IS_MVME147 (m68k_machtype == MACH_MVME147)
#else
# define MACH_MVME147_ONLY
@@ -110,7 +115,8 @@ extern unsigned long m68k_machtype;
#elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
|| defined(CONFIG_APOLLO) || defined(CONFIG_BVME6000) \
|| defined(CONFIG_HP300) || defined(CONFIG_Q40) \
- || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+ || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147) \
+ || defined(CONFIG_VIRT)
# define MACH_IS_MVME16x (m68k_machtype == MACH_MVME16x)
#else
# define MACH_MVME16x_ONLY
@@ -123,7 +129,8 @@ extern unsigned long m68k_machtype;
#elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
|| defined(CONFIG_APOLLO) || defined(CONFIG_MVME16x) \
|| defined(CONFIG_HP300) || defined(CONFIG_Q40) \
- || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+ || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147) \
+ || defined(CONFIG_VIRT)
# define MACH_IS_BVME6000 (m68k_machtype == MACH_BVME6000)
#else
# define MACH_BVME6000_ONLY
@@ -136,7 +143,8 @@ extern unsigned long m68k_machtype;
#elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
|| defined(CONFIG_APOLLO) || defined(CONFIG_MVME16x) \
|| defined(CONFIG_BVME6000) || defined(CONFIG_Q40) \
- || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+ || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147) \
+ || defined(CONFIG_VIRT)
# define MACH_IS_HP300 (m68k_machtype == MACH_HP300)
#else
# define MACH_HP300_ONLY
@@ -149,7 +157,8 @@ extern unsigned long m68k_machtype;
#elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
|| defined(CONFIG_APOLLO) || defined(CONFIG_MVME16x) \
|| defined(CONFIG_BVME6000) || defined(CONFIG_HP300) \
- || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+ || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147) \
+ || defined(CONFIG_VIRT)
# define MACH_IS_Q40 (m68k_machtype == MACH_Q40)
#else
# define MACH_Q40_ONLY
@@ -162,7 +171,8 @@ extern unsigned long m68k_machtype;
#elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
|| defined(CONFIG_APOLLO) || defined(CONFIG_MVME16x) \
|| defined(CONFIG_BVME6000) || defined(CONFIG_HP300) \
- || defined(CONFIG_Q40) || defined(CONFIG_MVME147)
+ || defined(CONFIG_Q40) || defined(CONFIG_MVME147) \
+ || defined(CONFIG_VIRT)
# define MACH_IS_SUN3X (m68k_machtype == MACH_SUN3X)
#else
# define CONFIG_SUN3X_ONLY
@@ -170,6 +180,20 @@ extern unsigned long m68k_machtype;
# define MACH_TYPE (MACH_SUN3X)
#endif
+#if !defined(CONFIG_VIRT)
+# define MACH_IS_VIRT (0)
+#elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
+ || defined(CONFIG_APOLLO) || defined(CONFIG_MVME16x) \
+ || defined(CONFIG_BVME6000) || defined(CONFIG_HP300) \
+ || defined(CONFIG_Q40) || defined(CONFIG_SUN3X) \
+ || defined(CONFIG_MVME147)
+# define MACH_IS_VIRT (m68k_machtype == MACH_VIRT)
+#else
+# define MACH_VIRT_ONLY
+# define MACH_IS_VIRT (1)
+# define MACH_TYPE (MACH_VIRT)
+#endif
+
#ifndef MACH_TYPE
# define MACH_TYPE (m68k_machtype)
#endif
diff --git a/arch/m68k/include/asm/timex.h b/arch/m68k/include/asm/timex.h
index 6a21d9358280..f4a7a340f4ca 100644
--- a/arch/m68k/include/asm/timex.h
+++ b/arch/m68k/include/asm/timex.h
@@ -35,7 +35,7 @@ static inline unsigned long random_get_entropy(void)
{
if (mach_random_get_entropy)
return mach_random_get_entropy();
- return 0;
+ return random_get_entropy_fallback();
}
#define random_get_entropy random_get_entropy
diff --git a/arch/m68k/include/asm/virt.h b/arch/m68k/include/asm/virt.h
new file mode 100644
index 000000000000..d3320c954796
--- /dev/null
+++ b/arch/m68k/include/asm/virt.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VIRT_H
+#define __ASM_VIRT_H
+
+#define NUM_VIRT_SOURCES 200
+
+struct virt_booter_device_data {
+ u32 mmio;
+ u32 irq;
+};
+
+struct virt_booter_data {
+ u32 qemu_version;
+ struct virt_booter_device_data pic;
+ struct virt_booter_device_data rtc;
+ struct virt_booter_device_data tty;
+ struct virt_booter_device_data ctrl;
+ struct virt_booter_device_data virtio;
+};
+
+extern struct virt_booter_data virt_bi_data;
+
+extern void __init virt_init_IRQ(void);
+
+#endif
diff --git a/arch/m68k/include/uapi/asm/bootinfo-virt.h b/arch/m68k/include/uapi/asm/bootinfo-virt.h
new file mode 100644
index 000000000000..e4db7e2213ab
--- /dev/null
+++ b/arch/m68k/include/uapi/asm/bootinfo-virt.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * asm/bootinfo-virt.h -- Virtual-m68k-specific boot information definitions
+ */
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_VIRT_H
+#define _UAPI_ASM_M68K_BOOTINFO_VIRT_H
+
+#define BI_VIRT_QEMU_VERSION 0x8000
+#define BI_VIRT_GF_PIC_BASE 0x8001
+#define BI_VIRT_GF_RTC_BASE 0x8002
+#define BI_VIRT_GF_TTY_BASE 0x8003
+#define BI_VIRT_VIRTIO_BASE 0x8004
+#define BI_VIRT_CTRL_BASE 0x8005
+
+#define VIRT_BOOTI_VERSION MK_BI_VERSION(2, 0)
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_MAC_H */
diff --git a/arch/m68k/include/uapi/asm/bootinfo.h b/arch/m68k/include/uapi/asm/bootinfo.h
index 38d3140381fa..203d9cbf9630 100644
--- a/arch/m68k/include/uapi/asm/bootinfo.h
+++ b/arch/m68k/include/uapi/asm/bootinfo.h
@@ -83,6 +83,7 @@ struct mem_info {
#define MACH_SUN3X 11
#define MACH_M54XX 12
#define MACH_M5441X 13
+#define MACH_VIRT 14
/*
diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index dbac7f8743fc..c0833da6a2ca 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile
@@ -11,6 +11,7 @@ extra-$(CONFIG_VME) := head.o
extra-$(CONFIG_HP300) := head.o
extra-$(CONFIG_Q40) := head.o
extra-$(CONFIG_SUN3X) := head.o
+extra-$(CONFIG_VIRT) := head.o
extra-$(CONFIG_SUN3) := sun3-head.o
extra-y += vmlinux.lds
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 9434fca68de5..18f278bdbd21 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -181,7 +181,7 @@ do_trace_entry:
movel #-ENOSYS,%sp@(PT_OFF_D0)| needed for strace
subql #4,%sp
SAVE_SWITCH_STACK
- jbsr syscall_trace
+ jbsr syscall_trace_enter
RESTORE_SWITCH_STACK
addql #4,%sp
movel %sp@(PT_OFF_ORIG_D0),%d0
@@ -194,7 +194,7 @@ badsys:
do_trace_exit:
subql #4,%sp
SAVE_SWITCH_STACK
- jbsr syscall_trace
+ jbsr syscall_trace_leave
RESTORE_SWITCH_STACK
addql #4,%sp
jra .Lret_from_exception
diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S
index 493c95db0e51..9e812d8606be 100644
--- a/arch/m68k/kernel/head.S
+++ b/arch/m68k/kernel/head.S
@@ -262,6 +262,7 @@
#include <asm/bootinfo-hp300.h>
#include <asm/bootinfo-mac.h>
#include <asm/bootinfo-q40.h>
+#include <asm/bootinfo-virt.h>
#include <asm/bootinfo-vme.h>
#include <asm/setup.h>
#include <asm/entry.h>
@@ -534,6 +535,7 @@ func_define putn,1
#define is_not_apollo(lab) cmpl &MACH_APOLLO,%pc@(m68k_machtype); jne lab
#define is_not_q40(lab) cmpl &MACH_Q40,%pc@(m68k_machtype); jne lab
#define is_not_sun3x(lab) cmpl &MACH_SUN3X,%pc@(m68k_machtype); jne lab
+#define is_not_virt(lab) cmpl &MACH_VIRT,%pc@(m68k_machtype); jne lab
#define hasnt_leds(lab) cmpl &MACH_HP300,%pc@(m68k_machtype); \
jeq 42f; \
@@ -647,6 +649,14 @@ ENTRY(__start)
L(test_notmac):
#endif /* CONFIG_MAC */
+#ifdef CONFIG_VIRT
+ is_not_virt(L(test_notvirt))
+
+ get_bi_record BI_VIRT_GF_TTY_BASE
+ lea %pc@(L(virt_gf_tty_base)),%a1
+ movel %a0@,%a1@
+L(test_notvirt):
+#endif /* CONFIG_VIRT */
/*
* There are ultimately two pieces of information we want for all kinds of
@@ -1237,6 +1247,13 @@ L(mmu_init_not_mac):
L(notsun3x):
#endif
+#ifdef CONFIG_VIRT
+ is_not_virt(L(novirt))
+ mmu_map_tt #1,#0xFF000000,#0x01000000,#_PAGE_NOCACHE_S
+ jbra L(mmu_init_done)
+L(novirt):
+#endif
+
#ifdef CONFIG_APOLLO
is_not_apollo(L(notapollo))
@@ -3186,6 +3203,14 @@ func_start serial_putc,%d0/%d1/%a0/%a1
3:
#endif
+#ifdef CONFIG_VIRT
+ is_not_virt(1f)
+
+ movel L(virt_gf_tty_base),%a1
+ movel %d0,%a1@(GF_PUT_CHAR)
+1:
+#endif
+
L(serial_putc_done):
func_return serial_putc
@@ -3865,3 +3890,9 @@ q40_mem_cptr:
L(q40_do_debug):
.long 0
#endif
+
+#if defined(CONFIG_VIRT)
+GF_PUT_CHAR = 0x00
+L(virt_gf_tty_base):
+ .long 0
+#endif /* CONFIG_VIRT */
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 6342ff4d2073..daebccdd2c09 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -270,12 +270,6 @@ out_eio:
return -EIO;
}
-asmlinkage void syscall_trace(void)
-{
- ptrace_report_syscall(0);
-}
-
-#if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU)
asmlinkage int syscall_trace_enter(void)
{
int ret = 0;
@@ -290,4 +284,3 @@ asmlinkage void syscall_trace_leave(void)
if (test_thread_flag(TIF_SYSCALL_TRACE))
ptrace_report_syscall_exit(task_pt_regs(current), 0);
}
-#endif /* CONFIG_COLDFIRE */
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 8f94feed969c..78ab562beb31 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -181,6 +181,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
unknown = hp300_parse_bootinfo(record);
else if (MACH_IS_APOLLO)
unknown = apollo_parse_bootinfo(record);
+ else if (MACH_IS_VIRT)
+ unknown = virt_parse_bootinfo(record);
else
unknown = 1;
}
@@ -312,6 +314,11 @@ void __init setup_arch(char **cmdline_p)
config_BSP(NULL, 0);
break;
#endif
+#ifdef CONFIG_VIRT
+ case MACH_VIRT:
+ config_virt();
+ break;
+#endif
default:
panic("No configuration setup");
}
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 49533f65958a..b9f6908a31bc 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -625,6 +625,7 @@ static inline void siginfo_build_tests(void)
/* _sigfault._perf */
BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10);
BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_flags) != 0x18);
/* _sigpoll */
BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c);
diff --git a/arch/m68k/math-emu/fp_arith.c b/arch/m68k/math-emu/fp_arith.c
index d9033238d097..f4a06492cd7a 100644
--- a/arch/m68k/math-emu/fp_arith.c
+++ b/arch/m68k/math-emu/fp_arith.c
@@ -243,7 +243,7 @@ fp_fdiv(struct fp_ext *dest, struct fp_ext *src)
/* infinity / infinity = NaN (quiet, as always) */
if (IS_INF(src))
fp_set_nan(dest);
- /* infinity / anything else = infinity (with approprate sign) */
+ /* infinity / anything else = infinity (with appropriate sign) */
return dest;
}
if (IS_INF(src)) {
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 20ddf71b43d0..7594a945732b 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -179,6 +179,12 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla
return (void __iomem *)physaddr;
}
#endif
+#ifdef CONFIG_VIRT
+ if (MACH_IS_VIRT) {
+ if (physaddr >= 0xff000000 && cacheflag == IOMAP_NOCACHE_SER)
+ return (void __iomem *)physaddr;
+ }
+#endif
#ifdef CONFIG_COLDFIRE
if (__cf_internalio(physaddr))
return (void __iomem *) physaddr;
@@ -293,17 +299,20 @@ EXPORT_SYMBOL(__ioremap);
void iounmap(void __iomem *addr)
{
#ifdef CONFIG_AMIGA
- if ((!MACH_IS_AMIGA) ||
- (((unsigned long)addr < 0x40000000) ||
- ((unsigned long)addr > 0x60000000)))
- free_io_area((__force void *)addr);
-#else
+ if (MACH_IS_AMIGA &&
+ ((unsigned long)addr >= 0x40000000) &&
+ ((unsigned long)addr < 0x60000000))
+ return;
+#endif
+#ifdef CONFIG_VIRT
+ if (MACH_IS_VIRT && (unsigned long)addr >= 0xff000000)
+ return;
+#endif
#ifdef CONFIG_COLDFIRE
if (cf_internalio(addr))
return;
#endif
free_io_area((__force void *)addr);
-#endif
}
EXPORT_SYMBOL(iounmap);
diff --git a/arch/m68k/virt/Makefile b/arch/m68k/virt/Makefile
new file mode 100644
index 000000000000..54b9b2866654
--- /dev/null
+++ b/arch/m68k/virt/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for Linux arch/m68k/virt source directory
+#
+
+obj-y := config.o ints.o platform.o
diff --git a/arch/m68k/virt/config.c b/arch/m68k/virt/config.c
new file mode 100644
index 000000000000..68d29c8b87e1
--- /dev/null
+++ b/arch/m68k/virt/config.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/serial_core.h>
+#include <clocksource/timer-goldfish.h>
+
+#include <asm/bootinfo.h>
+#include <asm/bootinfo-virt.h>
+#include <asm/byteorder.h>
+#include <asm/machdep.h>
+#include <asm/virt.h>
+#include <asm/config.h>
+
+struct virt_booter_data virt_bi_data;
+
+#define VIRT_CTRL_REG_FEATURES 0x00
+#define VIRT_CTRL_REG_CMD 0x04
+
+static struct resource ctrlres;
+
+enum {
+ CMD_NOOP,
+ CMD_RESET,
+ CMD_HALT,
+ CMD_PANIC,
+};
+
+static void virt_get_model(char *str)
+{
+ /* str is 80 characters long */
+ sprintf(str, "QEMU Virtual M68K Machine (%u.%u.%u)",
+ (u8)(virt_bi_data.qemu_version >> 24),
+ (u8)(virt_bi_data.qemu_version >> 16),
+ (u8)(virt_bi_data.qemu_version >> 8));
+}
+
+static void virt_halt(void)
+{
+ void __iomem *base = (void __iomem *)virt_bi_data.ctrl.mmio;
+
+ iowrite32be(CMD_HALT, base + VIRT_CTRL_REG_CMD);
+ local_irq_disable();
+ while (1)
+ ;
+}
+
+static void virt_reset(void)
+{
+ void __iomem *base = (void __iomem *)virt_bi_data.ctrl.mmio;
+
+ iowrite32be(CMD_RESET, base + VIRT_CTRL_REG_CMD);
+ local_irq_disable();
+ while (1)
+ ;
+}
+
+/*
+ * Parse a virtual-m68k-specific record in the bootinfo
+ */
+
+int __init virt_parse_bootinfo(const struct bi_record *record)
+{
+ int unknown = 0;
+ const void *data = record->data;
+
+ switch (be16_to_cpu(record->tag)) {
+ case BI_VIRT_QEMU_VERSION:
+ virt_bi_data.qemu_version = be32_to_cpup(data);
+ break;
+ case BI_VIRT_GF_PIC_BASE:
+ virt_bi_data.pic.mmio = be32_to_cpup(data);
+ data += 4;
+ virt_bi_data.pic.irq = be32_to_cpup(data);
+ break;
+ case BI_VIRT_GF_RTC_BASE:
+ virt_bi_data.rtc.mmio = be32_to_cpup(data);
+ data += 4;
+ virt_bi_data.rtc.irq = be32_to_cpup(data);
+ break;
+ case BI_VIRT_GF_TTY_BASE:
+ virt_bi_data.tty.mmio = be32_to_cpup(data);
+ data += 4;
+ virt_bi_data.tty.irq = be32_to_cpup(data);
+ break;
+ case BI_VIRT_CTRL_BASE:
+ virt_bi_data.ctrl.mmio = be32_to_cpup(data);
+ data += 4;
+ virt_bi_data.ctrl.irq = be32_to_cpup(data);
+ break;
+ case BI_VIRT_VIRTIO_BASE:
+ virt_bi_data.virtio.mmio = be32_to_cpup(data);
+ data += 4;
+ virt_bi_data.virtio.irq = be32_to_cpup(data);
+ break;
+ default:
+ unknown = 1;
+ break;
+ }
+ return unknown;
+}
+
+static void __init virt_sched_init(void)
+{
+ goldfish_timer_init(virt_bi_data.rtc.irq,
+ (void __iomem *)virt_bi_data.rtc.mmio);
+}
+
+void __init config_virt(void)
+{
+ char earlycon[24];
+
+ snprintf(earlycon, sizeof(earlycon), "early_gf_tty,0x%08x",
+ virt_bi_data.tty.mmio);
+ setup_earlycon(earlycon);
+
+ ctrlres = (struct resource)
+ DEFINE_RES_MEM_NAMED(virt_bi_data.ctrl.mmio, 0x100,
+ "virtctrl");
+
+ if (request_resource(&iomem_resource, &ctrlres)) {
+ pr_err("Cannot allocate virt controller resource\n");
+ return;
+ }
+
+ mach_init_IRQ = virt_init_IRQ;
+ mach_sched_init = virt_sched_init;
+ mach_get_model = virt_get_model;
+ mach_reset = virt_reset;
+ mach_halt = virt_halt;
+ mach_power_off = virt_halt;
+}
diff --git a/arch/m68k/virt/ints.c b/arch/m68k/virt/ints.c
new file mode 100644
index 000000000000..95818f901ebe
--- /dev/null
+++ b/arch/m68k/virt/ints.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+#include <asm/hwtest.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/virt.h>
+
+#define GFPIC_REG_IRQ_PENDING 0x04
+#define GFPIC_REG_IRQ_DISABLE_ALL 0x08
+#define GFPIC_REG_IRQ_DISABLE 0x0c
+#define GFPIC_REG_IRQ_ENABLE 0x10
+
+extern void show_registers(struct pt_regs *regs);
+
+static struct resource picres[6];
+static const char *picname[6] = {
+ "goldfish_pic.0",
+ "goldfish_pic.1",
+ "goldfish_pic.2",
+ "goldfish_pic.3",
+ "goldfish_pic.4",
+ "goldfish_pic.5"
+};
+
+/*
+ * 6 goldfish-pic for CPU IRQ #1 to IRQ #6
+ * CPU IRQ #1 -> PIC #1
+ * IRQ #1 to IRQ #31 -> unused
+ * IRQ #32 -> goldfish-tty
+ * CPU IRQ #2 -> PIC #2
+ * IRQ #1 to IRQ #32 -> virtio-mmio from 1 to 32
+ * CPU IRQ #3 -> PIC #3
+ * IRQ #1 to IRQ #32 -> virtio-mmio from 33 to 64
+ * CPU IRQ #4 -> PIC #4
+ * IRQ #1 to IRQ #32 -> virtio-mmio from 65 to 96
+ * CPU IRQ #5 -> PIC #5
+ * IRQ #1 to IRQ #32 -> virtio-mmio from 97 to 128
+ * CPU IRQ #6 -> PIC #6
+ * IRQ #1 -> goldfish-timer
+ * IRQ #2 -> goldfish-rtc
+ * IRQ #3 to IRQ #32 -> unused
+ * CPU IRQ #7 -> NMI
+ */
+
+static u32 gfpic_read(int pic, int reg)
+{
+ void __iomem *base = (void __iomem *)(virt_bi_data.pic.mmio +
+ pic * 0x1000);
+
+ return ioread32be(base + reg);
+}
+
+static void gfpic_write(u32 value, int pic, int reg)
+{
+ void __iomem *base = (void __iomem *)(virt_bi_data.pic.mmio +
+ pic * 0x1000);
+
+ iowrite32be(value, base + reg);
+}
+
+#define GF_PIC(irq) ((irq - IRQ_USER) / 32)
+#define GF_IRQ(irq) ((irq - IRQ_USER) % 32)
+
+static void virt_irq_enable(struct irq_data *data)
+{
+ gfpic_write(BIT(GF_IRQ(data->irq)), GF_PIC(data->irq),
+ GFPIC_REG_IRQ_ENABLE);
+}
+
+static void virt_irq_disable(struct irq_data *data)
+{
+ gfpic_write(BIT(GF_IRQ(data->irq)), GF_PIC(data->irq),
+ GFPIC_REG_IRQ_DISABLE);
+}
+
+static unsigned int virt_irq_startup(struct irq_data *data)
+{
+ virt_irq_enable(data);
+ return 0;
+}
+
+static irqreturn_t virt_nmi_handler(int irq, void *dev_id)
+{
+ static int in_nmi;
+
+ if (READ_ONCE(in_nmi))
+ return IRQ_HANDLED;
+ WRITE_ONCE(in_nmi, 1);
+
+ pr_warn("Non-Maskable Interrupt\n");
+ show_registers(get_irq_regs());
+
+ WRITE_ONCE(in_nmi, 0);
+ return IRQ_HANDLED;
+}
+
+static struct irq_chip virt_irq_chip = {
+ .name = "virt",
+ .irq_enable = virt_irq_enable,
+ .irq_disable = virt_irq_disable,
+ .irq_startup = virt_irq_startup,
+ .irq_shutdown = virt_irq_disable,
+};
+
+static void goldfish_pic_irq(struct irq_desc *desc)
+{
+ u32 irq_pending;
+ unsigned int irq_num;
+ unsigned int pic = desc->irq_data.irq - 1;
+
+ irq_pending = gfpic_read(pic, GFPIC_REG_IRQ_PENDING);
+ irq_num = IRQ_USER + pic * 32;
+
+ do {
+ if (irq_pending & 1)
+ generic_handle_irq(irq_num);
+ ++irq_num;
+ irq_pending >>= 1;
+ } while (irq_pending);
+}
+
+void __init virt_init_IRQ(void)
+{
+ unsigned int i;
+
+ m68k_setup_irq_controller(&virt_irq_chip, handle_simple_irq, IRQ_USER,
+ NUM_VIRT_SOURCES - IRQ_USER);
+
+ for (i = 0; i < 6; i++) {
+
+ picres[i] = (struct resource)
+ DEFINE_RES_MEM_NAMED(virt_bi_data.pic.mmio + i * 0x1000,
+ 0x1000, picname[i]);
+ if (request_resource(&iomem_resource, &picres[i])) {
+ pr_err("Cannot allocate %s resource\n", picname[i]);
+ return;
+ }
+
+ irq_set_chained_handler(virt_bi_data.pic.irq + i,
+ goldfish_pic_irq);
+ }
+
+ if (request_irq(IRQ_AUTO_7, virt_nmi_handler, 0, "NMI",
+ virt_nmi_handler))
+ pr_err("Couldn't register NMI\n");
+}
diff --git a/arch/m68k/virt/platform.c b/arch/m68k/virt/platform.c
new file mode 100644
index 000000000000..cb820f19a221
--- /dev/null
+++ b/arch/m68k/virt/platform.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/memblock.h>
+#include <asm/virt.h>
+#include <asm/irq.h>
+
+#define VIRTIO_BUS_NB 128
+
+static int __init virt_virtio_init(unsigned int id)
+{
+ const struct resource res[] = {
+ DEFINE_RES_MEM(virt_bi_data.virtio.mmio + id * 0x200, 0x200),
+ DEFINE_RES_IRQ(virt_bi_data.virtio.irq + id),
+ };
+ struct platform_device *pdev;
+
+ pdev = platform_device_register_simple("virtio-mmio", id,
+ res, ARRAY_SIZE(res));
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ return 0;
+}
+
+static int __init virt_platform_init(void)
+{
+ const struct resource goldfish_tty_res[] = {
+ DEFINE_RES_MEM(virt_bi_data.tty.mmio, 1),
+ DEFINE_RES_IRQ(virt_bi_data.tty.irq),
+ };
+ /* this is the second gf-rtc, the first one is used by the scheduler */
+ const struct resource goldfish_rtc_res[] = {
+ DEFINE_RES_MEM(virt_bi_data.rtc.mmio + 0x1000, 0x1000),
+ DEFINE_RES_IRQ(virt_bi_data.rtc.irq + 1),
+ };
+ struct platform_device *pdev;
+ unsigned int i;
+
+ if (!MACH_IS_VIRT)
+ return -ENODEV;
+
+ /* We need this to have DMA'able memory provided to goldfish-tty */
+ min_low_pfn = 0;
+
+ pdev = platform_device_register_simple("goldfish_tty",
+ PLATFORM_DEVID_NONE,
+ goldfish_tty_res,
+ ARRAY_SIZE(goldfish_tty_res));
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ pdev = platform_device_register_simple("goldfish_rtc",
+ PLATFORM_DEVID_NONE,
+ goldfish_rtc_res,
+ ARRAY_SIZE(goldfish_rtc_res));
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ for (i = 0; i < VIRTIO_BUS_NB; i++) {
+ int err;
+
+ err = virt_virtio_init(i);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+arch_initcall(virt_platform_init);
diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h
index 8026baf46e72..2e107886f97a 100644
--- a/arch/mips/include/asm/timex.h
+++ b/arch/mips/include/asm/timex.h
@@ -76,25 +76,24 @@ static inline cycles_t get_cycles(void)
else
return 0; /* no usable counter */
}
+#define get_cycles get_cycles
/*
* Like get_cycles - but where c0_count is not available we desperately
* use c0_random in an attempt to get at least a little bit of entropy.
- *
- * R6000 and R6000A neither have a count register nor a random register.
- * That leaves no entropy source in the CPU itself.
*/
static inline unsigned long random_get_entropy(void)
{
- unsigned int prid = read_c0_prid();
- unsigned int imp = prid & PRID_IMP_MASK;
+ unsigned int c0_random;
- if (can_use_mips_counter(prid))
+ if (can_use_mips_counter(read_c0_prid()))
return read_c0_count();
- else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A))
- return read_c0_random();
+
+ if (cpu_has_3kex)
+ c0_random = (read_c0_random() >> 8) & 0x3f;
else
- return 0; /* no usable register */
+ c0_random = read_c0_random() & 0x3f;
+ return (random_get_entropy_fallback() << 6) | (0x3f - c0_random);
}
#define random_get_entropy random_get_entropy
diff --git a/arch/nios2/include/asm/timex.h b/arch/nios2/include/asm/timex.h
index a769f871b28d..40a1adc9bd03 100644
--- a/arch/nios2/include/asm/timex.h
+++ b/arch/nios2/include/asm/timex.h
@@ -8,5 +8,8 @@
typedef unsigned long cycles_t;
extern cycles_t get_cycles(void);
+#define get_cycles get_cycles
+
+#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback())
#endif
diff --git a/arch/openrisc/include/asm/timex.h b/arch/openrisc/include/asm/timex.h
index d52b4e536e3f..5487fa93dd9b 100644
--- a/arch/openrisc/include/asm/timex.h
+++ b/arch/openrisc/include/asm/timex.h
@@ -23,6 +23,7 @@ static inline cycles_t get_cycles(void)
{
return mfspr(SPR_TTCR);
}
+#define get_cycles get_cycles
/* This isn't really used any more */
#define CLOCK_TICK_RATE 1000
diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
index 15f1b38dfe03..2fa6cefa62ca 100644
--- a/arch/openrisc/kernel/head.S
+++ b/arch/openrisc/kernel/head.S
@@ -521,6 +521,15 @@ _start:
l.ori r3,r0,0x1
l.mtspr r0,r3,SPR_SR
+ /*
+ * Start the TTCR as early as possible, so that the RNG can make use of
+ * measurements of boot time from the earliest opportunity. Especially
+ * important is that the TTCR does not return zero by the time we reach
+ * random_init().
+ */
+ l.movhi r3,hi(SPR_TTMR_CR)
+ l.mtspr r0,r3,SPR_TTMR
+
CLEAR_GPR(r1)
CLEAR_GPR(r2)
CLEAR_GPR(r3)
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index e8b4a03343d3..8d03b3b26229 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -59,20 +59,12 @@ void flush_dcache_page(struct page *page);
flush_kernel_icache_range_asm(s,e); \
} while (0)
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-do { \
- flush_cache_page(vma, vaddr, page_to_pfn(page)); \
- memcpy(dst, src, len); \
- flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); \
-} while (0)
-
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
-do { \
- flush_cache_page(vma, vaddr, page_to_pfn(page)); \
- memcpy(dst, src, len); \
-} while (0)
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn);
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long user_vaddr, void *dst, void *src, int len);
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long user_vaddr, void *dst, void *src, int len);
+void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
+ unsigned long pfn);
void flush_cache_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
@@ -80,16 +72,7 @@ void flush_cache_range(struct vm_area_struct *vma,
void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
#define ARCH_HAS_FLUSH_ANON_PAGE
-static inline void
-flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
-{
- if (PageAnon(page)) {
- flush_tlb_page(vma, vmaddr);
- preempt_disable();
- flush_dcache_page_asm(page_to_phys(page), vmaddr);
- preempt_enable();
- }
-}
+void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr);
#define ARCH_HAS_FLUSH_ON_KUNMAP
static inline void kunmap_flush_on_unmap(void *addr)
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 0561568f7b48..6faaaa3ebe9b 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -26,12 +26,14 @@
#define copy_page(to, from) copy_page_asm((void *)(to), (void *)(from))
struct page;
+struct vm_area_struct;
void clear_page_asm(void *page);
void copy_page_asm(void *to, void *from);
#define clear_user_page(vto, vaddr, page) clear_page_asm(vto)
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
- struct page *pg);
+void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr,
+ struct vm_area_struct *vma);
+#define __HAVE_ARCH_COPY_USER_HIGHPAGE
/*
* These are used to make use of C type-checking..
diff --git a/arch/parisc/include/asm/timex.h b/arch/parisc/include/asm/timex.h
index 06b510f8172e..b4622cb06a75 100644
--- a/arch/parisc/include/asm/timex.h
+++ b/arch/parisc/include/asm/timex.h
@@ -13,9 +13,10 @@
typedef unsigned long cycles_t;
-static inline cycles_t get_cycles (void)
+static inline cycles_t get_cycles(void)
{
return mfctl(16);
}
+#define get_cycles get_cycles
#endif
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index e7911225a4f8..0fd04073d4b6 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -27,6 +27,7 @@
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/shmparam.h>
+#include <asm/mmu_context.h>
int split_tlb __ro_after_init;
int dcache_stride __ro_after_init;
@@ -91,7 +92,7 @@ static inline void flush_data_cache(void)
}
-/* Virtual address of pfn. */
+/* Kernel virtual address of pfn. */
#define pfn_va(pfn) __va(PFN_PHYS(pfn))
void
@@ -124,11 +125,13 @@ show_cache_info(struct seq_file *m)
cache_info.ic_size/1024 );
if (cache_info.dc_loop != 1)
snprintf(buf, 32, "%lu-way associative", cache_info.dc_loop);
- seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s)\n",
+ seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s, alias=%d)\n",
cache_info.dc_size/1024,
(cache_info.dc_conf.cc_wt ? "WT":"WB"),
(cache_info.dc_conf.cc_sh ? ", shared I/D":""),
- ((cache_info.dc_loop == 1) ? "direct mapped" : buf));
+ ((cache_info.dc_loop == 1) ? "direct mapped" : buf),
+ cache_info.dc_conf.cc_alias
+ );
seq_printf(m, "ITLB entries\t: %ld\n" "DTLB entries\t: %ld%s\n",
cache_info.it_size,
cache_info.dt_size,
@@ -324,25 +327,81 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
preempt_enable();
}
-static inline void
-__purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
- unsigned long physaddr)
+static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
- if (!static_branch_likely(&parisc_has_cache))
- return;
+ unsigned long flags, space, pgd, prot;
+#ifdef CONFIG_TLB_PTLOCK
+ unsigned long pgd_lock;
+#endif
+
+ vmaddr &= PAGE_MASK;
+
preempt_disable();
- purge_dcache_page_asm(physaddr, vmaddr);
+
+ /* Set context for flush */
+ local_irq_save(flags);
+ prot = mfctl(8);
+ space = mfsp(SR_USER);
+ pgd = mfctl(25);
+#ifdef CONFIG_TLB_PTLOCK
+ pgd_lock = mfctl(28);
+#endif
+ switch_mm_irqs_off(NULL, vma->vm_mm, NULL);
+ local_irq_restore(flags);
+
+ flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
if (vma->vm_flags & VM_EXEC)
- flush_icache_page_asm(physaddr, vmaddr);
+ flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
+ flush_tlb_page(vma, vmaddr);
+
+ /* Restore previous context */
+ local_irq_save(flags);
+#ifdef CONFIG_TLB_PTLOCK
+ mtctl(pgd_lock, 28);
+#endif
+ mtctl(pgd, 25);
+ mtsp(space, SR_USER);
+ mtctl(prot, 8);
+ local_irq_restore(flags);
+
preempt_enable();
}
+static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr)
+{
+ pte_t *ptep = NULL;
+ pgd_t *pgd = mm->pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (!pgd_none(*pgd)) {
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_none(*p4d)) {
+ pud = pud_offset(p4d, addr);
+ if (!pud_none(*pud)) {
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd))
+ ptep = pte_offset_map(pmd, addr);
+ }
+ }
+ }
+ return ptep;
+}
+
+static inline bool pte_needs_flush(pte_t pte)
+{
+ return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_NO_CACHE))
+ == (_PAGE_PRESENT | _PAGE_ACCESSED);
+}
+
void flush_dcache_page(struct page *page)
{
struct address_space *mapping = page_mapping_file(page);
struct vm_area_struct *mpnt;
unsigned long offset;
unsigned long addr, old_addr = 0;
+ unsigned long count = 0;
pgoff_t pgoff;
if (mapping && !mapping_mapped(mapping)) {
@@ -357,33 +416,52 @@ void flush_dcache_page(struct page *page)
pgoff = page->index;
- /* We have carefully arranged in arch_get_unmapped_area() that
+ /*
+ * We have carefully arranged in arch_get_unmapped_area() that
* *any* mappings of a file are always congruently mapped (whether
* declared as MAP_PRIVATE or MAP_SHARED), so we only need
- * to flush one address here for them all to become coherent */
-
+ * to flush one address here for them all to become coherent
+ * on machines that support equivalent aliasing
+ */
flush_dcache_mmap_lock(mapping);
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
addr = mpnt->vm_start + offset;
+ if (parisc_requires_coherency()) {
+ pte_t *ptep;
- /* The TLB is the engine of coherence on parisc: The
- * CPU is entitled to speculate any page with a TLB
- * mapping, so here we kill the mapping then flush the
- * page along a special flush only alias mapping.
- * This guarantees that the page is no-longer in the
- * cache for any process and nor may it be
- * speculatively read in (until the user or kernel
- * specifically accesses it, of course) */
-
- flush_tlb_page(mpnt, addr);
- if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
- != (addr & (SHM_COLOUR - 1))) {
- __flush_cache_page(mpnt, addr, page_to_phys(page));
- if (parisc_requires_coherency() && old_addr)
- printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", old_addr, addr, mpnt->vm_file);
- old_addr = addr;
+ ptep = get_ptep(mpnt->vm_mm, addr);
+ if (ptep && pte_needs_flush(*ptep))
+ flush_user_cache_page(mpnt, addr);
+ } else {
+ /*
+ * The TLB is the engine of coherence on parisc:
+ * The CPU is entitled to speculate any page
+ * with a TLB mapping, so here we kill the
+ * mapping then flush the page along a special
+ * flush only alias mapping. This guarantees that
+ * the page is no-longer in the cache for any
+ * process and nor may it be speculatively read
+ * in (until the user or kernel specifically
+ * accesses it, of course)
+ */
+ flush_tlb_page(mpnt, addr);
+ if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
+ != (addr & (SHM_COLOUR - 1))) {
+ __flush_cache_page(mpnt, addr, page_to_phys(page));
+ /*
+ * Software is allowed to have any number
+ * of private mappings to a page.
+ */
+ if (!(mpnt->vm_flags & VM_SHARED))
+ continue;
+ if (old_addr)
+ pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n",
+ old_addr, addr, mpnt->vm_file);
+ old_addr = addr;
+ }
}
+ WARN_ON(++count == 4096);
}
flush_dcache_mmap_unlock(mapping);
}
@@ -403,7 +481,7 @@ void __init parisc_setup_cache_timing(void)
{
unsigned long rangetime, alltime;
unsigned long size;
- unsigned long threshold;
+ unsigned long threshold, threshold2;
alltime = mfctl(16);
flush_data_cache();
@@ -417,11 +495,16 @@ void __init parisc_setup_cache_timing(void)
printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n",
alltime, size, rangetime);
- threshold = L1_CACHE_ALIGN(size * alltime / rangetime);
- if (threshold > cache_info.dc_size)
- threshold = cache_info.dc_size;
- if (threshold)
- parisc_cache_flush_threshold = threshold;
+ threshold = L1_CACHE_ALIGN((unsigned long)((uint64_t)size * alltime / rangetime));
+ pr_info("Calculated flush threshold is %lu KiB\n",
+ threshold/1024);
+
+ /*
+ * The threshold computed above isn't very reliable. The following
+ * heuristic works reasonably well on c8000/rp3440.
+ */
+ threshold2 = cache_info.dc_size * num_online_cpus();
+ parisc_cache_flush_threshold = threshold2;
printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
parisc_cache_flush_threshold/1024);
@@ -477,19 +560,47 @@ void flush_kernel_dcache_page_addr(void *addr)
}
EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
- struct page *pg)
+static void flush_cache_page_if_present(struct vm_area_struct *vma,
+ unsigned long vmaddr, unsigned long pfn)
{
- /* Copy using kernel mapping. No coherency is needed (all in
- kunmap) for the `to' page. However, the `from' page needs to
- be flushed through a mapping equivalent to the user mapping
- before it can be accessed through the kernel mapping. */
- preempt_disable();
- flush_dcache_page_asm(__pa(vfrom), vaddr);
- copy_page_asm(vto, vfrom);
- preempt_enable();
+ pte_t *ptep = get_ptep(vma->vm_mm, vmaddr);
+
+ /*
+ * The pte check is racy and sometimes the flush will trigger
+ * a non-access TLB miss. Hopefully, the page has already been
+ * flushed.
+ */
+ if (ptep && pte_needs_flush(*ptep))
+ flush_cache_page(vma, vmaddr, pfn);
+}
+
+void copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma)
+{
+ void *kto, *kfrom;
+
+ kfrom = kmap_local_page(from);
+ kto = kmap_local_page(to);
+ flush_cache_page_if_present(vma, vaddr, page_to_pfn(from));
+ copy_page_asm(kto, kfrom);
+ kunmap_local(kto);
+ kunmap_local(kfrom);
+}
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long user_vaddr, void *dst, void *src, int len)
+{
+ flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+ memcpy(dst, src, len);
+ flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len);
+}
+
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long user_vaddr, void *dst, void *src, int len)
+{
+ flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+ memcpy(dst, src, len);
}
-EXPORT_SYMBOL(copy_user_page);
/* __flush_tlb_range()
*
@@ -520,92 +631,105 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
return 0;
}
-static inline unsigned long mm_total_size(struct mm_struct *mm)
+static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
- struct vm_area_struct *vma;
- unsigned long usize = 0;
-
- for (vma = mm->mmap; vma; vma = vma->vm_next)
- usize += vma->vm_end - vma->vm_start;
- return usize;
-}
-
-static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr)
-{
- pte_t *ptep = NULL;
+ unsigned long addr, pfn;
+ pte_t *ptep;
- if (!pgd_none(*pgd)) {
- p4d_t *p4d = p4d_offset(pgd, addr);
- if (!p4d_none(*p4d)) {
- pud_t *pud = pud_offset(p4d, addr);
- if (!pud_none(*pud)) {
- pmd_t *pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd))
- ptep = pte_offset_map(pmd, addr);
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ /*
+ * The vma can contain pages that aren't present. Although
+ * the pte search is expensive, we need the pte to find the
+ * page pfn and to check whether the page should be flushed.
+ */
+ ptep = get_ptep(vma->vm_mm, addr);
+ if (ptep && pte_needs_flush(*ptep)) {
+ if (parisc_requires_coherency()) {
+ flush_user_cache_page(vma, addr);
+ } else {
+ pfn = pte_pfn(*ptep);
+ if (WARN_ON(!pfn_valid(pfn)))
+ return;
+ __flush_cache_page(vma, addr, PFN_PHYS(pfn));
}
}
}
- return ptep;
}
-static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static inline unsigned long mm_total_size(struct mm_struct *mm)
{
- unsigned long addr, pfn;
- pte_t *ptep;
+ struct vm_area_struct *vma;
+ unsigned long usize = 0;
- for (addr = start; addr < end; addr += PAGE_SIZE) {
- ptep = get_ptep(mm->pgd, addr);
- if (ptep) {
- pfn = pte_pfn(*ptep);
- flush_cache_page(vma, addr, pfn);
- }
- }
+ for (vma = mm->mmap; vma && usize < parisc_cache_flush_threshold; vma = vma->vm_next)
+ usize += vma->vm_end - vma->vm_start;
+ return usize;
}
void flush_cache_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
- /* Flushing the whole cache on each cpu takes forever on
- rp3440, etc. So, avoid it if the mm isn't too big. */
- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
- mm_total_size(mm) >= parisc_cache_flush_threshold) {
- if (mm->context.space_id)
- flush_tlb_all();
+ /*
+ * Flushing the whole cache on each cpu takes forever on
+ * rp3440, etc. So, avoid it if the mm isn't too big.
+ *
+ * Note that we must flush the entire cache on machines
+ * with aliasing caches to prevent random segmentation
+ * faults.
+ */
+ if (!parisc_requires_coherency()
+ || mm_total_size(mm) >= parisc_cache_flush_threshold) {
+ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
+ return;
+ flush_tlb_all();
flush_cache_all();
return;
}
+ /* Flush mm */
for (vma = mm->mmap; vma; vma = vma->vm_next)
- flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end);
+ flush_cache_pages(vma, vma->vm_start, vma->vm_end);
}
-void flush_cache_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
- end - start >= parisc_cache_flush_threshold) {
- if (vma->vm_mm->context.space_id)
- flush_tlb_range(vma, start, end);
+ if (!parisc_requires_coherency()
+ || end - start >= parisc_cache_flush_threshold) {
+ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
+ return;
+ flush_tlb_range(vma, start, end);
flush_cache_all();
return;
}
- flush_cache_pages(vma, vma->vm_mm, start, end);
+ flush_cache_pages(vma, start, end);
}
-void
-flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
+void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
{
- if (pfn_valid(pfn)) {
- if (likely(vma->vm_mm->context.space_id)) {
- flush_tlb_page(vma, vmaddr);
- __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
- } else {
- __purge_cache_page(vma, vmaddr, PFN_PHYS(pfn));
- }
+ if (WARN_ON(!pfn_valid(pfn)))
+ return;
+ if (parisc_requires_coherency())
+ flush_user_cache_page(vma, vmaddr);
+ else
+ __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
+}
+
+void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
+{
+ if (!PageAnon(page))
+ return;
+
+ if (parisc_requires_coherency()) {
+ flush_user_cache_page(vma, vmaddr);
+ return;
}
+
+ flush_tlb_page(vma, vmaddr);
+ preempt_disable();
+ flush_dcache_page_asm(page_to_phys(page), vmaddr);
+ preempt_enable();
}
void flush_kernel_vmap_range(void *vaddr, int size)
diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c
index 80a0ab372802..e59574f65e64 100644
--- a/arch/parisc/kernel/patch.c
+++ b/arch/parisc/kernel/patch.c
@@ -40,10 +40,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags,
*need_unmap = 1;
set_fixmap(fixmap, page_to_phys(page));
- if (flags)
- raw_spin_lock_irqsave(&patch_lock, *flags);
- else
- __acquire(&patch_lock);
+ raw_spin_lock_irqsave(&patch_lock, *flags);
return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
}
@@ -52,10 +49,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
{
clear_fixmap(fixmap);
- if (flags)
- raw_spin_unlock_irqrestore(&patch_lock, *flags);
- else
- __release(&patch_lock);
+ raw_spin_unlock_irqrestore(&patch_lock, *flags);
}
void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
@@ -67,8 +61,9 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
int mapped;
/* Make sure we don't have any aliases in cache */
- flush_kernel_vmap_range(addr, len);
- flush_icache_range(start, end);
+ flush_kernel_dcache_range_asm(start, end);
+ flush_kernel_icache_range_asm(start, end);
+ flush_tlb_kernel_range(start, end);
p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped);
@@ -81,8 +76,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
* We're crossing a page boundary, so
* need to remap
*/
- flush_kernel_vmap_range((void *)fixmap,
- (p-fixmap) * sizeof(*p));
+ flush_kernel_dcache_range_asm((unsigned long)fixmap,
+ (unsigned long)p);
+ flush_tlb_kernel_range((unsigned long)fixmap,
+ (unsigned long)p);
if (mapped)
patch_unmap(FIX_TEXT_POKE0, &flags);
p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags,
@@ -90,10 +87,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
}
}
- flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p));
+ flush_kernel_dcache_range_asm((unsigned long)fixmap, (unsigned long)p);
+ flush_tlb_kernel_range((unsigned long)fixmap, (unsigned long)p);
if (mapped)
patch_unmap(FIX_TEXT_POKE0, &flags);
- flush_icache_range(start, end);
}
void __kprobes __patch_text(void *addr, u32 insn)
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index f114e102aaf2..84bc437be5cd 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -22,6 +22,8 @@
#include <asm/traps.h>
+#define DEBUG_NATLB 0
+
/* Various important other fields */
#define bit22set(x) (x & 0x00000200)
#define bits23_25set(x) (x & 0x000001c0)
@@ -450,8 +452,8 @@ handle_nadtlb_fault(struct pt_regs *regs)
fallthrough;
case 0x380:
/* PDC and FIC instructions */
- if (printk_ratelimit()) {
- pr_warn("BUG: nullifying cache flush/purge instruction\n");
+ if (DEBUG_NATLB && printk_ratelimit()) {
+ pr_warn("WARNING: nullifying cache flush/purge instruction\n");
show_regs(regs);
}
if (insn & 0x20) {
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index ecbae1832de3..61a4736355c2 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -13,7 +13,8 @@
#ifdef CONFIG_DEBUG_BUGVERBOSE
.macro __EMIT_BUG_ENTRY addr,file,line,flags
.section __bug_table,"aw"
-5001: .4byte \addr - 5001b, 5002f - 5001b
+5001: .4byte \addr - .
+ .4byte 5002f - .
.short \line, \flags
.org 5001b+BUG_ENTRY_SIZE
.previous
@@ -24,7 +25,7 @@
#else
.macro __EMIT_BUG_ENTRY addr,file,line,flags
.section __bug_table,"aw"
-5001: .4byte \addr - 5001b
+5001: .4byte \addr - .
.short \flags
.org 5001b+BUG_ENTRY_SIZE
.previous
@@ -49,15 +50,16 @@
#ifdef CONFIG_DEBUG_BUGVERBOSE
#define _EMIT_BUG_ENTRY \
".section __bug_table,\"aw\"\n" \
- "2:\t.4byte 1b - 2b, %0 - 2b\n" \
- "\t.short %1, %2\n" \
+ "2: .4byte 1b - .\n" \
+ " .4byte %0 - .\n" \
+ " .short %1, %2\n" \
".org 2b+%3\n" \
".previous\n"
#else
#define _EMIT_BUG_ENTRY \
".section __bug_table,\"aw\"\n" \
- "2:\t.4byte 1b - 2b\n" \
- "\t.short %2\n" \
+ "2: .4byte 1b - .\n" \
+ " .short %2\n" \
".org 2b+%3\n" \
".previous\n"
#endif
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index fa2e76e4093a..14b4489de52c 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -19,6 +19,7 @@ static inline cycles_t get_cycles(void)
{
return mftb();
}
+#define get_cycles get_cycles
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_TIMEX_H */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 65562c4a0a69..4c09c6688ac6 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -752,7 +752,7 @@ u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
* FIXME: How do i get PID? Do I really need it?
* prstatus.pr_pid = ????
*/
- elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+ elf_core_copy_regs(&prstatus.pr_reg, regs);
buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
&prstatus, sizeof(prstatus));
return buf;
diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
index b97bc179f65a..adcb1a1a2bfe 100644
--- a/arch/powerpc/platforms/powernv/opal-core.c
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -112,7 +112,7 @@ static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir,
struct pt_regs *regs)
{
memset(prstatus, 0, sizeof(struct elf_prstatus));
- elf_core_copy_kernel_regs(&(prstatus->pr_reg), regs);
+ elf_core_copy_regs(&(prstatus->pr_reg), regs);
/*
* Overload PID with PIR value.
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 00fd9c548f26..3ac2a81a55eb 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -468,7 +468,7 @@ config CC_HAVE_STACKPROTECTOR_TLS
config STACKPROTECTOR_PER_TASK
def_bool y
- depends on !GCC_PLUGIN_RANDSTRUCT
+ depends on !RANDSTRUCT
depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS
config PHYS_RAM_BASE_FIXED
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
index 746c4d4e7686..cf2f55e1dcb6 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
@@ -366,7 +366,7 @@
gpio1: gpio@20121000 {
compatible = "microchip,mpfs-gpio";
- reg = <000 0x20121000 0x0 0x1000>;
+ reg = <0x0 0x20121000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupt-controller;
#interrupt-cells = <1>;
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index aad45d7f498f..5c638fd5b35c 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -167,7 +167,7 @@
clocks = <&prci FU540_PRCI_CLK_TLCLK>;
status = "disabled";
};
- dma: dma@3000000 {
+ dma: dma-controller@3000000 {
compatible = "sifive,fu540-c000-pdma";
reg = <0x0 0x3000000 0x0 0x8000>;
interrupt-parent = <&plic0>;
diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index d3804a2f9aad..1aaea81fb141 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -30,8 +30,8 @@
typedef u32 bug_insn_t;
#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
-#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b"
-#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b"
+#define __BUG_ENTRY_ADDR RISCV_INT " 1b - ."
+#define __BUG_ENTRY_FILE RISCV_INT " %0 - ."
#else
#define __BUG_ENTRY_ADDR RISCV_PTR " 1b"
#define __BUG_ENTRY_FILE RISCV_PTR " %0"
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index 507cae273bc6..d6a7428f6248 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -41,7 +41,7 @@ static inline u32 get_cycles_hi(void)
static inline unsigned long random_get_entropy(void)
{
if (unlikely(clint_time_val == NULL))
- return 0;
+ return random_get_entropy_fallback();
return get_cycles();
}
#define random_get_entropy() random_get_entropy()
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index df325eacf62d..80eb3ee84ff1 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -20,7 +20,9 @@ LDFLAGS_vmlinux := -pie
endif
aflags_dwarf := -Wa,-gdwarf-2
KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
+ifndef CONFIG_AS_IS_LLVM
KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
+endif
KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack
KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
index b265bfede188..f56591bc0897 100644
--- a/arch/s390/boot/.gitignore
+++ b/arch/s390/boot/.gitignore
@@ -2,3 +2,6 @@
image
bzImage
section_cmp.*
+vmlinux
+vmlinux.lds
+vmlinux.syms
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 0ba646899131..883357a211a3 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -37,14 +37,21 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o
+obj-y += version.o pgm_check_info.o ctype.o ipl_data.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
-targets := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
-subdir- := compressed
+obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
+obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
+obj-all := $(obj-y) piggy.o syms.o
+
+targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
+targets += vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all)
OBJECTS := $(addprefix $(obj)/,$(obj-y))
+OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
quiet_cmd_section_cmp = SECTCMP $*
define cmd_section_cmp
@@ -59,14 +66,67 @@ define cmd_section_cmp
touch $@
endef
-$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
+$(obj)/bzImage: $(obj)/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
$(call if_changed,objcopy)
-$(obj)/section_cmp%: vmlinux $(obj)/compressed/vmlinux FORCE
+$(obj)/section_cmp%: vmlinux $(obj)/vmlinux FORCE
$(call if_changed,section_cmp)
-$(obj)/compressed/vmlinux: $(obj)/startup.a FORCE
- $(Q)$(MAKE) $(build)=$(obj)/compressed $@
+LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS_ALL) FORCE
+ $(call if_changed,ld)
+
+LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T
+$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(OBJECTS) FORCE
+ $(call if_changed,ld)
+
+quiet_cmd_dumpsyms = DUMPSYMS $<
+define cmd_dumpsyms
+ $(NM) -n -S --format=bsd "$<" | sed -nE 's/^0*([0-9a-fA-F]+) 0*([0-9a-fA-F]+) [tT] ([^ ]*)$$/\1 \2 \3/p' | tr '\n' '\0' > "$@"
+endef
+
+$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE
+ $(call if_changed,dumpsyms)
+
+OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms
+$(obj)/syms.o: $(obj)/syms.bin FORCE
+ $(call if_changed,objcopy)
+
+OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
+$(obj)/info.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info
+$(obj)/info.o: $(obj)/info.bin FORCE
+ $(call if_changed,objcopy)
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S
+$(obj)/vmlinux.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+suffix-$(CONFIG_KERNEL_GZIP) := .gz
+suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
+suffix-$(CONFIG_KERNEL_LZ4) := .lz4
+suffix-$(CONFIG_KERNEL_LZMA) := .lzma
+suffix-$(CONFIG_KERNEL_LZO) := .lzo
+suffix-$(CONFIG_KERNEL_XZ) := .xz
+suffix-$(CONFIG_KERNEL_ZSTD) := .zst
-$(obj)/startup.a: $(OBJECTS) FORCE
- $(call if_changed,ar)
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,bzip2_with_size)
+$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lz4_with_size)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzma_with_size)
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzo_with_size)
+$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,xzkern_with_size)
+$(obj)/vmlinux.bin.zst: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,zstd22_with_size)
+
+OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
+$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
+ $(call if_changed,objcopy)
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 641ce0fc5c3e..70418389414d 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -2,9 +2,12 @@
#ifndef BOOT_BOOT_H
#define BOOT_BOOT_H
-#include <asm/extable.h>
#include <linux/types.h>
+#define IPL_START 0x200
+
+#ifndef __ASSEMBLY__
+
void startup_kernel(void);
unsigned long detect_memory(void);
bool is_ipl_block_dump(void);
@@ -31,4 +34,5 @@ extern char _stack_start[], _stack_end[];
unsigned long read_ipl_report(unsigned long safe_offset);
+#endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/compressed/clz_ctz.c b/arch/s390/boot/clz_ctz.c
index c3ebf248596b..c3ebf248596b 100644
--- a/arch/s390/boot/compressed/clz_ctz.c
+++ b/arch/s390/boot/clz_ctz.c
diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore
deleted file mode 100644
index 01d93832cf4a..000000000000
--- a/arch/s390/boot/compressed/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-vmlinux
-vmlinux.lds
-vmlinux.syms
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
deleted file mode 100644
index d04e0e7de0b3..000000000000
--- a/arch/s390/boot/compressed/Makefile
+++ /dev/null
@@ -1,86 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# linux/arch/s390/boot/compressed/Makefile
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-
-KCOV_INSTRUMENT := n
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
-obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
-obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
-obj-all := $(obj-y) piggy.o syms.o
-targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
-targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
-targets += vmlinux.bin.zst
-targets += info.bin syms.bin vmlinux.syms $(obj-all)
-
-KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
-KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
-OBJCOPYFLAGS :=
-
-OBJECTS := $(addprefix $(obj)/,$(obj-y))
-OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
-
-LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS_ALL) FORCE
- $(call if_changed,ld)
-
-LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T
-$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE
- $(call if_changed,ld)
-
-quiet_cmd_dumpsyms = DUMPSYMS $<
-define cmd_dumpsyms
- $(NM) -n -S --format=bsd "$<" | sed -nE 's/^0*([0-9a-fA-F]+) 0*([0-9a-fA-F]+) [tT] ([^ ]*)$$/\1 \2 \3/p' | tr '\n' '\0' > "$@"
-endef
-
-$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE
- $(call if_changed,dumpsyms)
-
-OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms
-$(obj)/syms.o: $(obj)/syms.bin FORCE
- $(call if_changed,objcopy)
-
-OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
-$(obj)/info.bin: vmlinux FORCE
- $(call if_changed,objcopy)
-
-OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info
-$(obj)/info.o: $(obj)/info.bin FORCE
- $(call if_changed,objcopy)
-
-OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S
-$(obj)/vmlinux.bin: vmlinux FORCE
- $(call if_changed,objcopy)
-
-suffix-$(CONFIG_KERNEL_GZIP) := .gz
-suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
-suffix-$(CONFIG_KERNEL_LZ4) := .lz4
-suffix-$(CONFIG_KERNEL_LZMA) := .lzma
-suffix-$(CONFIG_KERNEL_LZO) := .lzo
-suffix-$(CONFIG_KERNEL_XZ) := .xz
-suffix-$(CONFIG_KERNEL_ZSTD) := .zst
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
- $(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
- $(call if_changed,bzip2_with_size)
-$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE
- $(call if_changed,lz4_with_size)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
- $(call if_changed,lzma_with_size)
-$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
- $(call if_changed,lzo_with_size)
-$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
- $(call if_changed,xzkern_with_size)
-$(obj)/vmlinux.bin.zst: $(obj)/vmlinux.bin FORCE
- $(call if_changed,zstd22_with_size)
-
-OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
-$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
- $(call if_changed,objcopy)
diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/decompressor.c
index e27c2140d620..e27c2140d620 100644
--- a/arch/s390/boot/compressed/decompressor.c
+++ b/arch/s390/boot/decompressor.c
diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/decompressor.h
index f75cc31a77dd..f75cc31a77dd 100644
--- a/arch/s390/boot/compressed/decompressor.h
+++ b/arch/s390/boot/decompressor.h
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index 666692429db0..3f79b9efb803 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -27,234 +27,181 @@
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/sclp.h>
-
-#define ARCH_OFFSET 4
+#include "boot.h"
#define EP_OFFSET 0x10008
#define EP_STRING "S390EP"
+#define IPL_BS 0x730
__HEAD
-
-#define IPL_BS 0x730
- .org 0
- .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded
- .long 0x02000018,0x60000050 # by ipl to addresses 0-23.
- .long 0x02000068,0x60000050 # (a PSW and two CCWs).
- .fill 80-24,1,0x40 # bytes 24-79 are discarded !!
- .long 0x020000f0,0x60000050 # The next 160 byte are loaded
- .long 0x02000140,0x60000050 # to addresses 0x18-0xb7
- .long 0x02000190,0x60000050 # They form the continuation
- .long 0x020001e0,0x60000050 # of the CCW program started
- .long 0x02000230,0x60000050 # by ipl and load the range
- .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image
- .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730
- .long 0x02000320,0x60000050 # in memory. At the end of
- .long 0x02000370,0x60000050 # the channel program the PSW
- .long 0x020003c0,0x60000050 # at location 0 is loaded.
- .long 0x02000410,0x60000050 # Initial processing starts
- .long 0x02000460,0x60000050 # at 0x200 = iplstart.
- .long 0x020004b0,0x60000050
- .long 0x02000500,0x60000050
- .long 0x02000550,0x60000050
- .long 0x020005a0,0x60000050
- .long 0x020005f0,0x60000050
- .long 0x02000640,0x60000050
- .long 0x02000690,0x60000050
- .long 0x020006e0,0x20000050
-
- .org __LC_RST_NEW_PSW # 0x1a0
- .quad 0,iplstart
- .org __LC_EXT_NEW_PSW # 0x1b0
- .quad 0x0002000180000000,0x1b0 # disabled wait
- .org __LC_PGM_NEW_PSW # 0x1d0
- .quad 0x0000000180000000,startup_pgm_check_handler
- .org __LC_IO_NEW_PSW # 0x1f0
- .quad 0x0002000180000000,0x1f0 # disabled wait
-
- .org 0x200
-
+ipl_start:
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ sam64 # switch to 64 bit addressing mode
+ lgh %r1,__LC_SUBCHANNEL_ID # test if subchannel number
+ brctg %r1,.Lnoload # is valid
+ llgf %r1,__LC_SUBCHANNEL_ID # load ipl subchannel number
+ lghi %r2,IPL_BS # load start address
+ bras %r14,.Lloader # load rest of ipl image
+ larl %r12,parmarea # pointer to parameter area
+ stg %r1,IPL_DEVICE-PARMAREA(%r12) # save ipl device number
+#
+# load parameter file from ipl device
+#
+.Lagain1:
+ larl %r2,_end # ramdisk loc. is temp
+ bras %r14,.Lloader # load parameter file
+ ltgr %r2,%r2 # got anything ?
+ jz .Lnopf
+ lg %r3,MAX_COMMAND_LINE_SIZE-PARMAREA(%r12)
+ aghi %r3,-1
+ clgr %r2,%r3
+ jl .Lnotrunc
+ lgr %r2,%r3
+.Lnotrunc:
+ larl %r4,_end
+ larl %r13,.L_hdr
+ clc 0(3,%r4),0(%r13) # if it is HDRx
+ jz .Lagain1 # skip dataset header
+ larl %r13,.L_eof
+ clc 0(3,%r4),0(%r13) # if it is EOFx
+ jz .Lagain1 # skip dateset trailer
+ lgr %r5,%r2
+ la %r6,COMMAND_LINE-PARMAREA(%r12)
+ lgr %r7,%r2
+ aghi %r7,1
+ mvcl %r6,%r4
+.Lnopf:
+#
+# load ramdisk from ipl device
+#
+.Lagain2:
+ larl %r2,_end # addr of ramdisk
+ stg %r2,INITRD_START-PARMAREA(%r12)
+ bras %r14,.Lloader # load ramdisk
+ stg %r2,INITRD_SIZE-PARMAREA(%r12) # store size of rd
+ ltgr %r2,%r2
+ jnz .Lrdcont
+ stg %r2,INITRD_START-PARMAREA(%r12) # no ramdisk found
+.Lrdcont:
+ larl %r2,_end
+ larl %r13,.L_hdr # skip HDRx and EOFx
+ clc 0(3,%r2),0(%r13)
+ jz .Lagain2
+ larl %r13,.L_eof
+ clc 0(3,%r2),0(%r13)
+ jz .Lagain2
+#
+# reset files in VM reader
+#
+ larl %r13,.Lcpuid
+ stidp 0(%r13) # store cpuid
+ tm 0(%r13),0xff # running VM ?
+ jno .Lnoreset
+ larl %r2,.Lreset
+ lghi %r3,26
+ diag %r2,%r3,8
+ larl %r5,.Lirb
+ stsch 0(%r5) # check if irq is pending
+ tm 30(%r5),0x0f # by verifying if any of the
+ jnz .Lwaitforirq # activity or status control
+ tm 31(%r5),0xff # bits is set in the schib
+ jz .Lnoreset
+.Lwaitforirq:
+ bras %r14,.Lirqwait # wait for IO interrupt
+ c %r1,__LC_SUBCHANNEL_ID # compare subchannel number
+ jne .Lwaitforirq
+ larl %r5,.Lirb
+ tsch 0(%r5)
+.Lnoreset:
+ j .Lnoload
+#
+# everything loaded, go for it
+#
+.Lnoload:
+ jg startup
#
# subroutine to wait for end I/O
#
.Lirqwait:
- mvc __LC_IO_NEW_PSW(16),.Lnewpsw # set up IO interrupt psw
- lpsw .Lwaitpsw
+ larl %r13,.Lnewpswmask # set up IO interrupt psw
+ mvc __LC_IO_NEW_PSW(8),0(%r13)
+ stg %r14,__LC_IO_NEW_PSW+8
+ larl %r13,.Lwaitpsw
+ lpswe 0(%r13)
.Lioint:
- br %r14
- .align 8
-.Lnewpsw:
- .quad 0x0000000080000000,.Lioint
-.Lwaitpsw:
- .long 0x020a0000,0x80000000+.Lioint
-
#
# subroutine for loading cards from the reader
#
.Lloader:
- la %r4,0(%r14)
- la %r3,.Lorb # r2 = address of orb into r2
- la %r5,.Lirb # r4 = address of irb
- la %r6,.Lccws
- la %r7,20
+ lgr %r4,%r14
+ larl %r3,.Lorb # r2 = address of orb into r2
+ larl %r5,.Lirb # r4 = address of irb
+ larl %r6,.Lccws
+ lghi %r7,20
.Linit:
st %r2,4(%r6) # initialize CCW data addresses
la %r2,0x50(%r2)
la %r6,8(%r6)
- bct 7,.Linit
-
- lctl %c6,%c6,.Lcr6 # set IO subclass mask
- slr %r2,%r2
+ brctg %r7,.Linit
+ larl %r13,.Lcr6
+ lctlg %c6,%c6,0(%r13)
+ xgr %r2,%r2
.Lldlp:
ssch 0(%r3) # load chunk of 1600 bytes
- bnz .Llderr
+ jnz .Llderr
.Lwait4irq:
- bas %r14,.Lirqwait
+ bras %r14,.Lirqwait
c %r1,__LC_SUBCHANNEL_ID # compare subchannel number
- bne .Lwait4irq
+ jne .Lwait4irq
tsch 0(%r5)
-
- slr %r0,%r0
+ xgr %r0,%r0
ic %r0,8(%r5) # get device status
- chi %r0,8 # channel end ?
- be .Lcont
- chi %r0,12 # channel end + device end ?
- be .Lcont
-
- l %r0,4(%r5)
- s %r0,8(%r3) # r0/8 = number of ccws executed
- mhi %r0,10 # *10 = number of bytes in ccws
- lh %r3,10(%r5) # get residual count
- sr %r0,%r3 # #ccws*80-residual=#bytes read
- ar %r2,%r0
-
+ cghi %r0,8 # channel end ?
+ je .Lcont
+ cghi %r0,12 # channel end + device end ?
+ je .Lcont
+ llgf %r0,4(%r5)
+ sgf %r0,8(%r3) # r0/8 = number of ccws executed
+ mghi %r0,10 # *10 = number of bytes in ccws
+ llgh %r3,10(%r5) # get residual count
+ sgr %r0,%r3 # #ccws*80-residual=#bytes read
+ agr %r2,%r0
br %r4 # r2 contains the total size
-
.Lcont:
- ahi %r2,0x640 # add 0x640 to total size
- la %r6,.Lccws
- la %r7,20
+ aghi %r2,0x640 # add 0x640 to total size
+ larl %r6,.Lccws
+ lghi %r7,20
.Lincr:
l %r0,4(%r6) # update CCW data addresses
- ahi %r0,0x640
+ aghi %r0,0x640
st %r0,4(%r6)
- ahi %r6,8
- bct 7,.Lincr
-
- b .Lldlp
+ aghi %r6,8
+ brctg %r7,.Lincr
+ j .Lldlp
.Llderr:
- lpsw .Lcrash
+ larl %r13,.Lcrash
+ lpsw 0(%r13)
.align 8
+.Lwaitpsw:
+ .quad 0x0202000180000000,.Lioint
+.Lnewpswmask:
+ .quad 0x0000000180000000
+ .align 8
.Lorb: .long 0x00000000,0x0080ff00,.Lccws
.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-.Lcr6: .long 0xff000000
-.Lloadp:.long 0,0
+ .align 8
+.Lcr6: .quad 0x00000000ff000000
.align 8
.Lcrash:.long 0x000a0000,0x00000000
-
.align 8
.Lccws: .rept 19
.long 0x02600050,0x00000000
.endr
.long 0x02200050,0x00000000
-
-iplstart:
- mvi __LC_AR_MODE_ID,1 # set esame flag
- slr %r0,%r0 # set cpuid to zero
- lhi %r1,2 # mode 2 = esame (dump)
- sigp %r1,%r0,0x12 # switch to esame mode
- bras %r13,0f
- .fill 16,4,0x0
-0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
- sam31 # switch to 31 bit addressing mode
- lh %r1,__LC_SUBCHANNEL_ID # test if subchannel number
- bct %r1,.Lnoload # is valid
- l %r1,__LC_SUBCHANNEL_ID # load ipl subchannel number
- la %r2,IPL_BS # load start address
- bas %r14,.Lloader # load rest of ipl image
- l %r12,.Lparm # pointer to parameter area
- st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number
-
-#
-# load parameter file from ipl device
-#
-.Lagain1:
- l %r2,.Linitrd # ramdisk loc. is temp
- bas %r14,.Lloader # load parameter file
- ltr %r2,%r2 # got anything ?
- bz .Lnopf
- l %r3,MAX_COMMAND_LINE_SIZE+ARCH_OFFSET-PARMAREA(%r12)
- ahi %r3,-1
- clr %r2,%r3
- bl .Lnotrunc
- lr %r2,%r3
-.Lnotrunc:
- l %r4,.Linitrd
- clc 0(3,%r4),.L_hdr # if it is HDRx
- bz .Lagain1 # skip dataset header
- clc 0(3,%r4),.L_eof # if it is EOFx
- bz .Lagain1 # skip dateset trailer
-
- lr %r5,%r2
- la %r6,COMMAND_LINE-PARMAREA(%r12)
- lr %r7,%r2
- ahi %r7,1
- mvcl %r6,%r4
-.Lnopf:
-
-#
-# load ramdisk from ipl device
-#
-.Lagain2:
- l %r2,.Linitrd # addr of ramdisk
- st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
- bas %r14,.Lloader # load ramdisk
- st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd
- ltr %r2,%r2
- bnz .Lrdcont
- st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found
-.Lrdcont:
- l %r2,.Linitrd
-
- clc 0(3,%r2),.L_hdr # skip HDRx and EOFx
- bz .Lagain2
- clc 0(3,%r2),.L_eof
- bz .Lagain2
-
-#
-# reset files in VM reader
-#
- stidp .Lcpuid # store cpuid
- tm .Lcpuid,0xff # running VM ?
- bno .Lnoreset
- la %r2,.Lreset
- lhi %r3,26
- diag %r2,%r3,8
- la %r5,.Lirb
- stsch 0(%r5) # check if irq is pending
- tm 30(%r5),0x0f # by verifying if any of the
- bnz .Lwaitforirq # activity or status control
- tm 31(%r5),0xff # bits is set in the schib
- bz .Lnoreset
-.Lwaitforirq:
- bas %r14,.Lirqwait # wait for IO interrupt
- c %r1,__LC_SUBCHANNEL_ID # compare subchannel number
- bne .Lwaitforirq
- la %r5,.Lirb
- tsch 0(%r5)
-.Lnoreset:
- b .Lnoload
-
-#
-# everything loaded, go for it
-#
-.Lnoload:
- l %r1,.Lstartup
- br %r1
-
-.Linitrd:.long _end # default address of initrd
-.Lparm: .long PARMAREA
-.Lstartup: .long startup
.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40
.byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6
.byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold"
@@ -268,10 +215,10 @@ iplstart:
# this is called either by the ipl loader or directly by PSW restart
# or linload or SALIPL
#
- .org STARTUP_NORMAL_OFFSET
+ .org STARTUP_NORMAL_OFFSET - IPL_START
SYM_CODE_START(startup)
j startup_normal
- .org EP_OFFSET
+ .org EP_OFFSET - IPL_START
#
# This is a list of s390 kernel entry points. At address 0x1000f the number of
# valid entry points is stored.
@@ -283,7 +230,7 @@ SYM_CODE_START(startup)
#
# kdump startup-code, running in 64 bit absolute addressing mode
#
- .org STARTUP_KDUMP_OFFSET
+ .org STARTUP_KDUMP_OFFSET - IPL_START
j startup_kdump
SYM_CODE_END(startup)
SYM_CODE_START_LOCAL(startup_normal)
@@ -295,20 +242,23 @@ SYM_CODE_START_LOCAL(startup_normal)
.fill 16,4,0x0
0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
sam64 # switch to 64 bit addressing mode
- basr %r13,0 # get base
-.LPG0:
- mvc __LC_EXT_NEW_PSW(16),.Lext_new_psw-.LPG0(%r13)
- mvc __LC_PGM_NEW_PSW(16),.Lpgm_new_psw-.LPG0(%r13)
- mvc __LC_IO_NEW_PSW(16),.Lio_new_psw-.LPG0(%r13)
+ larl %r13,.Lext_new_psw
+ mvc __LC_EXT_NEW_PSW(16),0(%r13)
+ larl %r13,.Lpgm_new_psw
+ mvc __LC_PGM_NEW_PSW(16),0(%r13)
+ larl %r13,.Lio_new_psw
+ mvc __LC_IO_NEW_PSW(16),0(%r13)
xc 0x200(256),0x200 # partially clear lowcore
xc 0x300(256),0x300
xc 0xe00(256),0xe00
xc 0xf00(256),0xf00
- lctlg %c0,%c15,.Lctl-.LPG0(%r13) # load control registers
+ larl %r13,.Lctl
+ lctlg %c0,%c15,0(%r13) # load control registers
stcke __LC_BOOT_CLOCK
mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
- spt 6f-.LPG0(%r13)
- mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
+ larl %r13,6f
+ spt 0(%r13)
+ mvc __LC_LAST_UPDATE_TIMER(8),0(%r13)
larl %r15,_stack_end-STACK_FRAME_OVERHEAD
brasl %r14,sclp_early_setup_buffer
brasl %r14,verify_facilities
@@ -368,23 +318,3 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler)
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8)
lpswe __LC_RETURN_PSW # disabled wait
SYM_CODE_END(startup_pgm_check_handler)
-
-#
-# params at 10400 (setup.h)
-# Must be keept in sync with struct parmarea in setup.h
-#
- .org PARMAREA
-SYM_DATA_START(parmarea)
- .quad 0 # IPL_DEVICE
- .quad 0 # INITRD_START
- .quad 0 # INITRD_SIZE
- .quad 0 # OLDMEM_BASE
- .quad 0 # OLDMEM_SIZE
- .quad kernel_version # points to kernel version string
- .quad COMMAND_LINE_SIZE
-
- .org COMMAND_LINE
- .byte "root=/dev/ram0 ro"
- .byte 0
- .org PARMAREA+__PARMAREA_SIZE
-SYM_DATA_END(parmarea)
diff --git a/arch/s390/boot/ipl_data.c b/arch/s390/boot/ipl_data.c
new file mode 100644
index 000000000000..0846e2b249c6
--- /dev/null
+++ b/arch/s390/boot/ipl_data.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/compat.h>
+#include <linux/ptrace.h>
+#include <asm/cio.h>
+#include <asm/asm-offsets.h>
+#include "boot.h"
+
+#define CCW0(cmd, addr, cnt, flg) \
+ { .cmd_code = cmd, .cda = addr, .count = cnt, .flags = flg, }
+
+#define PSW_MASK_DISABLED (PSW_MASK_WAIT | PSW_MASK_EA | PSW_MASK_BA)
+
+struct ipl_lowcore {
+ psw_t32 ipl_psw; /* 0x0000 */
+ struct ccw0 ccwpgm[2]; /* 0x0008 */
+ u8 fill[56]; /* 0x0018 */
+ struct ccw0 ccwpgmcc[20]; /* 0x0050 */
+ u8 pad_0xf0[0x01a0-0x00f0]; /* 0x00f0 */
+ psw_t restart_psw; /* 0x01a0 */
+ psw_t external_new_psw; /* 0x01b0 */
+ psw_t svc_new_psw; /* 0x01c0 */
+ psw_t program_new_psw; /* 0x01d0 */
+ psw_t mcck_new_psw; /* 0x01e0 */
+ psw_t io_new_psw; /* 0x01f0 */
+};
+
+/*
+ * Initial lowcore for IPL: the first 24 bytes are loaded by IPL to
+ * addresses 0-23 (a PSW and two CCWs). Bytes 24-79 are discarded.
+ * The next 160 bytes are loaded to addresses 0x18-0xb7. They form
+ * the continuation of the CCW program started by IPL and load the
+ * range 0x0f0-0x730 from the image to the range 0x0f0-0x730 in
+ * memory. At the end of the channel program the PSW at location 0 is
+ * loaded.
+ * Initial processing starts at 0x200 = iplstart.
+ *
+ * The restart psw points to iplstart which allows to load a kernel
+ * image into memory and starting it by a psw restart on any cpu. All
+ * other default psw new locations contain a disabled wait psw where
+ * the address indicates which psw was loaded.
+ *
+ * Note that the 'file' utility can detect s390 kernel images. For
+ * that to succeed the two initial CCWs, and the 0x40 fill bytes must
+ * be present.
+ */
+static struct ipl_lowcore ipl_lowcore __used __section(".ipldata") = {
+ .ipl_psw = { .mask = PSW32_MASK_BASE, .addr = PSW32_ADDR_AMODE | IPL_START },
+ .ccwpgm = {
+ [ 0] = CCW0(CCW_CMD_READ_IPL, 0x018, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 1] = CCW0(CCW_CMD_READ_IPL, 0x068, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ },
+ .fill = {
+ [ 0 ... 55] = 0x40,
+ },
+ .ccwpgmcc = {
+ [ 0] = CCW0(CCW_CMD_READ_IPL, 0x0f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 1] = CCW0(CCW_CMD_READ_IPL, 0x140, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 2] = CCW0(CCW_CMD_READ_IPL, 0x190, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 3] = CCW0(CCW_CMD_READ_IPL, 0x1e0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 4] = CCW0(CCW_CMD_READ_IPL, 0x230, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 5] = CCW0(CCW_CMD_READ_IPL, 0x280, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 6] = CCW0(CCW_CMD_READ_IPL, 0x2d0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 7] = CCW0(CCW_CMD_READ_IPL, 0x320, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 8] = CCW0(CCW_CMD_READ_IPL, 0x370, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 9] = CCW0(CCW_CMD_READ_IPL, 0x3c0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [10] = CCW0(CCW_CMD_READ_IPL, 0x410, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [11] = CCW0(CCW_CMD_READ_IPL, 0x460, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [12] = CCW0(CCW_CMD_READ_IPL, 0x4b0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [13] = CCW0(CCW_CMD_READ_IPL, 0x500, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [14] = CCW0(CCW_CMD_READ_IPL, 0x550, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [15] = CCW0(CCW_CMD_READ_IPL, 0x5a0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [16] = CCW0(CCW_CMD_READ_IPL, 0x5f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [17] = CCW0(CCW_CMD_READ_IPL, 0x640, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [18] = CCW0(CCW_CMD_READ_IPL, 0x690, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [19] = CCW0(CCW_CMD_READ_IPL, 0x6e0, 0x50, CCW_FLAG_SLI),
+ },
+ .restart_psw = { .mask = 0, .addr = IPL_START, },
+ .external_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_EXT_NEW_PSW, },
+ .svc_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_SVC_NEW_PSW, },
+ .program_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_PGM_NEW_PSW, },
+ .mcck_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_MCK_NEW_PSW, },
+ .io_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_IO_NEW_PSW, },
+};
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 9ed7e29c81d9..ca78d6162245 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -8,9 +8,16 @@
#include <asm/sections.h>
#include <asm/boot_data.h>
#include <asm/facility.h>
+#include <asm/setup.h>
#include <asm/uv.h>
#include "boot.h"
+struct parmarea parmarea __section(".parmarea") = {
+ .kernel_version = (unsigned long)kernel_version,
+ .max_command_line_size = COMMAND_LINE_SIZE,
+ .command_line = "root=/dev/ram0 ro",
+};
+
char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
int __bootdata(noexec_disabled);
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index d8984462071f..e8d74d4f62aa 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -8,7 +8,7 @@
#include <asm/timex.h>
#include <asm/sclp.h>
#include <asm/kasan.h>
-#include "compressed/decompressor.h"
+#include "decompressor.h"
#include "boot.h"
#define PRNG_MODE_TDES 1
diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
index 2f949cd9076b..7fa1a32ea0f3 100644
--- a/arch/s390/boot/mem_detect.c
+++ b/arch/s390/boot/mem_detect.c
@@ -7,7 +7,7 @@
#include <asm/sections.h>
#include <asm/mem_detect.h>
#include <asm/sparsemem.h>
-#include "compressed/decompressor.h"
+#include "decompressor.h"
#include "boot.h"
struct mem_detect_info __bootdata(mem_detect);
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 1aa11a8f57dd..863e6bcaa5a1 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -10,7 +10,7 @@
#include <asm/sclp.h>
#include <asm/diag.h>
#include <asm/uv.h>
-#include "compressed/decompressor.h"
+#include "decompressor.h"
#include "boot.h"
#include "uv.h"
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index 918e05137d4c..af5c6860e0a1 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -4,6 +4,7 @@
#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/sclp.h>
+#include "boot.h"
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
@@ -13,11 +14,19 @@ ENTRY(startup)
SECTIONS
{
. = 0;
+ .ipldata : {
+ *(.ipldata)
+ }
+ . = IPL_START;
.head.text : {
_head = . ;
HEAD_TEXT
_ehead = . ;
}
+ . = PARMAREA;
+ .parmarea : {
+ *(.parmarea)
+ }
.text : {
_text = .; /* Text */
*(.text)
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index bfbafd35bcbd..e013088b5115 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -194,7 +194,7 @@ static struct skcipher_alg cbc_des_alg = {
* same as DES. Implementers MUST reject keys that exhibit this
* property.
*
- * In fips mode additinally check for all 3 keys are unique.
+ * In fips mode additionally check for all 3 keys are unique.
*
*/
static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 234d791ca59d..ae382bafc772 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -528,7 +528,7 @@ static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
/* give mutex free before calling schedule() */
mutex_unlock(&prng_data->mutex);
schedule();
- /* occopy mutex again */
+ /* occupy mutex again */
if (mutex_lock_interruptible(&prng_data->mutex)) {
if (ret == 0)
ret = -ERESTARTSYS;
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index 3765c2d81df5..a3d881ca0a98 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -190,7 +190,7 @@ int hypfs_vm_create_files(struct dentry *root)
if (IS_ERR(data))
return PTR_ERR(data);
- /* Hpervisor Info */
+ /* Hypervisor Info */
dir = hypfs_mkdir(root, "hyp");
if (IS_ERR(dir)) {
rc = PTR_ERR(dir);
diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
index bb3837d7387c..7db046596b93 100644
--- a/arch/s390/include/asm/alternative-asm.h
+++ b/arch/s390/include/asm/alternative-asm.h
@@ -5,19 +5,6 @@
#ifdef __ASSEMBLY__
/*
- * Check the length of an instruction sequence. The length may not be larger
- * than 254 bytes and it has to be divisible by 2.
- */
-.macro alt_len_check start,end
- .if ( \end - \start ) > 254
- .error "cpu alternatives does not support instructions blocks > 254 bytes\n"
- .endif
- .if ( \end - \start ) % 2
- .error "cpu alternatives instructions length is odd\n"
- .endif
-.endm
-
-/*
* Issue one struct alt_instr descriptor entry (need to put it into
* the section .altinstructions, see below). This entry contains
* enough information for the alternatives patching code to patch an
@@ -28,66 +15,29 @@
.long \alt_start - .
.word \feature
.byte \orig_end - \orig_start
- .byte \alt_end - \alt_start
-.endm
-
-/*
- * Fill up @bytes with nops. The macro emits 6-byte nop instructions
- * for the bulk of the area, possibly followed by a 4-byte and/or
- * a 2-byte nop if the size of the area is not divisible by 6.
- */
-.macro alt_pad_fill bytes
- .rept ( \bytes ) / 6
- brcl 0,0
- .endr
- .rept ( \bytes ) % 6 / 4
- nop
- .endr
- .rept ( \bytes ) % 6 % 4 / 2
- nopr
- .endr
-.endm
-
-/*
- * Fill up @bytes with nops. If the number of bytes is larger
- * than 6, emit a jg instruction to branch over all nops, then
- * fill an area of size (@bytes - 6) with nop instructions.
- */
-.macro alt_pad bytes
- .if ( \bytes > 0 )
- .if ( \bytes > 6 )
- jg . + \bytes
- alt_pad_fill \bytes - 6
- .else
- alt_pad_fill \bytes
- .endif
- .endif
+ .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
+ .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
.endm
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
+ * @newinstr.
*/
.macro ALTERNATIVE oldinstr, newinstr, feature
.pushsection .altinstr_replacement,"ax"
770: \newinstr
771: .popsection
772: \oldinstr
-773: alt_len_check 770b, 771b
- alt_len_check 772b, 773b
- alt_pad ( ( 771b - 770b ) - ( 773b - 772b ) )
-774: .pushsection .altinstructions,"a"
- alt_entry 772b, 774b, 770b, 771b, \feature
+773: .pushsection .altinstructions,"a"
+ alt_entry 772b, 773b, 770b, 771b, \feature
.popsection
.endm
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
+ * @newinstr.
*/
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
.pushsection .altinstr_replacement,"ax"
@@ -95,17 +45,9 @@
771: \newinstr2
772: .popsection
773: \oldinstr
-774: alt_len_check 770b, 771b
- alt_len_check 771b, 772b
- alt_len_check 773b, 774b
- .if ( 771b - 770b > 772b - 771b )
- alt_pad ( ( 771b - 770b ) - ( 774b - 773b ) )
- .else
- alt_pad ( ( 772b - 771b ) - ( 774b - 773b ) )
- .endif
-775: .pushsection .altinstructions,"a"
- alt_entry 773b, 775b, 770b, 771b,\feature1
- alt_entry 773b, 775b, 771b, 772b,\feature2
+774: .pushsection .altinstructions,"a"
+ alt_entry 773b, 774b, 770b, 771b,\feature1
+ alt_entry 773b, 774b, 771b, 772b,\feature2
.popsection
.endm
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index 3f2856ed6808..904dd049f954 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -13,32 +13,25 @@ struct alt_instr {
s32 repl_offset; /* offset to replacement instruction */
u16 facility; /* facility bit set for replacement */
u8 instrlen; /* length of original instruction */
- u8 replacementlen; /* length of new instruction */
} __packed;
void apply_alternative_instructions(void);
void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
/*
- * |661: |662: |6620 |663:
- * +-----------+---------------------+
- * | oldinstr | oldinstr_padding |
- * | +----------+----------+
- * | | | |
- * | | >6 bytes |6/4/2 nops|
- * | |6 bytes jg----------->
- * +-----------+---------------------+
- * ^^ static padding ^^
+ * +---------------------------------+
+ * |661: |662:
+ * | oldinstr |
+ * +---------------------------------+
*
* .altinstr_replacement section
- * +---------------------+-----------+
+ * +---------------------------------+
* |6641: |6651:
* | alternative instr 1 |
- * +-----------+---------+- - - - - -+
- * |6642: |6652: |
- * | alternative instr 2 | padding
- * +---------------------+- - - - - -+
- * ^ runtime ^
+ * +---------------------------------+
+ * |6642: |6652:
+ * | alternative instr 2 |
+ * +---------------------------------+
*
* .altinstructions section
* +---------------------------------+
@@ -47,77 +40,31 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
* +---------------------------------+
*/
-#define b_altinstr(num) "664"#num
-#define e_altinstr(num) "665"#num
-
-#define e_oldinstr_pad_end "663"
+#define b_altinstr(num) "664"#num
+#define e_altinstr(num) "665"#num
#define oldinstr_len "662b-661b"
-#define oldinstr_total_len e_oldinstr_pad_end"b-661b"
#define altinstr_len(num) e_altinstr(num)"b-"b_altinstr(num)"b"
-#define oldinstr_pad_len(num) \
- "-(((" altinstr_len(num) ")-(" oldinstr_len ")) > 0) * " \
- "((" altinstr_len(num) ")-(" oldinstr_len "))"
-
-#define INSTR_LEN_SANITY_CHECK(len) \
- ".if " len " > 254\n" \
- "\t.error \"cpu alternatives does not support instructions " \
- "blocks > 254 bytes\"\n" \
- ".endif\n" \
- ".if (" len ") %% 2\n" \
- "\t.error \"cpu alternatives instructions length is odd\"\n" \
- ".endif\n"
-
-#define OLDINSTR_PADDING(oldinstr, num) \
- ".if " oldinstr_pad_len(num) " > 6\n" \
- "\tjg " e_oldinstr_pad_end "f\n" \
- "6620:\n" \
- "\t.rept (" oldinstr_pad_len(num) " - (6620b-662b)) / 2\n" \
- "\tnopr\n" \
- ".else\n" \
- "\t.rept " oldinstr_pad_len(num) " / 6\n" \
- "\t.brcl 0,0\n" \
- "\t.endr\n" \
- "\t.rept " oldinstr_pad_len(num) " %% 6 / 4\n" \
- "\tnop\n" \
- "\t.endr\n" \
- "\t.rept " oldinstr_pad_len(num) " %% 6 %% 4 / 2\n" \
- "\tnopr\n" \
- ".endr\n" \
- ".endif\n"
-
-#define OLDINSTR(oldinstr, num) \
- "661:\n\t" oldinstr "\n662:\n" \
- OLDINSTR_PADDING(oldinstr, num) \
- e_oldinstr_pad_end ":\n" \
- INSTR_LEN_SANITY_CHECK(oldinstr_len)
-
-#define OLDINSTR_2(oldinstr, num1, num2) \
- "661:\n\t" oldinstr "\n662:\n" \
- ".if " altinstr_len(num1) " < " altinstr_len(num2) "\n" \
- OLDINSTR_PADDING(oldinstr, num2) \
- ".else\n" \
- OLDINSTR_PADDING(oldinstr, num1) \
- ".endif\n" \
- e_oldinstr_pad_end ":\n" \
- INSTR_LEN_SANITY_CHECK(oldinstr_len)
+
+#define OLDINSTR(oldinstr) \
+ "661:\n\t" oldinstr "\n662:\n"
#define ALTINSTR_ENTRY(facility, num) \
"\t.long 661b - .\n" /* old instruction */ \
"\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \
"\t.word " __stringify(facility) "\n" /* facility bit */ \
- "\t.byte " oldinstr_total_len "\n" /* source len */ \
- "\t.byte " altinstr_len(num) "\n" /* alt instruction len */
+ "\t.byte " oldinstr_len "\n" /* instruction len */ \
+ "\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n" \
+ "\t.org . - (" altinstr_len(num) ") + (" oldinstr_len ")\n"
#define ALTINSTR_REPLACEMENT(altinstr, num) /* replacement */ \
- b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" \
- INSTR_LEN_SANITY_CHECK(altinstr_len(num))
+ b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n"
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, altinstr, facility) \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(altinstr, 1) \
".popsection\n" \
- OLDINSTR(oldinstr, 1) \
+ OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(facility, 1) \
".popsection\n"
@@ -127,7 +74,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
ALTINSTR_REPLACEMENT(altinstr1, 1) \
ALTINSTR_REPLACEMENT(altinstr2, 2) \
".popsection\n" \
- OLDINSTR_2(oldinstr, 1, 2) \
+ OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(facility1, 1) \
ALTINSTR_ENTRY(facility2, 2) \
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
index fb62df5e16a2..f24d9591aaed 100644
--- a/arch/s390/include/asm/asm-extable.h
+++ b/arch/s390/include/asm/asm-extable.h
@@ -26,16 +26,16 @@
stringify_in_c(.long (_target) - .;) \
stringify_in_c(.short (_type);) \
stringify_in_c(.macro extable_reg reg;) \
- stringify_in_c(.set found, 0;) \
- stringify_in_c(.set regnr, 0;) \
+ stringify_in_c(.set .Lfound, 0;) \
+ stringify_in_c(.set .Lregnr, 0;) \
stringify_in_c(.irp rs,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15;) \
stringify_in_c(.ifc "\reg", "%%\rs";) \
- stringify_in_c(.set found, 1;) \
- stringify_in_c(.short regnr;) \
+ stringify_in_c(.set .Lfound, 1;) \
+ stringify_in_c(.short .Lregnr;) \
stringify_in_c(.endif;) \
- stringify_in_c(.set regnr, regnr+1;) \
+ stringify_in_c(.set .Lregnr, .Lregnr+1;) \
stringify_in_c(.endr;) \
- stringify_in_c(.ifne (found != 1);) \
+ stringify_in_c(.ifne (.Lfound != 1);) \
stringify_in_c(.error "extable_reg: bad register argument";) \
stringify_in_c(.endif;) \
stringify_in_c(.endm;) \
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 2c057e1f3200..82de2a7c4160 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -26,14 +26,14 @@ static __always_inline void bcr_serialize(void)
asm volatile(__ASM_BCR_SERIALIZE : : : "memory");
}
-#define mb() bcr_serialize()
-#define rmb() barrier()
-#define wmb() barrier()
-#define dma_rmb() mb()
-#define dma_wmb() mb()
-#define __smp_mb() mb()
-#define __smp_rmb() rmb()
-#define __smp_wmb() wmb()
+#define __mb() bcr_serialize()
+#define __rmb() barrier()
+#define __wmb() barrier()
+#define __dma_rmb() __mb()
+#define __dma_wmb() __mb()
+#define __smp_mb() __mb()
+#define __smp_rmb() __rmb()
+#define __smp_wmb() __wmb()
#define __smp_store_release(p, v) \
do { \
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 0b25f28351ed..aebe1e22c7be 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -15,7 +15,8 @@
"1: .asciz \""__FILE__"\"\n" \
".previous\n" \
".section __bug_table,\"awM\",@progbits,%2\n" \
- "2: .long 0b-2b,1b-2b\n" \
+ "2: .long 0b-.\n" \
+ " .long 1b-.\n" \
" .short %0,%1\n" \
" .org 2b+%2\n" \
".previous\n" \
@@ -30,7 +31,7 @@
asm_inline volatile( \
"0: mc 0,0\n" \
".section __bug_table,\"awM\",@progbits,%1\n" \
- "1: .long 0b-1b\n" \
+ "1: .long 0b-.\n" \
" .short %0\n" \
" .org 1b+%1\n" \
".previous\n" \
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 1effac6a0152..1c4f585dd39b 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -369,7 +369,7 @@ void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev);
struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
/* Function from drivers/s390/cio/chsc.c */
-int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, long *clock_delta);
int chsc_sstpi(void *page, void *result, size_t size);
int chsc_stzi(void *page, void *result, size_t size);
int chsc_sgib(u32 origin);
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index cdc7ae72529d..7d6fe813ac39 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/thread_info.h>
+#include <asm/ptrace.h>
#define compat_mode_t compat_mode_t
typedef u16 compat_mode_t;
@@ -22,32 +23,8 @@ typedef u16 compat_mode_t;
(__force t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
})
-#define PSW32_MASK_PER 0x40000000UL
-#define PSW32_MASK_DAT 0x04000000UL
-#define PSW32_MASK_IO 0x02000000UL
-#define PSW32_MASK_EXT 0x01000000UL
-#define PSW32_MASK_KEY 0x00F00000UL
-#define PSW32_MASK_BASE 0x00080000UL /* Always one */
-#define PSW32_MASK_MCHECK 0x00040000UL
-#define PSW32_MASK_WAIT 0x00020000UL
-#define PSW32_MASK_PSTATE 0x00010000UL
-#define PSW32_MASK_ASC 0x0000C000UL
-#define PSW32_MASK_CC 0x00003000UL
-#define PSW32_MASK_PM 0x00000f00UL
-#define PSW32_MASK_RI 0x00000080UL
-
#define PSW32_MASK_USER 0x0000FF00UL
-#define PSW32_ADDR_AMODE 0x80000000UL
-#define PSW32_ADDR_INSN 0x7FFFFFFFUL
-
-#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20)
-
-#define PSW32_ASC_PRIMARY 0x00000000UL
-#define PSW32_ASC_ACCREG 0x00004000UL
-#define PSW32_ASC_SECONDARY 0x00008000UL
-#define PSW32_ASC_HOME 0x0000C000UL
-
#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 82388da3f95f..267a8f88e143 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -93,7 +93,9 @@ union ctlreg0 {
unsigned long tcx : 1; /* Transactional-Execution control */
unsigned long pifo : 1; /* Transactional-Execution Program-
Interruption-Filtering Override */
- unsigned long : 22;
+ unsigned long : 3;
+ unsigned long ccc : 1; /* Cryptography counter control */
+ unsigned long : 18;
unsigned long : 3;
unsigned long lap : 1; /* Low-address-protection control */
unsigned long : 4;
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 2f0a1cacdf85..000de2b1e67a 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -9,19 +9,21 @@
#include <linux/uaccess.h>
#include <asm/timex.h>
#include <asm/fpu/api.h>
+#include <asm/pai.h>
#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)
void do_per_trap(struct pt_regs *regs);
-#ifdef CONFIG_DEBUG_ENTRY
-static __always_inline void arch_check_user_regs(struct pt_regs *regs)
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
{
- debug_user_asce(0);
+ if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
+ debug_user_asce(0);
+
+ pai_kernel_enter(regs);
}
-#define arch_check_user_regs arch_check_user_regs
-#endif /* CONFIG_DEBUG_ENTRY */
+#define arch_enter_from_user_mode arch_enter_from_user_mode
static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
unsigned long ti_work)
@@ -44,6 +46,8 @@ static __always_inline void arch_exit_to_user_mode(void)
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
debug_user_asce(1);
+
+ pai_kernel_exit(current_pt_regs());
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 3f8ee257f9aa..a405b6bb89fb 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -133,6 +133,8 @@ int ipl_report_add_certificate(struct ipl_report *report, void *key,
* DIAG 308 support
*/
enum diag308_subcode {
+ DIAG308_CLEAR_RESET = 0,
+ DIAG308_LOAD_NORMAL_RESET = 1,
DIAG308_REL_HSA = 2,
DIAG308_LOAD_CLEAR = 3,
DIAG308_LOAD_NORMAL_DUMP = 4,
@@ -141,6 +143,10 @@ enum diag308_subcode {
DIAG308_LOAD_NORMAL = 7,
};
+enum diag308_subcode_flags {
+ DIAG308_FLAG_EI = 1UL << 16,
+};
+
enum diag308_rc {
DIAG308_RC_OK = 0x0001,
DIAG308_RC_NOCONFIG = 0x0102,
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 56002aeacabf..26fe5e535728 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -200,7 +200,10 @@ struct lowcore {
__u64 last_break_save_area; /* 0x1338 */
__u32 access_regs_save_area[16]; /* 0x1340 */
__u64 cregs_save_area[16]; /* 0x1380 */
- __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */
+ __u8 pad_0x1400[0x1500-0x1400]; /* 0x1400 */
+ /* Cryptography-counter designation */
+ __u64 ccd; /* 0x1500 */
+ __u8 pad_0x1508[0x1800-0x1508]; /* 0x1508 */
/* Transaction abort diagnostic block */
struct pgm_tdb pgm_tdb; /* 0x1800 */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 292083083830..af1cd3a6f406 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -101,7 +101,7 @@ void nmi_alloc_mcesa_early(u64 *mcesad);
int nmi_alloc_mcesa(u64 *mcesad);
void nmi_free_mcesa(u64 *mcesad);
-void s390_handle_mcck(void);
+void s390_handle_mcck(struct pt_regs *regs);
void __s390_handle_mcck(void);
int s390_do_machine_check(struct pt_regs *regs);
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index 2cfcd5ac3a8b..d910d71b5bb5 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -54,31 +54,31 @@
.endm
.macro __DECODE_R expand,reg
- .set __decode_fail,1
+ .set .L__decode_fail,1
.irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
.ifc \reg,%r\r1
\expand \r1
- .set __decode_fail,0
+ .set .L__decode_fail,0
.endif
.endr
- .if __decode_fail == 1
+ .if .L__decode_fail == 1
.error "__DECODE_R failed"
.endif
.endm
.macro __DECODE_RR expand,rsave,rtarget
- .set __decode_fail,1
+ .set .L__decode_fail,1
.irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
.ifc \rsave,%r\r1
.irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
.ifc \rtarget,%r\r2
\expand \r1,\r2
- .set __decode_fail,0
+ .set .L__decode_fail,0
.endif
.endr
.endif
.endr
- .if __decode_fail == 1
+ .if .L__decode_fail == 1
.error "__DECODE_RR failed"
.endif
.endm
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
new file mode 100644
index 000000000000..5b7e33ac6f0b
--- /dev/null
+++ b/arch/s390/include/asm/pai.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Processor Activity Instrumentation support for cryptography counters
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#ifndef _ASM_S390_PAI_H
+#define _ASM_S390_PAI_H
+
+#include <linux/jump_label.h>
+#include <asm/lowcore.h>
+#include <asm/ptrace.h>
+
+struct qpaci_info_block {
+ u64 header;
+ struct {
+ u64 : 8;
+ u64 num_cc : 8; /* # of supported crypto counters */
+ u64 : 48;
+ };
+};
+
+static inline int qpaci(struct qpaci_info_block *info)
+{
+ /* Size of info (in double words minus one) */
+ size_t size = sizeof(*info) / sizeof(u64) - 1;
+ int cc;
+
+ asm volatile(
+ " lgr 0,%[size]\n"
+ " .insn s,0xb28f0000,%[info]\n"
+ " lgr %[size],0\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=d" (cc), [info] "=Q" (*info), [size] "+&d" (size)
+ :
+ : "0", "cc", "memory");
+ return cc ? (size + 1) * sizeof(u64) : 0;
+}
+
+#define PAI_CRYPTO_BASE 0x1000 /* First event number */
+#define PAI_CRYPTO_MAXCTR 256 /* Max # of event counters */
+#define PAI_CRYPTO_KERNEL_OFFSET 2048
+
+DECLARE_STATIC_KEY_FALSE(pai_key);
+
+static __always_inline void pai_kernel_enter(struct pt_regs *regs)
+{
+ if (!IS_ENABLED(CONFIG_PERF_EVENTS))
+ return;
+ if (!static_branch_unlikely(&pai_key))
+ return;
+ if (!S390_lowcore.ccd)
+ return;
+ if (!user_mode(regs))
+ return;
+ WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd | PAI_CRYPTO_KERNEL_OFFSET);
+}
+
+static __always_inline void pai_kernel_exit(struct pt_regs *regs)
+{
+ if (!IS_ENABLED(CONFIG_PERF_EVENTS))
+ return;
+ if (!static_branch_unlikely(&pai_key))
+ return;
+ if (!S390_lowcore.ccd)
+ return;
+ if (!user_mode(regs))
+ return;
+ WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd & ~PAI_CRYPTO_KERNEL_OFFSET);
+}
+
+#endif
diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h
index 5dfe47588277..3bb4e7e33a0e 100644
--- a/arch/s390/include/asm/pci_debug.h
+++ b/arch/s390/include/asm/pci_debug.h
@@ -17,9 +17,14 @@ extern debug_info_t *pci_debug_err_id;
debug_text_event(pci_debug_err_id, 0, debug_buffer); \
} while (0)
+static inline void zpci_err_hex_level(int level, void *addr, int len)
+{
+ debug_event(pci_debug_err_id, level, addr, len);
+}
+
static inline void zpci_err_hex(void *addr, int len)
{
- debug_event(pci_debug_err_id, 0, addr, len);
+ zpci_err_hex_level(0, addr, len);
}
#endif
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index d9d5350cc3ec..bf15da0fedbc 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -46,10 +46,17 @@ static inline bool test_preempt_need_resched(void)
static inline void __preempt_count_add(int val)
{
- if (__builtin_constant_p(val) && (val >= -128) && (val <= 127))
- __atomic_add_const(val, &S390_lowcore.preempt_count);
- else
- __atomic_add(val, &S390_lowcore.preempt_count);
+ /*
+ * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
+ * enabled, gcc 12 fails to handle __builtin_constant_p().
+ */
+ if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) {
+ if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) {
+ __atomic_add_const(val, &S390_lowcore.preempt_count);
+ return;
+ }
+ }
+ __atomic_add(val, &S390_lowcore.preempt_count);
}
static inline void __preempt_count_sub(int val)
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index ff1e25d515a8..add764a2be8c 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -83,6 +83,7 @@ void cpu_detect_mhz_feature(void);
extern const struct seq_operations cpuinfo_op;
extern void execve_tail(void);
extern void __bpon(void);
+unsigned long vdso_size(void);
/*
* User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
@@ -94,9 +95,10 @@ extern void __bpon(void);
(_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1))
#define TASK_SIZE_MAX (-PAGE_SIZE)
-#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \
- _REGION3_SIZE : _REGION2_SIZE)
-#define STACK_TOP_MAX _REGION2_SIZE
+#define VDSO_BASE (STACK_TOP + PAGE_SIZE)
+#define VDSO_LIMIT (test_thread_flag(TIF_31BIT) ? _REGION3_SIZE : _REGION2_SIZE)
+#define STACK_TOP (VDSO_LIMIT - vdso_size() - PAGE_SIZE)
+#define STACK_TOP_MAX (_REGION2_SIZE - vdso_size() - PAGE_SIZE)
#define HAVE_ARCH_PICK_MMAP_LAYOUT
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index ddb70fb13fbc..8bae33ab320a 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -71,6 +71,35 @@ enum {
&(*(struct psw_bits *)(&(__psw))); \
}))
+#define PSW32_MASK_PER 0x40000000UL
+#define PSW32_MASK_DAT 0x04000000UL
+#define PSW32_MASK_IO 0x02000000UL
+#define PSW32_MASK_EXT 0x01000000UL
+#define PSW32_MASK_KEY 0x00F00000UL
+#define PSW32_MASK_BASE 0x00080000UL /* Always one */
+#define PSW32_MASK_MCHECK 0x00040000UL
+#define PSW32_MASK_WAIT 0x00020000UL
+#define PSW32_MASK_PSTATE 0x00010000UL
+#define PSW32_MASK_ASC 0x0000C000UL
+#define PSW32_MASK_CC 0x00003000UL
+#define PSW32_MASK_PM 0x00000f00UL
+#define PSW32_MASK_RI 0x00000080UL
+
+#define PSW32_ADDR_AMODE 0x80000000UL
+#define PSW32_ADDR_INSN 0x7FFFFFFFUL
+
+#define PSW32_DEFAULT_KEY (((u32)PAGE_DEFAULT_ACC) << 20)
+
+#define PSW32_ASC_PRIMARY 0x00000000UL
+#define PSW32_ASC_ACCREG 0x00004000UL
+#define PSW32_ASC_SECONDARY 0x00008000UL
+#define PSW32_ASC_HOME 0x0000C000UL
+
+typedef struct {
+ unsigned int mask;
+ unsigned int addr;
+} psw_t32 __aligned(8);
+
#define PGM_INT_CODE_MASK 0x7f
#define PGM_INT_CODE_PER 0x80
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 04cb1e7582a6..236b34b75ddb 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -87,6 +87,7 @@ struct sclp_info {
unsigned char has_diag318 : 1;
unsigned char has_sipl : 1;
unsigned char has_dirq : 1;
+ unsigned char has_iplcc : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
index a7c3ccf681da..7ce584aff5bb 100644
--- a/arch/s390/include/asm/scsw.h
+++ b/arch/s390/include/asm/scsw.h
@@ -508,9 +508,21 @@ static inline int scsw_cmd_is_valid_zcc(union scsw *scsw)
*/
static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
{
- return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
- !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
- (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS);
+ /* Must be status pending. */
+ if (!(scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND))
+ return 0;
+
+ /* Must have alert status. */
+ if (!(scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS))
+ return 0;
+
+ /* Must be alone or together with primary, secondary or both,
+ * => no intermediate status.
+ */
+ if (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS)
+ return 0;
+
+ return 1;
}
/**
@@ -522,10 +534,25 @@ static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
*/
static inline int scsw_cmd_is_valid_pno(union scsw *scsw)
{
- return (scsw->cmd.fctl != 0) &&
- (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
- (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) ||
- (scsw->cmd.actl & SCSW_ACTL_SUSPENDED));
+ /* Must indicate at least one I/O function. */
+ if (!scsw->cmd.fctl)
+ return 0;
+
+ /* Must be status pending. */
+ if (!(scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND))
+ return 0;
+
+ /* Can be status pending alone, or with any combination of primary,
+ * secondary and alert => no intermediate status.
+ */
+ if (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS))
+ return 1;
+
+ /* If intermediate, must be suspended. */
+ if (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)
+ return 1;
+
+ return 0;
}
/**
@@ -675,9 +702,21 @@ static inline int scsw_tm_is_valid_q(union scsw *scsw)
*/
static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
{
- return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
- !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
- (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS);
+ /* Must be status pending. */
+ if (!(scsw->tm.stctl & SCSW_STCTL_STATUS_PEND))
+ return 0;
+
+ /* Must have alert status. */
+ if (!(scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS))
+ return 0;
+
+ /* Must be alone or together with primary, secondary or both,
+ * => no intermediate status.
+ */
+ if (scsw->tm.stctl & SCSW_STCTL_INTER_STATUS)
+ return 0;
+
+ return 1;
}
/**
@@ -689,11 +728,25 @@ static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
*/
static inline int scsw_tm_is_valid_pno(union scsw *scsw)
{
- return (scsw->tm.fctl != 0) &&
- (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
- (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) ||
- ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
- (scsw->tm.actl & SCSW_ACTL_SUSPENDED)));
+ /* Must indicate at least one I/O function. */
+ if (!scsw->tm.fctl)
+ return 0;
+
+ /* Must be status pending. */
+ if (!(scsw->tm.stctl & SCSW_STCTL_STATUS_PEND))
+ return 0;
+
+ /* Can be status pending alone, or with any combination of primary,
+ * secondary and alert => no intermediate status.
+ */
+ if (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS))
+ return 1;
+
+ /* If intermediate, must be suspended. */
+ if (scsw->tm.actl & SCSW_ACTL_SUSPENDED)
+ return 1;
+
+ return 0;
}
/**
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 24a54443c865..37127cd7749e 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -77,8 +77,9 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
static inline void arch_spin_unlock(arch_spinlock_t *lp)
{
typecheck(int, lp->lock);
+ kcsan_release();
asm_inline volatile(
- ALTERNATIVE("", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
" sth %1,%0\n"
: "=R" (((unsigned short *) &lp->lock)[1])
: "d" (0) : "cc", "memory");
diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h
index ba07463897c1..4d74d7e33340 100644
--- a/arch/s390/include/asm/stp.h
+++ b/arch/s390/include/asm/stp.h
@@ -44,8 +44,8 @@ struct stp_sstpi {
u32 : 32;
u32 ctnid[3];
u32 : 32;
- u32 todoff[4];
- u32 rsvd[48];
+ u64 todoff;
+ u32 rsvd[50];
} __packed;
struct stp_tzib {
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 2cfce42aa7fc..ce878e85b6e4 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -197,6 +197,7 @@ static inline cycles_t get_cycles(void)
{
return (cycles_t) get_tod_clock() >> 2;
}
+#define get_cycles get_cycles
int get_phys_clock(unsigned long *clock);
void init_cpu_timer(void);
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
index 87e6cc2aeba4..95480ed9149e 100644
--- a/arch/s390/include/asm/vx-insn.h
+++ b/arch/s390/include/asm/vx-insn.h
@@ -366,7 +366,7 @@
.macro VLM vfrom, vto, disp, base, hint=3
VX_NUM v1, \vfrom
VX_NUM v3, \vto
- GR_NUM b2, \base /* Base register */
+ GR_NUM b2, \base
.word 0xE700 | ((v1&15) << 4) | (v3&15)
.word (b2 << 12) | (\disp)
MRXBOPC \hint, 0x36, v1, v3
@@ -376,7 +376,7 @@
.macro VST vr1, disp, index="%r0", base
VX_NUM v1, \vr1
GR_NUM x2, \index
- GR_NUM b2, \base /* Base register */
+ GR_NUM b2, \base
.word 0xE700 | ((v1&15) << 4) | (x2&15)
.word (b2 << 12) | (\disp)
MRXBOPC 0, 0x0E, v1
@@ -386,7 +386,7 @@
.macro VSTM vfrom, vto, disp, base, hint=3
VX_NUM v1, \vfrom
VX_NUM v3, \vto
- GR_NUM b2, \base /* Base register */
+ GR_NUM b2, \base
.word 0xE700 | ((v1&15) << 4) | (v3&15)
.word (b2 << 12) | (\disp)
MRXBOPC \hint, 0x3E, v1, v3
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index 7349e96d28a0..924b876f992c 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -171,7 +171,7 @@ struct pkey_skey2pkey {
#define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey)
/*
- * Verify the given CCA AES secure key for being able to be useable with
+ * Verify the given CCA AES secure key for being able to be usable with
* the pkey module. Check for correct key type and check for having at
* least one crypto card being able to handle this key (master key
* or old master key verification pattern matches).
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index 2f04a5499d74..d83713f67530 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -4,7 +4,7 @@
*
* zcrypt 2.2.1 (user-visible header)
*
- * Copyright IBM Corp. 2001, 2019
+ * Copyright IBM Corp. 2001, 2022
* Author(s): Robert Burroughs
* Eric Rossman (edrossma@us.ibm.com)
*
@@ -85,7 +85,7 @@ struct ica_rsa_modexpo_crt {
struct CPRBX {
__u16 cprb_len; /* CPRB length 220 */
__u8 cprb_ver_id; /* CPRB version id. 0x02 */
- __u8 pad_000[3]; /* Alignment pad bytes */
+ __u8 _pad_000[3]; /* Alignment pad bytes */
__u8 func_id[2]; /* function id 0x5432 */
__u8 cprb_flags[4]; /* Flags */
__u32 req_parml; /* request parameter buffer len */
@@ -95,19 +95,19 @@ struct CPRBX {
__u32 rpl_datal; /* reply data block len */
__u32 rpld_datal; /* replied data block len */
__u32 req_extbl; /* request extension block len */
- __u8 pad_001[4]; /* reserved */
+ __u8 _pad_001[4]; /* reserved */
__u32 rpld_extbl; /* replied extension block len */
- __u8 padx000[16 - sizeof(__u8 *)];
+ __u8 _pad_002[16 - sizeof(__u8 *)];
__u8 __user *req_parmb; /* request parm block 'address' */
- __u8 padx001[16 - sizeof(__u8 *)];
+ __u8 _pad_003[16 - sizeof(__u8 *)];
__u8 __user *req_datab; /* request data block 'address' */
- __u8 padx002[16 - sizeof(__u8 *)];
+ __u8 _pad_004[16 - sizeof(__u8 *)];
__u8 __user *rpl_parmb; /* reply parm block 'address' */
- __u8 padx003[16 - sizeof(__u8 *)];
+ __u8 _pad_005[16 - sizeof(__u8 *)];
__u8 __user *rpl_datab; /* reply data block 'address' */
- __u8 padx004[16 - sizeof(__u8 *)];
+ __u8 _pad_006[16 - sizeof(__u8 *)];
__u8 __user *req_extb; /* request extension block 'addr'*/
- __u8 padx005[16 - sizeof(__u8 *)];
+ __u8 _pad_007[16 - sizeof(__u8 *)];
__u8 __user *rpl_extb; /* reply extension block 'address'*/
__u16 ccp_rtcode; /* server return code */
__u16 ccp_rscode; /* server reason code */
@@ -115,12 +115,10 @@ struct CPRBX {
__u8 logon_id[8]; /* Logon Identifier */
__u8 mac_value[8]; /* Mac Value */
__u8 mac_content_flgs; /* Mac content flag byte */
- __u8 pad_002; /* Alignment */
+ __u8 _pad_008; /* Alignment */
__u16 domain; /* Domain */
- __u8 usage_domain[4]; /* Usage domain */
- __u8 cntrl_domain[4]; /* Control domain */
- __u8 S390enf_mask[4]; /* S/390 enforcement mask */
- __u8 pad_004[36]; /* reserved */
+ __u8 _pad_009[12]; /* reserved, checked for zeros */
+ __u8 _pad_010[36]; /* reserved */
} __attribute__((packed));
/**
@@ -238,8 +236,8 @@ struct zcrypt_device_matrix_ext {
};
#define AUTOSELECT 0xFFFFFFFF
-#define AUTOSEL_AP ((__u16) 0xFFFF)
-#define AUTOSEL_DOM ((__u16) 0xFFFF)
+#define AUTOSEL_AP ((__u16)0xFFFF)
+#define AUTOSEL_DOM ((__u16)0xFFFF)
#define ZCRYPT_IOCTL_MAGIC 'z'
@@ -305,12 +303,12 @@ struct zcrypt_device_matrix_ext {
/**
* Supported ioctl calls
*/
-#define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
-#define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
-#define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
-#define ZSENDEP11CPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
+#define ICARSAMODEXPO _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
+#define ICARSACRT _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
+#define ZSECSENDCPRB _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
-#define ZCRYPT_DEVICE_STATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x5f, 0)
+#define ZCRYPT_DEVICE_STATUS _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x5f, 0)
#define ZCRYPT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x58, char[MAX_ZDEV_CARDIDS_EXT])
#define ZCRYPT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x59, char[MAX_ZDEV_CARDIDS_EXT])
#define ZCRYPT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x5a, int[MAX_ZDEV_CARDIDS_EXT])
@@ -352,7 +350,7 @@ struct zcrypt_device_matrix {
};
/* Deprecated: use ZCRYPT_DEVICE_STATUS */
-#define ZDEVICESTATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
+#define ZDEVICESTATUS _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
/* Deprecated: use ZCRYPT_STATUS_MASK */
#define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
/* Deprecated: use ZCRYPT_QDEPTH_MASK */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index c8d1b6aa823e..5851041bb214 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -72,6 +72,7 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
+obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index cce0ddee2d02..e7bca29f9c34 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -7,8 +7,6 @@
#include <asm/facility.h>
#include <asm/nospec-branch.h>
-#define MAX_PATCH_LEN (255 - 1)
-
static int __initdata_or_module alt_instr_disabled;
static int __init disable_alternative_instructions(char *str)
@@ -19,85 +17,30 @@ static int __init disable_alternative_instructions(char *str)
early_param("noaltinstr", disable_alternative_instructions);
-struct brcl_insn {
- u16 opc;
- s32 disp;
-} __packed;
-
-static u16 __initdata_or_module nop16 = 0x0700;
-static u32 __initdata_or_module nop32 = 0x47000000;
-static struct brcl_insn __initdata_or_module nop48 = {
- 0xc004, 0
-};
-
-static const void *nops[] __initdata_or_module = {
- &nop16,
- &nop32,
- &nop48
-};
-
-static void __init_or_module add_jump_padding(void *insns, unsigned int len)
-{
- struct brcl_insn brcl = {
- 0xc0f4,
- len / 2
- };
-
- memcpy(insns, &brcl, sizeof(brcl));
- insns += sizeof(brcl);
- len -= sizeof(brcl);
-
- while (len > 0) {
- memcpy(insns, &nop16, 2);
- insns += 2;
- len -= 2;
- }
-}
-
-static void __init_or_module add_padding(void *insns, unsigned int len)
-{
- if (len > 6)
- add_jump_padding(insns, len);
- else if (len >= 2)
- memcpy(insns, nops[len / 2 - 1], len);
-}
-
static void __init_or_module __apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
struct alt_instr *a;
u8 *instr, *replacement;
- u8 insnbuf[MAX_PATCH_LEN];
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
*/
for (a = start; a < end; a++) {
- int insnbuf_sz = 0;
-
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
if (!__test_facility(a->facility, alt_stfle_fac_list))
continue;
- if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) {
+ if (unlikely(a->instrlen % 2)) {
WARN_ONCE(1, "cpu alternatives instructions length is "
"odd, skipping patching\n");
continue;
}
- memcpy(insnbuf, replacement, a->replacementlen);
- insnbuf_sz = a->replacementlen;
-
- if (a->instrlen > a->replacementlen) {
- add_padding(insnbuf + a->replacementlen,
- a->instrlen - a->replacementlen);
- insnbuf_sz += a->instrlen - a->replacementlen;
- }
-
- s390_kernel_write(instr, insnbuf, insnbuf_sz);
+ s390_kernel_write(instr, replacement, a->instrlen);
}
}
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 64509e7dbd3b..ef23739b277c 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -5,69 +5,59 @@
#include <linux/compat.h>
#include <linux/socket.h>
#include <linux/syscalls.h>
+#include <asm/ptrace.h>
-/* Macro that masks the high order bit of an 32 bit pointer and converts it*/
-/* to a 64 bit pointer */
-#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL))
-#define AA(__x) \
- ((unsigned long)(__x))
+/*
+ * Macro that masks the high order bit of a 32 bit pointer and
+ * converts it to a 64 bit pointer.
+ */
+#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL))
+#define AA(__x) ((unsigned long)(__x))
/* Now 32bit compatibility types */
struct ipc_kludge_32 {
- __u32 msgp; /* pointer */
- __s32 msgtyp;
+ __u32 msgp; /* pointer */
+ __s32 msgtyp;
};
/* asm/sigcontext.h */
-typedef union
-{
- __u64 d;
- __u32 f;
+typedef union {
+ __u64 d;
+ __u32 f;
} freg_t32;
-typedef struct
-{
+typedef struct {
unsigned int fpc;
unsigned int pad;
- freg_t32 fprs[__NUM_FPRS];
+ freg_t32 fprs[__NUM_FPRS];
} _s390_fp_regs32;
-typedef struct
-{
- __u32 mask;
- __u32 addr;
-} _psw_t32 __attribute__ ((aligned(8)));
-
-typedef struct
-{
- _psw_t32 psw;
+typedef struct {
+ psw_t32 psw;
__u32 gprs[__NUM_GPRS];
__u32 acrs[__NUM_ACRS];
} _s390_regs_common32;
-typedef struct
-{
+typedef struct {
_s390_regs_common32 regs;
- _s390_fp_regs32 fpregs;
+ _s390_fp_regs32 fpregs;
} _sigregs32;
-typedef struct
-{
- __u32 gprs_high[__NUM_GPRS];
- __u64 vxrs_low[__NUM_VXRS_LOW];
- __vector128 vxrs_high[__NUM_VXRS_HIGH];
- __u8 __reserved[128];
+typedef struct {
+ __u32 gprs_high[__NUM_GPRS];
+ __u64 vxrs_low[__NUM_VXRS_LOW];
+ __vector128 vxrs_high[__NUM_VXRS_HIGH];
+ __u8 __reserved[128];
} _sigregs_ext32;
#define _SIGCONTEXT_NSIG32 64
#define _SIGCONTEXT_NSIG_BPW32 32
#define __SIGNAL_FRAMESIZE32 96
-#define _SIGMASK_COPY_SIZE32 (sizeof(u32)*2)
+#define _SIGMASK_COPY_SIZE32 (sizeof(u32) * 2)
-struct sigcontext32
-{
+struct sigcontext32 {
__u32 oldmask[_COMPAT_NSIG_WORDS];
- __u32 sregs; /* pointer */
+ __u32 sregs; /* pointer */
};
/* asm/signal.h */
@@ -75,11 +65,11 @@ struct sigcontext32
/* asm/ucontext.h */
struct ucontext32 {
__u32 uc_flags;
- __u32 uc_link; /* pointer */
+ __u32 uc_link; /* pointer */
compat_stack_t uc_stack;
_sigregs32 uc_mcontext;
compat_sigset_t uc_sigmask;
- /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
unsigned char __unused[128 - sizeof(compat_sigset_t)];
_sigregs_ext32 uc_mcontext_ext;
};
@@ -88,25 +78,6 @@ struct stat64_emu31;
struct mmap_arg_struct_emu31;
struct fadvise64_64_args;
-long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group);
-long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group);
-long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group);
-long compat_sys_s390_setregid16(u16 rgid, u16 egid);
-long compat_sys_s390_setgid16(u16 gid);
-long compat_sys_s390_setreuid16(u16 ruid, u16 euid);
-long compat_sys_s390_setuid16(u16 uid);
-long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid);
-long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
-long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid);
-long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
-long compat_sys_s390_setfsuid16(u16 uid);
-long compat_sys_s390_setfsgid16(u16 gid);
-long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist);
-long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist);
-long compat_sys_s390_getuid16(void);
-long compat_sys_s390_geteuid16(void);
-long compat_sys_s390_getgid16(void);
-long compat_sys_s390_getegid16(void);
long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low);
long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low);
long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low);
@@ -118,8 +89,8 @@ long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbu
long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag);
long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg);
long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg);
-long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count);
-long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count);
+long compat_sys_s390_read(unsigned int fd, char __user *buf, compat_size_t count);
+long compat_sys_s390_write(unsigned int fd, const char __user *buf, compat_size_t count);
long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise);
long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 59b69c8ab5e1..df41132ccd06 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -53,19 +53,19 @@ STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
_LPP_OFFSET = __LC_LPP
.macro STBEAR address
- ALTERNATIVE "", ".insn s,0xb2010000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
.endm
.macro LBEAR address
- ALTERNATIVE "", ".insn s,0xb2000000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
.endm
.macro LPSWEY address,lpswe
- ALTERNATIVE "b \lpswe", ".insn siy,0xeb0000000071,\address,0", 193
+ ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
.endm
.macro MBEAR reg
- ALTERNATIVE "", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+ ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
.endm
.macro CHECK_STACK savearea
@@ -121,16 +121,16 @@ _LPP_OFFSET = __LC_LPP
.endm
.macro BPOFF
- ALTERNATIVE "", ".insn rrf,0xb2e80000,0,0,12,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
.endm
.macro BPON
- ALTERNATIVE "", ".insn rrf,0xb2e80000,0,0,13,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
.endm
.macro BPENTER tif_ptr,tif_mask
ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
- "", 82
+ "j .+12; nop; nop", 82
.endm
.macro BPEXIT tif_ptr,tif_mask
@@ -172,9 +172,19 @@ _LPP_OFFSET = __LC_LPP
lgr %r14,\reg
larl %r13,\start
slgr %r14,%r13
- lghi %r13,\end - \start
- clgr %r14,%r13
+#ifdef CONFIG_AS_IS_LLVM
+ clgfrl %r14,.Lrange_size\@
+#else
+ clgfi %r14,\end - \start
+#endif
jhe \outside_label
+#ifdef CONFIG_AS_IS_LLVM
+ .section .rodata, "a"
+ .align 4
+.Lrange_size\@:
+ .long \end - \start
+ .previous
+#endif
.endm
.macro SIEEXIT
@@ -226,7 +236,7 @@ ENTRY(__switch_to)
aghi %r3,__TASK_pid
mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
- ALTERNATIVE "", "lpp _LPP_OFFSET", 40
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
BR_EX %r14
ENDPROC(__switch_to)
@@ -473,10 +483,7 @@ ENTRY(\name)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
MBEAR %r11
stmg %r8,%r9,__PT_PSW(%r11)
- tm %r8,0x0001 # coming from user space?
- jno 1f
- lctlg %c1,%c1,__LC_KERNEL_ASCE
-1: lgr %r2,%r11 # pass pointer to pt_regs
+ lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,\handler
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
tmhh %r8,0x0001 # returning to user ?
@@ -602,6 +609,7 @@ ENTRY(mcck_int_handler)
mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1)
+ lgr %r2,%r11
lgr %r15,%r1
brasl %r14,s390_handle_mcck
.Lmcck_return:
@@ -612,7 +620,7 @@ ENTRY(mcck_int_handler)
jno 0f
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
stpt __LC_EXIT_TIMER
-0: ALTERNATIVE "", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
LBEAR 0(%r12)
lmg %r11,%r15,__PT_R11(%r11)
LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
@@ -648,7 +656,7 @@ ENTRY(mcck_int_handler)
ENDPROC(mcck_int_handler)
ENTRY(restart_int_handler)
- ALTERNATIVE "", "lpp _LPP_OFFSET", 40
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
stg %r15,__LC_SAVE_AREA_RESTART
TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
jz 0f
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 3033f616e256..45393919fe61 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -205,7 +205,7 @@ static void show_msi_interrupt(struct seq_file *p, int irq)
unsigned long flags;
int cpu;
- irq_lock_sparse();
+ rcu_read_lock();
desc = irq_to_desc(irq);
if (!desc)
goto out;
@@ -224,7 +224,7 @@ static void show_msi_interrupt(struct seq_file *p, int irq)
seq_putc(p, '\n');
raw_spin_unlock_irqrestore(&desc->lock, flags);
out:
- irq_unlock_sparse();
+ rcu_read_unlock();
}
/*
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 6ebf02e15c85..ab761c008f98 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -26,8 +26,10 @@
#include <asm/stacktrace.h>
#include <asm/switch_to.h>
#include <asm/nmi.h>
+#include <asm/sclp.h>
-typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
+typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long,
+ unsigned long);
extern const unsigned char relocate_kernel[];
extern const unsigned long long relocate_kernel_len;
@@ -243,6 +245,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
*/
static void __do_machine_kexec(void *data)
{
+ unsigned long diag308_subcode;
relocate_kernel_t data_mover;
struct kimage *image = data;
@@ -251,7 +254,10 @@ static void __do_machine_kexec(void *data)
__arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */
/* Call the moving routine */
- (*data_mover)(&image->head, image->start);
+ diag308_subcode = DIAG308_CLEAR_RESET;
+ if (sclp.has_iplcc)
+ diag308_subcode |= DIAG308_FLAG_EI;
+ (*data_mover)(&image->head, image->start, diag308_subcode);
/* Die if kexec returns */
disabled_wait();
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index fc60e29b8690..53ed3884fe64 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -29,6 +29,8 @@
#include <asm/switch_to.h>
#include <asm/ctl_reg.h>
#include <asm/asm-offsets.h>
+#include <asm/pai.h>
+
#include <linux/kvm_host.h>
struct mcck_struct {
@@ -169,10 +171,12 @@ void __s390_handle_mcck(void)
}
}
-void noinstr s390_handle_mcck(void)
+void noinstr s390_handle_mcck(struct pt_regs *regs)
{
trace_hardirqs_off();
+ pai_kernel_enter(regs);
__s390_handle_mcck();
+ pai_kernel_exit(regs);
trace_hardirqs_on();
}
/*
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 52c1fe23b823..0d64aafd158f 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -295,6 +295,76 @@ CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z16, CRSTE_1MB_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z16, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ, 0x0091);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_CHIP_HIT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_DRAWER_HIT, 0x0094);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP, 0x0095);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_CHIP_HIT, 0x0097);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT, 0x0098);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE, 0x0099);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER, 0x009a);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER, 0x009b);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_MEMORY, 0x009c);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE_MEMORY, 0x009d);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER_MEMORY, 0x009e);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER_MEMORY, 0x009f);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_IV, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_CHIP_HIT, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_DRAWER_HIT, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_CHIP_HIT, 0x00af);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_MEMORY, 0x00b4);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE_MEMORY, 0x00b5);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER_MEMORY, 0x00b6);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER_MEMORY, 0x00b7);
+CPUMF_EVENT_ATTR(cf_z16, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z16, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z16, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z16, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TABORT, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_NO_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_SPECIAL, 0x00f6);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_ACCESS, 0x00f8);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CYCLES, 0x00fd);
+CPUMF_EVENT_ATTR(cf_z16, SORTL, 0x0100);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CC, 0x0109);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CCFINISH, 0x010a);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_INVOCATIONS, 0x010b);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_COMPLETIONS, 0x010c);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
@@ -635,6 +705,80 @@ static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = {
NULL,
};
+static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z16, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z16, CRSTE_1MB_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z16, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z16, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z16, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z16, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_IV),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, BCD_DFP_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z16, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z16, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z16, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z16, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_ACCESS),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_CYCLES),
+ CPUMF_EVENT_PTR(cf_z16, SORTL),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_CC),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_CCFINISH),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_INVOCATIONS),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_COMPLETIONS),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_WAIT_LOCK),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_HOLD_LOCK),
+ CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
static struct attribute_group cpumcf_pmu_events_group = {
@@ -749,6 +893,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
case 0x8562:
model = cpumcf_z15_pmu_event_attr;
break;
+ case 0x3931:
+ case 0x3932:
+ model = cpumcf_z16_pmu_event_attr;
+ break;
default:
model = none;
break;
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
new file mode 100644
index 000000000000..8c1545946d85
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -0,0 +1,688 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Facility
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT "pai_crypto"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/perf_event.h>
+
+#include <asm/ctl_reg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+static debug_info_t *cfm_dbg;
+static unsigned int paicrypt_cnt; /* Size of the mapped counter sets */
+ /* extracted with QPACI instruction */
+
+DEFINE_STATIC_KEY_FALSE(pai_key);
+
+struct pai_userdata {
+ u16 num;
+ u64 value;
+} __packed;
+
+struct paicrypt_map {
+ unsigned long *page; /* Page for CPU to store counters */
+ struct pai_userdata *save; /* Page to store no-zero counters */
+ unsigned int users; /* # of PAI crypto users */
+ unsigned int sampler; /* # of PAI crypto samplers */
+ unsigned int counter; /* # of PAI crypto counters */
+ struct perf_event *event; /* Perf event for sampling */
+};
+
+static DEFINE_PER_CPU(struct paicrypt_map, paicrypt_map);
+
+/* Release the PMU if event is the last perf event */
+static DEFINE_MUTEX(pai_reserve_mutex);
+
+/* Adjust usage counters and remove allocated memory when all users are
+ * gone.
+ */
+static void paicrypt_event_destroy(struct perf_event *event)
+{
+ struct paicrypt_map *cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+
+ cpump->event = NULL;
+ static_branch_dec(&pai_key);
+ mutex_lock(&pai_reserve_mutex);
+ if (event->attr.sample_period)
+ cpump->sampler -= 1;
+ else
+ cpump->counter -= 1;
+ debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d"
+ " sampler %d counter %d\n", __func__,
+ event->attr.config, event->cpu, cpump->sampler,
+ cpump->counter);
+ if (!cpump->counter && !cpump->sampler) {
+ debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
+ __func__, (unsigned long)cpump->page,
+ cpump->save);
+ free_page((unsigned long)cpump->page);
+ cpump->page = NULL;
+ kvfree(cpump->save);
+ cpump->save = NULL;
+ }
+ mutex_unlock(&pai_reserve_mutex);
+}
+
+static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
+{
+ if (kernel)
+ nr += PAI_CRYPTO_MAXCTR;
+ return cpump->page[nr];
+}
+
+/* Read the counter values. Return value from location in CMP. For event
+ * CRYPTO_ALL sum up all events.
+ */
+static u64 paicrypt_getdata(struct perf_event *event, bool kernel)
+{
+ struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+ u64 sum = 0;
+ int i;
+
+ if (event->attr.config != PAI_CRYPTO_BASE) {
+ return paicrypt_getctr(cpump,
+ event->attr.config - PAI_CRYPTO_BASE,
+ kernel);
+ }
+
+ for (i = 1; i <= paicrypt_cnt; i++) {
+ u64 val = paicrypt_getctr(cpump, i, kernel);
+
+ if (!val)
+ continue;
+ sum += val;
+ }
+ return sum;
+}
+
+static u64 paicrypt_getall(struct perf_event *event)
+{
+ u64 sum = 0;
+
+ if (!event->attr.exclude_kernel)
+ sum += paicrypt_getdata(event, true);
+ if (!event->attr.exclude_user)
+ sum += paicrypt_getdata(event, false);
+
+ return sum;
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for crypto events
+ *
+ * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is save to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
+{
+ unsigned int *use_ptr;
+ int rc = 0;
+
+ mutex_lock(&pai_reserve_mutex);
+ if (a->sample_period) { /* Sampling requested */
+ use_ptr = &cpump->sampler;
+ if (cpump->counter || cpump->sampler)
+ rc = -EBUSY; /* ... sampling/counting active */
+ } else { /* Counting requested */
+ use_ptr = &cpump->counter;
+ if (cpump->sampler)
+ rc = -EBUSY; /* ... and sampling active */
+ }
+ if (rc)
+ goto unlock;
+
+ /* Allocate memory for counter page and counter extraction.
+ * Only the first counting event has to allocate a page.
+ */
+ if (cpump->page)
+ goto unlock;
+
+ rc = -ENOMEM;
+ cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ if (!cpump->page)
+ goto unlock;
+ cpump->save = kvmalloc_array(paicrypt_cnt + 1,
+ sizeof(struct pai_userdata), GFP_KERNEL);
+ if (!cpump->save) {
+ free_page((unsigned long)cpump->page);
+ cpump->page = NULL;
+ goto unlock;
+ }
+ rc = 0;
+
+unlock:
+ /* If rc is non-zero, do not increment counter/sampler. */
+ if (!rc)
+ *use_ptr += 1;
+ debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx sampler %d"
+ " counter %d page %#lx save %p rc %d\n", __func__,
+ a->sample_period, cpump->sampler, cpump->counter,
+ (unsigned long)cpump->page, cpump->save, rc);
+ mutex_unlock(&pai_reserve_mutex);
+ return rc;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paicrypt_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *a = &event->attr;
+ struct paicrypt_map *cpump;
+ int rc;
+
+ /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
+ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+ return -ENOENT;
+ /* PAI crypto event must be valid */
+ if (a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
+ return -EINVAL;
+ /* Allow only CPU wide operation, no process context for now. */
+ if (event->hw.target || event->cpu == -1)
+ return -ENOENT;
+ /* Allow only CRYPTO_ALL for sampling. */
+ if (a->sample_period && a->config != PAI_CRYPTO_BASE)
+ return -EINVAL;
+
+ cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+ rc = paicrypt_busy(a, cpump);
+ if (rc)
+ return rc;
+
+ cpump->event = event;
+ event->destroy = paicrypt_event_destroy;
+
+ if (a->sample_period) {
+ a->sample_period = 1;
+ a->freq = 0;
+ /* Register for paicrypt_sched_task() to be called */
+ event->attach_state |= PERF_ATTACH_SCHED_CB;
+ /* Add raw data which contain the memory mapped counters */
+ a->sample_type |= PERF_SAMPLE_RAW;
+ /* Turn off inheritance */
+ a->inherit = 0;
+ }
+
+ static_branch_inc(&pai_key);
+ return 0;
+}
+
+static void paicrypt_read(struct perf_event *event)
+{
+ u64 prev, new, delta;
+
+ prev = local64_read(&event->hw.prev_count);
+ new = paicrypt_getall(event);
+ local64_set(&event->hw.prev_count, new);
+ delta = (prev <= new) ? new - prev
+ : (-1ULL - prev) + new + 1; /* overflow */
+ local64_add(delta, &event->count);
+}
+
+static void paicrypt_start(struct perf_event *event, int flags)
+{
+ u64 sum;
+
+ sum = paicrypt_getall(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
+ local64_set(&event->count, 0);
+}
+
+static int paicrypt_add(struct perf_event *event, int flags)
+{
+ struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+ unsigned long ccd;
+
+ if (cpump->users++ == 0) {
+ ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET;
+ WRITE_ONCE(S390_lowcore.ccd, ccd);
+ __ctl_set_bit(0, 50);
+ }
+ cpump->event = event;
+ if (flags & PERF_EF_START && !event->attr.sample_period) {
+ /* Only counting needs initial counter value */
+ paicrypt_start(event, PERF_EF_RELOAD);
+ }
+ event->hw.state = 0;
+ if (event->attr.sample_period)
+ perf_sched_cb_inc(event->pmu);
+ return 0;
+}
+
+static void paicrypt_stop(struct perf_event *event, int flags)
+{
+ paicrypt_read(event);
+ event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paicrypt_del(struct perf_event *event, int flags)
+{
+ struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+
+ if (event->attr.sample_period)
+ perf_sched_cb_dec(event->pmu);
+ if (!event->attr.sample_period)
+ /* Only counting needs to read counter */
+ paicrypt_stop(event, PERF_EF_UPDATE);
+ if (cpump->users-- == 1) {
+ __ctl_clear_bit(0, 50);
+ WRITE_ONCE(S390_lowcore.ccd, 0);
+ }
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paicrypt_copy(struct pai_userdata *userdata,
+ struct paicrypt_map *cpump,
+ bool exclude_user, bool exclude_kernel)
+{
+ int i, outidx = 0;
+
+ for (i = 1; i <= paicrypt_cnt; i++) {
+ u64 val = 0;
+
+ if (!exclude_kernel)
+ val += paicrypt_getctr(cpump, i, true);
+ if (!exclude_user)
+ val += paicrypt_getctr(cpump, i, false);
+ if (val) {
+ userdata[outidx].num = i;
+ userdata[outidx].value = val;
+ outidx++;
+ }
+ }
+ return outidx * sizeof(struct pai_userdata);
+}
+
+static int paicrypt_push_sample(void)
+{
+ struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+ struct perf_event *event = cpump->event;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ size_t rawsize;
+ int overflow;
+
+ if (!cpump->event) /* No event active */
+ return 0;
+ rawsize = paicrypt_copy(cpump->save, cpump,
+ cpump->event->attr.exclude_user,
+ cpump->event->attr.exclude_kernel);
+ if (!rawsize) /* No incremented counters */
+ return 0;
+
+ /* Setup perf sample */
+ memset(&regs, 0, sizeof(regs));
+ memset(&raw, 0, sizeof(raw));
+ memset(&data, 0, sizeof(data));
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ if (event->attr.sample_type & PERF_SAMPLE_TID) {
+ data.tid_entry.pid = task_tgid_nr(current);
+ data.tid_entry.tid = task_pid_nr(current);
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_TIME)
+ data.time = event->clock();
+ if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+ data.id = event->id;
+ if (event->attr.sample_type & PERF_SAMPLE_CPU) {
+ data.cpu_entry.cpu = smp_processor_id();
+ data.cpu_entry.reserved = 0;
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.frag.size = rawsize;
+ raw.frag.data = cpump->save;
+ raw.size = raw.frag.size;
+ data.raw = &raw;
+ }
+
+ overflow = perf_event_overflow(event, &data, &regs);
+ perf_event_update_userpage(event);
+ /* Clear lowcore page after read */
+ memset(cpump->page, 0, PAGE_SIZE);
+ return overflow;
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event CRYPTO_ALL is allowed.
+ */
+static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ /* We started with a clean page on event installation. So read out
+ * results on schedule_out and if page was dirty, clear values.
+ */
+ if (!sched_in)
+ paicrypt_push_sample();
+}
+
+/* Attribute definitions for paicrypt interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1000 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paicrypt_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group paicrypt_events_group = {
+ .name = "events",
+ .attrs = NULL /* Filled in attr_event_init() */
+};
+
+static struct attribute_group paicrypt_format_group = {
+ .name = "format",
+ .attrs = paicrypt_format_attr,
+};
+
+static const struct attribute_group *paicrypt_attr_groups[] = {
+ &paicrypt_events_group,
+ &paicrypt_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paicrypt = {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = paicrypt_event_init,
+ .add = paicrypt_add,
+ .del = paicrypt_del,
+ .start = paicrypt_start,
+ .stop = paicrypt_stop,
+ .read = paicrypt_read,
+ .sched_task = paicrypt_sched_task,
+ .attr_groups = paicrypt_attr_groups
+};
+
+/* List of symbolic PAI counter names. */
+static const char * const paicrypt_ctrnames[] = {
+ [0] = "CRYPTO_ALL",
+ [1] = "KM_DEA",
+ [2] = "KM_TDEA_128",
+ [3] = "KM_TDEA_192",
+ [4] = "KM_ENCRYPTED_DEA",
+ [5] = "KM_ENCRYPTED_TDEA_128",
+ [6] = "KM_ENCRYPTED_TDEA_192",
+ [7] = "KM_AES_128",
+ [8] = "KM_AES_192",
+ [9] = "KM_AES_256",
+ [10] = "KM_ENCRYPTED_AES_128",
+ [11] = "KM_ENCRYPTED_AES_192",
+ [12] = "KM_ENCRYPTED_AES_256",
+ [13] = "KM_XTS_AES_128",
+ [14] = "KM_XTS_AES_256",
+ [15] = "KM_XTS_ENCRYPTED_AES_128",
+ [16] = "KM_XTS_ENCRYPTED_AES_256",
+ [17] = "KMC_DEA",
+ [18] = "KMC_TDEA_128",
+ [19] = "KMC_TDEA_192",
+ [20] = "KMC_ENCRYPTED_DEA",
+ [21] = "KMC_ENCRYPTED_TDEA_128",
+ [22] = "KMC_ENCRYPTED_TDEA_192",
+ [23] = "KMC_AES_128",
+ [24] = "KMC_AES_192",
+ [25] = "KMC_AES_256",
+ [26] = "KMC_ENCRYPTED_AES_128",
+ [27] = "KMC_ENCRYPTED_AES_192",
+ [28] = "KMC_ENCRYPTED_AES_256",
+ [29] = "KMC_PRNG",
+ [30] = "KMA_GCM_AES_128",
+ [31] = "KMA_GCM_AES_192",
+ [32] = "KMA_GCM_AES_256",
+ [33] = "KMA_GCM_ENCRYPTED_AES_128",
+ [34] = "KMA_GCM_ENCRYPTED_AES_192",
+ [35] = "KMA_GCM_ENCRYPTED_AES_256",
+ [36] = "KMF_DEA",
+ [37] = "KMF_TDEA_128",
+ [38] = "KMF_TDEA_192",
+ [39] = "KMF_ENCRYPTED_DEA",
+ [40] = "KMF_ENCRYPTED_TDEA_128",
+ [41] = "KMF_ENCRYPTED_TDEA_192",
+ [42] = "KMF_AES_128",
+ [43] = "KMF_AES_192",
+ [44] = "KMF_AES_256",
+ [45] = "KMF_ENCRYPTED_AES_128",
+ [46] = "KMF_ENCRYPTED_AES_192",
+ [47] = "KMF_ENCRYPTED_AES_256",
+ [48] = "KMCTR_DEA",
+ [49] = "KMCTR_TDEA_128",
+ [50] = "KMCTR_TDEA_192",
+ [51] = "KMCTR_ENCRYPTED_DEA",
+ [52] = "KMCTR_ENCRYPTED_TDEA_128",
+ [53] = "KMCTR_ENCRYPTED_TDEA_192",
+ [54] = "KMCTR_AES_128",
+ [55] = "KMCTR_AES_192",
+ [56] = "KMCTR_AES_256",
+ [57] = "KMCTR_ENCRYPTED_AES_128",
+ [58] = "KMCTR_ENCRYPTED_AES_192",
+ [59] = "KMCTR_ENCRYPTED_AES_256",
+ [60] = "KMO_DEA",
+ [61] = "KMO_TDEA_128",
+ [62] = "KMO_TDEA_192",
+ [63] = "KMO_ENCRYPTED_DEA",
+ [64] = "KMO_ENCRYPTED_TDEA_128",
+ [65] = "KMO_ENCRYPTED_TDEA_192",
+ [66] = "KMO_AES_128",
+ [67] = "KMO_AES_192",
+ [68] = "KMO_AES_256",
+ [69] = "KMO_ENCRYPTED_AES_128",
+ [70] = "KMO_ENCRYPTED_AES_192",
+ [71] = "KMO_ENCRYPTED_AES_256",
+ [72] = "KIMD_SHA_1",
+ [73] = "KIMD_SHA_256",
+ [74] = "KIMD_SHA_512",
+ [75] = "KIMD_SHA3_224",
+ [76] = "KIMD_SHA3_256",
+ [77] = "KIMD_SHA3_384",
+ [78] = "KIMD_SHA3_512",
+ [79] = "KIMD_SHAKE_128",
+ [80] = "KIMD_SHAKE_256",
+ [81] = "KIMD_GHASH",
+ [82] = "KLMD_SHA_1",
+ [83] = "KLMD_SHA_256",
+ [84] = "KLMD_SHA_512",
+ [85] = "KLMD_SHA3_224",
+ [86] = "KLMD_SHA3_256",
+ [87] = "KLMD_SHA3_384",
+ [88] = "KLMD_SHA3_512",
+ [89] = "KLMD_SHAKE_128",
+ [90] = "KLMD_SHAKE_256",
+ [91] = "KMAC_DEA",
+ [92] = "KMAC_TDEA_128",
+ [93] = "KMAC_TDEA_192",
+ [94] = "KMAC_ENCRYPTED_DEA",
+ [95] = "KMAC_ENCRYPTED_TDEA_128",
+ [96] = "KMAC_ENCRYPTED_TDEA_192",
+ [97] = "KMAC_AES_128",
+ [98] = "KMAC_AES_192",
+ [99] = "KMAC_AES_256",
+ [100] = "KMAC_ENCRYPTED_AES_128",
+ [101] = "KMAC_ENCRYPTED_AES_192",
+ [102] = "KMAC_ENCRYPTED_AES_256",
+ [103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA",
+ [104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128",
+ [105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192",
+ [106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA",
+ [107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128",
+ [108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192",
+ [109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128",
+ [110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192",
+ [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256",
+ [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128",
+ [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192",
+ [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A",
+ [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128",
+ [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256",
+ [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128",
+ [118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256",
+ [119] = "PCC_SCALAR_MULTIPLY_P256",
+ [120] = "PCC_SCALAR_MULTIPLY_P384",
+ [121] = "PCC_SCALAR_MULTIPLY_P521",
+ [122] = "PCC_SCALAR_MULTIPLY_ED25519",
+ [123] = "PCC_SCALAR_MULTIPLY_ED448",
+ [124] = "PCC_SCALAR_MULTIPLY_X25519",
+ [125] = "PCC_SCALAR_MULTIPLY_X448",
+ [126] = "PRNO_SHA_512_DRNG",
+ [127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO",
+ [128] = "PRNO_TRNG",
+ [129] = "KDSA_ECDSA_VERIFY_P256",
+ [130] = "KDSA_ECDSA_VERIFY_P384",
+ [131] = "KDSA_ECDSA_VERIFY_P521",
+ [132] = "KDSA_ECDSA_SIGN_P256",
+ [133] = "KDSA_ECDSA_SIGN_P384",
+ [134] = "KDSA_ECDSA_SIGN_P521",
+ [135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256",
+ [136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384",
+ [137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521",
+ [138] = "KDSA_EDDSA_VERIFY_ED25519",
+ [139] = "KDSA_EDDSA_VERIFY_ED448",
+ [140] = "KDSA_EDDSA_SIGN_ED25519",
+ [141] = "KDSA_EDDSA_SIGN_ED448",
+ [142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519",
+ [143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448",
+ [144] = "PCKMO_ENCRYPT_DEA_KEY",
+ [145] = "PCKMO_ENCRYPT_TDEA_128_KEY",
+ [146] = "PCKMO_ENCRYPT_TDEA_192_KEY",
+ [147] = "PCKMO_ENCRYPT_AES_128_KEY",
+ [148] = "PCKMO_ENCRYPT_AES_192_KEY",
+ [149] = "PCKMO_ENCRYPT_AES_256_KEY",
+ [150] = "PCKMO_ENCRYPT_ECC_P256_KEY",
+ [151] = "PCKMO_ENCRYPT_ECC_P384_KEY",
+ [152] = "PCKMO_ENCRYPT_ECC_P521_KEY",
+ [153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY",
+ [154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
+ [155] = "IBM_RESERVED_155",
+ [156] = "IBM_RESERVED_156",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ struct device_attribute *dap;
+
+ dap = container_of(attrs[i], struct device_attribute, attr);
+ pa = container_of(dap, struct perf_pmu_events_attr, attr);
+ kfree(pa);
+ }
+ kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+
+ pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+ if (!pa)
+ return -ENOMEM;
+
+ sysfs_attr_init(&pa->attr.attr);
+ pa->id = PAI_CRYPTO_BASE + num;
+ pa->attr.attr.name = paicrypt_ctrnames[num];
+ pa->attr.attr.mode = 0444;
+ pa->attr.show = cpumf_events_sysfs_show;
+ pa->attr.store = NULL;
+ attrs[num] = &pa->attr.attr;
+ return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+ struct attribute **attrs;
+ int ret, i;
+
+ attrs = kmalloc_array(ARRAY_SIZE(paicrypt_ctrnames) + 1, sizeof(*attrs),
+ GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) {
+ ret = attr_event_init_one(attrs, i);
+ if (ret) {
+ attr_event_free(attrs, i - 1);
+ return ret;
+ }
+ }
+ attrs[i] = NULL;
+ paicrypt_events_group.attrs = attrs;
+ return 0;
+}
+
+static int __init paicrypt_init(void)
+{
+ struct qpaci_info_block ib;
+ int rc;
+
+ if (!test_facility(196))
+ return 0;
+
+ qpaci(&ib);
+ paicrypt_cnt = ib.num_cc;
+ if (paicrypt_cnt == 0)
+ return 0;
+ if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR)
+ paicrypt_cnt = PAI_CRYPTO_MAXCTR - 1;
+
+ rc = attr_event_init(); /* Export known PAI crypto events */
+ if (rc) {
+ pr_err("Creation of PMU pai_crypto /sysfs failed\n");
+ return rc;
+ }
+
+ /* Setup s390dbf facility */
+ cfm_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+ if (!cfm_dbg) {
+ pr_err("Registration of s390dbf pai_crypto failed\n");
+ return -ENOMEM;
+ }
+ debug_register_view(cfm_dbg, &debug_sprintf_view);
+
+ rc = perf_pmu_register(&paicrypt, "pai_crypto", -1);
+ if (rc) {
+ pr_err("Registering the pai_crypto PMU failed with rc=%i\n",
+ rc);
+ debug_unregister_view(cfm_dbg, &debug_sprintf_view);
+ debug_unregister(cfm_dbg);
+ return rc;
+ }
+ return 0;
+}
+
+device_initcall(paicrypt_init);
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index 9438368c3632..a9a1a6f45375 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -14,6 +14,7 @@
* moves the new kernel to its destination...
* %r2 = pointer to first kimage_entry_t
* %r3 = start address - where to jump to after the job is done...
+ * %r4 = subcode
*
* %r5 will be used as temp. storage
* %r6 holds the destination address
@@ -56,7 +57,7 @@ ENTRY(relocate_kernel)
jo 0b
j .base
.done:
- sgr %r0,%r0 # clear register r0
+ lgr %r0,%r4 # subcode
cghi %r3,0
je .diag
la %r4,load_psw-.base(%r13) # load psw-address into the register
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index d860ac300919..8d91eccc0963 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -494,7 +494,7 @@ static void __init setup_lowcore_dat_off(void)
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
lc->preempt_count = PREEMPT_DISABLED;
- set_prefix((u32)(unsigned long) lc);
+ set_prefix(__pa(lc));
lowcore_ptr[0] = lc;
}
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 326cb8f75f58..6b7b6d5e3632 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -364,7 +364,7 @@ static inline int check_sync_clock(void)
* Apply clock delta to the global data structures.
* This is called once on the CPU that performed the clock sync.
*/
-static void clock_sync_global(unsigned long delta)
+static void clock_sync_global(long delta)
{
unsigned long now, adj;
struct ptff_qto qto;
@@ -400,7 +400,7 @@ static void clock_sync_global(unsigned long delta)
* Apply clock delta to the per-CPU data structures of this CPU.
* This is called for each online CPU after the call to clock_sync_global.
*/
-static void clock_sync_local(unsigned long delta)
+static void clock_sync_local(long delta)
{
/* Add the delta to the clock comparator. */
if (S390_lowcore.clock_comparator != clock_comparator_max) {
@@ -424,7 +424,7 @@ static void __init time_init_wq(void)
struct clock_sync_data {
atomic_t cpus;
int in_sync;
- unsigned long clock_delta;
+ long clock_delta;
};
/*
@@ -544,7 +544,7 @@ static int stpinfo_valid(void)
static int stp_sync_clock(void *data)
{
struct clock_sync_data *sync = data;
- u64 clock_delta, flags;
+ long clock_delta, flags;
static int first;
int rc;
@@ -554,9 +554,7 @@ static int stp_sync_clock(void *data)
while (atomic_read(&sync->cpus) != 0)
cpu_relax();
rc = 0;
- if (stp_info.todoff[0] || stp_info.todoff[1] ||
- stp_info.todoff[2] || stp_info.todoff[3] ||
- stp_info.tmd != 2) {
+ if (stp_info.todoff || stp_info.tmd != 2) {
flags = vdso_update_begin();
rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
&clock_delta);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 99694260cac9..5075cde77b29 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/time_namespace.h>
+#include <linux/random.h>
#include <vdso/datapage.h>
#include <asm/vdso.h>
@@ -160,10 +161,9 @@ int vdso_getcpu_init(void)
}
early_initcall(vdso_getcpu_init); /* Must be called before SMP init */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
{
- unsigned long vdso_text_len, vdso_mapping_len;
- unsigned long vvar_start, vdso_text_start;
+ unsigned long vvar_start, vdso_text_start, vdso_text_len;
struct vm_special_mapping *vdso_mapping;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -180,8 +180,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
vdso_text_len = vdso64_end - vdso64_start;
vdso_mapping = &vdso64_mapping;
}
- vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
- vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
+ vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0);
rc = vvar_start;
if (IS_ERR_VALUE(vvar_start))
goto out;
@@ -210,6 +209,52 @@ out:
return rc;
}
+static unsigned long vdso_addr(unsigned long start, unsigned long len)
+{
+ unsigned long addr, end, offset;
+
+ /*
+ * Round up the start address. It can start out unaligned as a result
+ * of stack start randomization.
+ */
+ start = PAGE_ALIGN(start);
+
+ /* Round the lowest possible end address up to a PMD boundary. */
+ end = (start + len + PMD_SIZE - 1) & PMD_MASK;
+ if (end >= VDSO_BASE)
+ end = VDSO_BASE;
+ end -= len;
+
+ if (end > start) {
+ offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
+ addr = start + (offset << PAGE_SHIFT);
+ } else {
+ addr = start;
+ }
+ return addr;
+}
+
+unsigned long vdso_size(void)
+{
+ unsigned long size = VVAR_NR_PAGES * PAGE_SIZE;
+
+ if (is_compat_task())
+ size += vdso32_end - vdso32_start;
+ else
+ size += vdso64_end - vdso64_start;
+ return PAGE_ALIGN(size);
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+ unsigned long addr = VDSO_BASE;
+ unsigned long size = vdso_size();
+
+ if (current->flags & PF_RANDOMIZE)
+ addr = vdso_addr(current->mm->start_stack + PAGE_SIZE, size);
+ return map_vdso(addr, size);
+}
+
static struct page ** __init vdso_setup_pages(void *start, void *end)
{
int pages = (end - start) >> PAGE_SHIFT;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 5beb7a4a11b3..83bb5cf97282 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -11,7 +11,6 @@
#include <linux/kvm.h>
#include <linux/gfp.h>
#include <linux/errno.h>
-#include <linux/compat.h>
#include <linux/mm_types.h>
#include <linux/pgtable.h>
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 5e7ea8b111e8..04d4c6cf898e 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock)
int owner;
asm_inline volatile(
- ALTERNATIVE("", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
" l %0,%1\n"
: "=d" (owner) : "Q" (*lock) : "memory");
return owner;
@@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
int expected = old;
asm_inline volatile(
- ALTERNATIVE("", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
" cs %0,%3,%1\n"
: "=d" (old), "=Q" (*lock)
: "0" (old), "d" (new), "Q" (*lock)
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index e54f928503c5..d545f5c39f7e 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -58,9 +58,9 @@ static inline unsigned long mmap_base(unsigned long rnd,
/*
* Top of mmap area (just below the process stack).
- * Leave at least a ~32 MB hole.
+ * Leave at least a ~128 MB hole.
*/
- gap_min = 32 * 1024 * 1024UL;
+ gap_min = SZ_128M;
gap_max = (STACK_TOP / 6) * 5;
if (gap < gap_min)
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index e563cb65c0c4..bc980fd313d5 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -799,7 +799,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
struct zpci_dev *zdev;
int rc;
- zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
+ zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
if (!zdev)
return ERR_PTR(-ENOMEM);
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 1057d7af4a55..375e0a5120bc 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -30,7 +30,7 @@ bool zpci_unique_uid;
void update_uid_checking(bool new)
{
if (zpci_unique_uid != new)
- zpci_dbg(1, "uid checking:%d\n", new);
+ zpci_dbg(3, "uid checking:%d\n", new);
zpci_unique_uid = new;
}
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 3408c0df3ebf..ca6bd98eec13 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -196,7 +196,7 @@ int __init zpci_debug_init(void)
if (!pci_debug_err_id)
return -EINVAL;
debug_register_view(pci_debug_err_id, &debug_hex_ascii_view);
- debug_set_level(pci_debug_err_id, 6);
+ debug_set_level(pci_debug_err_id, 3);
debugfs_root = debugfs_create_dir("pci", NULL);
return 0;
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index ea9db5cea64e..b9324ca2eb94 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -321,9 +321,6 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
- zpci_err("avail CCDF:\n");
- zpci_err_hex(ccdf, sizeof(*ccdf));
-
switch (ccdf->pec) {
case 0x0301: /* Reserved|Standby -> Configured */
if (!zdev) {
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 1710d006ee93..1a822b7799f8 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -18,16 +18,40 @@
#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
-static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
+struct zpci_err_insn_data {
+ u8 insn;
+ u8 cc;
+ u8 status;
+ union {
+ struct {
+ u64 req;
+ u64 offset;
+ };
+ struct {
+ u64 addr;
+ u64 len;
+ };
+ };
+} __packed;
+
+static inline void zpci_err_insn_req(int lvl, u8 insn, u8 cc, u8 status,
+ u64 req, u64 offset)
{
- struct {
- u64 req;
- u64 offset;
- u8 cc;
- u8 status;
- } __packed data = {req, offset, cc, status};
-
- zpci_err_hex(&data, sizeof(data));
+ struct zpci_err_insn_data data = {
+ .insn = insn, .cc = cc, .status = status,
+ .req = req, .offset = offset};
+
+ zpci_err_hex_level(lvl, &data, sizeof(data));
+}
+
+static inline void zpci_err_insn_addr(int lvl, u8 insn, u8 cc, u8 status,
+ u64 addr, u64 len)
+{
+ struct zpci_err_insn_data data = {
+ .insn = insn, .cc = cc, .status = status,
+ .addr = addr, .len = len};
+
+ zpci_err_hex_level(lvl, &data, sizeof(data));
}
/* Modify PCI Function Controls */
@@ -47,16 +71,24 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
{
+ bool retried = false;
u8 cc;
do {
cc = __mpcifc(req, fib, status);
- if (cc == 2)
+ if (cc == 2) {
msleep(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(1, 'M', cc, *status, req, 0);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, *status, req, 0);
+ zpci_err_insn_req(0, 'M', cc, *status, req, 0);
+ else if (retried)
+ zpci_err_insn_req(1, 'M', cc, *status, req, 0);
return cc;
}
@@ -80,16 +112,24 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
{
+ bool retried = false;
u8 cc, status;
do {
cc = __rpcit(fn, addr, range, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_addr(1, 'R', cc, status, addr, range);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, addr, range);
+ zpci_err_insn_addr(0, 'R', cc, status, addr, range);
+ else if (retried)
+ zpci_err_insn_addr(1, 'R', cc, status, addr, range);
if (cc == 1 && (status == 4 || status == 16))
return -ENOMEM;
@@ -144,17 +184,25 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
int __zpci_load(u64 *data, u64 req, u64 offset)
{
+ bool retried = false;
u8 status;
int cc;
do {
cc = __pcilg(data, req, offset, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(1, 'l', cc, status, req, offset);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, req, offset);
+ zpci_err_insn_req(0, 'l', cc, status, req, offset);
+ else if (retried)
+ zpci_err_insn_req(1, 'l', cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
@@ -198,7 +246,7 @@ int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
cc = __pcilg_mio(data, (__force u64) addr, len, &status);
if (cc)
- zpci_err_insn(cc, status, 0, (__force u64) addr);
+ zpci_err_insn_addr(0, 'L', cc, status, (__force u64) addr, len);
return (cc > 0) ? -EIO : cc;
}
@@ -225,17 +273,25 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
int __zpci_store(u64 data, u64 req, u64 offset)
{
+ bool retried = false;
u8 status;
int cc;
do {
cc = __pcistg(data, req, offset, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(1, 's', cc, status, req, offset);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, req, offset);
+ zpci_err_insn_req(0, 's', cc, status, req, offset);
+ else if (retried)
+ zpci_err_insn_req(1, 's', cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
@@ -278,7 +334,7 @@ int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
cc = __pcistg_mio(data, (__force u64) addr, len, &status);
if (cc)
- zpci_err_insn(cc, status, 0, (__force u64) addr);
+ zpci_err_insn_addr(0, 'S', cc, status, (__force u64) addr, len);
return (cc > 0) ? -EIO : cc;
}
@@ -304,17 +360,25 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
int __zpci_store_block(const u64 *data, u64 req, u64 offset)
{
+ bool retried = false;
u8 status;
int cc;
do {
cc = __pcistb(data, req, offset, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(0, 'b', cc, status, req, offset);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, req, offset);
+ zpci_err_insn_req(0, 'b', cc, status, req, offset);
+ else if (retried)
+ zpci_err_insn_req(1, 'b', cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
@@ -358,7 +422,7 @@ int zpci_write_block(volatile void __iomem *dst,
cc = __pcistb_mio(src, (__force u64) dst, len, &status);
if (cc)
- zpci_err_insn(cc, status, 0, (__force u64) dst);
+ zpci_err_insn_addr(0, 'B', cc, status, (__force u64) dst, len);
return (cc > 0) ? -EIO : cc;
}
diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S
index 3d1c31e0cf3d..6f835124ee82 100644
--- a/arch/s390/purgatory/head.S
+++ b/arch/s390/purgatory/head.S
@@ -44,11 +44,14 @@
.endm
.macro MEMSWAP dst,src,buf,len
-10: cghi \len,bufsz
+10: larl %r0,purgatory_end
+ larl %r1,stack
+ slgr %r0,%r1
+ cgr \len,%r0
jh 11f
lgr %r4,\len
j 12f
-11: lghi %r4,bufsz
+11: lgr %r4,%r0
12: MEMCPY \buf,\dst,%r4
MEMCPY \dst,\src,%r4
@@ -135,12 +138,18 @@ ENTRY(purgatory_start)
.start_crash_kernel:
/* Location of purgatory_start in crash memory */
+ larl %r0,.base_crash
+ larl %r1,purgatory_start
+ slgr %r0,%r1
lgr %r8,%r13
- aghi %r8,-(.base_crash-purgatory_start)
+ sgr %r8,%r0
/* Destination for this code i.e. end of memory to be swapped. */
+ larl %r0,purgatory_end
+ larl %r1,purgatory_start
+ slgr %r0,%r1
lg %r9,crash_size-.base_crash(%r13)
- aghi %r9,-(purgatory_end-purgatory_start)
+ sgr %r9,%r0
/* Destination in crash memory, i.e. same as r9 but in crash memory. */
lg %r10,crash_start-.base_crash(%r13)
@@ -149,15 +158,19 @@ ENTRY(purgatory_start)
/* Buffer location (in crash memory) and size. As the purgatory is
* behind the point of no return it can re-use the stack as buffer.
*/
- lghi %r11,bufsz
+ larl %r11,purgatory_end
larl %r12,stack
+ slgr %r11,%r12
MEMCPY %r12,%r9,%r11 /* dst -> (crash) buf */
MEMCPY %r9,%r8,%r11 /* self -> dst */
/* Jump to new location. */
lgr %r7,%r9
- aghi %r7,.jump_to_dst-purgatory_start
+ larl %r0,.jump_to_dst
+ larl %r1,purgatory_start
+ slgr %r0,%r1
+ agr %r7,%r0
br %r7
.jump_to_dst:
@@ -169,7 +182,10 @@ ENTRY(purgatory_start)
/* Load new buffer location after jump */
larl %r7,stack
- aghi %r10,stack-purgatory_start
+ lgr %r0,%r7
+ larl %r1,purgatory_start
+ slgr %r0,%r1
+ agr %r10,%r0
MEMCPY %r10,%r7,%r11 /* (new) buf -> (crash) buf */
/* Now the code is set up to run from its designated location. Start
diff --git a/arch/sh/boards/board-sh7757lcr.c b/arch/sh/boards/board-sh7757lcr.c
index c32b4c6229d3..f39c8196efdf 100644
--- a/arch/sh/boards/board-sh7757lcr.c
+++ b/arch/sh/boards/board-sh7757lcr.c
@@ -16,7 +16,7 @@
#include <linux/io.h>
#include <linux/mfd/tmio.h>
#include <linux/mmc/host.h>
-#include <linux/mmc/sh_mmcif.h>
+#include <linux/platform_data/sh_mmcif.h>
#include <linux/sh_eth.h>
#include <linux/sh_intc.h>
#include <linux/usb/renesas_usbhs.h>
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 4c9522dd351f..674da7ebd8b7 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -19,7 +19,7 @@
#include <linux/memblock.h>
#include <linux/mfd/tmio.h>
#include <linux/mmc/host.h>
-#include <linux/mmc/sh_mmcif.h>
+#include <linux/platform_data/sh_mmcif.h>
#include <linux/mtd/physmap.h>
#include <linux/gpio.h>
#include <linux/gpio/machine.h>
diff --git a/arch/sh/boot/romimage/mmcif-sh7724.c b/arch/sh/boot/romimage/mmcif-sh7724.c
index 6595b6b45bf1..d30123d859e0 100644
--- a/arch/sh/boot/romimage/mmcif-sh7724.c
+++ b/arch/sh/boot/romimage/mmcif-sh7724.c
@@ -8,7 +8,7 @@
* for more details.
*/
-#include <linux/mmc/sh_mmcif.h>
+#include <linux/platform_data/sh_mmcif.h>
#include <mach/romimage.h>
#define MMCIF_BASE (void __iomem *)0xa4ca0000
diff --git a/arch/sh/configs/rsk7201_defconfig b/arch/sh/configs/rsk7201_defconfig
index e41526120be1..619c18699459 100644
--- a/arch/sh/configs/rsk7201_defconfig
+++ b/arch/sh/configs/rsk7201_defconfig
@@ -25,7 +25,6 @@ CONFIG_CMDLINE_OVERWRITE=y
CONFIG_CMDLINE="console=ttySC0,115200 earlyprintk=serial ignore_loglevel"
CONFIG_BINFMT_FLAT=y
CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
CONFIG_PM=y
CONFIG_CPU_IDLE=y
# CONFIG_STANDALONE is not set
diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig
index 6af08fa1ddf8..5a54e2b883f0 100644
--- a/arch/sh/configs/rsk7203_defconfig
+++ b/arch/sh/configs/rsk7203_defconfig
@@ -30,7 +30,6 @@ CONFIG_CMDLINE_OVERWRITE=y
CONFIG_CMDLINE="console=ttySC0,115200 earlyprintk=serial ignore_loglevel"
CONFIG_BINFMT_FLAT=y
CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
CONFIG_PM=y
CONFIG_CPU_IDLE=y
CONFIG_NET=y
diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig
index 601d062250d1..122216123e63 100644
--- a/arch/sh/configs/se7206_defconfig
+++ b/arch/sh/configs/se7206_defconfig
@@ -40,7 +40,6 @@ CONFIG_CMDLINE_OVERWRITE=y
CONFIG_CMDLINE="console=ttySC3,115200 ignore_loglevel earlyprintk=serial"
CONFIG_BINFMT_FLAT=y
CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
CONFIG_BINFMT_MISC=y
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/sparc/include/asm/timex_32.h b/arch/sparc/include/asm/timex_32.h
index 542915b46209..f86326a6f89e 100644
--- a/arch/sparc/include/asm/timex_32.h
+++ b/arch/sparc/include/asm/timex_32.h
@@ -9,8 +9,6 @@
#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
-/* XXX Maybe do something better at some point... -DaveM */
-typedef unsigned long cycles_t;
-#define get_cycles() (0)
+#include <asm-generic/timex.h>
#endif
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index f9fe502b81c6..dad38960d1a8 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -779,5 +779,6 @@ static_assert(offsetof(compat_siginfo_t, si_upper) == 0x18);
static_assert(offsetof(compat_siginfo_t, si_pkey) == 0x14);
static_assert(offsetof(compat_siginfo_t, si_perf_data) == 0x10);
static_assert(offsetof(compat_siginfo_t, si_perf_type) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_perf_flags) == 0x18);
static_assert(offsetof(compat_siginfo_t, si_band) == 0x0c);
static_assert(offsetof(compat_siginfo_t, si_fd) == 0x10);
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 8b9fc76cd3e0..570e43e6fda5 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -590,5 +590,6 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x28);
static_assert(offsetof(siginfo_t, si_pkey) == 0x20);
static_assert(offsetof(siginfo_t, si_perf_data) == 0x18);
static_assert(offsetof(siginfo_t, si_perf_type) == 0x20);
+static_assert(offsetof(siginfo_t, si_perf_flags) == 0x24);
static_assert(offsetof(siginfo_t, si_band) == 0x10);
static_assert(offsetof(siginfo_t, si_fd) == 0x14);
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index c5e1545bc5cf..77d7b9032158 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -58,7 +58,7 @@ CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables -m64
SPARC_REG_CFLAGS = -ffixed-g4 -ffixed-g5 -fcall-used-g5 -fcall-used-g7
-$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_CFLAGS := $(filter-out $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@@ -88,6 +88,7 @@ $(obj)/vdso32.so.dbg: asflags-$(CONFIG_SPARC64) += -m32
KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=medlow,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index b03269faef71..c4344b67628d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -483,7 +483,6 @@ static void ubd_handler(void)
if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
blk_queue_max_discard_sectors(io_req->req->q, 0);
blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
}
blk_mq_end_request(io_req->req, io_req->error);
kfree(io_req);
@@ -800,10 +799,8 @@ static int ubd_open_dev(struct ubd *ubd_dev)
}
if (ubd_dev->no_trim == 0) {
ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
- ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
}
blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
return 0;
diff --git a/arch/um/include/asm/timex.h b/arch/um/include/asm/timex.h
index e392a9a5bc9b..9f27176adb26 100644
--- a/arch/um/include/asm/timex.h
+++ b/arch/um/include/asm/timex.h
@@ -2,13 +2,8 @@
#ifndef __UM_TIMEX_H
#define __UM_TIMEX_H
-typedef unsigned long cycles_t;
-
-static inline cycles_t get_cycles (void)
-{
- return 0;
-}
-
#define CLOCK_TICK_RATE (HZ)
+#include <asm-generic/timex.h>
+
#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4bed3abf444d..762a0b6ab8b6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -188,7 +188,7 @@ config X86
select HAVE_CONTEXT_TRACKING if X86_64
select HAVE_CONTEXT_TRACKING_OFFSTACK if HAVE_CONTEXT_TRACKING
select HAVE_C_RECORDMCOUNT
- select HAVE_OBJTOOL_MCOUNT if STACK_VALIDATION
+ select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL
select HAVE_BUILDTIME_MCOUNT_SORT
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
@@ -212,6 +212,7 @@ config X86
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_JUMP_LABEL_HACK if HAVE_OBJTOOL
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4
@@ -230,7 +231,10 @@ config X86
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_MOVE_PMD
select HAVE_MOVE_PUD
+ select HAVE_NOINSTR_HACK if HAVE_OBJTOOL
select HAVE_NMI
+ select HAVE_NOINSTR_VALIDATION if HAVE_OBJTOOL
+ select HAVE_OBJTOOL if X86_64
select HAVE_OPTPROBES
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
@@ -239,17 +243,17 @@ config X86
select HAVE_PCI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
+ select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
- select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
+ select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC || STACK_VALIDATION
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_SETUP_PER_CPU_AREA
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
- select HAVE_STACK_VALIDATION if X86_64
+ select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
select HAVE_STATIC_CALL
- select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION
+ select HAVE_STATIC_CALL_INLINE if HAVE_OBJTOOL
select HAVE_PREEMPT_DYNAMIC_CALL
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
@@ -268,7 +272,6 @@ config X86
select RTC_MC146818_LIB
select SPARSE_IRQ
select SRCU
- select STACK_VALIDATION if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
@@ -459,6 +462,7 @@ config GOLDFISH
config RETPOLINE
bool "Avoid speculative indirect branches in kernel"
+ select OBJTOOL if HAVE_OBJTOOL
default y
help
Compile kernel with the retpoline compiler options to guard against
@@ -472,6 +476,7 @@ config CC_HAS_SLS
config SLS
bool "Mitigate Straight-Line-Speculation"
depends on CC_HAS_SLS && X86_64
+ select OBJTOOL if HAVE_OBJTOOL
default n
help
Compile the kernel with straight-line-speculation options to guard
@@ -878,6 +883,21 @@ config ACRN_GUEST
IOT with small footprint and real-time features. More details can be
found in https://projectacrn.org/.
+config INTEL_TDX_GUEST
+ bool "Intel TDX (Trust Domain Extensions) - Guest Support"
+ depends on X86_64 && CPU_SUP_INTEL
+ depends on X86_X2APIC
+ select ARCH_HAS_CC_PLATFORM
+ select X86_MEM_ENCRYPT
+ select X86_MCE
+ help
+ Support running as a guest under Intel TDX. Without this support,
+ the guest kernel can not boot or run under TDX.
+ TDX includes memory encryption and integrity capabilities
+ which protect the confidentiality and integrity of guest
+ memory contents and CPU state. TDX guests are protected from
+ some attacks from the VMM.
+
endif #HYPERVISOR_GUEST
source "arch/x86/Kconfig.cpu"
@@ -1313,7 +1333,7 @@ config MICROCODE
config MICROCODE_INTEL
bool "Intel microcode loading support"
- depends on MICROCODE
+ depends on CPU_SUP_INTEL && MICROCODE
default MICROCODE
help
This options enables microcode patch loading support for Intel
@@ -1325,7 +1345,7 @@ config MICROCODE_INTEL
config MICROCODE_AMD
bool "AMD microcode loading support"
- depends on MICROCODE
+ depends on CPU_SUP_AMD && MICROCODE
help
If you select this option, microcode patch loading support for AMD
processors will be enabled.
@@ -1816,17 +1836,6 @@ config ARCH_RANDOM
If supported, this is a high bandwidth, cryptographically
secure hardware random number generator.
-config X86_SMAP
- def_bool y
- prompt "Supervisor Mode Access Prevention" if EXPERT
- help
- Supervisor Mode Access Prevention (SMAP) is a security
- feature in newer Intel processors. There is a small
- performance cost if this enabled and turned on; there is
- also a small increase in the kernel size if this is enabled.
-
- If unsure, say Y.
-
config X86_UMIP
def_bool y
prompt "User Mode Instruction Prevention" if EXPERT
@@ -1855,9 +1864,10 @@ config CC_HAS_IBT
config X86_KERNEL_IBT
prompt "Indirect Branch Tracking"
bool
- depends on X86_64 && CC_HAS_IBT && STACK_VALIDATION
+ depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL
# https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f
depends on !LD_IS_LLD || LLD_VERSION >= 140000
+ select OBJTOOL
help
Build the kernel with support for Indirect Branch Tracking, a
hardware support course-grain forward-edge Control Flow Integrity
@@ -2326,7 +2336,9 @@ choice
it can be used to assist security vulnerability exploitation.
This setting can be changed at boot time via the kernel command
- line parameter vsyscall=[emulate|xonly|none].
+ line parameter vsyscall=[emulate|xonly|none]. Emulate mode
+ is deprecated and can only be enabled using the kernel command
+ line.
On a system with recent enough glibc (2.14 or newer) and no
static binaries, you can say None without a performance penalty
@@ -2334,20 +2346,6 @@ choice
If unsure, select "Emulate execution only".
- config LEGACY_VSYSCALL_EMULATE
- bool "Full emulation"
- help
- The kernel traps and emulates calls into the fixed vsyscall
- address mapping. This makes the mapping non-executable, but
- it still contains readable known contents, which could be
- used in certain rare security vulnerability exploits. This
- configuration is recommended when using legacy userspace
- that still uses vsyscalls along with legacy binary
- instrumentation tools that require code to be readable.
-
- An example of this type of legacy userspace is running
- Pin on an old binary that still uses vsyscalls.
-
config LEGACY_VSYSCALL_XONLY
bool "Emulate execution only"
help
@@ -2838,13 +2836,6 @@ config IA32_EMULATION
64-bit kernel. You should likely turn this on, unless you're
100% sure that you don't have any 32-bit programs left.
-config IA32_AOUT
- tristate "IA32 a.out support"
- depends on IA32_EMULATION
- depends on BROKEN
- help
- Support old a.out binaries in the 32bit emulation.
-
config X86_X32_ABI
bool "x32 ABI for 64-bit mode"
depends on X86_64
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d3a6f74a94bd..d872a7522e55 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -237,7 +237,7 @@ choice
config UNWINDER_ORC
bool "ORC unwinder"
depends on X86_64
- select STACK_VALIDATION
+ select OBJTOOL
help
This option enables the ORC (Oops Rewind Capability) unwinder for
unwinding kernel stack traces. It uses a custom data format which is
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 63d50f65b828..1abd7cc9d6cd 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -313,5 +313,6 @@ define archhelp
echo ''
echo ' kvm_guest.config - Enable Kconfig items for running this kernel as a KVM guest'
echo ' xen.config - Enable Kconfig items for running this kernel as a Xen guest'
+ echo ' x86_debug.config - Enable tip tree debugging options for testing'
endef
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 34c9dbb6a47d..148ba5c5106e 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -26,6 +26,7 @@
#include "bitops.h"
#include "ctype.h"
#include "cpuflags.h"
+#include "io.h"
/* Useful macros */
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
@@ -35,44 +36,10 @@ extern struct boot_params boot_params;
#define cpu_relax() asm volatile("rep; nop")
-/* Basic port I/O */
-static inline void outb(u8 v, u16 port)
-{
- asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
-}
-static inline u8 inb(u16 port)
-{
- u8 v;
- asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
- return v;
-}
-
-static inline void outw(u16 v, u16 port)
-{
- asm volatile("outw %0,%1" : : "a" (v), "dN" (port));
-}
-static inline u16 inw(u16 port)
-{
- u16 v;
- asm volatile("inw %1,%0" : "=a" (v) : "dN" (port));
- return v;
-}
-
-static inline void outl(u32 v, u16 port)
-{
- asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
-}
-static inline u32 inl(u16 port)
-{
- u32 v;
- asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
- return v;
-}
-
static inline void io_delay(void)
{
const u16 DELAY_PORT = 0x80;
- asm volatile("outb %%al,%0" : : "dN" (DELAY_PORT));
+ outb(0, DELAY_PORT);
}
/* These functions are used to reference data in other segments. */
@@ -110,66 +77,78 @@ typedef unsigned int addr_t;
static inline u8 rdfs8(addr_t addr)
{
+ u8 *ptr = (u8 *)absolute_pointer(addr);
u8 v;
- asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
+ asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*ptr));
return v;
}
static inline u16 rdfs16(addr_t addr)
{
+ u16 *ptr = (u16 *)absolute_pointer(addr);
u16 v;
- asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
+ asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
return v;
}
static inline u32 rdfs32(addr_t addr)
{
+ u32 *ptr = (u32 *)absolute_pointer(addr);
u32 v;
- asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
+ asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
return v;
}
static inline void wrfs8(u8 v, addr_t addr)
{
- asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
+ u8 *ptr = (u8 *)absolute_pointer(addr);
+ asm volatile("movb %1,%%fs:%0" : "+m" (*ptr) : "qi" (v));
}
static inline void wrfs16(u16 v, addr_t addr)
{
- asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
+ u16 *ptr = (u16 *)absolute_pointer(addr);
+ asm volatile("movw %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
}
static inline void wrfs32(u32 v, addr_t addr)
{
- asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
+ u32 *ptr = (u32 *)absolute_pointer(addr);
+ asm volatile("movl %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
}
static inline u8 rdgs8(addr_t addr)
{
+ u8 *ptr = (u8 *)absolute_pointer(addr);
u8 v;
- asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
+ asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*ptr));
return v;
}
static inline u16 rdgs16(addr_t addr)
{
+ u16 *ptr = (u16 *)absolute_pointer(addr);
u16 v;
- asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
+ asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
return v;
}
static inline u32 rdgs32(addr_t addr)
{
+ u32 *ptr = (u32 *)absolute_pointer(addr);
u32 v;
- asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
+ asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
return v;
}
static inline void wrgs8(u8 v, addr_t addr)
{
- asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
+ u8 *ptr = (u8 *)absolute_pointer(addr);
+ asm volatile("movb %1,%%gs:%0" : "+m" (*ptr) : "qi" (v));
}
static inline void wrgs16(u16 v, addr_t addr)
{
- asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
+ u16 *ptr = (u16 *)absolute_pointer(addr);
+ asm volatile("movw %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
}
static inline void wrgs32(u32 v, addr_t addr)
{
- asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
+ u32 *ptr = (u32 *)absolute_pointer(addr);
+ asm volatile("movl %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
}
/* Note: these only return true/false, not a signed return value! */
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 6115274fe10f..19e1905dcbf6 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -101,8 +101,10 @@ ifdef CONFIG_X86_64
endif
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
+vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
+vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 8bcbcee54aa1..9caf89063e77 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -3,10 +3,9 @@
#include "misc.h"
#include "error.h"
#include "../string.h"
+#include "efi.h"
#include <linux/numa.h>
-#include <linux/efi.h>
-#include <asm/efi.h>
/*
* Longest parameter of 'acpi=' is 'copy_dsdt', plus an extra '\0'
@@ -20,153 +19,56 @@
*/
struct mem_vector immovable_mem[MAX_NUMNODES*2];
-/*
- * Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and
- * ACPI_TABLE_GUID are found, take the former, which has more features.
- */
static acpi_physical_address
-__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
- bool efi_64)
+__efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len)
{
- acpi_physical_address rsdp_addr = 0;
-
#ifdef CONFIG_EFI
- int i;
-
- /* Get EFI tables from systab. */
- for (i = 0; i < nr_tables; i++) {
- acpi_physical_address table;
- efi_guid_t guid;
-
- if (efi_64) {
- efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i;
-
- guid = tbl->guid;
- table = tbl->table;
-
- if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
- debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
- return 0;
- }
- } else {
- efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i;
-
- guid = tbl->guid;
- table = tbl->table;
- }
+ unsigned long rsdp_addr;
+ int ret;
- if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
- rsdp_addr = table;
- else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
- return table;
- }
+ /*
+ * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to
+ * ACPI_TABLE_GUID because it has more features.
+ */
+ rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+ ACPI_20_TABLE_GUID);
+ if (rsdp_addr)
+ return (acpi_physical_address)rsdp_addr;
+
+ /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */
+ rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+ ACPI_TABLE_GUID);
+ if (rsdp_addr)
+ return (acpi_physical_address)rsdp_addr;
+
+ debug_putstr("Error getting RSDP address.\n");
#endif
- return rsdp_addr;
-}
-
-/* EFI/kexec support is 64-bit only. */
-#ifdef CONFIG_X86_64
-static struct efi_setup_data *get_kexec_setup_data_addr(void)
-{
- struct setup_data *data;
- u64 pa_data;
-
- pa_data = boot_params->hdr.setup_data;
- while (pa_data) {
- data = (struct setup_data *)pa_data;
- if (data->type == SETUP_EFI)
- return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
-
- pa_data = data->next;
- }
- return NULL;
-}
-
-static acpi_physical_address kexec_get_rsdp_addr(void)
-{
- efi_system_table_64_t *systab;
- struct efi_setup_data *esd;
- struct efi_info *ei;
- char *sig;
-
- esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
- if (!esd)
- return 0;
-
- if (!esd->tables) {
- debug_putstr("Wrong kexec SETUP_EFI data.\n");
- return 0;
- }
-
- ei = &boot_params->efi_info;
- sig = (char *)&ei->efi_loader_signature;
- if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
- debug_putstr("Wrong kexec EFI loader signature.\n");
- return 0;
- }
-
- /* Get systab from boot params. */
- systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
- if (!systab)
- error("EFI system table not found in kexec boot_params.");
-
- return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
+ return 0;
}
-#else
-static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; }
-#endif /* CONFIG_X86_64 */
static acpi_physical_address efi_get_rsdp_addr(void)
{
#ifdef CONFIG_EFI
- unsigned long systab, config_tables;
+ unsigned long cfg_tbl_pa = 0;
+ unsigned int cfg_tbl_len;
+ unsigned long systab_pa;
unsigned int nr_tables;
- struct efi_info *ei;
- bool efi_64;
- char *sig;
-
- ei = &boot_params->efi_info;
- sig = (char *)&ei->efi_loader_signature;
-
- if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
- efi_64 = true;
- } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) {
- efi_64 = false;
- } else {
- debug_putstr("Wrong EFI loader signature.\n");
- return 0;
- }
+ enum efi_type et;
+ int ret;
- /* Get systab from boot params. */
-#ifdef CONFIG_X86_64
- systab = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32);
-#else
- if (ei->efi_systab_hi || ei->efi_memmap_hi) {
- debug_putstr("Error getting RSDP address: EFI system table located above 4GB.\n");
+ et = efi_get_type(boot_params);
+ if (et == EFI_TYPE_NONE)
return 0;
- }
- systab = ei->efi_systab;
-#endif
- if (!systab)
- error("EFI system table not found.");
- /* Handle EFI bitness properly */
- if (efi_64) {
- efi_system_table_64_t *stbl = (efi_system_table_64_t *)systab;
+ systab_pa = efi_get_system_table(boot_params);
+ if (!systab_pa)
+ error("EFI support advertised, but unable to locate system table.");
- config_tables = stbl->tables;
- nr_tables = stbl->nr_tables;
- } else {
- efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
+ ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len);
+ if (ret || !cfg_tbl_pa)
+ error("EFI config table not found.");
- config_tables = stbl->tables;
- nr_tables = stbl->nr_tables;
- }
-
- if (!config_tables)
- error("EFI config tables not found.");
-
- return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
+ return __efi_get_rsdp_addr(cfg_tbl_pa, cfg_tbl_len);
#else
return 0;
#endif
@@ -256,14 +158,6 @@ acpi_physical_address get_rsdp_addr(void)
pa = boot_params->acpi_rsdp_addr;
- /*
- * Try to get EFI data from setup_data. This can happen when we're a
- * kexec'ed kernel and kexec(1) has passed all the required EFI info to
- * us.
- */
- if (!pa)
- pa = kexec_get_rsdp_addr();
-
if (!pa)
pa = efi_get_rsdp_addr();
diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c
index 261e81fb9582..70a8d1706d0f 100644
--- a/arch/x86/boot/compressed/early_serial_console.c
+++ b/arch/x86/boot/compressed/early_serial_console.c
@@ -1,5 +1,6 @@
#include "misc.h"
-int early_serial_base;
+/* This might be accessed before .bss is cleared, so use .data instead. */
+int early_serial_base __section(".data");
#include "../early_serial_console.c"
diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c
new file mode 100644
index 000000000000..6edd034b0b30
--- /dev/null
+++ b/arch/x86/boot/compressed/efi.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helpers for early access to EFI configuration table.
+ *
+ * Originally derived from arch/x86/boot/compressed/acpi.c
+ */
+
+#include "misc.h"
+
+/**
+ * efi_get_type - Given a pointer to boot_params, determine the type of EFI environment.
+ *
+ * @bp: pointer to boot_params
+ *
+ * Return: EFI_TYPE_{32,64} for valid EFI environments, EFI_TYPE_NONE otherwise.
+ */
+enum efi_type efi_get_type(struct boot_params *bp)
+{
+ struct efi_info *ei;
+ enum efi_type et;
+ const char *sig;
+
+ ei = &bp->efi_info;
+ sig = (char *)&ei->efi_loader_signature;
+
+ if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+ et = EFI_TYPE_64;
+ } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) {
+ et = EFI_TYPE_32;
+ } else {
+ debug_putstr("No EFI environment detected.\n");
+ et = EFI_TYPE_NONE;
+ }
+
+#ifndef CONFIG_X86_64
+ /*
+ * Existing callers like acpi.c treat this case as an indicator to
+ * fall-through to non-EFI, rather than an error, so maintain that
+ * functionality here as well.
+ */
+ if (ei->efi_systab_hi || ei->efi_memmap_hi) {
+ debug_putstr("EFI system table is located above 4GB and cannot be accessed.\n");
+ et = EFI_TYPE_NONE;
+ }
+#endif
+
+ return et;
+}
+
+/**
+ * efi_get_system_table - Given a pointer to boot_params, retrieve the physical address
+ * of the EFI system table.
+ *
+ * @bp: pointer to boot_params
+ *
+ * Return: EFI system table address on success. On error, return 0.
+ */
+unsigned long efi_get_system_table(struct boot_params *bp)
+{
+ unsigned long sys_tbl_pa;
+ struct efi_info *ei;
+ enum efi_type et;
+
+ /* Get systab from boot params. */
+ ei = &bp->efi_info;
+#ifdef CONFIG_X86_64
+ sys_tbl_pa = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32);
+#else
+ sys_tbl_pa = ei->efi_systab;
+#endif
+ if (!sys_tbl_pa) {
+ debug_putstr("EFI system table not found.");
+ return 0;
+ }
+
+ return sys_tbl_pa;
+}
+
+/*
+ * EFI config table address changes to virtual address after boot, which may
+ * not be accessible for the kexec'd kernel. To address this, kexec provides
+ * the initial physical address via a struct setup_data entry, which is
+ * checked for here, along with some sanity checks.
+ */
+static struct efi_setup_data *get_kexec_setup_data(struct boot_params *bp,
+ enum efi_type et)
+{
+#ifdef CONFIG_X86_64
+ struct efi_setup_data *esd = NULL;
+ struct setup_data *data;
+ u64 pa_data;
+
+ pa_data = bp->hdr.setup_data;
+ while (pa_data) {
+ data = (struct setup_data *)pa_data;
+ if (data->type == SETUP_EFI) {
+ esd = (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+ break;
+ }
+
+ pa_data = data->next;
+ }
+
+ /*
+ * Original ACPI code falls back to attempting normal EFI boot in these
+ * cases, so maintain existing behavior by indicating non-kexec
+ * environment to the caller, but print them for debugging.
+ */
+ if (esd && !esd->tables) {
+ debug_putstr("kexec EFI environment missing valid configuration table.\n");
+ return NULL;
+ }
+
+ return esd;
+#endif
+ return NULL;
+}
+
+/**
+ * efi_get_conf_table - Given a pointer to boot_params, locate and return the physical
+ * address of EFI configuration table.
+ *
+ * @bp: pointer to boot_params
+ * @cfg_tbl_pa: location to store physical address of config table
+ * @cfg_tbl_len: location to store number of config table entries
+ *
+ * Return: 0 on success. On error, return params are left unchanged.
+ */
+int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa,
+ unsigned int *cfg_tbl_len)
+{
+ unsigned long sys_tbl_pa;
+ enum efi_type et;
+ int ret;
+
+ if (!cfg_tbl_pa || !cfg_tbl_len)
+ return -EINVAL;
+
+ sys_tbl_pa = efi_get_system_table(bp);
+ if (!sys_tbl_pa)
+ return -EINVAL;
+
+ /* Handle EFI bitness properly */
+ et = efi_get_type(bp);
+ if (et == EFI_TYPE_64) {
+ efi_system_table_64_t *stbl = (efi_system_table_64_t *)sys_tbl_pa;
+ struct efi_setup_data *esd;
+
+ /* kexec provides an alternative EFI conf table, check for it. */
+ esd = get_kexec_setup_data(bp, et);
+
+ *cfg_tbl_pa = esd ? esd->tables : stbl->tables;
+ *cfg_tbl_len = stbl->nr_tables;
+ } else if (et == EFI_TYPE_32) {
+ efi_system_table_32_t *stbl = (efi_system_table_32_t *)sys_tbl_pa;
+
+ *cfg_tbl_pa = stbl->tables;
+ *cfg_tbl_len = stbl->nr_tables;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Get vendor table address/guid from EFI config table at the given index */
+static int get_vendor_table(void *cfg_tbl, unsigned int idx,
+ unsigned long *vendor_tbl_pa,
+ efi_guid_t *vendor_tbl_guid,
+ enum efi_type et)
+{
+ if (et == EFI_TYPE_64) {
+ efi_config_table_64_t *tbl_entry = (efi_config_table_64_t *)cfg_tbl + idx;
+
+ if (!IS_ENABLED(CONFIG_X86_64) && tbl_entry->table >> 32) {
+ debug_putstr("Error: EFI config table entry located above 4GB.\n");
+ return -EINVAL;
+ }
+
+ *vendor_tbl_pa = tbl_entry->table;
+ *vendor_tbl_guid = tbl_entry->guid;
+
+ } else if (et == EFI_TYPE_32) {
+ efi_config_table_32_t *tbl_entry = (efi_config_table_32_t *)cfg_tbl + idx;
+
+ *vendor_tbl_pa = tbl_entry->table;
+ *vendor_tbl_guid = tbl_entry->guid;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * efi_find_vendor_table - Given EFI config table, search it for the physical
+ * address of the vendor table associated with GUID.
+ *
+ * @bp: pointer to boot_params
+ * @cfg_tbl_pa: pointer to EFI configuration table
+ * @cfg_tbl_len: number of entries in EFI configuration table
+ * @guid: GUID of vendor table
+ *
+ * Return: vendor table address on success. On error, return 0.
+ */
+unsigned long efi_find_vendor_table(struct boot_params *bp,
+ unsigned long cfg_tbl_pa,
+ unsigned int cfg_tbl_len,
+ efi_guid_t guid)
+{
+ enum efi_type et;
+ unsigned int i;
+
+ et = efi_get_type(bp);
+ if (et == EFI_TYPE_NONE)
+ return 0;
+
+ for (i = 0; i < cfg_tbl_len; i++) {
+ unsigned long vendor_tbl_pa;
+ efi_guid_t vendor_tbl_guid;
+ int ret;
+
+ ret = get_vendor_table((void *)cfg_tbl_pa, i,
+ &vendor_tbl_pa,
+ &vendor_tbl_guid, et);
+ if (ret)
+ return 0;
+
+ if (!efi_guidcmp(guid, vendor_tbl_guid))
+ return vendor_tbl_pa;
+ }
+
+ return 0;
+}
diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
new file mode 100644
index 000000000000..7db2f41b54cd
--- /dev/null
+++ b/arch/x86/boot/compressed/efi.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_EFI_H
+#define BOOT_COMPRESSED_EFI_H
+
+#if defined(_LINUX_EFI_H) || defined(_ASM_X86_EFI_H)
+#error Please do not include kernel proper namespace headers
+#endif
+
+typedef guid_t efi_guid_t __aligned(__alignof__(u32));
+
+#define EFI_GUID(a, b, c, d...) (efi_guid_t){ { \
+ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
+ (b) & 0xff, ((b) >> 8) & 0xff, \
+ (c) & 0xff, ((c) >> 8) & 0xff, d } }
+
+#define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+#define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42)
+
+#define EFI32_LOADER_SIGNATURE "EL32"
+#define EFI64_LOADER_SIGNATURE "EL64"
+
+/*
+ * Generic EFI table header
+ */
+typedef struct {
+ u64 signature;
+ u32 revision;
+ u32 headersize;
+ u32 crc32;
+ u32 reserved;
+} efi_table_hdr_t;
+
+#define EFI_CONVENTIONAL_MEMORY 7
+
+#define EFI_MEMORY_MORE_RELIABLE \
+ ((u64)0x0000000000010000ULL) /* higher reliability */
+#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */
+
+#define EFI_PAGE_SHIFT 12
+
+typedef struct {
+ u32 type;
+ u32 pad;
+ u64 phys_addr;
+ u64 virt_addr;
+ u64 num_pages;
+ u64 attribute;
+} efi_memory_desc_t;
+
+#define efi_early_memdesc_ptr(map, desc_size, n) \
+ (efi_memory_desc_t *)((void *)(map) + ((n) * (desc_size)))
+
+typedef struct {
+ efi_guid_t guid;
+ u64 table;
+} efi_config_table_64_t;
+
+typedef struct {
+ efi_guid_t guid;
+ u32 table;
+} efi_config_table_32_t;
+
+typedef struct {
+ efi_table_hdr_t hdr;
+ u64 fw_vendor; /* physical addr of CHAR16 vendor string */
+ u32 fw_revision;
+ u32 __pad1;
+ u64 con_in_handle;
+ u64 con_in;
+ u64 con_out_handle;
+ u64 con_out;
+ u64 stderr_handle;
+ u64 stderr;
+ u64 runtime;
+ u64 boottime;
+ u32 nr_tables;
+ u32 __pad2;
+ u64 tables;
+} efi_system_table_64_t;
+
+typedef struct {
+ efi_table_hdr_t hdr;
+ u32 fw_vendor; /* physical addr of CHAR16 vendor string */
+ u32 fw_revision;
+ u32 con_in_handle;
+ u32 con_in;
+ u32 con_out_handle;
+ u32 con_out;
+ u32 stderr_handle;
+ u32 stderr;
+ u32 runtime;
+ u32 boottime;
+ u32 nr_tables;
+ u32 tables;
+} efi_system_table_32_t;
+
+/* kexec external ABI */
+struct efi_setup_data {
+ u64 fw_vendor;
+ u64 __unused;
+ u64 tables;
+ u64 smbios;
+ u64 reserved[8];
+};
+
+static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right)
+{
+ return memcmp(&left, &right, sizeof (efi_guid_t));
+}
+
+#ifdef CONFIG_EFI
+bool __pure __efi_soft_reserve_enabled(void);
+
+static inline bool __pure efi_soft_reserve_enabled(void)
+{
+ return IS_ENABLED(CONFIG_EFI_SOFT_RESERVE)
+ && __efi_soft_reserve_enabled();
+}
+#else
+static inline bool efi_soft_reserve_enabled(void)
+{
+ return false;
+}
+#endif /* CONFIG_EFI */
+#endif /* BOOT_COMPRESSED_EFI_H */
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index dea95301196b..d33f060900d2 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -189,11 +189,11 @@ SYM_FUNC_START(startup_32)
subl $32, %eax /* Encryption bit is always above bit 31 */
bts %eax, %edx /* Set encryption mask for page tables */
/*
- * Mark SEV as active in sev_status so that startup32_check_sev_cbit()
- * will do a check. The sev_status memory will be fully initialized
- * with the contents of MSR_AMD_SEV_STATUS later in
- * set_sev_encryption_mask(). For now it is sufficient to know that SEV
- * is active.
+ * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that
+ * startup32_check_sev_cbit() will do a check. sev_enable() will
+ * initialize sev_status with all the bits reported by
+ * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT
+ * needs to be set for now.
*/
movl $1, rva(sev_status)(%ebp)
1:
@@ -289,7 +289,7 @@ SYM_FUNC_START(startup_32)
pushl %eax
/* Enter paged protected Mode, activating Long Mode */
- movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
+ movl $CR0_STATE, %eax
movl %eax, %cr0
/* Jump from 32bit compatibility mode into 64bit mode. */
@@ -447,6 +447,23 @@ SYM_CODE_START(startup_64)
call load_stage1_idt
popq %rsi
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /*
+ * Now that the stage1 interrupt handlers are set up, #VC exceptions from
+ * CPUID instructions can be properly handled for SEV-ES guests.
+ *
+ * For SEV-SNP, the CPUID table also needs to be set up in advance of any
+ * CPUID instructions being issued, so go ahead and do that now via
+ * sev_enable(), which will also handle the rest of the SEV-related
+ * detection/setup to ensure that has been done in advance of any dependent
+ * code.
+ */
+ pushq %rsi
+ movq %rsi, %rdi /* real mode address */
+ call sev_enable
+ popq %rsi
+#endif
+
/*
* paging_prepare() sets up the trampoline and checks if we need to
* enable 5-level paging.
@@ -558,17 +575,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
shrq $3, %rcx
rep stosq
-/*
- * If running as an SEV guest, the encryption mask is required in the
- * page-table setup code below. When the guest also has SEV-ES enabled
- * set_sev_encryption_mask() will cause #VC exceptions, but the stage2
- * handler can't map its GHCB because the page-table is not set up yet.
- * So set up the encryption mask here while still on the stage1 #VC
- * handler. Then load stage2 IDT and switch to the kernel's own
- * page-table.
- */
pushq %rsi
- call set_sev_encryption_mask
call load_stage2_idt
/* Pass boot_params to initialize_identity_maps() */
@@ -642,12 +649,28 @@ SYM_CODE_START(trampoline_32bit_src)
movl $MSR_EFER, %ecx
rdmsr
btsl $_EFER_LME, %eax
+ /* Avoid writing EFER if no change was made (for TDX guest) */
+ jc 1f
wrmsr
- popl %edx
+1: popl %edx
popl %ecx
+#ifdef CONFIG_X86_MCE
+ /*
+ * Preserve CR4.MCE if the kernel will enable #MC support.
+ * Clearing MCE may fault in some environments (that also force #MC
+ * support). Any machine check that occurs before #MC support is fully
+ * configured will crash the system regardless of the CR4.MCE value set
+ * here.
+ */
+ movl %cr4, %eax
+ andl $X86_CR4_MCE, %eax
+#else
+ movl $0, %eax
+#endif
+
/* Enable PAE and LA57 (if required) paging modes */
- movl $X86_CR4_PAE, %eax
+ orl $X86_CR4_PAE, %eax
testl %edx, %edx
jz 1f
orl $X86_CR4_LA57, %eax
@@ -661,8 +684,9 @@ SYM_CODE_START(trampoline_32bit_src)
pushl $__KERNEL_CS
pushl %eax
- /* Enable paging again */
- movl $(X86_CR0_PG | X86_CR0_PE), %eax
+ /* Enable paging again. */
+ movl %cr0, %eax
+ btsl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0
lret
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
index f7213d0943b8..44c350d627c7 100644
--- a/arch/x86/boot/compressed/ident_map_64.c
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -90,7 +90,7 @@ static struct x86_mapping_info mapping_info;
/*
* Adds the specified range to the identity mappings.
*/
-static void add_identity_map(unsigned long start, unsigned long end)
+void kernel_add_identity_map(unsigned long start, unsigned long end)
{
int ret;
@@ -157,14 +157,15 @@ void initialize_identity_maps(void *rmode)
* explicitly here in case the compressed kernel does not touch them,
* or does not touch all the pages covering them.
*/
- add_identity_map((unsigned long)_head, (unsigned long)_end);
+ kernel_add_identity_map((unsigned long)_head, (unsigned long)_end);
boot_params = rmode;
- add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
+ kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
cmdline = get_cmd_line_ptr();
- add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
+ kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
+
+ sev_prep_identity_maps(top_level_pgt);
/* Load the new page-table. */
- sev_verify_cbit(top_level_pgt);
write_cr3(top_level_pgt);
}
@@ -246,10 +247,10 @@ static int set_clr_page_flags(struct x86_mapping_info *info,
* It should already exist, but keep things generic.
*
* To map the page just read from it and fault it in if there is no
- * mapping yet. add_identity_map() can't be called here because that
- * would unconditionally map the address on PMD level, destroying any
- * PTE-level mappings that might already exist. Use assembly here so
- * the access won't be optimized away.
+ * mapping yet. kernel_add_identity_map() can't be called here because
+ * that would unconditionally map the address on PMD level, destroying
+ * any PTE-level mappings that might already exist. Use assembly here
+ * so the access won't be optimized away.
*/
asm volatile("mov %[address], %%r9"
:: [address] "g" (*(unsigned long *)address)
@@ -275,15 +276,31 @@ static int set_clr_page_flags(struct x86_mapping_info *info,
* Changing encryption attributes of a page requires to flush it from
* the caches.
*/
- if ((set | clr) & _PAGE_ENC)
+ if ((set | clr) & _PAGE_ENC) {
clflush_page(address);
+ /*
+ * If the encryption attribute is being cleared, change the page state
+ * to shared in the RMP table.
+ */
+ if (clr)
+ snp_set_page_shared(__pa(address & PAGE_MASK));
+ }
+
/* Update PTE */
pte = *ptep;
pte = pte_set_flags(pte, set);
pte = pte_clear_flags(pte, clr);
set_pte(ptep, pte);
+ /*
+ * If the encryption attribute is being set, then change the page state to
+ * private in the RMP entry. The page state change must be done after the PTE
+ * is updated.
+ */
+ if (set & _PAGE_ENC)
+ snp_set_page_private(__pa(address & PAGE_MASK));
+
/* Flush TLB after changing encryption attribute */
write_cr3(top_level_pgt);
@@ -347,5 +364,5 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
* Error code is sane - now identity map the 2M region around
* the faulting address.
*/
- add_identity_map(address, end);
+ kernel_add_identity_map(address, end);
}
diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c
index 9b93567d663a..6debb816e83d 100644
--- a/arch/x86/boot/compressed/idt_64.c
+++ b/arch/x86/boot/compressed/idt_64.c
@@ -39,7 +39,23 @@ void load_stage1_idt(void)
load_boot_idt(&boot_idt_desc);
}
-/* Setup IDT after kernel jumping to .Lrelocated */
+/*
+ * Setup IDT after kernel jumping to .Lrelocated.
+ *
+ * initialize_identity_maps() needs a #PF handler to be setup
+ * in order to be able to fault-in identity mapping ranges; see
+ * do_boot_page_fault().
+ *
+ * This #PF handler setup needs to happen in load_stage2_idt() where the
+ * IDT is loaded and there the #VC IDT entry gets setup too.
+ *
+ * In order to be able to handle #VCs, one needs a GHCB which
+ * gets setup with an already set up pagetable, which is done in
+ * initialize_identity_maps(). And there's the catch 22: the boot #VC
+ * handler do_boot_stage2_vc() needs to call early_setup_ghcb() itself
+ * (and, especially set_page_decrypted()) because the SEV-ES setup code
+ * cannot initialize a GHCB as there's no #PF handler yet...
+ */
void load_stage2_idt(void)
{
boot_idt_desc.address = (unsigned long)boot_idt;
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 411b268bc0a2..4a3f223973f4 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -22,15 +22,14 @@
#include "misc.h"
#include "error.h"
#include "../string.h"
+#include "efi.h"
#include <generated/compile.h>
#include <linux/module.h>
#include <linux/uts.h>
#include <linux/utsname.h>
#include <linux/ctype.h>
-#include <linux/efi.h>
#include <generated/utsrelease.h>
-#include <asm/efi.h>
#define _SETUP
#include <asm/setup.h> /* For COMMAND_LINE_SIZE */
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index a63424d13627..a73e4d783cae 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -187,42 +187,6 @@ SYM_CODE_END(startup32_vc_handler)
.code64
#include "../../kernel/sev_verify_cbit.S"
-SYM_FUNC_START(set_sev_encryption_mask)
-#ifdef CONFIG_AMD_MEM_ENCRYPT
- push %rbp
- push %rdx
-
- movq %rsp, %rbp /* Save current stack pointer */
-
- call get_sev_encryption_bit /* Get the encryption bit position */
- testl %eax, %eax
- jz .Lno_sev_mask
-
- bts %rax, sme_me_mask(%rip) /* Create the encryption mask */
-
- /*
- * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in
- * get_sev_encryption_bit() because this function is 32-bit code and
- * shared between 64-bit and 32-bit boot path.
- */
- movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */
- rdmsr
-
- /* Store MSR value in sev_status */
- shlq $32, %rdx
- orq %rdx, %rax
- movq %rax, sev_status(%rip)
-
-.Lno_sev_mask:
- movq %rbp, %rsp /* Restore original stack pointer */
-
- pop %rdx
- pop %rbp
-#endif
-
- xor %rax, %rax
- RET
-SYM_FUNC_END(set_sev_encryption_mask)
.data
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 1cdcaf34ee36..cf690d8712f4 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -48,12 +48,17 @@ void *memmove(void *dest, const void *src, size_t n);
*/
struct boot_params *boot_params;
+struct port_io_ops pio_ops;
+
memptr free_mem_ptr;
memptr free_mem_end_ptr;
static char *vidmem;
static int vidport;
-static int lines, cols;
+
+/* These might be accessed before .bss is cleared, so use .data instead. */
+static int lines __section(".data");
+static int cols __section(".data");
#ifdef CONFIG_KERNEL_GZIP
#include "../../../../lib/decompress_inflate.c"
@@ -371,6 +376,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
lines = boot_params->screen_info.orig_video_lines;
cols = boot_params->screen_info.orig_video_cols;
+ init_default_io_ops();
+
+ /*
+ * Detect TDX guest environment.
+ *
+ * It has to be done before console_init() in order to use
+ * paravirtualized port I/O operations if needed.
+ */
+ early_tdx_detect();
+
console_init();
/*
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 16ed360b6692..4910bf230d7b 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -22,17 +22,21 @@
#include <linux/linkage.h>
#include <linux/screen_info.h>
#include <linux/elf.h>
-#include <linux/io.h>
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
#include <asm/desc_defs.h>
+#include "tdx.h"
+
#define BOOT_CTYPE_H
#include <linux/acpi.h>
#define BOOT_BOOT_H
#include "../ctype.h"
+#include "../io.h"
+
+#include "efi.h"
#ifdef CONFIG_X86_64
#define memptr long
@@ -120,17 +124,23 @@ static inline void console_init(void)
{ }
#endif
-void set_sev_encryption_mask(void);
-
#ifdef CONFIG_AMD_MEM_ENCRYPT
+void sev_enable(struct boot_params *bp);
void sev_es_shutdown_ghcb(void);
extern bool sev_es_check_ghcb_fault(unsigned long address);
+void snp_set_page_private(unsigned long paddr);
+void snp_set_page_shared(unsigned long paddr);
+void sev_prep_identity_maps(unsigned long top_level_pgt);
#else
+static inline void sev_enable(struct boot_params *bp) { }
static inline void sev_es_shutdown_ghcb(void) { }
static inline bool sev_es_check_ghcb_fault(unsigned long address)
{
return false;
}
+static inline void snp_set_page_private(unsigned long paddr) { }
+static inline void snp_set_page_shared(unsigned long paddr) { }
+static inline void sev_prep_identity_maps(unsigned long top_level_pgt) { }
#endif
/* acpi.c */
@@ -151,6 +161,7 @@ static inline int count_immovable_mem_regions(void) { return 0; }
#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
#endif
+extern void kernel_add_identity_map(unsigned long start, unsigned long end);
/* Used by PAGE_KERN* macros: */
extern pteval_t __default_kernel_pte_mask;
@@ -172,4 +183,47 @@ void boot_stage2_vc(void);
unsigned long sev_verify_cbit(unsigned long cr3);
+enum efi_type {
+ EFI_TYPE_64,
+ EFI_TYPE_32,
+ EFI_TYPE_NONE,
+};
+
+#ifdef CONFIG_EFI
+/* helpers for early EFI config table access */
+enum efi_type efi_get_type(struct boot_params *bp);
+unsigned long efi_get_system_table(struct boot_params *bp);
+int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa,
+ unsigned int *cfg_tbl_len);
+unsigned long efi_find_vendor_table(struct boot_params *bp,
+ unsigned long cfg_tbl_pa,
+ unsigned int cfg_tbl_len,
+ efi_guid_t guid);
+#else
+static inline enum efi_type efi_get_type(struct boot_params *bp)
+{
+ return EFI_TYPE_NONE;
+}
+
+static inline unsigned long efi_get_system_table(struct boot_params *bp)
+{
+ return 0;
+}
+
+static inline int efi_get_conf_table(struct boot_params *bp,
+ unsigned long *cfg_tbl_pa,
+ unsigned int *cfg_tbl_len)
+{
+ return -ENOENT;
+}
+
+static inline unsigned long efi_find_vendor_table(struct boot_params *bp,
+ unsigned long cfg_tbl_pa,
+ unsigned int cfg_tbl_len,
+ efi_guid_t guid)
+{
+ return 0;
+}
+#endif /* CONFIG_EFI */
+
#endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index 6ff7e81b5628..cc9b2529a086 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -6,7 +6,7 @@
#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE 0x70
+#define TRAMPOLINE_32BIT_CODE_SIZE 0x80
#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index a1733319a22a..2ac12ff4111b 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,11 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
#include "misc.h"
-#include <linux/efi.h>
#include <asm/e820/types.h>
#include <asm/processor.h>
-#include <asm/efi.h>
#include "pgtable.h"
#include "../string.h"
+#include "efi.h"
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 28bcf04c022e..52f989f6acc2 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -20,8 +20,10 @@
#include <asm/fpu/xcr.h>
#include <asm/ptrace.h>
#include <asm/svm.h>
+#include <asm/cpuid.h>
#include "error.h"
+#include "../msr.h"
struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
struct ghcb *boot_ghcb;
@@ -56,23 +58,19 @@ static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
static inline u64 sev_es_rd_ghcb_msr(void)
{
- unsigned long low, high;
+ struct msr m;
- asm volatile("rdmsr" : "=a" (low), "=d" (high) :
- "c" (MSR_AMD64_SEV_ES_GHCB));
+ boot_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m);
- return ((high << 32) | low);
+ return m.q;
}
static inline void sev_es_wr_ghcb_msr(u64 val)
{
- u32 low, high;
+ struct msr m;
- low = val & 0xffffffffUL;
- high = val >> 32;
-
- asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB),
- "a"(low), "d" (high) : "memory");
+ m.q = val;
+ boot_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m);
}
static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
@@ -119,11 +117,54 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
/* Include code for early handlers */
#include "../../kernel/sev-shared.c"
-static bool early_setup_sev_es(void)
+static inline bool sev_snp_enabled(void)
+{
+ return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+}
+
+static void __page_state_change(unsigned long paddr, enum psc_op op)
+{
+ u64 val;
+
+ if (!sev_snp_enabled())
+ return;
+
+ /*
+ * If private -> shared then invalidate the page before requesting the
+ * state change in the RMP table.
+ */
+ if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+
+ /* Issue VMGEXIT to change the page state in RMP table. */
+ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
+ VMGEXIT();
+
+ /* Read the response of the VMGEXIT. */
+ val = sev_es_rd_ghcb_msr();
+ if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+
+ /*
+ * Now that page state is changed in the RMP table, validate it so that it is
+ * consistent with the RMP entry.
+ */
+ if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+}
+
+void snp_set_page_private(unsigned long paddr)
+{
+ __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE);
+}
+
+void snp_set_page_shared(unsigned long paddr)
{
- if (!sev_es_negotiate_protocol())
- sev_es_terminate(GHCB_SEV_ES_PROT_UNSUPPORTED);
+ __page_state_change(paddr, SNP_PAGE_STATE_SHARED);
+}
+static bool early_setup_ghcb(void)
+{
if (set_page_decrypted((unsigned long)&boot_ghcb_page))
return false;
@@ -135,6 +176,10 @@ static bool early_setup_sev_es(void)
/* Initialize lookup tables for the instruction decoder */
inat_init_tables();
+ /* SNP guest requires the GHCB GPA must be registered */
+ if (sev_snp_enabled())
+ snp_register_ghcb_early(__pa(&boot_ghcb_page));
+
return true;
}
@@ -174,8 +219,8 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
struct es_em_ctxt ctxt;
enum es_result result;
- if (!boot_ghcb && !early_setup_sev_es())
- sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+ if (!boot_ghcb && !early_setup_ghcb())
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
vc_ghcb_invalidate(boot_ghcb);
result = vc_init_em_ctxt(&ctxt, regs, exit_code);
@@ -202,5 +247,191 @@ finish:
if (result == ES_OK)
vc_finish_insn(&ctxt);
else if (result != ES_RETRY)
- sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+}
+
+static void enforce_vmpl0(void)
+{
+ u64 attrs;
+ int err;
+
+ /*
+ * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically
+ * higher) privilege level. Here, clear the VMPL1 permission mask of the
+ * GHCB page. If the guest is not running at VMPL0, this will fail.
+ *
+ * If the guest is running at VMPL0, it will succeed. Even if that operation
+ * modifies permission bits, it is still ok to do so currently because Linux
+ * SNP guests are supported only on VMPL0 so VMPL1 or higher permission masks
+ * changing is a don't-care.
+ */
+ attrs = 1;
+ if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, attrs))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
+}
+
+void sev_enable(struct boot_params *bp)
+{
+ unsigned int eax, ebx, ecx, edx;
+ struct msr m;
+ bool snp;
+
+ /*
+ * Setup/preliminary detection of SNP. This will be sanity-checked
+ * against CPUID/MSR values later.
+ */
+ snp = snp_init(bp);
+
+ /* Check for the SME/SEV support leaf */
+ eax = 0x80000000;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (eax < 0x8000001f)
+ return;
+
+ /*
+ * Check for the SME/SEV feature:
+ * CPUID Fn8000_001F[EAX]
+ * - Bit 0 - Secure Memory Encryption support
+ * - Bit 1 - Secure Encrypted Virtualization support
+ * CPUID Fn8000_001F[EBX]
+ * - Bits 5:0 - Pagetable bit position used to indicate encryption
+ */
+ eax = 0x8000001f;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ /* Check whether SEV is supported */
+ if (!(eax & BIT(1))) {
+ if (snp)
+ error("SEV-SNP support indicated by CC blob, but not CPUID.");
+ return;
+ }
+
+ /* Set the SME mask if this is an SEV guest. */
+ boot_rdmsr(MSR_AMD64_SEV, &m);
+ sev_status = m.q;
+ if (!(sev_status & MSR_AMD64_SEV_ENABLED))
+ return;
+
+ /* Negotiate the GHCB protocol version. */
+ if (sev_status & MSR_AMD64_SEV_ES_ENABLED) {
+ if (!sev_es_negotiate_protocol())
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_PROT_UNSUPPORTED);
+ }
+
+ /*
+ * SNP is supported in v2 of the GHCB spec which mandates support for HV
+ * features.
+ */
+ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
+ if (!(get_hv_features() & GHCB_HV_FT_SNP))
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+ enforce_vmpl0();
+ }
+
+ if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");
+
+ sme_me_mask = BIT_ULL(ebx & 0x3f);
+}
+
+/* Search for Confidential Computing blob in the EFI config table. */
+static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp)
+{
+ unsigned long cfg_table_pa;
+ unsigned int cfg_table_len;
+ int ret;
+
+ ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len);
+ if (ret)
+ return NULL;
+
+ return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa,
+ cfg_table_len,
+ EFI_CC_BLOB_GUID);
+}
+
+/*
+ * Initial set up of SNP relies on information provided by the
+ * Confidential Computing blob, which can be passed to the boot kernel
+ * by firmware/bootloader in the following ways:
+ *
+ * - via an entry in the EFI config table
+ * - via a setup_data structure, as defined by the Linux Boot Protocol
+ *
+ * Scan for the blob in that order.
+ */
+static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+{
+ struct cc_blob_sev_info *cc_info;
+
+ cc_info = find_cc_blob_efi(bp);
+ if (cc_info)
+ goto found_cc_info;
+
+ cc_info = find_cc_blob_setup_data(bp);
+ if (!cc_info)
+ return NULL;
+
+found_cc_info:
+ if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+ return cc_info;
+}
+
+/*
+ * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks
+ * will verify the SNP CPUID/MSR bits.
+ */
+bool snp_init(struct boot_params *bp)
+{
+ struct cc_blob_sev_info *cc_info;
+
+ if (!bp)
+ return false;
+
+ cc_info = find_cc_blob(bp);
+ if (!cc_info)
+ return false;
+
+ /*
+ * If a SNP-specific Confidential Computing blob is present, then
+ * firmware/bootloader have indicated SNP support. Verifying this
+ * involves CPUID checks which will be more reliable if the SNP
+ * CPUID table is used. See comments over snp_setup_cpuid_table() for
+ * more details.
+ */
+ setup_cpuid_table(cc_info);
+
+ /*
+ * Pass run-time kernel a pointer to CC info via boot_params so EFI
+ * config table doesn't need to be searched again during early startup
+ * phase.
+ */
+ bp->cc_blob_address = (u32)(unsigned long)cc_info;
+
+ return true;
+}
+
+void sev_prep_identity_maps(unsigned long top_level_pgt)
+{
+ /*
+ * The Confidential Computing blob is used very early in uncompressed
+ * kernel to find the in-memory CPUID table to handle CPUID
+ * instructions. Make sure an identity-mapping exists so it can be
+ * accessed after switchover.
+ */
+ if (sev_snp_enabled()) {
+ unsigned long cc_info_pa = boot_params->cc_blob_address;
+ struct cc_blob_sev_info *cc_info;
+
+ kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info));
+
+ cc_info = (struct cc_blob_sev_info *)cc_info_pa;
+ kernel_add_identity_map(cc_info->cpuid_phys, cc_info->cpuid_phys + cc_info->cpuid_len);
+ }
+
+ sev_verify_cbit(top_level_pgt);
}
diff --git a/arch/x86/boot/compressed/tdcall.S b/arch/x86/boot/compressed/tdcall.S
new file mode 100644
index 000000000000..46d0495e0d3a
--- /dev/null
+++ b/arch/x86/boot/compressed/tdcall.S
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "../../coco/tdx/tdcall.S"
diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
new file mode 100644
index 000000000000..918a7606f53c
--- /dev/null
+++ b/arch/x86/boot/compressed/tdx.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "../cpuflags.h"
+#include "../string.h"
+#include "../io.h"
+#include "error.h"
+
+#include <vdso/limits.h>
+#include <uapi/asm/vmx.h>
+
+#include <asm/shared/tdx.h>
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __tdx_hypercall_failed(void)
+{
+ error("TDVMCALL failed. TDX module bug?");
+}
+
+static inline unsigned int tdx_io_in(int size, u16 port)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = EXIT_REASON_IO_INSTRUCTION,
+ .r12 = size,
+ .r13 = 0,
+ .r14 = port,
+ };
+
+ if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+ return UINT_MAX;
+
+ return args.r11;
+}
+
+static inline void tdx_io_out(int size, u16 port, u32 value)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = EXIT_REASON_IO_INSTRUCTION,
+ .r12 = size,
+ .r13 = 1,
+ .r14 = port,
+ .r15 = value,
+ };
+
+ __tdx_hypercall(&args, 0);
+}
+
+static inline u8 tdx_inb(u16 port)
+{
+ return tdx_io_in(1, port);
+}
+
+static inline void tdx_outb(u8 value, u16 port)
+{
+ tdx_io_out(1, port, value);
+}
+
+static inline void tdx_outw(u16 value, u16 port)
+{
+ tdx_io_out(2, port, value);
+}
+
+void early_tdx_detect(void)
+{
+ u32 eax, sig[3];
+
+ cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2], &sig[1]);
+
+ if (memcmp(TDX_IDENT, sig, sizeof(sig)))
+ return;
+
+ /* Use hypercalls instead of I/O instructions */
+ pio_ops.f_inb = tdx_inb;
+ pio_ops.f_outb = tdx_outb;
+ pio_ops.f_outw = tdx_outw;
+}
diff --git a/arch/x86/boot/compressed/tdx.h b/arch/x86/boot/compressed/tdx.h
new file mode 100644
index 000000000000..9055482cd35c
--- /dev/null
+++ b/arch/x86/boot/compressed/tdx.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_TDX_H
+#define BOOT_COMPRESSED_TDX_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+void early_tdx_detect(void);
+#else
+static inline void early_tdx_detect(void) { };
+#endif
+
+#endif /* BOOT_COMPRESSED_TDX_H */
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index e1478d32de1a..fed8d13ce252 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -27,6 +27,7 @@
#include <asm/required-features.h>
#include <asm/msr-index.h>
#include "string.h"
+#include "msr.h"
static u32 err_flags[NCAPINTS];
@@ -130,12 +131,11 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
/* If this is an AMD and we're only missing SSE+SSE2, try to
turn them on */
- u32 ecx = MSR_K7_HWCR;
- u32 eax, edx;
+ struct msr m;
- asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
- eax &= ~(1 << 15);
- asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+ boot_rdmsr(MSR_K7_HWCR, &m);
+ m.l &= ~(1 << 15);
+ boot_wrmsr(MSR_K7_HWCR, &m);
get_cpuflags(); /* Make sure it really did something */
err = check_cpuflags();
@@ -145,28 +145,28 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
/* If this is a VIA C3, we might have to enable CX8
explicitly */
- u32 ecx = MSR_VIA_FCR;
- u32 eax, edx;
+ struct msr m;
- asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
- eax |= (1<<1)|(1<<7);
- asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+ boot_rdmsr(MSR_VIA_FCR, &m);
+ m.l |= (1 << 1) | (1 << 7);
+ boot_wrmsr(MSR_VIA_FCR, &m);
set_bit(X86_FEATURE_CX8, cpu.flags);
err = check_cpuflags();
} else if (err == 0x01 && is_transmeta()) {
/* Transmeta might have masked feature bits in word 0 */
- u32 ecx = 0x80860004;
- u32 eax, edx;
+ struct msr m, m_tmp;
u32 level = 1;
- asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
- asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx));
+ boot_rdmsr(0x80860004, &m);
+ m_tmp = m;
+ m_tmp.l = ~0;
+ boot_wrmsr(0x80860004, &m_tmp);
asm("cpuid"
: "+a" (level), "=d" (cpu.flags[0])
: : "ecx", "ebx");
- asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+ boot_wrmsr(0x80860004, &m);
err = check_cpuflags();
} else if (err == 0x01 &&
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index a0b75f73dc63..a83d67ec627d 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -71,8 +71,7 @@ int has_eflag(unsigned long mask)
# define EBX_REG "=b"
#endif
-static inline void cpuid_count(u32 id, u32 count,
- u32 *a, u32 *b, u32 *c, u32 *d)
+void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d)
{
asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t"
"cpuid \n\t"
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index 2e20814d3ce3..475b8fde90f7 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -17,5 +17,6 @@ extern u32 cpu_vendor[3];
int has_eflag(unsigned long mask);
void get_cpuflags(void);
+void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d);
#endif
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 6dbd7e9f74c9..0352e4589efa 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -163,7 +163,11 @@ extra_header_fields:
.long 0x200 # SizeOfHeaders
.long 0 # CheckSum
.word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application)
+#ifdef CONFIG_DXE_MEM_ATTRIBUTES
+ .word IMAGE_DLL_CHARACTERISTICS_NX_COMPAT # DllCharacteristics
+#else
.word 0 # DllCharacteristics
+#endif
#ifdef CONFIG_X86_32
.long 0 # SizeOfStackReserve
.long 0 # SizeOfStackCommit
diff --git a/arch/x86/boot/io.h b/arch/x86/boot/io.h
new file mode 100644
index 000000000000..110880907f87
--- /dev/null
+++ b/arch/x86/boot/io.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_IO_H
+#define BOOT_IO_H
+
+#include <asm/shared/io.h>
+
+#undef inb
+#undef inw
+#undef inl
+#undef outb
+#undef outw
+#undef outl
+
+struct port_io_ops {
+ u8 (*f_inb)(u16 port);
+ void (*f_outb)(u8 v, u16 port);
+ void (*f_outw)(u16 v, u16 port);
+};
+
+extern struct port_io_ops pio_ops;
+
+/*
+ * Use the normal I/O instructions by default.
+ * TDX guests override these to use hypercalls.
+ */
+static inline void init_default_io_ops(void)
+{
+ pio_ops.f_inb = __inb;
+ pio_ops.f_outb = __outb;
+ pio_ops.f_outw = __outw;
+}
+
+/*
+ * Redirect port I/O operations via pio_ops callbacks.
+ * TDX guests override these callbacks with TDX-specific helpers.
+ */
+#define inb pio_ops.f_inb
+#define outb pio_ops.f_outb
+#define outw pio_ops.f_outw
+
+#endif
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index e3add857c2c9..c4ea5258ab55 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -17,6 +17,8 @@
struct boot_params boot_params __attribute__((aligned(16)));
+struct port_io_ops pio_ops;
+
char *HEAP = _end;
char *heap_end = _end; /* Default end of heap = no heap */
@@ -33,7 +35,7 @@ static void copy_boot_params(void)
u16 cl_offset;
};
const struct old_cmdline * const oldcmd =
- (const struct old_cmdline *)OLD_CL_ADDRESS;
+ absolute_pointer(OLD_CL_ADDRESS);
BUILD_BUG_ON(sizeof(boot_params) != 4096);
memcpy(&boot_params.hdr, &hdr, sizeof(hdr));
@@ -133,6 +135,8 @@ static void init_heap(void)
void main(void)
{
+ init_default_io_ops();
+
/* First, copy the boot header into the "zeropage" */
copy_boot_params();
diff --git a/arch/x86/boot/msr.h b/arch/x86/boot/msr.h
new file mode 100644
index 000000000000..aed66f7ae199
--- /dev/null
+++ b/arch/x86/boot/msr.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers/definitions related to MSR access.
+ */
+
+#ifndef BOOT_MSR_H
+#define BOOT_MSR_H
+
+#include <asm/shared/msr.h>
+
+/*
+ * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the
+ * boot kernel since they rely on tracepoint/exception handling infrastructure
+ * that's not available here.
+ */
+static inline void boot_rdmsr(unsigned int reg, struct msr *m)
+{
+ asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg));
+}
+
+static inline void boot_wrmsr(unsigned int reg, const struct msr *m)
+{
+ asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory");
+}
+
+#endif /* BOOT_MSR_H */
diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile
index c1ead00017a7..c816acf78b6a 100644
--- a/arch/x86/coco/Makefile
+++ b/arch/x86/coco/Makefile
@@ -4,3 +4,5 @@ KASAN_SANITIZE_core.o := n
CFLAGS_core.o += -fno-stack-protector
obj-y += core.o
+
+obj-$(CONFIG_INTEL_TDX_GUEST) += tdx/
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index fc1365dd927e..49b44f881484 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -18,7 +18,15 @@ static u64 cc_mask __ro_after_init;
static bool intel_cc_platform_has(enum cc_attr attr)
{
- return false;
+ switch (attr) {
+ case CC_ATTR_GUEST_UNROLL_STRING_IO:
+ case CC_ATTR_HOTPLUG_DISABLED:
+ case CC_ATTR_GUEST_MEM_ENCRYPT:
+ case CC_ATTR_MEM_ENCRYPT:
+ return true;
+ default:
+ return false;
+ }
}
/*
@@ -57,6 +65,9 @@ static bool amd_cc_platform_has(enum cc_attr attr)
return (sev_status & MSR_AMD64_SEV_ENABLED) &&
!(sev_status & MSR_AMD64_SEV_ES_ENABLED);
+ case CC_ATTR_GUEST_SEV_SNP:
+ return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+
default:
return false;
}
@@ -87,9 +98,18 @@ EXPORT_SYMBOL_GPL(cc_platform_has);
u64 cc_mkenc(u64 val)
{
+ /*
+ * Both AMD and Intel use a bit in the page table to indicate
+ * encryption status of the page.
+ *
+ * - for AMD, bit *set* means the page is encrypted
+ * - for Intel *clear* means encrypted.
+ */
switch (vendor) {
case CC_VENDOR_AMD:
return val | cc_mask;
+ case CC_VENDOR_INTEL:
+ return val & ~cc_mask;
default:
return val;
}
@@ -97,9 +117,12 @@ u64 cc_mkenc(u64 val)
u64 cc_mkdec(u64 val)
{
+ /* See comment in cc_mkenc() */
switch (vendor) {
case CC_VENDOR_AMD:
return val & ~cc_mask;
+ case CC_VENDOR_INTEL:
+ return val | cc_mask;
default:
return val;
}
diff --git a/arch/x86/coco/tdx/Makefile b/arch/x86/coco/tdx/Makefile
new file mode 100644
index 000000000000..46c55998557d
--- /dev/null
+++ b/arch/x86/coco/tdx/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += tdx.o tdcall.o
diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S
new file mode 100644
index 000000000000..f9eb1134f22d
--- /dev/null
+++ b/arch/x86/coco/tdx/tdcall.S
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/asm-offsets.h>
+#include <asm/asm.h>
+#include <asm/frame.h>
+#include <asm/unwind_hints.h>
+
+#include <linux/linkage.h>
+#include <linux/bits.h>
+#include <linux/errno.h>
+
+#include "../../virt/vmx/tdx/tdxcall.S"
+
+/*
+ * Bitmasks of exposed registers (with VMM).
+ */
+#define TDX_R10 BIT(10)
+#define TDX_R11 BIT(11)
+#define TDX_R12 BIT(12)
+#define TDX_R13 BIT(13)
+#define TDX_R14 BIT(14)
+#define TDX_R15 BIT(15)
+
+/*
+ * These registers are clobbered to hold arguments for each
+ * TDVMCALL. They are safe to expose to the VMM.
+ * Each bit in this mask represents a register ID. Bit field
+ * details can be found in TDX GHCI specification, section
+ * titled "TDCALL [TDG.VP.VMCALL] leaf".
+ */
+#define TDVMCALL_EXPOSE_REGS_MASK ( TDX_R10 | TDX_R11 | \
+ TDX_R12 | TDX_R13 | \
+ TDX_R14 | TDX_R15 )
+
+/*
+ * __tdx_module_call() - Used by TDX guests to request services from
+ * the TDX module (does not include VMM services) using TDCALL instruction.
+ *
+ * Transforms function call register arguments into the TDCALL register ABI.
+ * After TDCALL operation, TDX module output is saved in @out (if it is
+ * provided by the user).
+ *
+ *-------------------------------------------------------------------------
+ * TDCALL ABI:
+ *-------------------------------------------------------------------------
+ * Input Registers:
+ *
+ * RAX - TDCALL Leaf number.
+ * RCX,RDX,R8-R9 - TDCALL Leaf specific input registers.
+ *
+ * Output Registers:
+ *
+ * RAX - TDCALL instruction error code.
+ * RCX,RDX,R8-R11 - TDCALL Leaf specific output registers.
+ *
+ *-------------------------------------------------------------------------
+ *
+ * __tdx_module_call() function ABI:
+ *
+ * @fn (RDI) - TDCALL Leaf ID, moved to RAX
+ * @rcx (RSI) - Input parameter 1, moved to RCX
+ * @rdx (RDX) - Input parameter 2, moved to RDX
+ * @r8 (RCX) - Input parameter 3, moved to R8
+ * @r9 (R8) - Input parameter 4, moved to R9
+ *
+ * @out (R9) - struct tdx_module_output pointer
+ * stored temporarily in R12 (not
+ * shared with the TDX module). It
+ * can be NULL.
+ *
+ * Return status of TDCALL via RAX.
+ */
+SYM_FUNC_START(__tdx_module_call)
+ FRAME_BEGIN
+ TDX_MODULE_CALL host=0
+ FRAME_END
+ RET
+SYM_FUNC_END(__tdx_module_call)
+
+/*
+ * __tdx_hypercall() - Make hypercalls to a TDX VMM using TDVMCALL leaf
+ * of TDCALL instruction
+ *
+ * Transforms values in function call argument struct tdx_hypercall_args @args
+ * into the TDCALL register ABI. After TDCALL operation, VMM output is saved
+ * back in @args.
+ *
+ *-------------------------------------------------------------------------
+ * TD VMCALL ABI:
+ *-------------------------------------------------------------------------
+ *
+ * Input Registers:
+ *
+ * RAX - TDCALL instruction leaf number (0 - TDG.VP.VMCALL)
+ * RCX - BITMAP which controls which part of TD Guest GPR
+ * is passed as-is to the VMM and back.
+ * R10 - Set 0 to indicate TDCALL follows standard TDX ABI
+ * specification. Non zero value indicates vendor
+ * specific ABI.
+ * R11 - VMCALL sub function number
+ * RBX, RBP, RDI, RSI - Used to pass VMCALL sub function specific arguments.
+ * R8-R9, R12-R15 - Same as above.
+ *
+ * Output Registers:
+ *
+ * RAX - TDCALL instruction status (Not related to hypercall
+ * output).
+ * R10 - Hypercall output error code.
+ * R11-R15 - Hypercall sub function specific output values.
+ *
+ *-------------------------------------------------------------------------
+ *
+ * __tdx_hypercall() function ABI:
+ *
+ * @args (RDI) - struct tdx_hypercall_args for input and output
+ * @flags (RSI) - TDX_HCALL_* flags
+ *
+ * On successful completion, return the hypercall error code.
+ */
+SYM_FUNC_START(__tdx_hypercall)
+ FRAME_BEGIN
+
+ /* Save callee-saved GPRs as mandated by the x86_64 ABI */
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+
+ /* Mangle function call ABI into TDCALL ABI: */
+ /* Set TDCALL leaf ID (TDVMCALL (0)) in RAX */
+ xor %eax, %eax
+
+ /* Copy hypercall registers from arg struct: */
+ movq TDX_HYPERCALL_r10(%rdi), %r10
+ movq TDX_HYPERCALL_r11(%rdi), %r11
+ movq TDX_HYPERCALL_r12(%rdi), %r12
+ movq TDX_HYPERCALL_r13(%rdi), %r13
+ movq TDX_HYPERCALL_r14(%rdi), %r14
+ movq TDX_HYPERCALL_r15(%rdi), %r15
+
+ movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx
+
+ /*
+ * For the idle loop STI needs to be called directly before the TDCALL
+ * that enters idle (EXIT_REASON_HLT case). STI instruction enables
+ * interrupts only one instruction later. If there is a window between
+ * STI and the instruction that emulates the HALT state, there is a
+ * chance for interrupts to happen in this window, which can delay the
+ * HLT operation indefinitely. Since this is the not the desired
+ * result, conditionally call STI before TDCALL.
+ */
+ testq $TDX_HCALL_ISSUE_STI, %rsi
+ jz .Lskip_sti
+ sti
+.Lskip_sti:
+ tdcall
+
+ /*
+ * RAX==0 indicates a failure of the TDVMCALL mechanism itself and that
+ * something has gone horribly wrong with the TDX module.
+ *
+ * The return status of the hypercall operation is in a separate
+ * register (in R10). Hypercall errors are a part of normal operation
+ * and are handled by callers.
+ */
+ testq %rax, %rax
+ jne .Lpanic
+
+ /* TDVMCALL leaf return code is in R10 */
+ movq %r10, %rax
+
+ /* Copy hypercall result registers to arg struct if needed */
+ testq $TDX_HCALL_HAS_OUTPUT, %rsi
+ jz .Lout
+
+ movq %r10, TDX_HYPERCALL_r10(%rdi)
+ movq %r11, TDX_HYPERCALL_r11(%rdi)
+ movq %r12, TDX_HYPERCALL_r12(%rdi)
+ movq %r13, TDX_HYPERCALL_r13(%rdi)
+ movq %r14, TDX_HYPERCALL_r14(%rdi)
+ movq %r15, TDX_HYPERCALL_r15(%rdi)
+.Lout:
+ /*
+ * Zero out registers exposed to the VMM to avoid speculative execution
+ * with VMM-controlled values. This needs to include all registers
+ * present in TDVMCALL_EXPOSE_REGS_MASK (except R12-R15). R12-R15
+ * context will be restored.
+ */
+ xor %r10d, %r10d
+ xor %r11d, %r11d
+
+ /* Restore callee-saved GPRs as mandated by the x86_64 ABI */
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+
+ FRAME_END
+
+ RET
+.Lpanic:
+ call __tdx_hypercall_failed
+ /* __tdx_hypercall_failed never returns */
+ REACHABLE
+ jmp .Lpanic
+SYM_FUNC_END(__tdx_hypercall)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
new file mode 100644
index 000000000000..03deb4d6920d
--- /dev/null
+++ b/arch/x86/coco/tdx/tdx.c
@@ -0,0 +1,692 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021-2022 Intel Corporation */
+
+#undef pr_fmt
+#define pr_fmt(fmt) "tdx: " fmt
+
+#include <linux/cpufeature.h>
+#include <asm/coco.h>
+#include <asm/tdx.h>
+#include <asm/vmx.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
+#include <asm/pgtable.h>
+
+/* TDX module Call Leaf IDs */
+#define TDX_GET_INFO 1
+#define TDX_GET_VEINFO 3
+#define TDX_ACCEPT_PAGE 6
+
+/* TDX hypercall Leaf IDs */
+#define TDVMCALL_MAP_GPA 0x10001
+
+/* MMIO direction */
+#define EPT_READ 0
+#define EPT_WRITE 1
+
+/* Port I/O direction */
+#define PORT_READ 0
+#define PORT_WRITE 1
+
+/* See Exit Qualification for I/O Instructions in VMX documentation */
+#define VE_IS_IO_IN(e) ((e) & BIT(3))
+#define VE_GET_IO_SIZE(e) (((e) & GENMASK(2, 0)) + 1)
+#define VE_GET_PORT_NUM(e) ((e) >> 16)
+#define VE_IS_IO_STRING(e) ((e) & BIT(4))
+
+/*
+ * Wrapper for standard use of __tdx_hypercall with no output aside from
+ * return code.
+ */
+static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = fn,
+ .r12 = r12,
+ .r13 = r13,
+ .r14 = r14,
+ .r15 = r15,
+ };
+
+ return __tdx_hypercall(&args, 0);
+}
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __tdx_hypercall_failed(void)
+{
+ panic("TDVMCALL failed. TDX module bug?");
+}
+
+/*
+ * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined
+ * independently from but are currently matched 1:1 with VMX EXIT_REASONs.
+ * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
+ * guest sides of these calls.
+ */
+static u64 hcall_func(u64 exit_reason)
+{
+ return exit_reason;
+}
+
+#ifdef CONFIG_KVM_GUEST
+long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2,
+ unsigned long p3, unsigned long p4)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = nr,
+ .r11 = p1,
+ .r12 = p2,
+ .r13 = p3,
+ .r14 = p4,
+ };
+
+ return __tdx_hypercall(&args, 0);
+}
+EXPORT_SYMBOL_GPL(tdx_kvm_hypercall);
+#endif
+
+/*
+ * Used for TDX guests to make calls directly to the TD module. This
+ * should only be used for calls that have no legitimate reason to fail
+ * or where the kernel can not survive the call failing.
+ */
+static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
+ struct tdx_module_output *out)
+{
+ if (__tdx_module_call(fn, rcx, rdx, r8, r9, out))
+ panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
+}
+
+static u64 get_cc_mask(void)
+{
+ struct tdx_module_output out;
+ unsigned int gpa_width;
+
+ /*
+ * TDINFO TDX module call is used to get the TD execution environment
+ * information like GPA width, number of available vcpus, debug mode
+ * information, etc. More details about the ABI can be found in TDX
+ * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL
+ * [TDG.VP.INFO].
+ *
+ * The GPA width that comes out of this call is critical. TDX guests
+ * can not meaningfully run without it.
+ */
+ tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out);
+
+ gpa_width = out.rcx & GENMASK(5, 0);
+
+ /*
+ * The highest bit of a guest physical address is the "sharing" bit.
+ * Set it for shared pages and clear it for private pages.
+ */
+ return BIT_ULL(gpa_width - 1);
+}
+
+static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = hcall_func(EXIT_REASON_HLT),
+ .r12 = irq_disabled,
+ };
+
+ /*
+ * Emulate HLT operation via hypercall. More info about ABI
+ * can be found in TDX Guest-Host-Communication Interface
+ * (GHCI), section 3.8 TDG.VP.VMCALL<Instruction.HLT>.
+ *
+ * The VMM uses the "IRQ disabled" param to understand IRQ
+ * enabled status (RFLAGS.IF) of the TD guest and to determine
+ * whether or not it should schedule the halted vCPU if an
+ * IRQ becomes pending. E.g. if IRQs are disabled, the VMM
+ * can keep the vCPU in virtual HLT, even if an IRQ is
+ * pending, without hanging/breaking the guest.
+ */
+ return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
+}
+
+static bool handle_halt(void)
+{
+ /*
+ * Since non safe halt is mainly used in CPU offlining
+ * and the guest will always stay in the halt state, don't
+ * call the STI instruction (set do_sti as false).
+ */
+ const bool irq_disabled = irqs_disabled();
+ const bool do_sti = false;
+
+ if (__halt(irq_disabled, do_sti))
+ return false;
+
+ return true;
+}
+
+void __cpuidle tdx_safe_halt(void)
+{
+ /*
+ * For do_sti=true case, __tdx_hypercall() function enables
+ * interrupts using the STI instruction before the TDCALL. So
+ * set irq_disabled as false.
+ */
+ const bool irq_disabled = false;
+ const bool do_sti = true;
+
+ /*
+ * Use WARN_ONCE() to report the failure.
+ */
+ if (__halt(irq_disabled, do_sti))
+ WARN_ONCE(1, "HLT instruction emulation failed\n");
+}
+
+static bool read_msr(struct pt_regs *regs)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = hcall_func(EXIT_REASON_MSR_READ),
+ .r12 = regs->cx,
+ };
+
+ /*
+ * Emulate the MSR read via hypercall. More info about ABI
+ * can be found in TDX Guest-Host-Communication Interface
+ * (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
+ */
+ if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+ return false;
+
+ regs->ax = lower_32_bits(args.r11);
+ regs->dx = upper_32_bits(args.r11);
+ return true;
+}
+
+static bool write_msr(struct pt_regs *regs)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = hcall_func(EXIT_REASON_MSR_WRITE),
+ .r12 = regs->cx,
+ .r13 = (u64)regs->dx << 32 | regs->ax,
+ };
+
+ /*
+ * Emulate the MSR write via hypercall. More info about ABI
+ * can be found in TDX Guest-Host-Communication Interface
+ * (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
+ */
+ return !__tdx_hypercall(&args, 0);
+}
+
+static bool handle_cpuid(struct pt_regs *regs)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = hcall_func(EXIT_REASON_CPUID),
+ .r12 = regs->ax,
+ .r13 = regs->cx,
+ };
+
+ /*
+ * Only allow VMM to control range reserved for hypervisor
+ * communication.
+ *
+ * Return all-zeros for any CPUID outside the range. It matches CPU
+ * behaviour for non-supported leaf.
+ */
+ if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) {
+ regs->ax = regs->bx = regs->cx = regs->dx = 0;
+ return true;
+ }
+
+ /*
+ * Emulate the CPUID instruction via a hypercall. More info about
+ * ABI can be found in TDX Guest-Host-Communication Interface
+ * (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
+ */
+ if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+ return false;
+
+ /*
+ * As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
+ * EAX, EBX, ECX, EDX registers after the CPUID instruction execution.
+ * So copy the register contents back to pt_regs.
+ */
+ regs->ax = args.r12;
+ regs->bx = args.r13;
+ regs->cx = args.r14;
+ regs->dx = args.r15;
+
+ return true;
+}
+
+static bool mmio_read(int size, unsigned long addr, unsigned long *val)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = hcall_func(EXIT_REASON_EPT_VIOLATION),
+ .r12 = size,
+ .r13 = EPT_READ,
+ .r14 = addr,
+ .r15 = *val,
+ };
+
+ if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+ return false;
+ *val = args.r11;
+ return true;
+}
+
+static bool mmio_write(int size, unsigned long addr, unsigned long val)
+{
+ return !_tdx_hypercall(hcall_func(EXIT_REASON_EPT_VIOLATION), size,
+ EPT_WRITE, addr, val);
+}
+
+static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
+{
+ char buffer[MAX_INSN_SIZE];
+ unsigned long *reg, val;
+ struct insn insn = {};
+ enum mmio_type mmio;
+ int size, extend_size;
+ u8 extend_val = 0;
+
+ /* Only in-kernel MMIO is supported */
+ if (WARN_ON_ONCE(user_mode(regs)))
+ return false;
+
+ if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE))
+ return false;
+
+ if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
+ return false;
+
+ mmio = insn_decode_mmio(&insn, &size);
+ if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
+ return false;
+
+ if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
+ reg = insn_get_modrm_reg_ptr(&insn, regs);
+ if (!reg)
+ return false;
+ }
+
+ ve->instr_len = insn.length;
+
+ /* Handle writes first */
+ switch (mmio) {
+ case MMIO_WRITE:
+ memcpy(&val, reg, size);
+ return mmio_write(size, ve->gpa, val);
+ case MMIO_WRITE_IMM:
+ val = insn.immediate.value;
+ return mmio_write(size, ve->gpa, val);
+ case MMIO_READ:
+ case MMIO_READ_ZERO_EXTEND:
+ case MMIO_READ_SIGN_EXTEND:
+ /* Reads are handled below */
+ break;
+ case MMIO_MOVS:
+ case MMIO_DECODE_FAILED:
+ /*
+ * MMIO was accessed with an instruction that could not be
+ * decoded or handled properly. It was likely not using io.h
+ * helpers or accessed MMIO accidentally.
+ */
+ return false;
+ default:
+ WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?");
+ return false;
+ }
+
+ /* Handle reads */
+ if (!mmio_read(size, ve->gpa, &val))
+ return false;
+
+ switch (mmio) {
+ case MMIO_READ:
+ /* Zero-extend for 32-bit operation */
+ extend_size = size == 4 ? sizeof(*reg) : 0;
+ break;
+ case MMIO_READ_ZERO_EXTEND:
+ /* Zero extend based on operand size */
+ extend_size = insn.opnd_bytes;
+ break;
+ case MMIO_READ_SIGN_EXTEND:
+ /* Sign extend based on operand size */
+ extend_size = insn.opnd_bytes;
+ if (size == 1 && val & BIT(7))
+ extend_val = 0xFF;
+ else if (size > 1 && val & BIT(15))
+ extend_val = 0xFF;
+ break;
+ default:
+ /* All other cases has to be covered with the first switch() */
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ if (extend_size)
+ memset(reg, extend_val, extend_size);
+ memcpy(reg, &val, size);
+ return true;
+}
+
+static bool handle_in(struct pt_regs *regs, int size, int port)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION),
+ .r12 = size,
+ .r13 = PORT_READ,
+ .r14 = port,
+ };
+ u64 mask = GENMASK(BITS_PER_BYTE * size, 0);
+ bool success;
+
+ /*
+ * Emulate the I/O read via hypercall. More info about ABI can be found
+ * in TDX Guest-Host-Communication Interface (GHCI) section titled
+ * "TDG.VP.VMCALL<Instruction.IO>".
+ */
+ success = !__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+
+ /* Update part of the register affected by the emulated instruction */
+ regs->ax &= ~mask;
+ if (success)
+ regs->ax |= args.r11 & mask;
+
+ return success;
+}
+
+static bool handle_out(struct pt_regs *regs, int size, int port)
+{
+ u64 mask = GENMASK(BITS_PER_BYTE * size, 0);
+
+ /*
+ * Emulate the I/O write via hypercall. More info about ABI can be found
+ * in TDX Guest-Host-Communication Interface (GHCI) section titled
+ * "TDG.VP.VMCALL<Instruction.IO>".
+ */
+ return !_tdx_hypercall(hcall_func(EXIT_REASON_IO_INSTRUCTION), size,
+ PORT_WRITE, port, regs->ax & mask);
+}
+
+/*
+ * Emulate I/O using hypercall.
+ *
+ * Assumes the IO instruction was using ax, which is enforced
+ * by the standard io.h macros.
+ *
+ * Return True on success or False on failure.
+ */
+static bool handle_io(struct pt_regs *regs, u32 exit_qual)
+{
+ int size, port;
+ bool in;
+
+ if (VE_IS_IO_STRING(exit_qual))
+ return false;
+
+ in = VE_IS_IO_IN(exit_qual);
+ size = VE_GET_IO_SIZE(exit_qual);
+ port = VE_GET_PORT_NUM(exit_qual);
+
+
+ if (in)
+ return handle_in(regs, size, port);
+ else
+ return handle_out(regs, size, port);
+}
+
+/*
+ * Early #VE exception handler. Only handles a subset of port I/O.
+ * Intended only for earlyprintk. If failed, return false.
+ */
+__init bool tdx_early_handle_ve(struct pt_regs *regs)
+{
+ struct ve_info ve;
+
+ tdx_get_ve_info(&ve);
+
+ if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION)
+ return false;
+
+ return handle_io(regs, ve.exit_qual);
+}
+
+void tdx_get_ve_info(struct ve_info *ve)
+{
+ struct tdx_module_output out;
+
+ /*
+ * Called during #VE handling to retrieve the #VE info from the
+ * TDX module.
+ *
+ * This has to be called early in #VE handling. A "nested" #VE which
+ * occurs before this will raise a #DF and is not recoverable.
+ *
+ * The call retrieves the #VE info from the TDX module, which also
+ * clears the "#VE valid" flag. This must be done before anything else
+ * because any #VE that occurs while the valid flag is set will lead to
+ * #DF.
+ *
+ * Note, the TDX module treats virtual NMIs as inhibited if the #VE
+ * valid flag is set. It means that NMI=>#VE will not result in a #DF.
+ */
+ tdx_module_call(TDX_GET_VEINFO, 0, 0, 0, 0, &out);
+
+ /* Transfer the output parameters */
+ ve->exit_reason = out.rcx;
+ ve->exit_qual = out.rdx;
+ ve->gla = out.r8;
+ ve->gpa = out.r9;
+ ve->instr_len = lower_32_bits(out.r10);
+ ve->instr_info = upper_32_bits(out.r10);
+}
+
+/* Handle the user initiated #VE */
+static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
+{
+ switch (ve->exit_reason) {
+ case EXIT_REASON_CPUID:
+ return handle_cpuid(regs);
+ default:
+ pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
+ return false;
+ }
+}
+
+/* Handle the kernel #VE */
+static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
+{
+ switch (ve->exit_reason) {
+ case EXIT_REASON_HLT:
+ return handle_halt();
+ case EXIT_REASON_MSR_READ:
+ return read_msr(regs);
+ case EXIT_REASON_MSR_WRITE:
+ return write_msr(regs);
+ case EXIT_REASON_CPUID:
+ return handle_cpuid(regs);
+ case EXIT_REASON_EPT_VIOLATION:
+ return handle_mmio(regs, ve);
+ case EXIT_REASON_IO_INSTRUCTION:
+ return handle_io(regs, ve->exit_qual);
+ default:
+ pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
+ return false;
+ }
+}
+
+bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
+{
+ bool ret;
+
+ if (user_mode(regs))
+ ret = virt_exception_user(regs, ve);
+ else
+ ret = virt_exception_kernel(regs, ve);
+
+ /* After successful #VE handling, move the IP */
+ if (ret)
+ regs->ip += ve->instr_len;
+
+ return ret;
+}
+
+static bool tdx_tlb_flush_required(bool private)
+{
+ /*
+ * TDX guest is responsible for flushing TLB on private->shared
+ * transition. VMM is responsible for flushing on shared->private.
+ *
+ * The VMM _can't_ flush private addresses as it can't generate PAs
+ * with the guest's HKID. Shared memory isn't subject to integrity
+ * checking, i.e. the VMM doesn't need to flush for its own protection.
+ *
+ * There's no need to flush when converting from shared to private,
+ * as flushing is the VMM's responsibility in this case, e.g. it must
+ * flush to avoid integrity failures in the face of a buggy or
+ * malicious guest.
+ */
+ return !private;
+}
+
+static bool tdx_cache_flush_required(void)
+{
+ /*
+ * AMD SME/SEV can avoid cache flushing if HW enforces cache coherence.
+ * TDX doesn't have such capability.
+ *
+ * Flush cache unconditionally.
+ */
+ return true;
+}
+
+static bool try_accept_one(phys_addr_t *start, unsigned long len,
+ enum pg_level pg_level)
+{
+ unsigned long accept_size = page_level_size(pg_level);
+ u64 tdcall_rcx;
+ u8 page_size;
+
+ if (!IS_ALIGNED(*start, accept_size))
+ return false;
+
+ if (len < accept_size)
+ return false;
+
+ /*
+ * Pass the page physical address to the TDX module to accept the
+ * pending, private page.
+ *
+ * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
+ */
+ switch (pg_level) {
+ case PG_LEVEL_4K:
+ page_size = 0;
+ break;
+ case PG_LEVEL_2M:
+ page_size = 1;
+ break;
+ case PG_LEVEL_1G:
+ page_size = 2;
+ break;
+ default:
+ return false;
+ }
+
+ tdcall_rcx = *start | page_size;
+ if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
+ return false;
+
+ *start += accept_size;
+ return true;
+}
+
+/*
+ * Inform the VMM of the guest's intent for this physical page: shared with
+ * the VMM or private to the guest. The VMM is expected to change its mapping
+ * of the page in response.
+ */
+static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
+{
+ phys_addr_t start = __pa(vaddr);
+ phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
+
+ if (!enc) {
+ /* Set the shared (decrypted) bits: */
+ start |= cc_mkdec(0);
+ end |= cc_mkdec(0);
+ }
+
+ /*
+ * Notify the VMM about page mapping conversion. More info about ABI
+ * can be found in TDX Guest-Host-Communication Interface (GHCI),
+ * section "TDG.VP.VMCALL<MapGPA>"
+ */
+ if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
+ return false;
+
+ /* private->shared conversion requires only MapGPA call */
+ if (!enc)
+ return true;
+
+ /*
+ * For shared->private conversion, accept the page using
+ * TDX_ACCEPT_PAGE TDX module call.
+ */
+ while (start < end) {
+ unsigned long len = end - start;
+
+ /*
+ * Try larger accepts first. It gives chance to VMM to keep
+ * 1G/2M SEPT entries where possible and speeds up process by
+ * cutting number of hypercalls (if successful).
+ */
+
+ if (try_accept_one(&start, len, PG_LEVEL_1G))
+ continue;
+
+ if (try_accept_one(&start, len, PG_LEVEL_2M))
+ continue;
+
+ if (!try_accept_one(&start, len, PG_LEVEL_4K))
+ return false;
+ }
+
+ return true;
+}
+
+void __init tdx_early_init(void)
+{
+ u64 cc_mask;
+ u32 eax, sig[3];
+
+ cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2], &sig[1]);
+
+ if (memcmp(TDX_IDENT, sig, sizeof(sig)))
+ return;
+
+ setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
+
+ cc_set_vendor(CC_VENDOR_INTEL);
+ cc_mask = get_cc_mask();
+ cc_set_mask(cc_mask);
+
+ /*
+ * All bits above GPA width are reserved and kernel treats shared bit
+ * as flag, not as part of physical address.
+ *
+ * Adjust physical mask to only cover valid GPA bits.
+ */
+ physical_mask &= cc_mask - 1;
+
+ x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
+ x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
+ x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
+
+ pr_info("Guest detected\n");
+}
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index a4c061fb7c6e..29b36e9e4e74 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -63,7 +63,7 @@ For 32-bit we have the following conventions - kernel is built with
* for assembly code:
*/
-.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0
+.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
.if \save_ret
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
@@ -73,7 +73,7 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rsi /* pt_regs->si */
.endif
pushq \rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
+ pushq \rcx /* pt_regs->cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
pushq %r9 /* pt_regs->r9 */
@@ -99,6 +99,7 @@ For 32-bit we have the following conventions - kernel is built with
* well before they could be put to use in a speculative execution
* gadget.
*/
+ xorl %esi, %esi /* nospec si */
xorl %edx, %edx /* nospec dx */
xorl %ecx, %ecx /* nospec cx */
xorl %r8d, %r8d /* nospec r8 */
@@ -114,32 +115,24 @@ For 32-bit we have the following conventions - kernel is built with
.endm
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
- PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
+ PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret
CLEAR_REGS
.endm
-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
+.macro POP_REGS pop_rdi=1
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
- .if \skip_r11rcx
- popq %rsi
- .else
popq %r11
- .endif
popq %r10
popq %r9
popq %r8
popq %rax
- .if \skip_r11rcx
- popq %rsi
- .else
popq %rcx
- .endif
popq %rdx
popq %rsi
.if \pop_rdi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 73d958522b6a..4300ba49b5ee 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -191,8 +191,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
* perf profiles. Nothing jumps here.
*/
syscall_return_via_sysret:
- /* rcx and r11 are already restored (see code above) */
- POP_REGS pop_rdi=0 skip_r11rcx=1
+ POP_REGS pop_rdi=0
/*
* Now all regs are restored except RSP and RDI.
@@ -215,8 +214,13 @@ syscall_return_via_sysret:
popq %rdi
popq %rsp
+SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
swapgs
sysretq
+SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+ int3
SYM_CODE_END(entry_SYSCALL_64)
/*
@@ -318,6 +322,14 @@ SYM_CODE_END(ret_from_fork)
#endif
.endm
+/* Save all registers in pt_regs */
+SYM_CODE_START_LOCAL(push_and_clear_regs)
+ UNWIND_HINT_FUNC
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
+ RET
+SYM_CODE_END(push_and_clear_regs)
+
/**
* idtentry_body - Macro to emit code calling the C function
* @cfunc: C function to be called
@@ -325,7 +337,21 @@ SYM_CODE_END(ret_from_fork)
*/
.macro idtentry_body cfunc has_error_code:req
- call error_entry
+ call push_and_clear_regs
+ UNWIND_HINT_REGS
+
+ /*
+ * Call error_entry() and switch to the task stack if from userspace.
+ *
+ * When in XENPV, it is already in the task stack, and it can't fault
+ * for native_iret() nor native_load_gs_index() since XENPV uses its
+ * own pvops for IRET and load_gs_index(). And it doesn't need to
+ * switch the CR3. So it can skip invoking error_entry().
+ */
+ ALTERNATIVE "call error_entry; movq %rax, %rsp", \
+ "", X86_FEATURE_XENPV
+
+ ENCODE_FRAME_POINTER
UNWIND_HINT_REGS
movq %rsp, %rdi /* pt_regs pointer into 1st argument*/
@@ -358,6 +384,7 @@ SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
ENDBR
ASM_CLAC
+ cld
.if \has_error_code == 0
pushq $-1 /* ORIG_RAX: no syscall to restart */
@@ -426,6 +453,7 @@ SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS
ENDBR
ASM_CLAC
+ cld
pushq $-1 /* ORIG_RAX: no syscall to restart */
@@ -482,6 +510,7 @@ SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS
ENDBR
ASM_CLAC
+ cld
/*
* If the entry is from userspace, switch stacks and treat it as
@@ -508,6 +537,7 @@ SYM_CODE_START(\asmsym)
call vc_switch_off_ist
movq %rax, %rsp /* Switch to new stack */
+ ENCODE_FRAME_POINTER
UNWIND_HINT_REGS
/* Update pt_regs */
@@ -544,6 +574,7 @@ SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS offset=8
ENDBR
ASM_CLAC
+ cld
/* paranoid_entry returns GS information for paranoid_exit in EBX. */
call paranoid_entry
@@ -869,7 +900,6 @@ SYM_CODE_END(xen_failsafe_callback)
*/
SYM_CODE_START_LOCAL(paranoid_entry)
UNWIND_HINT_FUNC
- cld
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
@@ -983,13 +1013,10 @@ SYM_CODE_START_LOCAL(paranoid_exit)
SYM_CODE_END(paranoid_exit)
/*
- * Save all registers in pt_regs, and switch GS if needed.
+ * Switch GS and CR3 if needed.
*/
SYM_CODE_START_LOCAL(error_entry)
UNWIND_HINT_FUNC
- cld
- PUSH_AND_CLEAR_REGS save_ret=1
- ENCODE_FRAME_POINTER 8
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -997,19 +1024,15 @@ SYM_CODE_START_LOCAL(error_entry)
* We entered from user mode or we're pretending to have entered
* from user mode due to an IRET fault.
*/
- SWAPGS
+ swapgs
FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
- popq %r12 /* save return addr in %12 */
- movq %rsp, %rdi /* arg0 = pt_regs pointer */
call sync_regs
- movq %rax, %rsp /* switch stack */
- ENCODE_FRAME_POINTER
- pushq %r12
RET
/*
@@ -1033,7 +1056,7 @@ SYM_CODE_START_LOCAL(error_entry)
* gsbase and proceed. We'll fix up the exception and land in
* .Lgs_change's error handler with kernel gsbase.
*/
- SWAPGS
+ swapgs
/*
* Issue an LFENCE to prevent GS speculation, regardless of whether it is a
@@ -1041,6 +1064,7 @@ SYM_CODE_START_LOCAL(error_entry)
*/
.Lerror_entry_done_lfence:
FENCE_SWAPGS_KERNEL_ENTRY
+ leaq 8(%rsp), %rax /* return pt_regs pointer */
RET
.Lbstep_iret:
@@ -1053,7 +1077,7 @@ SYM_CODE_START_LOCAL(error_entry)
* We came from an IRET to user mode, so we have user
* gsbase and CR3. Switch to kernel gsbase and CR3:
*/
- SWAPGS
+ swapgs
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
@@ -1061,9 +1085,9 @@ SYM_CODE_START_LOCAL(error_entry)
* Pretend that the exception came from user mode: set up pt_regs
* as if we faulted immediately after IRET.
*/
- mov %rsp, %rdi
+ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
call fixup_bad_iret
- mov %rax, %rsp
+ mov %rax, %rdi
jmp .Lerror_entry_from_usermode_after_swapgs
SYM_CODE_END(error_entry)
@@ -1126,6 +1150,7 @@ SYM_CODE_START(asm_exc_nmi)
*/
ASM_CLAC
+ cld
/* Use %rdx as our temp variable throughout */
pushq %rdx
@@ -1145,7 +1170,6 @@ SYM_CODE_START(asm_exc_nmi)
*/
swapgs
- cld
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 4fdb007cddbd..d1052742ad0c 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -50,7 +50,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
UNWIND_HINT_EMPTY
ENDBR
/* Interrupts are off on entry. */
- SWAPGS
+ swapgs
pushq %rax
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
@@ -83,32 +83,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
movl %eax, %eax
pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- pushq %rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq $0 /* pt_regs->r8 = 0 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq $0 /* pt_regs->r9 = 0 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq $0 /* pt_regs->r10 = 0 */
- xorl %r10d, %r10d /* nospec r10 */
- pushq $0 /* pt_regs->r11 = 0 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp (will be overwritten) */
- xorl %ebp, %ebp /* nospec rbp */
- pushq $0 /* pt_regs->r12 = 0 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq $0 /* pt_regs->r13 = 0 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq $0 /* pt_regs->r14 = 0 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq $0 /* pt_regs->r15 = 0 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
UNWIND_HINT_REGS
cld
@@ -225,35 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
movl %eax, %eax /* discard orig_ax high bits */
pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- xorl %esi, %esi /* nospec si */
- pushq %rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
- pushq %rbp /* pt_regs->cx (stashed in bp) */
- xorl %ecx, %ecx /* nospec cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq $0 /* pt_regs->r8 = 0 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq $0 /* pt_regs->r9 = 0 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq $0 /* pt_regs->r10 = 0 */
- xorl %r10d, %r10d /* nospec r10 */
- pushq $0 /* pt_regs->r11 = 0 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp (will be overwritten) */
- xorl %ebp, %ebp /* nospec rbp */
- pushq $0 /* pt_regs->r12 = 0 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq $0 /* pt_regs->r13 = 0 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq $0 /* pt_regs->r14 = 0 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq $0 /* pt_regs->r15 = 0 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS
UNWIND_HINT_REGS
movq %rsp, %rdi
@@ -297,6 +244,8 @@ sysret32_from_system_call:
* code. We zero R8-R10 to avoid info leaks.
*/
movq RSP-ORIG_RAX(%rsp), %rsp
+SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
/*
* The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
@@ -314,6 +263,9 @@ sysret32_from_system_call:
xorl %r10d, %r10d
swapgs
sysretl
+SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+ int3
SYM_CODE_END(entry_SYSCALL_compat)
/*
@@ -362,54 +314,25 @@ SYM_CODE_START(entry_INT80_compat)
/* switch to thread stack expects orig_ax and rdi to be pushed */
pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
/* Need to switch before accessing the thread stack. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
/* In the Xen PV case we already run on the thread stack. */
ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
- movq %rsp, %rdi
+ movq %rsp, %rax
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- pushq 6*8(%rdi) /* regs->ss */
- pushq 5*8(%rdi) /* regs->rsp */
- pushq 4*8(%rdi) /* regs->eflags */
- pushq 3*8(%rdi) /* regs->cs */
- pushq 2*8(%rdi) /* regs->ip */
- pushq 1*8(%rdi) /* regs->orig_ax */
- pushq (%rdi) /* pt_regs->di */
+ pushq 5*8(%rax) /* regs->ss */
+ pushq 4*8(%rax) /* regs->rsp */
+ pushq 3*8(%rax) /* regs->eflags */
+ pushq 2*8(%rax) /* regs->cs */
+ pushq 1*8(%rax) /* regs->ip */
+ pushq 0*8(%rax) /* regs->orig_ax */
.Lint80_keep_stack:
- pushq %rsi /* pt_regs->si */
- xorl %esi, %esi /* nospec si */
- pushq %rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
- pushq %rcx /* pt_regs->cx */
- xorl %ecx, %ecx /* nospec cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq %r9 /* pt_regs->r9 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq %r10 /* pt_regs->r10*/
- xorl %r10d, %r10d /* nospec r10 */
- pushq %r11 /* pt_regs->r11 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp */
- xorl %ebp, %ebp /* nospec rbp */
- pushq %r12 /* pt_regs->r12 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq %r13 /* pt_regs->r13 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq %r14 /* pt_regs->r14 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq %r15 /* pt_regs->r15 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
UNWIND_HINT_REGS
cld
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 693f8b9031fb..c2a8b76ae0bc 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -91,7 +91,7 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),)
endif
endif
-$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@@ -148,6 +148,7 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32))
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 235a5794296a..1000d457c332 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -438,7 +438,7 @@ bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
static __init int vdso_setup(char *s)
{
vdso64_enabled = simple_strtoul(s, NULL, 0);
- return 0;
+ return 1;
}
__setup("vdso=", vdso_setup);
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index fd2ee9408e91..4af81df133ee 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -48,7 +48,7 @@ static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init =
#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY)
XONLY;
#else
- EMULATE;
+ #error VSYSCALL config is broken
#endif
static int __init vsyscall_setup(char *str)
diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
index d6cdfe631674..09c56965750a 100644
--- a/arch/x86/events/Kconfig
+++ b/arch/x86/events/Kconfig
@@ -44,4 +44,12 @@ config PERF_EVENTS_AMD_UNCORE
To compile this driver as a module, choose M here: the
module will be called 'amd-uncore'.
+
+config PERF_EVENTS_AMD_BRS
+ depends on PERF_EVENTS && CPU_SUP_AMD
+ bool "AMD Zen3 Branch Sampling support"
+ help
+ Enable AMD Zen3 branch sampling support (BRS) which samples up to
+ 16 consecutive taken branches in registers.
+
endmenu
diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile
index 6cbe38d5fd9d..b9f5d4610256 100644
--- a/arch/x86/events/amd/Makefile
+++ b/arch/x86/events/amd/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_CPU_SUP_AMD) += core.o
+obj-$(CONFIG_PERF_EVENTS_AMD_BRS) += brs.o
obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o
obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o
obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE) += amd-uncore.o
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
new file mode 100644
index 000000000000..bee8765a1e9b
--- /dev/null
+++ b/arch/x86/events/amd/brs.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implement support for AMD Fam19h Branch Sampling feature
+ * Based on specifications published in AMD PPR Fam19 Model 01
+ *
+ * Copyright 2021 Google LLC
+ * Contributed by Stephane Eranian <eranian@google.com>
+ */
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <asm/msr.h>
+#include <asm/cpufeature.h>
+
+#include "../perf_event.h"
+
+#define BRS_POISON 0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */
+
+/* Debug Extension Configuration register layout */
+union amd_debug_extn_cfg {
+ __u64 val;
+ struct {
+ __u64 rsvd0:2, /* reserved */
+ brsmen:1, /* branch sample enable */
+ rsvd4_3:2,/* reserved - must be 0x3 */
+ vb:1, /* valid branches recorded */
+ rsvd2:10, /* reserved */
+ msroff:4, /* index of next entry to write */
+ rsvd3:4, /* reserved */
+ pmc:3, /* #PMC holding the sampling event */
+ rsvd4:37; /* reserved */
+ };
+};
+
+static inline unsigned int brs_from(int idx)
+{
+ return MSR_AMD_SAMP_BR_FROM + 2 * idx;
+}
+
+static inline unsigned int brs_to(int idx)
+{
+ return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
+}
+
+static inline void set_debug_extn_cfg(u64 val)
+{
+ /* bits[4:3] must always be set to 11b */
+ wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3);
+}
+
+static inline u64 get_debug_extn_cfg(void)
+{
+ u64 val;
+
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, val);
+ return val;
+}
+
+static bool __init amd_brs_detect(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_BRS))
+ return false;
+
+ switch (boot_cpu_data.x86) {
+ case 0x19: /* AMD Fam19h (Zen3) */
+ x86_pmu.lbr_nr = 16;
+
+ /* No hardware filtering supported */
+ x86_pmu.lbr_sel_map = NULL;
+ x86_pmu.lbr_sel_mask = 0;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Current BRS implementation does not support branch type or privilege level
+ * filtering. Therefore, this function simply enforces these limitations. No need for
+ * a br_sel_map. Software filtering is not supported because it would not correlate well
+ * with a sampling period.
+ */
+int amd_brs_setup_filter(struct perf_event *event)
+{
+ u64 type = event->attr.branch_sample_type;
+
+ /* No BRS support */
+ if (!x86_pmu.lbr_nr)
+ return -EOPNOTSUPP;
+
+ /* Can only capture all branches, i.e., no filtering */
+ if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* tos = top of stack, i.e., last valid entry written */
+static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
+{
+ /*
+ * msroff: index of next entry to write so top-of-stack is one off
+ * if BRS is full then msroff is set back to 0.
+ */
+ return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1;
+}
+
+/*
+ * make sure we have a sane BRS offset to begin with
+ * especially with kexec
+ */
+void amd_brs_reset(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_BRS))
+ return;
+
+ /*
+ * Reset config
+ */
+ set_debug_extn_cfg(0);
+
+ /*
+ * Mark first entry as poisoned
+ */
+ wrmsrl(brs_to(0), BRS_POISON);
+}
+
+int __init amd_brs_init(void)
+{
+ if (!amd_brs_detect())
+ return -EOPNOTSUPP;
+
+ pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr);
+
+ return 0;
+}
+
+void amd_brs_enable(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ union amd_debug_extn_cfg cfg;
+
+ /* Activate only on first user */
+ if (++cpuc->brs_active > 1)
+ return;
+
+ cfg.val = 0; /* reset all fields */
+ cfg.brsmen = 1; /* enable branch sampling */
+
+ /* Set enable bit */
+ set_debug_extn_cfg(cfg.val);
+}
+
+void amd_brs_enable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ if (cpuc->lbr_users)
+ amd_brs_enable();
+}
+
+void amd_brs_disable(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ union amd_debug_extn_cfg cfg;
+
+ /* Check if active (could be disabled via x86_pmu_disable_all()) */
+ if (!cpuc->brs_active)
+ return;
+
+ /* Only disable for last user */
+ if (--cpuc->brs_active)
+ return;
+
+ /*
+ * Clear the brsmen bit but preserve the others as they contain
+ * useful state such as vb and msroff
+ */
+ cfg.val = get_debug_extn_cfg();
+
+ /*
+ * When coming in on interrupt and BRS is full, then hw will have
+ * already stopped BRS, no need to issue wrmsr again
+ */
+ if (cfg.brsmen) {
+ cfg.brsmen = 0;
+ set_debug_extn_cfg(cfg.val);
+ }
+}
+
+void amd_brs_disable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ if (cpuc->lbr_users)
+ amd_brs_disable();
+}
+
+static bool amd_brs_match_plm(struct perf_event *event, u64 to)
+{
+ int type = event->attr.branch_sample_type;
+ int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV;
+ int plm_u = PERF_SAMPLE_BRANCH_USER;
+
+ if (!(type & plm_k) && kernel_ip(to))
+ return 0;
+
+ if (!(type & plm_u) && !kernel_ip(to))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Caller must ensure amd_brs_inuse() is true before calling
+ * return:
+ */
+void amd_brs_drain(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *event = cpuc->events[0];
+ struct perf_branch_entry *br = cpuc->lbr_entries;
+ union amd_debug_extn_cfg cfg;
+ u32 i, nr = 0, num, tos, start;
+ u32 shift = 64 - boot_cpu_data.x86_virt_bits;
+
+ /*
+ * BRS event forced on PMC0,
+ * so check if there is an event.
+ * It is possible to have lbr_users > 0 but the event
+ * not yet scheduled due to long latency PMU irq
+ */
+ if (!event)
+ goto empty;
+
+ cfg.val = get_debug_extn_cfg();
+
+ /* Sanity check [0-x86_pmu.lbr_nr] */
+ if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr))
+ goto empty;
+
+ /* No valid branch */
+ if (cfg.vb == 0)
+ goto empty;
+
+ /*
+ * msr.off points to next entry to be written
+ * tos = most recent entry index = msr.off - 1
+ * BRS register buffer saturates, so we know we have
+ * start < tos and that we have to read from start to tos
+ */
+ start = 0;
+ tos = amd_brs_get_tos(&cfg);
+
+ num = tos - start + 1;
+
+ /*
+ * BRS is only one pass (saturation) from MSROFF to depth-1
+ * MSROFF wraps to zero when buffer is full
+ */
+ for (i = 0; i < num; i++) {
+ u32 brs_idx = tos - i;
+ u64 from, to;
+
+ rdmsrl(brs_to(brs_idx), to);
+
+ /* Entry does not belong to us (as marked by kernel) */
+ if (to == BRS_POISON)
+ break;
+
+ /*
+ * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved.
+ * Necessary to generate proper virtual addresses suitable for
+ * symbolization
+ */
+ to = (u64)(((s64)to << shift) >> shift);
+
+ if (!amd_brs_match_plm(event, to))
+ continue;
+
+ rdmsrl(brs_from(brs_idx), from);
+
+ perf_clear_branch_entry_bitfields(br+nr);
+
+ br[nr].from = from;
+ br[nr].to = to;
+
+ nr++;
+ }
+empty:
+ /* Record number of sampled branches */
+ cpuc->lbr_stack.nr = nr;
+}
+
+/*
+ * Poison most recent entry to prevent reuse by next task
+ * required because BRS entry are not tagged by PID
+ */
+static void amd_brs_poison_buffer(void)
+{
+ union amd_debug_extn_cfg cfg;
+ unsigned int idx;
+
+ /* Get current state */
+ cfg.val = get_debug_extn_cfg();
+
+ /* idx is most recently written entry */
+ idx = amd_brs_get_tos(&cfg);
+
+ /* Poison target of entry */
+ wrmsrl(brs_to(idx), BRS_POISON);
+}
+
+/*
+ * On context switch in, we need to make sure no samples from previous user
+ * are left in the BRS.
+ *
+ * On ctxswin, sched_in = true, called after the PMU has started
+ * On ctxswout, sched_in = false, called before the PMU is stopped
+ */
+void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /* no active users */
+ if (!cpuc->lbr_users)
+ return;
+
+ /*
+ * On context switch in, we need to ensure we do not use entries
+ * from previous BRS user on that CPU, so we poison the buffer as
+ * a faster way compared to resetting all entries.
+ */
+ if (sched_in)
+ amd_brs_poison_buffer();
+}
+
+/*
+ * called from ACPI processor_idle.c or acpi_pad.c
+ * with interrupts disabled
+ */
+void perf_amd_brs_lopwr_cb(bool lopwr_in)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ union amd_debug_extn_cfg cfg;
+
+ /*
+ * on mwait in, we may end up in non C0 state.
+ * we must disable branch sampling to avoid holding the NMI
+ * for too long. We disable it in hardware but we
+ * keep the state in cpuc, so we can re-enable.
+ *
+ * The hardware will deliver the NMI if needed when brsmen cleared
+ */
+ if (cpuc->brs_active) {
+ cfg.val = get_debug_extn_cfg();
+ cfg.brsmen = !lopwr_in;
+ set_debug_extn_cfg(cfg.val);
+ }
+}
+
+DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
+EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);
+
+void __init amd_brs_lopwr_init(void)
+{
+ static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
+}
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 9687a8aef01c..9ac3718410ce 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/perf_event.h>
+#include <linux/jump_label.h>
#include <linux/export.h>
#include <linux/types.h>
#include <linux/init.h>
@@ -7,6 +8,7 @@
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <asm/apicdef.h>
+#include <asm/apic.h>
#include <asm/nmi.h>
#include "../perf_event.h"
@@ -18,6 +20,9 @@ static unsigned long perf_nmi_window;
#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
+/* PMC Enable and Overflow bits for PerfCntrGlobal* registers */
+static u64 amd_pmu_global_cntr_mask __read_mostly;
+
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -325,8 +330,16 @@ static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
}
}
+#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
+static inline int amd_is_brs_event(struct perf_event *e)
+{
+ return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
+}
+
static int amd_core_hw_config(struct perf_event *event)
{
+ int ret = 0;
+
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -343,7 +356,66 @@ static int amd_core_hw_config(struct perf_event *event)
if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
event->hw.flags |= PERF_X86_EVENT_PAIR;
- return 0;
+ /*
+ * if branch stack is requested
+ */
+ if (has_branch_stack(event)) {
+ /*
+ * Due to interrupt holding, BRS is not recommended in
+ * counting mode.
+ */
+ if (!is_sampling_event(event))
+ return -EINVAL;
+
+ /*
+ * Due to the way BRS operates by holding the interrupt until
+ * lbr_nr entries have been captured, it does not make sense
+ * to allow sampling on BRS with an event that does not match
+ * what BRS is capturing, i.e., retired taken branches.
+ * Otherwise the correlation with the event's period is even
+ * more loose:
+ *
+ * With retired taken branch:
+ * Effective P = P + 16 + X
+ * With any other event:
+ * Effective P = P + Y + X
+ *
+ * Where X is the number of taken branches due to interrupt
+ * skid. Skid is large.
+ *
+ * Where Y is the occurences of the event while BRS is
+ * capturing the lbr_nr entries.
+ *
+ * By using retired taken branches, we limit the impact on the
+ * Y variable. We know it cannot be more than the depth of
+ * BRS.
+ */
+ if (!amd_is_brs_event(event))
+ return -EINVAL;
+
+ /*
+ * BRS implementation does not work with frequency mode
+ * reprogramming of the period.
+ */
+ if (event->attr.freq)
+ return -EINVAL;
+ /*
+ * The kernel subtracts BRS depth from period, so it must
+ * be big enough.
+ */
+ if (event->attr.sample_period <= x86_pmu.lbr_nr)
+ return -EINVAL;
+
+ /*
+ * Check if we can allow PERF_SAMPLE_BRANCH_STACK
+ */
+ ret = amd_brs_setup_filter(event);
+
+ /* only set in case of success */
+ if (!ret)
+ event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
+ }
+ return ret;
}
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
@@ -366,7 +438,7 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip && get_ibs_caps())
return -ENOENT;
- if (has_branch_stack(event))
+ if (has_branch_stack(event) && !x86_pmu.lbr_nr)
return -EOPNOTSUPP;
ret = x86_pmu_hw_config(event);
@@ -510,6 +582,18 @@ static struct amd_nb *amd_alloc_nb(int cpu)
return nb;
}
+static void amd_pmu_cpu_reset(int cpu)
+{
+ if (x86_pmu.version < 2)
+ return;
+
+ /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */
+ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
+
+ /* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */
+ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask);
+}
+
static int amd_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
@@ -555,6 +639,9 @@ static void amd_pmu_cpu_starting(int cpu)
cpuc->amd_nb->nb_id = nb_id;
cpuc->amd_nb->refcnt++;
+
+ amd_brs_reset();
+ amd_pmu_cpu_reset(cpu);
}
static void amd_pmu_cpu_dead(int cpu)
@@ -574,8 +661,54 @@ static void amd_pmu_cpu_dead(int cpu)
cpuhw->amd_nb = NULL;
}
+
+ amd_pmu_cpu_reset(cpu);
+}
+
+static inline void amd_pmu_set_global_ctl(u64 ctl)
+{
+ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
}
+static inline u64 amd_pmu_get_global_status(void)
+{
+ u64 status;
+
+ /* PerfCntrGlobalStatus is read-only */
+ rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
+
+ return status & amd_pmu_global_cntr_mask;
+}
+
+static inline void amd_pmu_ack_global_status(u64 status)
+{
+ /*
+ * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
+ * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
+ * clears the same bit in PerfCntrGlobalStatus
+ */
+
+ /* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
+ status &= amd_pmu_global_cntr_mask;
+ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
+}
+
+static bool amd_pmu_test_overflow_topbit(int idx)
+{
+ u64 counter;
+
+ rdmsrl(x86_pmu_event_addr(idx), counter);
+
+ return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
+}
+
+static bool amd_pmu_test_overflow_status(int idx)
+{
+ return amd_pmu_get_global_status() & BIT_ULL(idx);
+}
+
+DEFINE_STATIC_CALL(amd_pmu_test_overflow, amd_pmu_test_overflow_topbit);
+
/*
* When a PMC counter overflows, an NMI is used to process the event and
* reset the counter. NMI latency can result in the counter being updated
@@ -588,7 +721,6 @@ static void amd_pmu_cpu_dead(int cpu)
static void amd_pmu_wait_on_overflow(int idx)
{
unsigned int i;
- u64 counter;
/*
* Wait for the counter to be reset if it has overflowed. This loop
@@ -596,8 +728,7 @@ static void amd_pmu_wait_on_overflow(int idx)
* forever...
*/
for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
- rdmsrl(x86_pmu_event_addr(idx), counter);
- if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
+ if (!static_call(amd_pmu_test_overflow)(idx))
break;
/* Might be in IRQ context, so can't sleep */
@@ -605,13 +736,11 @@ static void amd_pmu_wait_on_overflow(int idx)
}
}
-static void amd_pmu_disable_all(void)
+static void amd_pmu_check_overflow(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
- x86_pmu_disable_all();
-
/*
* This shouldn't be called from NMI context, but add a safeguard here
* to return, since if we're in NMI context we can't wait for an NMI
@@ -634,6 +763,47 @@ static void amd_pmu_disable_all(void)
}
}
+static void amd_pmu_enable_event(struct perf_event *event)
+{
+ x86_pmu_enable_event(event);
+}
+
+static void amd_pmu_enable_all(int added)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx;
+
+ amd_brs_enable_all();
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ /* only activate events which are marked as active */
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ amd_pmu_enable_event(cpuc->events[idx]);
+ }
+}
+
+static void amd_pmu_v2_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * Testing cpu_hw_events.enabled should be skipped in this case unlike
+ * in x86_pmu_enable_event().
+ *
+ * Since cpu_hw_events.enabled is set only after returning from
+ * x86_pmu_start(), the PMCs must be programmed and kept ready.
+ * Counting starts only after x86_pmu_enable_all() is called.
+ */
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+static void amd_pmu_v2_enable_all(int added)
+{
+ amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask);
+}
+
static void amd_pmu_disable_event(struct perf_event *event)
{
x86_pmu_disable_event(event);
@@ -651,6 +821,32 @@ static void amd_pmu_disable_event(struct perf_event *event)
amd_pmu_wait_on_overflow(event->hw.idx);
}
+static void amd_pmu_disable_all(void)
+{
+ amd_brs_disable_all();
+ x86_pmu_disable_all();
+ amd_pmu_check_overflow();
+}
+
+static void amd_pmu_v2_disable_all(void)
+{
+ /* Disable all PMCs */
+ amd_pmu_set_global_ctl(0);
+ amd_pmu_check_overflow();
+}
+
+static void amd_pmu_add_event(struct perf_event *event)
+{
+ if (needs_branch_stack(event))
+ amd_pmu_brs_add(event);
+}
+
+static void amd_pmu_del_event(struct perf_event *event)
+{
+ if (needs_branch_stack(event))
+ amd_pmu_brs_del(event);
+}
+
/*
* Because of NMI latency, if multiple PMC counters are active or other sources
* of NMIs are received, the perf NMI handler can handle one or more overflowed
@@ -669,13 +865,8 @@ static void amd_pmu_disable_event(struct perf_event *event)
* handled a counter. When an un-handled NMI is received, it will be claimed
* only if arriving within that window.
*/
-static int amd_pmu_handle_irq(struct pt_regs *regs)
+static inline int amd_pmu_adjust_nmi_window(int handled)
{
- int handled;
-
- /* Process any counter overflows */
- handled = x86_pmu_handle_irq(regs);
-
/*
* If a counter was handled, record a timestamp such that un-handled
* NMIs will be claimed if arriving within that window.
@@ -692,6 +883,113 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
return NMI_HANDLED;
}
+static int amd_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int handled;
+ int pmu_enabled;
+
+ /*
+ * Save the PMU state.
+ * It needs to be restored when leaving the handler.
+ */
+ pmu_enabled = cpuc->enabled;
+ cpuc->enabled = 0;
+
+ /* stop everything (includes BRS) */
+ amd_pmu_disable_all();
+
+ /* Drain BRS is in use (could be inactive) */
+ if (cpuc->lbr_users)
+ amd_brs_drain();
+
+ /* Process any counter overflows */
+ handled = x86_pmu_handle_irq(regs);
+
+ cpuc->enabled = pmu_enabled;
+ if (pmu_enabled)
+ amd_pmu_enable_all(0);
+
+ return amd_pmu_adjust_nmi_window(handled);
+}
+
+static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_sample_data data;
+ struct hw_perf_event *hwc;
+ struct perf_event *event;
+ int handled = 0, idx;
+ u64 status, mask;
+ bool pmu_enabled;
+
+ /*
+ * Save the PMU state as it needs to be restored when leaving the
+ * handler
+ */
+ pmu_enabled = cpuc->enabled;
+ cpuc->enabled = 0;
+
+ /* Stop counting */
+ amd_pmu_v2_disable_all();
+
+ status = amd_pmu_get_global_status();
+
+ /* Check if any overflows are pending */
+ if (!status)
+ goto done;
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ event = cpuc->events[idx];
+ hwc = &event->hw;
+ x86_perf_event_update(event);
+ mask = BIT_ULL(idx);
+
+ if (!(status & mask))
+ continue;
+
+ /* Event overflow */
+ handled++;
+ perf_sample_data_init(&data, 0, hwc->last_period);
+
+ if (!x86_perf_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+
+ status &= ~mask;
+ }
+
+ /*
+ * It should never be the case that some overflows are not handled as
+ * the corresponding PMCs are expected to be inactive according to the
+ * active_mask
+ */
+ WARN_ON(status > 0);
+
+ /* Clear overflow bits */
+ amd_pmu_ack_global_status(~status);
+
+ /*
+ * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT
+ * PMI entry is not set by the local APIC when a PMC overflow occurs
+ */
+ inc_irq_stat(apic_perf_irqs);
+
+done:
+ cpuc->enabled = pmu_enabled;
+
+ /* Resume counting only if PMU is active */
+ if (pmu_enabled)
+ amd_pmu_v2_enable_all(0);
+
+ return amd_pmu_adjust_nmi_window(handled);
+}
+
static struct event_constraint *
amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@@ -897,6 +1195,51 @@ static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
--cpuc->n_pair;
}
+/*
+ * Because of the way BRS operates with an inactive and active phases, and
+ * the link to one counter, it is not possible to have two events using BRS
+ * scheduled at the same time. There would be an issue with enforcing the
+ * period of each one and given that the BRS saturates, it would not be possible
+ * to guarantee correlated content for all events. Therefore, in situations
+ * where multiple events want to use BRS, the kernel enforces mutual exclusion.
+ * Exclusion is enforced by chosing only one counter for events using BRS.
+ * The event scheduling logic will then automatically multiplex the
+ * events and ensure that at most one event is actively using BRS.
+ *
+ * The BRS counter could be any counter, but there is no constraint on Fam19h,
+ * therefore all counters are equal and thus we pick the first one: PMC0
+ */
+static struct event_constraint amd_fam19h_brs_cntr0_constraint =
+ EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK);
+
+static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint =
+ __EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR);
+
+static struct event_constraint *
+amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ bool has_brs = has_amd_brs(hwc);
+
+ /*
+ * In case BRS is used with an event requiring a counter pair,
+ * the kernel allows it but only on counter 0 & 1 to enforce
+ * multiplexing requiring to protect BRS in case of multiple
+ * BRS users
+ */
+ if (amd_is_pair_event_code(hwc)) {
+ return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint
+ : &pair_constraint;
+ }
+
+ if (has_brs)
+ return &amd_fam19h_brs_cntr0_constraint;
+
+ return &unconstrained;
+}
+
+
static ssize_t amd_event_sysfs_show(char *page, u64 config)
{
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
@@ -905,12 +1248,31 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config)
return x86_event_sysfs_show(page, config, event);
}
+static void amd_pmu_sched_task(struct perf_event_context *ctx,
+ bool sched_in)
+{
+ if (sched_in && x86_pmu.lbr_nr)
+ amd_pmu_brs_sched_task(ctx, sched_in);
+}
+
+static u64 amd_pmu_limit_period(struct perf_event *event, u64 left)
+{
+ /*
+ * Decrease period by the depth of the BRS feature to get the last N
+ * taken branches and approximate the desired period
+ */
+ if (has_branch_stack(event) && left > x86_pmu.lbr_nr)
+ left -= x86_pmu.lbr_nr;
+
+ return left;
+}
+
static __initconst const struct x86_pmu amd_pmu = {
.name = "AMD",
.handle_irq = amd_pmu_handle_irq,
.disable_all = amd_pmu_disable_all,
- .enable_all = x86_pmu_enable_all,
- .enable = x86_pmu_enable_event,
+ .enable_all = amd_pmu_enable_all,
+ .enable = amd_pmu_enable_event,
.disable = amd_pmu_disable_event,
.hw_config = amd_pmu_hw_config,
.schedule_events = x86_schedule_events,
@@ -920,6 +1282,8 @@ static __initconst const struct x86_pmu amd_pmu = {
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS,
+ .add = amd_pmu_add_event,
+ .del = amd_pmu_del_event,
.cntval_bits = 48,
.cntval_mask = (1ULL << 48) - 1,
.apic = 1,
@@ -938,8 +1302,55 @@ static __initconst const struct x86_pmu amd_pmu = {
.amd_nb_constraints = 1,
};
+static ssize_t branches_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
+}
+
+static DEVICE_ATTR_RO(branches);
+
+static struct attribute *amd_pmu_brs_attrs[] = {
+ &dev_attr_branches.attr,
+ NULL,
+};
+
+static umode_t
+amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.lbr_nr ? attr->mode : 0;
+}
+
+static struct attribute_group group_caps_amd_brs = {
+ .name = "caps",
+ .attrs = amd_pmu_brs_attrs,
+ .is_visible = amd_brs_is_visible,
+};
+
+EVENT_ATTR_STR(branch-brs, amd_branch_brs,
+ "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n");
+
+static struct attribute *amd_brs_events_attrs[] = {
+ EVENT_PTR(amd_branch_brs),
+ NULL,
+};
+
+static struct attribute_group group_events_amd_brs = {
+ .name = "events",
+ .attrs = amd_brs_events_attrs,
+ .is_visible = amd_brs_is_visible,
+};
+
+static const struct attribute_group *amd_attr_update[] = {
+ &group_caps_amd_brs,
+ &group_events_amd_brs,
+ NULL,
+};
+
static int __init amd_core_pmu_init(void)
{
+ union cpuid_0x80000022_ebx ebx;
u64 even_ctr_mask = 0ULL;
int i;
@@ -957,6 +1368,27 @@ static int __init amd_core_pmu_init(void)
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
+
+ /* Check for Performance Monitoring v2 support */
+ if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
+ ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
+
+ /* Update PMU version for later usage */
+ x86_pmu.version = 2;
+
+ /* Find the number of available Core PMCs */
+ x86_pmu.num_counters = ebx.split.num_core_pmc;
+
+ amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
+
+ /* Update PMC handling functions */
+ x86_pmu.enable_all = amd_pmu_v2_enable_all;
+ x86_pmu.disable_all = amd_pmu_v2_disable_all;
+ x86_pmu.enable = amd_pmu_v2_enable_event;
+ x86_pmu.handle_irq = amd_pmu_v2_handle_irq;
+ static_call_update(amd_pmu_test_overflow, amd_pmu_test_overflow_status);
+ }
+
/*
* AMD Core perfctr has separate MSRs for the NB events, see
* the amd/uncore.c driver.
@@ -989,6 +1421,23 @@ static int __init amd_core_pmu_init(void)
x86_pmu.flags |= PMU_FL_PAIR;
}
+ /*
+ * BRS requires special event constraints and flushing on ctxsw.
+ */
+ if (boot_cpu_data.x86 >= 0x19 && !amd_brs_init()) {
+ x86_pmu.get_event_constraints = amd_get_event_constraints_f19h;
+ x86_pmu.sched_task = amd_pmu_sched_task;
+ x86_pmu.limit_period = amd_pmu_limit_period;
+ /*
+ * put_event_constraints callback same as Fam17h, set above
+ */
+
+ /* branch sampling must be stopped when entering low power */
+ amd_brs_lopwr_init();
+ }
+
+ x86_pmu.attr_update = amd_attr_update;
+
pr_cont("core perfctr, ");
return 0;
}
@@ -1023,6 +1472,24 @@ __init int amd_pmu_init(void)
return 0;
}
+static inline void amd_pmu_reload_virt(void)
+{
+ if (x86_pmu.version >= 2) {
+ /*
+ * Clear global enable bits, reprogram the PERF_CTL
+ * registers with updated perf_ctr_virt_mask and then
+ * set global enable bits once again
+ */
+ amd_pmu_v2_disable_all();
+ amd_pmu_enable_all(0);
+ amd_pmu_v2_enable_all(0);
+ return;
+ }
+
+ amd_pmu_disable_all();
+ amd_pmu_enable_all(0);
+}
+
void amd_pmu_enable_virt(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1030,8 +1497,7 @@ void amd_pmu_enable_virt(void)
cpuc->perf_ctr_virt_mask = 0;
/* Reload all events */
- amd_pmu_disable_all();
- x86_pmu_enable_all(0);
+ amd_pmu_reload_virt();
}
EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
@@ -1048,7 +1514,6 @@ void amd_pmu_disable_virt(void)
cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
/* Reload all events */
- amd_pmu_disable_all();
- x86_pmu_enable_all(0);
+ amd_pmu_reload_virt();
}
EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 9739019d4b67..c251bc44c088 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -94,10 +94,6 @@ struct perf_ibs {
unsigned int fetch_ignore_if_zero_rip : 1;
struct cpu_perf_ibs __percpu *pcpu;
- struct attribute **format_attrs;
- struct attribute_group format_group;
- const struct attribute_group *attr_groups[2];
-
u64 (*get_count)(u64 config);
};
@@ -304,6 +300,16 @@ static int perf_ibs_init(struct perf_event *event)
hwc->config_base = perf_ibs->msr;
hwc->config = config;
+ /*
+ * rip recorded by IbsOpRip will not be consistent with rsp and rbp
+ * recorded as part of interrupt regs. Thus we need to use rip from
+ * interrupt regs while unwinding call stack. Setting _EARLY flag
+ * makes sure we unwind call-stack before perf sample rip is set to
+ * IbsOpRip.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+ event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
+
return 0;
}
@@ -518,16 +524,118 @@ static void perf_ibs_del(struct perf_event *event, int flags)
static void perf_ibs_read(struct perf_event *event) { }
+/*
+ * We need to initialize with empty group if all attributes in the
+ * group are dynamic.
+ */
+static struct attribute *attrs_empty[] = {
+ NULL,
+};
+
+static struct attribute_group empty_format_group = {
+ .name = "format",
+ .attrs = attrs_empty,
+};
+
+static struct attribute_group empty_caps_group = {
+ .name = "caps",
+ .attrs = attrs_empty,
+};
+
+static const struct attribute_group *empty_attr_groups[] = {
+ &empty_format_group,
+ &empty_caps_group,
+ NULL,
+};
+
PMU_FORMAT_ATTR(rand_en, "config:57");
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
+PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59");
+PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16");
+PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1");
-static struct attribute *ibs_fetch_format_attrs[] = {
+static umode_t
+zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0;
+}
+
+static struct attribute *rand_en_attrs[] = {
&format_attr_rand_en.attr,
NULL,
};
-static struct attribute *ibs_op_format_attrs[] = {
- NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
+static struct attribute *fetch_l3missonly_attrs[] = {
+ &fetch_l3missonly.attr.attr,
+ NULL,
+};
+
+static struct attribute *zen4_ibs_extensions_attrs[] = {
+ &zen4_ibs_extensions.attr.attr,
+ NULL,
+};
+
+static struct attribute_group group_rand_en = {
+ .name = "format",
+ .attrs = rand_en_attrs,
+};
+
+static struct attribute_group group_fetch_l3missonly = {
+ .name = "format",
+ .attrs = fetch_l3missonly_attrs,
+ .is_visible = zen4_ibs_extensions_is_visible,
+};
+
+static struct attribute_group group_zen4_ibs_extensions = {
+ .name = "caps",
+ .attrs = zen4_ibs_extensions_attrs,
+ .is_visible = zen4_ibs_extensions_is_visible,
+};
+
+static const struct attribute_group *fetch_attr_groups[] = {
+ &group_rand_en,
+ &empty_caps_group,
+ NULL,
+};
+
+static const struct attribute_group *fetch_attr_update[] = {
+ &group_fetch_l3missonly,
+ &group_zen4_ibs_extensions,
+ NULL,
+};
+
+static umode_t
+cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0;
+}
+
+static struct attribute *cnt_ctl_attrs[] = {
+ &format_attr_cnt_ctl.attr,
+ NULL,
+};
+
+static struct attribute *op_l3missonly_attrs[] = {
+ &op_l3missonly.attr.attr,
+ NULL,
+};
+
+static struct attribute_group group_cnt_ctl = {
+ .name = "format",
+ .attrs = cnt_ctl_attrs,
+ .is_visible = cnt_ctl_is_visible,
+};
+
+static struct attribute_group group_op_l3missonly = {
+ .name = "format",
+ .attrs = op_l3missonly_attrs,
+ .is_visible = zen4_ibs_extensions_is_visible,
+};
+
+static const struct attribute_group *op_attr_update[] = {
+ &group_cnt_ctl,
+ &group_op_l3missonly,
+ &group_zen4_ibs_extensions,
NULL,
};
@@ -551,7 +659,6 @@ static struct perf_ibs perf_ibs_fetch = {
.max_period = IBS_FETCH_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
- .format_attrs = ibs_fetch_format_attrs,
.get_count = get_ibs_fetch_count,
};
@@ -577,7 +684,6 @@ static struct perf_ibs perf_ibs_op = {
.max_period = IBS_OP_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
- .format_attrs = ibs_op_format_attrs,
.get_count = get_ibs_op_count,
};
@@ -687,6 +793,14 @@ fail:
data.raw = &raw;
}
+ /*
+ * rip recorded by IbsOpRip will not be consistent with rsp and rbp
+ * recorded as part of interrupt regs. Thus we need to use rip from
+ * interrupt regs while unwinding call stack.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+ data.callchain = perf_callchain(event, iregs);
+
throttle = perf_event_overflow(event, &data, &regs);
out:
if (throttle) {
@@ -739,17 +853,6 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
perf_ibs->pcpu = pcpu;
- /* register attributes */
- if (perf_ibs->format_attrs[0]) {
- memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
- perf_ibs->format_group.name = "format";
- perf_ibs->format_group.attrs = perf_ibs->format_attrs;
-
- memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
- perf_ibs->attr_groups[0] = &perf_ibs->format_group;
- perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
- }
-
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
if (ret) {
perf_ibs->pcpu = NULL;
@@ -759,10 +862,8 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
return ret;
}
-static __init void perf_event_ibs_init(void)
+static __init int perf_ibs_fetch_init(void)
{
- struct attribute **attr = ibs_op_format_attrs;
-
/*
* Some chips fail to reset the fetch count when it is written; instead
* they need a 0-1 transition of IbsFetchEn.
@@ -773,12 +874,19 @@ static __init void perf_event_ibs_init(void)
if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10)
perf_ibs_fetch.fetch_ignore_if_zero_rip = 1;
- perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+ if (ibs_caps & IBS_CAPS_ZEN4)
+ perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY;
+
+ perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups;
+ perf_ibs_fetch.pmu.attr_update = fetch_attr_update;
- if (ibs_caps & IBS_CAPS_OPCNT) {
+ return perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+}
+
+static __init int perf_ibs_op_init(void)
+{
+ if (ibs_caps & IBS_CAPS_OPCNT)
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
- *attr++ = &format_attr_cnt_ctl.attr;
- }
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
@@ -786,15 +894,52 @@ static __init void perf_event_ibs_init(void)
perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
}
- perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+ if (ibs_caps & IBS_CAPS_ZEN4)
+ perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
+
+ perf_ibs_op.pmu.attr_groups = empty_attr_groups;
+ perf_ibs_op.pmu.attr_update = op_attr_update;
+
+ return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+}
+
+static __init int perf_event_ibs_init(void)
+{
+ int ret;
+
+ ret = perf_ibs_fetch_init();
+ if (ret)
+ return ret;
+
+ ret = perf_ibs_op_init();
+ if (ret)
+ goto err_op;
+
+ ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
+ if (ret)
+ goto err_nmi;
- register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+ return 0;
+
+err_nmi:
+ perf_pmu_unregister(&perf_ibs_op.pmu);
+ free_percpu(perf_ibs_op.pcpu);
+ perf_ibs_op.pcpu = NULL;
+err_op:
+ perf_pmu_unregister(&perf_ibs_fetch.pmu);
+ free_percpu(perf_ibs_fetch.pcpu);
+ perf_ibs_fetch.pcpu = NULL;
+
+ return ret;
}
#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
-static __init void perf_event_ibs_init(void) { }
+static __init int perf_event_ibs_init(void)
+{
+ return 0;
+}
#endif
@@ -1064,9 +1209,7 @@ static __init int amd_ibs_init(void)
x86_pmu_amd_ibs_starting_cpu,
x86_pmu_amd_ibs_dying_cpu);
- perf_event_ibs_init();
-
- return 0;
+ return perf_event_ibs_init();
}
/* Since we need the pci subsystem to init ibs we can't do this earlier: */
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index eef816fc216d..30788894124f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1338,6 +1338,10 @@ static void x86_pmu_enable(struct pmu *pmu)
if (hwc->state & PERF_HES_ARCH)
continue;
+ /*
+ * if cpuc->enabled = 0, then no wrmsr as
+ * per x86_pmu_enable_event()
+ */
x86_pmu_start(event, PERF_EF_RELOAD);
}
cpuc->n_added = 0;
@@ -1704,11 +1708,15 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
* event overflow
*/
handled++;
- perf_sample_data_init(&data, 0, event->hw.last_period);
if (!x86_perf_event_set_period(event))
continue;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (has_branch_stack(event))
+ data.br_stack = &cpuc->lbr_stack;
+
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
@@ -1837,7 +1845,7 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha
/* string trumps id */
if (pmu_attr->event_str)
- return sprintf(page, "%s", pmu_attr->event_str);
+ return sprintf(page, "%s\n", pmu_attr->event_str);
return x86_pmu.events_sysfs_show(page, config);
}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fc7f458eb3de..955ae91c56dc 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6216,7 +6216,9 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L:
+ case INTEL_FAM6_ALDERLAKE_N:
case INTEL_FAM6_RAPTORLAKE:
+ case INTEL_FAM6_RAPTORLAKE_P:
/*
* Alder Lake has 2 types of CPU, core and atom.
*
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 48e5db21142c..8ec23f47fee9 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -682,7 +682,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index fe1742c4ca49..13179f31fe10 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -769,6 +769,7 @@ void intel_pmu_lbr_disable_all(void)
void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
{
unsigned long mask = x86_pmu.lbr_nr - 1;
+ struct perf_branch_entry *br = cpuc->lbr_entries;
u64 tos = intel_pmu_lbr_tos();
int i;
@@ -784,15 +785,11 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
- cpuc->lbr_entries[i].from = msr_lastbranch.from;
- cpuc->lbr_entries[i].to = msr_lastbranch.to;
- cpuc->lbr_entries[i].mispred = 0;
- cpuc->lbr_entries[i].predicted = 0;
- cpuc->lbr_entries[i].in_tx = 0;
- cpuc->lbr_entries[i].abort = 0;
- cpuc->lbr_entries[i].cycles = 0;
- cpuc->lbr_entries[i].type = 0;
- cpuc->lbr_entries[i].reserved = 0;
+ perf_clear_branch_entry_bitfields(br);
+
+ br->from = msr_lastbranch.from;
+ br->to = msr_lastbranch.to;
+ br++;
}
cpuc->lbr_stack.nr = i;
cpuc->lbr_stack.hw_idx = tos;
@@ -807,6 +804,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
+ struct perf_branch_entry *br = cpuc->lbr_entries;
u64 tos = intel_pmu_lbr_tos();
int i;
int out = 0;
@@ -878,15 +876,14 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (abort && x86_pmu.lbr_double_abort && out > 0)
out--;
- cpuc->lbr_entries[out].from = from;
- cpuc->lbr_entries[out].to = to;
- cpuc->lbr_entries[out].mispred = mis;
- cpuc->lbr_entries[out].predicted = pred;
- cpuc->lbr_entries[out].in_tx = in_tx;
- cpuc->lbr_entries[out].abort = abort;
- cpuc->lbr_entries[out].cycles = cycles;
- cpuc->lbr_entries[out].type = 0;
- cpuc->lbr_entries[out].reserved = 0;
+ perf_clear_branch_entry_bitfields(br+out);
+ br[out].from = from;
+ br[out].to = to;
+ br[out].mispred = mis;
+ br[out].predicted = pred;
+ br[out].in_tx = in_tx;
+ br[out].abort = abort;
+ br[out].cycles = cycles;
out++;
}
cpuc->lbr_stack.nr = out;
@@ -951,6 +948,8 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
to = rdlbr_to(i, lbr);
info = rdlbr_info(i, lbr);
+ perf_clear_branch_entry_bitfields(e);
+
e->from = from;
e->to = to;
e->mispred = get_lbr_mispred(info);
@@ -959,7 +958,6 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info);
e->type = get_lbr_br_type(info);
- e->reserved = 0;
}
cpuc->lbr_stack.nr = i;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 7695dcae280e..db6c31bca809 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1828,7 +1828,9 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
{},
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 4262351f52b6..ce440011cc4e 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -79,10 +79,43 @@
#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650
#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668
#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670
+#define PCI_DEVICE_ID_INTEL_ADL_17_IMC 0x4614
+#define PCI_DEVICE_ID_INTEL_ADL_18_IMC 0x4617
+#define PCI_DEVICE_ID_INTEL_ADL_19_IMC 0x4618
+#define PCI_DEVICE_ID_INTEL_ADL_20_IMC 0x461B
+#define PCI_DEVICE_ID_INTEL_ADL_21_IMC 0x461C
#define PCI_DEVICE_ID_INTEL_RPL_1_IMC 0xA700
#define PCI_DEVICE_ID_INTEL_RPL_2_IMC 0xA702
#define PCI_DEVICE_ID_INTEL_RPL_3_IMC 0xA706
#define PCI_DEVICE_ID_INTEL_RPL_4_IMC 0xA709
+#define PCI_DEVICE_ID_INTEL_RPL_5_IMC 0xA701
+#define PCI_DEVICE_ID_INTEL_RPL_6_IMC 0xA703
+#define PCI_DEVICE_ID_INTEL_RPL_7_IMC 0xA704
+#define PCI_DEVICE_ID_INTEL_RPL_8_IMC 0xA705
+#define PCI_DEVICE_ID_INTEL_RPL_9_IMC 0xA706
+#define PCI_DEVICE_ID_INTEL_RPL_10_IMC 0xA707
+#define PCI_DEVICE_ID_INTEL_RPL_11_IMC 0xA708
+#define PCI_DEVICE_ID_INTEL_RPL_12_IMC 0xA709
+#define PCI_DEVICE_ID_INTEL_RPL_13_IMC 0xA70a
+#define PCI_DEVICE_ID_INTEL_RPL_14_IMC 0xA70b
+#define PCI_DEVICE_ID_INTEL_RPL_15_IMC 0xA715
+#define PCI_DEVICE_ID_INTEL_RPL_16_IMC 0xA716
+#define PCI_DEVICE_ID_INTEL_RPL_17_IMC 0xA717
+#define PCI_DEVICE_ID_INTEL_RPL_18_IMC 0xA718
+#define PCI_DEVICE_ID_INTEL_RPL_19_IMC 0xA719
+#define PCI_DEVICE_ID_INTEL_RPL_20_IMC 0xA71A
+#define PCI_DEVICE_ID_INTEL_RPL_21_IMC 0xA71B
+#define PCI_DEVICE_ID_INTEL_RPL_22_IMC 0xA71C
+#define PCI_DEVICE_ID_INTEL_RPL_23_IMC 0xA728
+#define PCI_DEVICE_ID_INTEL_RPL_24_IMC 0xA729
+#define PCI_DEVICE_ID_INTEL_RPL_25_IMC 0xA72A
+
+
+#define IMC_UNCORE_DEV(a) \
+{ \
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_##a##_IMC), \
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), \
+}
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -849,242 +882,80 @@ static struct intel_uncore_type *snb_pci_uncores[] = {
};
static const struct pci_device_id snb_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(SNB),
{ /* end: all zeroes */ },
};
static const struct pci_device_id ivb_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(IVB),
+ IMC_UNCORE_DEV(IVB_E3),
{ /* end: all zeroes */ },
};
static const struct pci_device_id hsw_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(HSW),
+ IMC_UNCORE_DEV(HSW_U),
{ /* end: all zeroes */ },
};
static const struct pci_device_id bdw_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(BDW),
{ /* end: all zeroes */ },
};
static const struct pci_device_id skl_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_E3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_HQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_WQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YD_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S4_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S5_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(SKL_Y),
+ IMC_UNCORE_DEV(SKL_U),
+ IMC_UNCORE_DEV(SKL_HD),
+ IMC_UNCORE_DEV(SKL_HQ),
+ IMC_UNCORE_DEV(SKL_SD),
+ IMC_UNCORE_DEV(SKL_SQ),
+ IMC_UNCORE_DEV(SKL_E3),
+ IMC_UNCORE_DEV(KBL_Y),
+ IMC_UNCORE_DEV(KBL_U),
+ IMC_UNCORE_DEV(KBL_UQ),
+ IMC_UNCORE_DEV(KBL_SD),
+ IMC_UNCORE_DEV(KBL_SQ),
+ IMC_UNCORE_DEV(KBL_HQ),
+ IMC_UNCORE_DEV(KBL_WQ),
+ IMC_UNCORE_DEV(CFL_2U),
+ IMC_UNCORE_DEV(CFL_4U),
+ IMC_UNCORE_DEV(CFL_4H),
+ IMC_UNCORE_DEV(CFL_6H),
+ IMC_UNCORE_DEV(CFL_2S_D),
+ IMC_UNCORE_DEV(CFL_4S_D),
+ IMC_UNCORE_DEV(CFL_6S_D),
+ IMC_UNCORE_DEV(CFL_8S_D),
+ IMC_UNCORE_DEV(CFL_4S_W),
+ IMC_UNCORE_DEV(CFL_6S_W),
+ IMC_UNCORE_DEV(CFL_8S_W),
+ IMC_UNCORE_DEV(CFL_4S_S),
+ IMC_UNCORE_DEV(CFL_6S_S),
+ IMC_UNCORE_DEV(CFL_8S_S),
+ IMC_UNCORE_DEV(AML_YD),
+ IMC_UNCORE_DEV(AML_YQ),
+ IMC_UNCORE_DEV(WHL_UQ),
+ IMC_UNCORE_DEV(WHL_4_UQ),
+ IMC_UNCORE_DEV(WHL_UD),
+ IMC_UNCORE_DEV(CML_H1),
+ IMC_UNCORE_DEV(CML_H2),
+ IMC_UNCORE_DEV(CML_H3),
+ IMC_UNCORE_DEV(CML_U1),
+ IMC_UNCORE_DEV(CML_U2),
+ IMC_UNCORE_DEV(CML_U3),
+ IMC_UNCORE_DEV(CML_S1),
+ IMC_UNCORE_DEV(CML_S2),
+ IMC_UNCORE_DEV(CML_S3),
+ IMC_UNCORE_DEV(CML_S4),
+ IMC_UNCORE_DEV(CML_S5),
{ /* end: all zeroes */ },
};
static const struct pci_device_id icl_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RKL_1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RKL_2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(ICL_U),
+ IMC_UNCORE_DEV(ICL_U2),
+ IMC_UNCORE_DEV(RKL_1),
+ IMC_UNCORE_DEV(RKL_2),
{ /* end: all zeroes */ },
};
@@ -1326,106 +1197,57 @@ void nhm_uncore_cpu_init(void)
/* Tiger Lake MMIO uncore support */
static const struct pci_device_id tgl_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U4_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_H_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_4_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_5_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_6_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_7_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_8_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_9_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_10_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_11_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_12_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_13_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_14_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_15_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_4_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(TGL_U1),
+ IMC_UNCORE_DEV(TGL_U2),
+ IMC_UNCORE_DEV(TGL_U3),
+ IMC_UNCORE_DEV(TGL_U4),
+ IMC_UNCORE_DEV(TGL_H),
+ IMC_UNCORE_DEV(ADL_1),
+ IMC_UNCORE_DEV(ADL_2),
+ IMC_UNCORE_DEV(ADL_3),
+ IMC_UNCORE_DEV(ADL_4),
+ IMC_UNCORE_DEV(ADL_5),
+ IMC_UNCORE_DEV(ADL_6),
+ IMC_UNCORE_DEV(ADL_7),
+ IMC_UNCORE_DEV(ADL_8),
+ IMC_UNCORE_DEV(ADL_9),
+ IMC_UNCORE_DEV(ADL_10),
+ IMC_UNCORE_DEV(ADL_11),
+ IMC_UNCORE_DEV(ADL_12),
+ IMC_UNCORE_DEV(ADL_13),
+ IMC_UNCORE_DEV(ADL_14),
+ IMC_UNCORE_DEV(ADL_15),
+ IMC_UNCORE_DEV(ADL_16),
+ IMC_UNCORE_DEV(ADL_17),
+ IMC_UNCORE_DEV(ADL_18),
+ IMC_UNCORE_DEV(ADL_19),
+ IMC_UNCORE_DEV(ADL_20),
+ IMC_UNCORE_DEV(ADL_21),
+ IMC_UNCORE_DEV(RPL_1),
+ IMC_UNCORE_DEV(RPL_2),
+ IMC_UNCORE_DEV(RPL_3),
+ IMC_UNCORE_DEV(RPL_4),
+ IMC_UNCORE_DEV(RPL_5),
+ IMC_UNCORE_DEV(RPL_6),
+ IMC_UNCORE_DEV(RPL_7),
+ IMC_UNCORE_DEV(RPL_8),
+ IMC_UNCORE_DEV(RPL_9),
+ IMC_UNCORE_DEV(RPL_10),
+ IMC_UNCORE_DEV(RPL_11),
+ IMC_UNCORE_DEV(RPL_12),
+ IMC_UNCORE_DEV(RPL_13),
+ IMC_UNCORE_DEV(RPL_14),
+ IMC_UNCORE_DEV(RPL_15),
+ IMC_UNCORE_DEV(RPL_16),
+ IMC_UNCORE_DEV(RPL_17),
+ IMC_UNCORE_DEV(RPL_18),
+ IMC_UNCORE_DEV(RPL_19),
+ IMC_UNCORE_DEV(RPL_20),
+ IMC_UNCORE_DEV(RPL_21),
+ IMC_UNCORE_DEV(RPL_22),
+ IMC_UNCORE_DEV(RPL_23),
+ IMC_UNCORE_DEV(RPL_24),
+ IMC_UNCORE_DEV(RPL_25),
{ /* end: all zeroes */ }
};
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 6d759f88315c..ac542f98c070 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -103,7 +103,9 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_ROCKETLAKE:
case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L:
+ case INTEL_FAM6_ALDERLAKE_N:
case INTEL_FAM6_RAPTORLAKE:
+ case INTEL_FAM6_RAPTORLAKE_P:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 150261d929b9..21a5482bcf84 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -67,22 +67,23 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
/*
* struct hw_perf_event.flags flags
*/
-#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
-#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_PEBS_LD_HSW 0x0008 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW 0x0010 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL 0x0020 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC 0x0040 /* dynamic alloc'd constraint */
-
-#define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
-#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
-#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
-#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
-#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
-#define PERF_X86_EVENT_PEBS_STLAT 0x8000 /* st+stlat data address sampling */
+#define PERF_X86_EVENT_PEBS_LDLAT 0x00001 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST 0x00002 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW 0x00004 /* haswell style datala, store */
+#define PERF_X86_EVENT_PEBS_LD_HSW 0x00008 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW 0x00010 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL 0x00020 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC 0x00040 /* dynamic alloc'd constraint */
+
+#define PERF_X86_EVENT_EXCL_ACCT 0x00100 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD 0x00200 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_LARGE_PEBS 0x00400 /* use large PEBS */
+#define PERF_X86_EVENT_PEBS_VIA_PT 0x00800 /* use PT buffer for PEBS */
+#define PERF_X86_EVENT_PAIR 0x01000 /* Large Increment per Cycle */
+#define PERF_X86_EVENT_LBR_SELECT 0x02000 /* Save/Restore MSR_LBR_SELECT */
+#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */
+#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */
+#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */
static inline bool is_topdown_count(struct perf_event *event)
{
@@ -325,6 +326,8 @@ struct cpu_hw_events {
* AMD specific bits
*/
struct amd_nb *amd_nb;
+ int brs_active; /* BRS is enabled */
+
/* Inverted mask of bits to clear in the perf_ctr ctrl registers */
u64 perf_ctr_virt_mask;
int n_pair; /* Large increment events */
@@ -1105,6 +1108,11 @@ int x86_pmu_hw_config(struct perf_event *event);
void x86_pmu_disable_all(void);
+static inline bool has_amd_brs(struct hw_perf_event *hwc)
+{
+ return hwc->flags & PERF_X86_EVENT_AMD_BRS;
+}
+
static inline bool is_counter_pair(struct hw_perf_event *hwc)
{
return hwc->flags & PERF_X86_EVENT_PAIR;
@@ -1211,6 +1219,75 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu)
int amd_pmu_init(void);
+#ifdef CONFIG_PERF_EVENTS_AMD_BRS
+int amd_brs_init(void);
+void amd_brs_disable(void);
+void amd_brs_enable(void);
+void amd_brs_enable_all(void);
+void amd_brs_disable_all(void);
+void amd_brs_drain(void);
+void amd_brs_lopwr_init(void);
+void amd_brs_disable_all(void);
+int amd_brs_setup_filter(struct perf_event *event);
+void amd_brs_reset(void);
+
+static inline void amd_pmu_brs_add(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ perf_sched_cb_inc(event->ctx->pmu);
+ cpuc->lbr_users++;
+ /*
+ * No need to reset BRS because it is reset
+ * on brs_enable() and it is saturating
+ */
+}
+
+static inline void amd_pmu_brs_del(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ cpuc->lbr_users--;
+ WARN_ON_ONCE(cpuc->lbr_users < 0);
+
+ perf_sched_cb_dec(event->ctx->pmu);
+}
+
+void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in);
+#else
+static inline int amd_brs_init(void)
+{
+ return 0;
+}
+static inline void amd_brs_disable(void) {}
+static inline void amd_brs_enable(void) {}
+static inline void amd_brs_drain(void) {}
+static inline void amd_brs_lopwr_init(void) {}
+static inline void amd_brs_disable_all(void) {}
+static inline int amd_brs_setup_filter(struct perf_event *event)
+{
+ return 0;
+}
+static inline void amd_brs_reset(void) {}
+
+static inline void amd_pmu_brs_add(struct perf_event *event)
+{
+}
+
+static inline void amd_pmu_brs_del(struct perf_event *event)
+{
+}
+
+static inline void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+}
+
+static inline void amd_brs_enable_all(void)
+{
+}
+
+#endif
+
#else /* CONFIG_CPU_SUP_AMD */
static inline int amd_pmu_init(void)
@@ -1218,6 +1295,22 @@ static inline int amd_pmu_init(void)
return 0;
}
+static inline int amd_brs_init(void)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void amd_brs_drain(void)
+{
+}
+
+static inline void amd_brs_enable_all(void)
+{
+}
+
+static inline void amd_brs_disable_all(void)
+{
+}
#endif /* CONFIG_CPU_SUP_AMD */
static inline int is_pebs_pt(struct perf_event *event)
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index 8e4d0391ff6c..e481056698de 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -5,7 +5,5 @@
obj-$(CONFIG_IA32_EMULATION) := ia32_signal.o
-obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
-
audit-class-$(CONFIG_AUDIT) := audit.o
obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y)
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
deleted file mode 100644
index 9bd15241fadb..000000000000
--- a/arch/x86/ia32/ia32_aout.c
+++ /dev/null
@@ -1,325 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * a.out loader for x86-64
- *
- * Copyright (C) 1991, 1992, 1996 Linus Torvalds
- * Hacked together by Andi Kleen
- */
-
-#include <linux/module.h>
-
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/a.out.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/binfmts.h>
-#include <linux/personality.h>
-#include <linux/init.h>
-#include <linux/jiffies.h>
-#include <linux/perf_event.h>
-#include <linux/sched/task_stack.h>
-
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-#include <asm/user32.h>
-#include <asm/ia32.h>
-
-#undef WARN_OLD
-
-static int load_aout_binary(struct linux_binprm *);
-static int load_aout_library(struct file *);
-
-static struct linux_binfmt aout_format = {
- .module = THIS_MODULE,
- .load_binary = load_aout_binary,
- .load_shlib = load_aout_library,
-};
-
-static int set_brk(unsigned long start, unsigned long end)
-{
- start = PAGE_ALIGN(start);
- end = PAGE_ALIGN(end);
- if (end <= start)
- return 0;
- return vm_brk(start, end - start);
-}
-
-
-/*
- * create_aout_tables() parses the env- and arg-strings in new user
- * memory and creates the pointer tables from them, and puts their
- * addresses on the "stack", returning the new stack pointer value.
- */
-static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
-{
- u32 __user *argv, *envp, *sp;
- int argc = bprm->argc, envc = bprm->envc;
-
- sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p);
- sp -= envc+1;
- envp = sp;
- sp -= argc+1;
- argv = sp;
- put_user((unsigned long) envp, --sp);
- put_user((unsigned long) argv, --sp);
- put_user(argc, --sp);
- current->mm->arg_start = (unsigned long) p;
- while (argc-- > 0) {
- char c;
-
- put_user((u32)(unsigned long)p, argv++);
- do {
- get_user(c, p++);
- } while (c);
- }
- put_user(0, argv);
- current->mm->arg_end = current->mm->env_start = (unsigned long) p;
- while (envc-- > 0) {
- char c;
-
- put_user((u32)(unsigned long)p, envp++);
- do {
- get_user(c, p++);
- } while (c);
- }
- put_user(0, envp);
- current->mm->env_end = (unsigned long) p;
- return sp;
-}
-
-/*
- * These are the functions used to load a.out style executables and shared
- * libraries. There is no binary dependent code anywhere else.
- */
-static int load_aout_binary(struct linux_binprm *bprm)
-{
- unsigned long error, fd_offset, rlim;
- struct pt_regs *regs = current_pt_regs();
- struct exec ex;
- int retval;
-
- ex = *((struct exec *) bprm->buf); /* exec-header */
- if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
- N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
- N_TRSIZE(ex) || N_DRSIZE(ex) ||
- i_size_read(file_inode(bprm->file)) <
- ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
- return -ENOEXEC;
- }
-
- fd_offset = N_TXTOFF(ex);
-
- /* Check initial limits. This avoids letting people circumvent
- * size limits imposed on them by creating programs with large
- * arrays in the data or bss.
- */
- rlim = rlimit(RLIMIT_DATA);
- if (rlim >= RLIM_INFINITY)
- rlim = ~0;
- if (ex.a_data + ex.a_bss > rlim)
- return -ENOMEM;
-
- /* Flush all traces of the currently running executable */
- retval = begin_new_exec(bprm);
- if (retval)
- return retval;
-
- /* OK, This is the point of no return */
- set_personality(PER_LINUX);
- set_personality_ia32(false);
-
- setup_new_exec(bprm);
-
- regs->cs = __USER32_CS;
- regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
- regs->r13 = regs->r14 = regs->r15 = 0;
-
- current->mm->end_code = ex.a_text +
- (current->mm->start_code = N_TXTADDR(ex));
- current->mm->end_data = ex.a_data +
- (current->mm->start_data = N_DATADDR(ex));
- current->mm->brk = ex.a_bss +
- (current->mm->start_brk = N_BSSADDR(ex));
-
- retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
- if (retval < 0)
- return retval;
-
- if (N_MAGIC(ex) == OMAGIC) {
- unsigned long text_addr, map_size;
-
- text_addr = N_TXTADDR(ex);
- map_size = ex.a_text+ex.a_data;
-
- error = vm_brk(text_addr & PAGE_MASK, map_size);
-
- if (error)
- return error;
-
- error = read_code(bprm->file, text_addr, 32,
- ex.a_text + ex.a_data);
- if ((signed long)error < 0)
- return error;
- } else {
-#ifdef WARN_OLD
- static unsigned long error_time, error_time2;
- if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
- (N_MAGIC(ex) != NMAGIC) &&
- time_after(jiffies, error_time2 + 5*HZ)) {
- printk(KERN_NOTICE "executable not page aligned\n");
- error_time2 = jiffies;
- }
-
- if ((fd_offset & ~PAGE_MASK) != 0 &&
- time_after(jiffies, error_time + 5*HZ)) {
- printk(KERN_WARNING
- "fd_offset is not page aligned. Please convert "
- "program: %pD\n",
- bprm->file);
- error_time = jiffies;
- }
-#endif
-
- if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
- error = vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
- if (error)
- return error;
-
- read_code(bprm->file, N_TXTADDR(ex), fd_offset,
- ex.a_text+ex.a_data);
- goto beyond_if;
- }
-
- error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
- PROT_READ | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
- fd_offset);
-
- if (error != N_TXTADDR(ex))
- return error;
-
- error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
- fd_offset + ex.a_text);
- if (error != N_DATADDR(ex))
- return error;
- }
-
-beyond_if:
- error = set_brk(current->mm->start_brk, current->mm->brk);
- if (error)
- return error;
-
- set_binfmt(&aout_format);
-
- current->mm->start_stack =
- (unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
- /* start thread */
- loadsegment(fs, 0);
- loadsegment(ds, __USER32_DS);
- loadsegment(es, __USER32_DS);
- load_gs_index(0);
- (regs)->ip = ex.a_entry;
- (regs)->sp = current->mm->start_stack;
- (regs)->flags = 0x200;
- (regs)->cs = __USER32_CS;
- (regs)->ss = __USER32_DS;
- regs->r8 = regs->r9 = regs->r10 = regs->r11 =
- regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
- return 0;
-}
-
-static int load_aout_library(struct file *file)
-{
- unsigned long bss, start_addr, len, error;
- int retval;
- struct exec ex;
- loff_t pos = 0;
-
- retval = -ENOEXEC;
- error = kernel_read(file, &ex, sizeof(ex), &pos);
- if (error != sizeof(ex))
- goto out;
-
- /* We come in here for the regular a.out style of shared libraries */
- if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
- N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
- i_size_read(file_inode(file)) <
- ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
- goto out;
- }
-
- if (N_FLAGS(ex))
- goto out;
-
- /* For QMAGIC, the starting address is 0x20 into the page. We mask
- this off to get the starting address for the page */
-
- start_addr = ex.a_entry & 0xfffff000;
-
- if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
-#ifdef WARN_OLD
- static unsigned long error_time;
- if (time_after(jiffies, error_time + 5*HZ)) {
- printk(KERN_WARNING
- "N_TXTOFF is not page aligned. Please convert "
- "library: %pD\n",
- file);
- error_time = jiffies;
- }
-#endif
- retval = vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
- if (retval)
- goto out;
-
- read_code(file, start_addr, N_TXTOFF(ex),
- ex.a_text + ex.a_data);
- retval = 0;
- goto out;
- }
- /* Now use mmap to map the library into memory. */
- error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
- N_TXTOFF(ex));
- retval = error;
- if (error != start_addr)
- goto out;
-
- len = PAGE_ALIGN(ex.a_text + ex.a_data);
- bss = ex.a_text + ex.a_data + ex.a_bss;
- if (bss > len) {
- retval = vm_brk(start_addr + len, bss - len);
- if (retval)
- goto out;
- }
- retval = 0;
-out:
- return retval;
-}
-
-static int __init init_aout_binfmt(void)
-{
- register_binfmt(&aout_format);
- return 0;
-}
-
-static void __exit exit_aout_binfmt(void)
-{
- unregister_binfmt(&aout_format);
-}
-
-module_init(init_aout_binfmt);
-module_exit(exit_aout_binfmt);
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h
index 9aff97f0de7f..d937c55e717e 100644
--- a/arch/x86/include/asm/acenv.h
+++ b/arch/x86/include/asm/acenv.h
@@ -13,7 +13,19 @@
/* Asm macros */
-#define ACPI_FLUSH_CPU_CACHE() wbinvd()
+/*
+ * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states.
+ * It is required to prevent data loss.
+ *
+ * While running inside virtual machine, the kernel can bypass cache flushing.
+ * Changing sleep state in a virtual machine doesn't affect the host system
+ * sleep state and cannot lead to data loss.
+ */
+#define ACPI_FLUSH_CPU_CACHE() \
+do { \
+ if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \
+ wbinvd(); \
+} while (0)
int __acpi_acquire_global_lock(unsigned int *lock);
int __acpi_release_global_lock(unsigned int *lock);
diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h
index 46e1df45efc0..aabdbb5ab920 100644
--- a/arch/x86/include/asm/amd-ibs.h
+++ b/arch/x86/include/asm/amd-ibs.h
@@ -49,7 +49,7 @@ union ibs_op_ctl {
};
};
-/* MSR 0xc0011035: IBS Op Data 2 */
+/* MSR 0xc0011035: IBS Op Data 1 */
union ibs_op_data {
__u64 val;
struct {
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 00d1a400b7a1..ed0eaf65c437 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -16,7 +16,6 @@ extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
extern bool early_is_amd_nb(u32 value);
extern struct resource *amd_get_mmconfig_range(struct resource *res);
-extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
extern int amd_numa_init(void);
extern int amd_get_subcaches(int);
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 48067af94678..bd8ae0a7010a 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -328,6 +328,8 @@ struct apic {
/* wakeup_secondary_cpu */
int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
+ /* wakeup secondary CPU using 64-bit wakeup point */
+ int (*wakeup_secondary_cpu_64)(int apicid, unsigned long start_eip);
void (*inquire_remote_apic)(int apicid);
@@ -488,6 +490,11 @@ static inline unsigned int read_apic_id(void)
return apic->get_apic_id(reg);
}
+#ifdef CONFIG_X86_64
+typedef int (*wakeup_cpu_handler)(int apicid, unsigned long start_eip);
+extern void acpi_wake_cpu_handler_update(wakeup_cpu_handler handler);
+#endif
+
extern int default_apic_id_valid(u32 apicid);
extern int default_acpi_madt_oem_check(char *, char *);
extern void default_setup_apic_routing(void);
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 5716f22f81ac..92035eb3afee 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -95,12 +95,6 @@
#define APIC_LVTTHMR 0x330
#define APIC_LVTPC 0x340
#define APIC_LVT0 0x350
-#define APIC_LVT_TIMER_BASE_MASK (0x3 << 18)
-#define GET_APIC_TIMER_BASE(x) (((x) >> 18) & 0x3)
-#define SET_APIC_TIMER_BASE(x) (((x) << 18))
-#define APIC_TIMER_BASE_CLKIN 0x0
-#define APIC_TIMER_BASE_TMBASE 0x1
-#define APIC_TIMER_BASE_DIV 0x2
#define APIC_LVT_TIMER_ONESHOT (0 << 17)
#define APIC_LVT_TIMER_PERIODIC (1 << 17)
#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17)
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 981fe923a59f..53e9b0620d96 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -74,6 +74,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
BOOT_PARAM_PRESERVE(hdr),
BOOT_PARAM_PRESERVE(e820_table),
BOOT_PARAM_PRESERVE(eddbuf),
+ BOOT_PARAM_PRESERVE(cc_blob_address),
};
memset(&scratch, 0, sizeof(scratch));
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index aaf0cb0db4ae..a3ec87d198ac 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -18,7 +18,7 @@
#ifdef CONFIG_X86_32
# define __BUG_REL(val) ".long " __stringify(val)
#else
-# define __BUG_REL(val) ".long " __stringify(val) " - 2b"
+# define __BUG_REL(val) ".long " __stringify(val) " - ."
#endif
#ifdef CONFIG_DEBUG_BUGVERBOSE
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 0a7fe0321613..215f5a65790f 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -42,6 +42,9 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
#define arch_cmpxchg64_local(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
+#define arch_try_cmpxchg64(ptr, po, n) \
+ __try_cmpxchg64((ptr), (unsigned long long *)(po), \
+ (unsigned long long)(n))
#endif
static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
@@ -70,6 +73,24 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
return prev;
}
+static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
+{
+ bool success;
+ u64 old = *pold;
+ asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]"
+ CC_SET(z)
+ : CC_OUT(z) (success),
+ [ptr] "+m" (*ptr),
+ "+A" (old)
+ : "b" ((u32)new),
+ "c" ((u32)(new >> 32))
+ : "memory");
+
+ if (unlikely(!success))
+ *pold = old;
+ return success;
+}
+
#ifndef CONFIG_X86_CMPXCHG64
/*
* Building a kernel capable running on 80386 and 80486. It may be necessary
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 072e5459fe2f..250187ac8248 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -19,6 +19,12 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
arch_cmpxchg_local((ptr), (o), (n)); \
})
+#define arch_try_cmpxchg64(ptr, po, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ arch_try_cmpxchg((ptr), (po), (n)); \
+})
+
#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 86e5e4e26fcb..8cbf623f0ecf 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -36,6 +36,8 @@ extern int _debug_hotplug_cpu(int cpu, int action);
#endif
#endif
+extern void ap_init_aperfmperf(void);
+
int mwait_usable(const struct cpuinfo_x86 *);
unsigned int x86_family(unsigned int sig);
@@ -43,14 +45,12 @@ unsigned int x86_model(unsigned int sig);
unsigned int x86_stepping(unsigned int sig);
#ifdef CONFIG_CPU_SUP_INTEL
extern void __init sld_setup(struct cpuinfo_x86 *c);
-extern void switch_to_sld(unsigned long tifn);
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
extern bool handle_guest_split_lock(unsigned long ip);
extern void handle_bus_lock(struct pt_regs *regs);
u8 get_this_hybrid_cpu_type(void);
#else
static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
-static inline void switch_to_sld(unsigned long tifn) {}
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
{
return false;
@@ -76,4 +76,22 @@ static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {}
extern __noendbr void cet_disable(void);
+struct ucode_cpu_info;
+
+int intel_cpu_collect_info(struct ucode_cpu_info *uci);
+
+static inline bool intel_cpu_signatures_match(unsigned int s1, unsigned int p1,
+ unsigned int s2, unsigned int p2)
+{
+ if (s1 != s2)
+ return false;
+
+ /* Processor flags are either both 0 ... */
+ if (!p1 && !p2)
+ return true;
+
+ /* ... or they intersect. */
+ return p1 & p2;
+}
+
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index dd5ea1bdf04c..75efc4c6f076 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -143,7 +143,7 @@ extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
-static inline struct entry_stack *cpu_entry_stack(int cpu)
+static __always_inline struct entry_stack *cpu_entry_stack(int cpu)
{
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
}
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1261842d006c..66d3e3b1d24d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -34,14 +34,17 @@ enum cpuid_leafs
CPUID_8000_001F_EAX,
};
+#define X86_CAP_FMT_NUM "%d:%d"
+#define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31)
+
#ifdef CONFIG_X86_FEATURE_NAMES
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
#define X86_CAP_FMT "%s"
#define x86_cap_flag(flag) x86_cap_flags[flag]
#else
-#define X86_CAP_FMT "%d:%d"
-#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31)
+#define X86_CAP_FMT X86_CAP_FMT_NUM
+#define x86_cap_flag x86_cap_flag_num
#endif
/*
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 73e643ae94b6..21bb78dfd41d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -201,7 +201,7 @@
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-/* FREE! ( 7*32+10) */
+#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
@@ -211,7 +211,7 @@
#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-/* FREE! ( 7*32+20) */
+#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
@@ -238,6 +238,7 @@
#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */
#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */
+#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -315,6 +316,7 @@
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h
new file mode 100644
index 000000000000..70b2db18165e
--- /dev/null
+++ b/arch/x86/include/asm/cpuid.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CPUID-related helpers/definitions
+ *
+ * Derived from arch/x86/kvm/cpuid.c
+ */
+
+#ifndef _ASM_X86_CPUID_H
+#define _ASM_X86_CPUID_H
+
+static __always_inline bool cpuid_function_is_indexed(u32 function)
+{
+ switch (function) {
+ case 4:
+ case 7:
+ case 0xb:
+ case 0xd:
+ case 0xf:
+ case 0x10:
+ case 0x12:
+ case 0x14:
+ case 0x17:
+ case 0x18:
+ case 0x1d:
+ case 0x1e:
+ case 0x1f:
+ case 0x8000001d:
+ return true;
+ }
+
+ return false;
+}
+
+#endif /* _ASM_X86_CPUID_H */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 1231d63f836d..36369e76cc63 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -10,12 +10,6 @@
* cpu_feature_enabled().
*/
-#ifdef CONFIG_X86_SMAP
-# define DISABLE_SMAP 0
-#else
-# define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31))
-#endif
-
#ifdef CONFIG_X86_UMIP
# define DISABLE_UMIP 0
#else
@@ -68,6 +62,12 @@
# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31))
#endif
+#ifdef CONFIG_INTEL_TDX_GUEST
+# define DISABLE_TDX_GUEST 0
+#else
+# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -79,8 +79,8 @@
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
#define DISABLED_MASK7 (DISABLE_PTI)
-#define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
+#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
+#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
#define DISABLED_MASK11 0
#define DISABLED_MASK12 0
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 98938a68251c..bed74a0f2932 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -357,6 +357,11 @@ static inline u32 efi64_convert_status(efi_status_t status)
runtime), \
func, __VA_ARGS__))
+#define efi_dxe_call(func, ...) \
+ (efi_is_native() \
+ ? efi_dxe_table->func(__VA_ARGS__) \
+ : __efi64_thunk_map(efi_dxe_table, func, __VA_ARGS__))
+
#else /* CONFIG_EFI_MIXED */
static inline bool efi_is_64bit(void)
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 29fea180a665..cb0ff1055ab1 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -116,7 +116,7 @@ extern unsigned int vdso32_enabled;
* now struct_user_regs, they are different)
*/
-#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \
+#define ELF_CORE_COPY_REGS(pr_reg, regs) \
do { \
pr_reg[0] = regs->bx; \
pr_reg[1] = regs->cx; \
@@ -128,6 +128,7 @@ do { \
pr_reg[7] = regs->ds; \
pr_reg[8] = regs->es; \
pr_reg[9] = regs->fs; \
+ savesegment(gs, pr_reg[10]); \
pr_reg[11] = regs->orig_ax; \
pr_reg[12] = regs->ip; \
pr_reg[13] = regs->cs; \
@@ -136,18 +137,6 @@ do { \
pr_reg[16] = regs->ss; \
} while (0);
-#define ELF_CORE_COPY_REGS(pr_reg, regs) \
-do { \
- ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
- pr_reg[10] = get_user_gs(regs); \
-} while (0);
-
-#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \
-do { \
- ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
- savesegment(gs, pr_reg[10]); \
-} while (0);
-
#define ELF_PLATFORM (utsname()->machine)
#define set_personality_64bit() do { } while (0)
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 43184640b579..674ed46d3ced 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -10,7 +10,7 @@
#include <asm/fpu/api.h>
/* Check that the stack and regs on entry from user mode are sane. */
-static __always_inline void arch_check_user_regs(struct pt_regs *regs)
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
/*
@@ -42,7 +42,7 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs)
WARN_ON_ONCE(regs != task_pt_regs(current));
}
}
-#define arch_check_user_regs arch_check_user_regs
+#define arch_enter_from_user_mode arch_enter_from_user_mode
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index c83b3020350a..6b0f31fb53f7 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -162,7 +162,6 @@ static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
}
/* prctl */
-struct task_struct;
-extern long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2);
+extern long fpu_xstate_prctl(int option, unsigned long arg2);
#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/arch/x86/include/asm/fpu/internal.h
+++ /dev/null
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 032e020853aa..731ee7cc40a5 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -26,6 +26,7 @@
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
#include <asm/fixmap.h>
+#include <asm/pgtable_areas.h>
/* declarations for highmem.c */
extern unsigned long highstart_pfn, highend_pfn;
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 7924f27f5c8b..72184b0b2219 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -632,6 +632,10 @@ DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback);
DECLARE_IDTENTRY_RAW(X86_TRAP_OTHER, exc_xen_unknown_trap);
#endif
+#ifdef CONFIG_INTEL_TDX_GUEST
+DECLARE_IDTENTRY(X86_TRAP_VE, exc_virtualization_exception);
+#endif
+
/* Device interrupts common/spurious */
DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, common_interrupt);
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index e9736af126b2..1870b99c3356 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -44,6 +44,7 @@
#include <asm/page.h>
#include <asm/early_ioremap.h>
#include <asm/pgtable_types.h>
+#include <asm/shared/io.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
@@ -256,37 +257,23 @@ static inline void slow_down_io(void)
#endif
#define BUILDIO(bwl, bw, type) \
-static inline void out##bwl(unsigned type value, int port) \
-{ \
- asm volatile("out" #bwl " %" #bw "0, %w1" \
- : : "a"(value), "Nd"(port)); \
-} \
- \
-static inline unsigned type in##bwl(int port) \
-{ \
- unsigned type value; \
- asm volatile("in" #bwl " %w1, %" #bw "0" \
- : "=a"(value) : "Nd"(port)); \
- return value; \
-} \
- \
-static inline void out##bwl##_p(unsigned type value, int port) \
+static inline void out##bwl##_p(type value, u16 port) \
{ \
out##bwl(value, port); \
slow_down_io(); \
} \
\
-static inline unsigned type in##bwl##_p(int port) \
+static inline type in##bwl##_p(u16 port) \
{ \
- unsigned type value = in##bwl(port); \
+ type value = in##bwl(port); \
slow_down_io(); \
return value; \
} \
\
-static inline void outs##bwl(int port, const void *addr, unsigned long count) \
+static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \
{ \
if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \
- unsigned type *value = (unsigned type *)addr; \
+ type *value = (type *)addr; \
while (count) { \
out##bwl(*value, port); \
value++; \
@@ -299,10 +286,10 @@ static inline void outs##bwl(int port, const void *addr, unsigned long count) \
} \
} \
\
-static inline void ins##bwl(int port, void *addr, unsigned long count) \
+static inline void ins##bwl(u16 port, void *addr, unsigned long count) \
{ \
if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \
- unsigned type *value = (unsigned type *)addr; \
+ type *value = (type *)addr; \
while (count) { \
*value = in##bwl(port); \
value++; \
@@ -315,13 +302,11 @@ static inline void ins##bwl(int port, void *addr, unsigned long count) \
} \
}
-BUILDIO(b, b, char)
-BUILDIO(w, w, short)
-BUILDIO(l, , int)
+BUILDIO(b, b, u8)
+BUILDIO(w, w, u16)
+BUILDIO(l, , u32)
+#undef BUILDIO
-#define inb inb
-#define inw inw
-#define inl inl
#define inb_p inb_p
#define inw_p inw_p
#define inl_p inl_p
@@ -329,9 +314,6 @@ BUILDIO(l, , int)
#define insw insw
#define insl insl
-#define outb outb
-#define outw outw
-#define outl outl
#define outb_p outb_p
#define outw_p outw_p
#define outl_p outl_p
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 111104d1c2cd..7793e52d6237 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -137,14 +137,6 @@ static __always_inline void arch_local_irq_restore(unsigned long flags)
if (!arch_irqs_disabled_flags(flags))
arch_local_irq_enable();
}
-#else
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_XEN_PV
-#define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
-#else
-#define SWAPGS swapgs
-#endif
-#endif
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 0449b125d27f..071572e23d3a 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -20,7 +20,7 @@
_ASM_PTR "%c0 + %c1 - .\n\t" \
".popsection \n\t"
-#ifdef CONFIG_STACK_VALIDATION
+#ifdef CONFIG_HAVE_JUMP_LABEL_HACK
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
@@ -34,7 +34,7 @@ l_yes:
return true;
}
-#else
+#else /* !CONFIG_HAVE_JUMP_LABEL_HACK */
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
@@ -48,7 +48,7 @@ l_yes:
return true;
}
-#endif /* STACK_VALIDATION */
+#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 56935ebb1dfe..57bc74e112f2 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -7,6 +7,8 @@
#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
+#include <asm/tdx.h>
+
#ifdef CONFIG_KVM_GUEST
bool kvm_check_and_clear_guest_paused(void);
#else
@@ -32,6 +34,10 @@ static inline bool kvm_check_and_clear_guest_paused(void)
static inline long kvm_hypercall0(unsigned int nr)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, 0, 0, 0, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr)
@@ -42,6 +48,10 @@ static inline long kvm_hypercall0(unsigned int nr)
static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, 0, 0, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1)
@@ -53,6 +63,10 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
unsigned long p2)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, p2, 0, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2)
@@ -64,6 +78,10 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, p2, p3, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3)
@@ -76,6 +94,10 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
unsigned long p4)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, p2, p3, p4);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index e2c6f433ed10..88ceaf3648b3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -49,9 +49,6 @@ void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
void __init mem_encrypt_free_decrypted_mem(void);
-/* Architecture __weak replacement functions */
-void __init mem_encrypt_init(void);
-
void __init sev_es_init_vc_handling(void);
#define __bss_decrypted __section(".bss..decrypted")
@@ -89,6 +86,9 @@ static inline void mem_encrypt_free_decrypted_mem(void) { }
#endif /* CONFIG_AMD_MEM_ENCRYPT */
+/* Architecture __weak replacement functions */
+void __init mem_encrypt_init(void);
+
/*
* The __sme_pa() and __sme_pa_nodebug() macros are meant for use when
* writing to or comparing values from the cr3 register. Having the
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 27516046117a..b8d40ddeab00 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -141,7 +141,7 @@ do { \
#ifdef CONFIG_X86_32
#define deactivate_mm(tsk, mm) \
do { \
- lazy_load_gs(0); \
+ loadsegment(gs, 0); \
} while (0)
#else
#define deactivate_mm(tsk, mm) \
diff --git a/arch/x86/include/asm/mmx.h b/arch/x86/include/asm/mmx.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/arch/x86/include/asm/mmx.h
+++ /dev/null
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ee15311b6be1..403e83b4adc8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -76,6 +76,8 @@
/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
#define MSR_IA32_CORE_CAPS 0x000000cf
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT)
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
@@ -154,6 +156,11 @@
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_POWER_CTL_BIT_EE 19
+/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */
+#define MSR_INTEGRITY_CAPS 0x000002d9
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
+
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
@@ -312,6 +319,7 @@
/* Run Time Average Power Limiting (RAPL) Interface */
+#define MSR_VR_CURRENT_CONFIG 0x00000601
#define MSR_RAPL_POWER_UNIT 0x00000606
#define MSR_PKG_POWER_LIMIT 0x00000610
@@ -502,8 +510,10 @@
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
@@ -524,6 +534,11 @@
#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+/* AMD Performance Counter Global Status and Control MSRs */
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
+#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
@@ -688,6 +703,10 @@
#define MSR_IA32_PERF_CTL 0x00000199
#define INTEL_PERF_CTL_MASK 0xffff
+/* AMD Branch Sampling configuration */
+#define MSR_AMD_DBG_EXTN_CFG 0xc000010f
+#define MSR_AMD_SAMP_BR_FROM 0xc0010300
+
#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index d42e6c6b47b1..65ec1965cd28 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -10,16 +10,7 @@
#include <asm/errno.h>
#include <asm/cpumask.h>
#include <uapi/asm/msr.h>
-
-struct msr {
- union {
- struct {
- u32 l;
- u32 h;
- };
- u64 q;
- };
-};
+#include <asm/shared/msr.h>
struct msr_info {
u32 msr_no;
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 1cb9c17a4cb4..5c5f1e56c404 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -47,6 +47,7 @@ struct nmiaction {
#define register_nmi_handler(t, fn, fg, n, init...) \
({ \
static struct nmiaction init fn##_na = { \
+ .list = LIST_HEAD_INIT(fn##_na.list), \
.handler = (fn), \
.name = (n), \
.flags = (fg), \
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index e9c86299b835..baa70451b8df 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -16,7 +16,7 @@ extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;
-static inline unsigned long __phys_addr_nodebug(unsigned long x)
+static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index a0627dfae541..1307cd689d2a 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,6 +93,15 @@ struct irq_routing_table {
struct irq_info slots[];
} __attribute__((packed));
+struct irt_routing_table {
+ u32 signature; /* IRT_SIGNATURE should be here */
+ u8 size; /* Number of entries provided */
+ u8 used; /* Number of entries actually used */
+ u16 exclusive_irqs; /* IRQs devoted exclusively to
+ PCI usage */
+ struct irq_info slots[];
+} __attribute__((packed));
+
extern unsigned int pcibios_irq_mask;
extern raw_spinlock_t pci_config_lock;
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index b06e4c573add..409725e86f42 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_PERF_EVENT_H
#define _ASM_X86_PERF_EVENT_H
+#include <linux/static_call.h>
+
/*
* Performance event hw details:
*/
@@ -184,6 +186,18 @@ union cpuid28_ecx {
unsigned int full;
};
+/*
+ * AMD "Extended Performance Monitoring and Debug" CPUID
+ * detection/enumeration details:
+ */
+union cpuid_0x80000022_ebx {
+ struct {
+ /* Number of Core Performance Counters */
+ unsigned int num_core_pmc:4;
+ } split;
+ unsigned int full;
+};
+
struct x86_pmu_capability {
int version;
int num_counters_gp;
@@ -371,6 +385,11 @@ struct pebs_xmm {
};
/*
+ * AMD Extended Performance Monitoring and Debug cpuid feature detection
+ */
+#define EXT_PERFMON_DEBUG_FEATURES 0x80000022
+
+/*
* IBS cpuid feature detection
*/
@@ -391,6 +410,7 @@ struct pebs_xmm {
#define IBS_CAPS_OPBRNFUSE (1U<<8)
#define IBS_CAPS_FETCHCTLEXTD (1U<<9)
#define IBS_CAPS_OPDATA4 (1U<<10)
+#define IBS_CAPS_ZEN4 (1U<<11)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
@@ -404,6 +424,7 @@ struct pebs_xmm {
#define IBSCTL_LVT_OFFSET_MASK 0x0F
/* IBS fetch bits/masks */
+#define IBS_FETCH_L3MISSONLY (1ULL<<59)
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
@@ -420,6 +441,7 @@ struct pebs_xmm {
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
+#define IBS_OP_L3MISSONLY (1ULL<<16)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */
@@ -518,6 +540,27 @@ static inline void intel_pt_handle_vmx(int on)
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
extern void amd_pmu_enable_virt(void);
extern void amd_pmu_disable_virt(void);
+
+#if defined(CONFIG_PERF_EVENTS_AMD_BRS)
+
+#define PERF_NEEDS_LOPWR_CB 1
+
+/*
+ * architectural low power callback impacts
+ * drivers/acpi/processor_idle.c
+ * drivers/acpi/acpi_pad.c
+ */
+extern void perf_amd_brs_lopwr_cb(bool lopwr_in);
+
+DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
+
+static inline void perf_lopwr_cb(bool lopwr_in)
+{
+ static_call_mod(perf_lopwr_cb)(lopwr_in);
+}
+
+#endif /* PERF_NEEDS_LOPWR_CB */
+
#else
static inline void amd_pmu_enable_virt(void) { }
static inline void amd_pmu_disable_virt(void) { }
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 1d5f14aff5f6..2e6c04d8a45b 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -41,9 +41,6 @@ static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
return __arch_override_mprotect_pkey(vma, prot, pkey);
}
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
- unsigned long init_val);
-
#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)
#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
@@ -118,11 +115,6 @@ int mm_pkey_free(struct mm_struct *mm, int pkey)
return 0;
}
-extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
- unsigned long init_val);
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
- unsigned long init_val);
-
static inline int vma_pkey(struct vm_area_struct *vma)
{
unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index feed36d44d04..12ef86b19910 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -13,6 +13,8 @@ void syscall_init(void);
#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
void entry_SYSCALL_64_safe_stack(void);
+void entry_SYSRETQ_unsafe_stack(void);
+void entry_SYSRETQ_end(void);
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif
@@ -28,6 +30,8 @@ void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_SYSCALL_compat_safe_stack(void);
+void entry_SYSRETL_compat_unsafe_stack(void);
+void entry_SYSRETL_compat_end(void);
void entry_INT80_compat(void);
#ifdef CONFIG_XEN_PV
void xen_entry_INT80_compat(void);
@@ -35,11 +39,9 @@ void xen_entry_INT80_compat(void);
#endif
void x86_configure_nx(void);
-void x86_report_nx(void);
extern int reboot_force;
-long do_arch_prctl_common(struct task_struct *task, int option,
- unsigned long arg2);
+long do_arch_prctl_common(int option, unsigned long arg2);
#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 4357e0f2cd5f..f4db78b09c8f 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -186,9 +186,13 @@ static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack);
+ ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack &&
+ regs->ip < (unsigned long)entry_SYSRETQ_end);
#ifdef CONFIG_IA32_EMULATION
ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack);
+ ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack &&
+ regs->ip < (unsigned long)entry_SYSRETL_compat_end);
#endif
return ret;
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 331474b150f1..fd6f6e5b755a 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -25,6 +25,7 @@ struct real_mode_header {
u32 sev_es_trampoline_start;
#endif
#ifdef CONFIG_X86_64
+ u32 trampoline_start64;
u32 trampoline_pgd;
#endif
/* ACPI S3 wakeup */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 656ed6531d03..2e7890dd58a4 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -350,18 +350,6 @@ static inline void __loadsegment_fs(unsigned short value)
#define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
-/*
- * x86-32 user GS accessors. This is ugly and could do with some cleaning up.
- */
-#ifdef CONFIG_X86_32
-# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
-# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
-# define task_user_gs(tsk) ((tsk)->thread.gs)
-# define lazy_save_gs(v) savesegment(gs, (v))
-# define lazy_load_gs(v) loadsegment(gs, (v))
-# define load_gs_index(v) loadsegment(gs, (v))
-#endif /* X86_32 */
-
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 896e48d45828..7590ac2570b9 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -50,7 +50,6 @@ extern unsigned long saved_video_mode;
extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
-extern unsigned long __startup_secondary_64(void);
extern void startup_64_setup_env(unsigned long physbase);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
@@ -109,27 +108,19 @@ extern unsigned long _brk_end;
void *extend_brk(size_t size, size_t align);
/*
- * Reserve space in the brk section. The name must be unique within
- * the file, and somewhat descriptive. The size is in bytes. Must be
- * used at file scope.
+ * Reserve space in the brk section. The name must be unique within the file,
+ * and somewhat descriptive. The size is in bytes.
*
- * (This uses a temp function to wrap the asm so we can pass it the
- * size parameter; otherwise we wouldn't be able to. We can't use a
- * "section" attribute on a normal variable because it always ends up
- * being @progbits, which ends up allocating space in the vmlinux
- * executable.)
+ * The allocation is done using inline asm (rather than using a section
+ * attribute on a normal variable) in order to allow the use of @nobits, so
+ * that it doesn't take up any space in the vmlinux file.
*/
-#define RESERVE_BRK(name,sz) \
- static void __section(".discard.text") __noendbr __used notrace \
- __brk_reservation_fn_##name##__(void) { \
- asm volatile ( \
- ".pushsection .brk_reservation,\"aw\",@nobits;" \
- ".brk." #name ":" \
- " 1:.skip %c0;" \
- " .size .brk." #name ", . - 1b;" \
- " .popsection" \
- : : "i" (sz)); \
- }
+#define RESERVE_BRK(name, size) \
+ asm(".pushsection .brk_reservation,\"aw\",@nobits\n\t" \
+ ".brk." #name ":\n\t" \
+ ".skip " __stringify(size) "\n\t" \
+ ".size .brk." #name ", " __stringify(size) "\n\t" \
+ ".popsection\n\t")
extern void probe_roms(void);
#ifdef __i386__
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 1b2fd32b42fe..b8357d6ecd47 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -57,9 +57,79 @@
#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
+/* GHCB GPA Register */
+#define GHCB_MSR_REG_GPA_REQ 0x012
+#define GHCB_MSR_REG_GPA_REQ_VAL(v) \
+ /* GHCBData[63:12] */ \
+ (((u64)((v) & GENMASK_ULL(51, 0)) << 12) | \
+ /* GHCBData[11:0] */ \
+ GHCB_MSR_REG_GPA_REQ)
+
+#define GHCB_MSR_REG_GPA_RESP 0x013
+#define GHCB_MSR_REG_GPA_RESP_VAL(v) \
+ /* GHCBData[63:12] */ \
+ (((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+/*
+ * SNP Page State Change Operation
+ *
+ * GHCBData[55:52] - Page operation:
+ * 0x0001 Page assignment, Private
+ * 0x0002 Page assignment, Shared
+ */
+enum psc_op {
+ SNP_PAGE_STATE_PRIVATE = 1,
+ SNP_PAGE_STATE_SHARED,
+};
+
+#define GHCB_MSR_PSC_REQ 0x014
+#define GHCB_MSR_PSC_REQ_GFN(gfn, op) \
+ /* GHCBData[55:52] */ \
+ (((u64)((op) & 0xf) << 52) | \
+ /* GHCBData[51:12] */ \
+ ((u64)((gfn) & GENMASK_ULL(39, 0)) << 12) | \
+ /* GHCBData[11:0] */ \
+ GHCB_MSR_PSC_REQ)
+
+#define GHCB_MSR_PSC_RESP 0x015
+#define GHCB_MSR_PSC_RESP_VAL(val) \
+ /* GHCBData[63:32] */ \
+ (((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
+
/* GHCB Hypervisor Feature Request/Response */
#define GHCB_MSR_HV_FT_REQ 0x080
#define GHCB_MSR_HV_FT_RESP 0x081
+#define GHCB_MSR_HV_FT_RESP_VAL(v) \
+ /* GHCBData[63:12] */ \
+ (((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+#define GHCB_HV_FT_SNP BIT_ULL(0)
+#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1)
+
+/* SNP Page State Change NAE event */
+#define VMGEXIT_PSC_MAX_ENTRY 253
+
+struct psc_hdr {
+ u16 cur_entry;
+ u16 end_entry;
+ u32 reserved;
+} __packed;
+
+struct psc_entry {
+ u64 cur_page : 12,
+ gfn : 40,
+ operation : 4,
+ pagesize : 1,
+ reserved : 7;
+} __packed;
+
+struct snp_psc_desc {
+ struct psc_hdr hdr;
+ struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY];
+} __packed;
+
+/* Guest message request error code */
+#define SNP_GUEST_REQ_INVALID_LEN BIT_ULL(32)
#define GHCB_MSR_TERM_REQ 0x100
#define GHCB_MSR_TERM_REASON_SET_POS 12
@@ -73,8 +143,20 @@
/* GHCBData[23:16] */ \
((((u64)reason_val) & 0xff) << 16))
+/* Error codes from reason set 0 */
+#define SEV_TERM_SET_GEN 0
#define GHCB_SEV_ES_GEN_REQ 0
#define GHCB_SEV_ES_PROT_UNSUPPORTED 1
+#define GHCB_SNP_UNSUPPORTED 2
+
+/* Linux-specific reason codes (used with reason set 1) */
+#define SEV_TERM_SET_LINUX 1
+#define GHCB_TERM_REGISTER 0 /* GHCB GPA registration failure */
+#define GHCB_TERM_PSC 1 /* Page State Change failure */
+#define GHCB_TERM_PVALIDATE 2 /* Pvalidate failure */
+#define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */
+#define GHCB_TERM_CPUID 4 /* CPUID-validation failure */
+#define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index ec060c433589..19514524f0f8 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -11,9 +11,10 @@
#include <linux/types.h>
#include <asm/insn.h>
#include <asm/sev-common.h>
+#include <asm/bootparam.h>
-#define GHCB_PROTO_OUR 0x0001UL
-#define GHCB_PROTOCOL_MAX 1ULL
+#define GHCB_PROTOCOL_MIN 1ULL
+#define GHCB_PROTOCOL_MAX 2ULL
#define GHCB_DEFAULT_USAGE 0ULL
#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); }
@@ -42,6 +43,24 @@ struct es_em_ctxt {
struct es_fault_info fi;
};
+/*
+ * AMD SEV Confidential computing blob structure. The structure is
+ * defined in OVMF UEFI firmware header:
+ * https://github.com/tianocore/edk2/blob/master/OvmfPkg/Include/Guid/ConfidentialComputingSevSnpBlob.h
+ */
+#define CC_BLOB_SEV_HDR_MAGIC 0x45444d41
+struct cc_blob_sev_info {
+ u32 magic;
+ u16 version;
+ u16 reserved;
+ u64 secrets_phys;
+ u32 secrets_len;
+ u32 rsvd1;
+ u64 cpuid_phys;
+ u32 cpuid_len;
+ u32 rsvd2;
+} __packed;
+
void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code);
static inline u64 lower_bits(u64 val, unsigned int bits)
@@ -60,6 +79,61 @@ extern void vc_no_ghcb(void);
extern void vc_boot_ghcb(void);
extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
+/* Software defined (when rFlags.CF = 1) */
+#define PVALIDATE_FAIL_NOUPDATE 255
+
+/* RMP page size */
+#define RMP_PG_SIZE_4K 0
+
+#define RMPADJUST_VMSA_PAGE_BIT BIT(16)
+
+/* SNP Guest message request */
+struct snp_req_data {
+ unsigned long req_gpa;
+ unsigned long resp_gpa;
+ unsigned long data_gpa;
+ unsigned int data_npages;
+};
+
+struct sev_guest_platform_data {
+ u64 secrets_gpa;
+};
+
+/*
+ * The secrets page contains 96-bytes of reserved field that can be used by
+ * the guest OS. The guest OS uses the area to save the message sequence
+ * number for each VMPCK.
+ *
+ * See the GHCB spec section Secret page layout for the format for this area.
+ */
+struct secrets_os_area {
+ u32 msg_seqno_0;
+ u32 msg_seqno_1;
+ u32 msg_seqno_2;
+ u32 msg_seqno_3;
+ u64 ap_jump_table_pa;
+ u8 rsvd[40];
+ u8 guest_usage[32];
+} __packed;
+
+#define VMPCK_KEY_LEN 32
+
+/* See the SNP spec version 0.9 for secrets page format */
+struct snp_secrets_page_layout {
+ u32 version;
+ u32 imien : 1,
+ rsvd1 : 31;
+ u32 fms;
+ u32 rsvd2;
+ u8 gosvw[16];
+ u8 vmpck0[VMPCK_KEY_LEN];
+ u8 vmpck1[VMPCK_KEY_LEN];
+ u8 vmpck2[VMPCK_KEY_LEN];
+ u8 vmpck3[VMPCK_KEY_LEN];
+ struct secrets_os_area os_area;
+ u8 rsvd3[3840];
+} __packed;
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern struct static_key_false sev_es_enable_key;
extern void __sev_es_ist_enter(struct pt_regs *regs);
@@ -87,12 +161,71 @@ extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
struct es_em_ctxt *ctxt,
u64 exit_code, u64 exit_info_1,
u64 exit_info_2);
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
+{
+ int rc;
+
+ /* "rmpadjust" mnemonic support in binutils 2.36 and newer */
+ asm volatile(".byte 0xF3,0x0F,0x01,0xFE\n\t"
+ : "=a"(rc)
+ : "a"(vaddr), "c"(rmp_psize), "d"(attrs)
+ : "memory", "cc");
+
+ return rc;
+}
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
+{
+ bool no_rmpupdate;
+ int rc;
+
+ /* "pvalidate" mnemonic support in binutils 2.36 and newer */
+ asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t"
+ CC_SET(c)
+ : CC_OUT(c) (no_rmpupdate), "=a"(rc)
+ : "a"(vaddr), "c"(rmp_psize), "d"(validate)
+ : "memory", "cc");
+
+ if (no_rmpupdate)
+ return PVALIDATE_FAIL_NOUPDATE;
+
+ return rc;
+}
+void setup_ghcb(void);
+void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+ unsigned int npages);
+void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+ unsigned int npages);
+void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
+void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
+void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
+void snp_set_wakeup_secondary_cpu(void);
+bool snp_init(struct boot_params *bp);
+void snp_abort(void);
+int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
static inline void sev_es_nmi_complete(void) { }
static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
+static inline void setup_ghcb(void) { }
+static inline void __init
+early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
+static inline void __init
+early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
+static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
+static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { }
+static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { }
+static inline void snp_set_wakeup_secondary_cpu(void) { }
+static inline bool snp_init(struct boot_params *bp) { return false; }
+static inline void snp_abort(void) { }
+static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input,
+ unsigned long *fw_err)
+{
+ return -ENOTTY;
+}
#endif
#endif
diff --git a/arch/x86/include/asm/shared/io.h b/arch/x86/include/asm/shared/io.h
new file mode 100644
index 000000000000..c0ef921c0586
--- /dev/null
+++ b/arch/x86/include/asm/shared/io.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_IO_H
+#define _ASM_X86_SHARED_IO_H
+
+#include <linux/types.h>
+
+#define BUILDIO(bwl, bw, type) \
+static inline void __out##bwl(type value, u16 port) \
+{ \
+ asm volatile("out" #bwl " %" #bw "0, %w1" \
+ : : "a"(value), "Nd"(port)); \
+} \
+ \
+static inline type __in##bwl(u16 port) \
+{ \
+ type value; \
+ asm volatile("in" #bwl " %w1, %" #bw "0" \
+ : "=a"(value) : "Nd"(port)); \
+ return value; \
+}
+
+BUILDIO(b, b, u8)
+BUILDIO(w, w, u16)
+BUILDIO(l, , u32)
+#undef BUILDIO
+
+#define inb __inb
+#define inw __inw
+#define inl __inl
+#define outb __outb
+#define outw __outw
+#define outl __outl
+
+#endif
diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h
new file mode 100644
index 000000000000..1e6ec10b3a15
--- /dev/null
+++ b/arch/x86/include/asm/shared/msr.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_MSR_H
+#define _ASM_X86_SHARED_MSR_H
+
+struct msr {
+ union {
+ struct {
+ u32 l;
+ u32 h;
+ };
+ u64 q;
+ };
+};
+
+#endif /* _ASM_X86_SHARED_MSR_H */
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
new file mode 100644
index 000000000000..e53f26228fbb
--- /dev/null
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_TDX_H
+#define _ASM_X86_SHARED_TDX_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+#define TDX_HYPERCALL_STANDARD 0
+
+#define TDX_HCALL_HAS_OUTPUT BIT(0)
+#define TDX_HCALL_ISSUE_STI BIT(1)
+
+#define TDX_CPUID_LEAF_ID 0x21
+#define TDX_IDENT "IntelTDX "
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Used in __tdx_hypercall() to pass down and get back registers' values of
+ * the TDCALL instruction when requesting services from the VMM.
+ *
+ * This is a software only structure and not part of the TDX module/VMM ABI.
+ */
+struct tdx_hypercall_args {
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+};
+
+/* Used to request services from the VMM */
+u64 __tdx_hypercall(struct tdx_hypercall_args *args, unsigned long flags);
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __tdx_hypercall_failed(void);
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_X86_SHARED_TDX_H */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index d17b39893b79..bab490379c65 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -19,25 +19,14 @@
#ifdef __ASSEMBLY__
-#ifdef CONFIG_X86_SMAP
-
#define ASM_CLAC \
ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP
#define ASM_STAC \
ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP
-#else /* CONFIG_X86_SMAP */
-
-#define ASM_CLAC
-#define ASM_STAC
-
-#endif /* CONFIG_X86_SMAP */
-
#else /* __ASSEMBLY__ */
-#ifdef CONFIG_X86_SMAP
-
static __always_inline void clac(void)
{
/* Note: a barrier is implicit in alternative() */
@@ -76,19 +65,6 @@ static __always_inline void smap_restore(unsigned long flags)
#define ASM_STAC \
ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)
-#else /* CONFIG_X86_SMAP */
-
-static inline void clac(void) { }
-static inline void stac(void) { }
-
-static inline unsigned long smap_save(void) { return 0; }
-static inline void smap_restore(unsigned long flags) { }
-
-#define ASM_CLAC
-#define ASM_STAC
-
-#endif /* CONFIG_X86_SMAP */
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_SMAP_H */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 68c257a3de0d..45b18eb94fa1 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -184,14 +184,15 @@ static inline void wbinvd(void)
native_wbinvd();
}
-#ifdef CONFIG_X86_64
static inline void load_gs_index(unsigned int selector)
{
+#ifdef CONFIG_X86_64
native_load_gs_index(selector);
-}
-
+#else
+ loadsegment(gs, selector);
#endif
+}
#endif /* CONFIG_PARAVIRT_XXL */
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 7b132d0312eb..a800abb1a992 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -19,7 +19,6 @@ struct saved_context {
u16 gs;
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
- bool misc_enable_saved;
struct saved_msrs saved_msrs;
struct desc_ptr gdt_desc;
struct desc_ptr idt;
@@ -28,6 +27,7 @@ struct saved_context {
unsigned long tr;
unsigned long safety;
unsigned long return_address;
+ bool misc_enable_saved;
} __attribute__((packed));
/* routines for saving/restoring kernel state */
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 35bb35d28733..54df06687d83 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -14,9 +14,13 @@
* Image of the saved processor state, used by the low level ACPI suspend to
* RAM code and by the low level hibernation code.
*
- * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that
- * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c,
- * still work as required.
+ * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S
+ * and make sure that __save/__restore_processor_state(), defined in
+ * arch/x86/power/cpu.c, still work as required.
+ *
+ * Because the structure is packed, make sure to avoid unaligned members. For
+ * optimisation purposes but also because tools like kmemleak only search for
+ * pointers that are aligned.
*/
struct saved_context {
struct pt_regs regs;
@@ -36,7 +40,6 @@ struct saved_context {
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
- bool misc_enable_saved;
struct saved_msrs saved_msrs;
unsigned long efer;
u16 gdt_pad; /* Unused */
@@ -48,6 +51,7 @@ struct saved_context {
unsigned long tr;
unsigned long safety;
unsigned long return_address;
+ bool misc_enable_saved;
} __attribute__((packed));
#define loaddebug(thread,register) \
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index f70a5108d464..1b07fba11704 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -271,6 +271,7 @@ struct vmcb_seg {
u64 base;
} __packed;
+/* Save area definition for legacy and SEV-MEM guests */
struct vmcb_save_area {
struct vmcb_seg es;
struct vmcb_seg cs;
@@ -282,12 +283,12 @@ struct vmcb_save_area {
struct vmcb_seg ldtr;
struct vmcb_seg idtr;
struct vmcb_seg tr;
- u8 reserved_1[43];
+ u8 reserved_1[42];
+ u8 vmpl;
u8 cpl;
u8 reserved_2[4];
u64 efer;
- u8 reserved_3[104];
- u64 xss; /* Valid for SEV-ES only */
+ u8 reserved_3[112];
u64 cr4;
u64 cr3;
u64 cr0;
@@ -297,7 +298,9 @@ struct vmcb_save_area {
u64 rip;
u8 reserved_4[88];
u64 rsp;
- u8 reserved_5[24];
+ u64 s_cet;
+ u64 ssp;
+ u64 isst_addr;
u64 rax;
u64 star;
u64 lstar;
@@ -308,29 +311,145 @@ struct vmcb_save_area {
u64 sysenter_esp;
u64 sysenter_eip;
u64 cr2;
- u8 reserved_6[32];
+ u8 reserved_5[32];
u64 g_pat;
u64 dbgctl;
u64 br_from;
u64 br_to;
u64 last_excp_from;
u64 last_excp_to;
-
- /*
- * The following part of the save area is valid only for
- * SEV-ES guests when referenced through the GHCB or for
- * saving to the host save area.
- */
- u8 reserved_7[72];
+ u8 reserved_6[72];
u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
- u8 reserved_7b[4];
+} __packed;
+
+/* Save area definition for SEV-ES and SEV-SNP guests */
+struct sev_es_save_area {
+ struct vmcb_seg es;
+ struct vmcb_seg cs;
+ struct vmcb_seg ss;
+ struct vmcb_seg ds;
+ struct vmcb_seg fs;
+ struct vmcb_seg gs;
+ struct vmcb_seg gdtr;
+ struct vmcb_seg ldtr;
+ struct vmcb_seg idtr;
+ struct vmcb_seg tr;
+ u64 vmpl0_ssp;
+ u64 vmpl1_ssp;
+ u64 vmpl2_ssp;
+ u64 vmpl3_ssp;
+ u64 u_cet;
+ u8 reserved_1[2];
+ u8 vmpl;
+ u8 cpl;
+ u8 reserved_2[4];
+ u64 efer;
+ u8 reserved_3[104];
+ u64 xss;
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+ u64 rflags;
+ u64 rip;
+ u64 dr0;
+ u64 dr1;
+ u64 dr2;
+ u64 dr3;
+ u64 dr0_addr_mask;
+ u64 dr1_addr_mask;
+ u64 dr2_addr_mask;
+ u64 dr3_addr_mask;
+ u8 reserved_4[24];
+ u64 rsp;
+ u64 s_cet;
+ u64 ssp;
+ u64 isst_addr;
+ u64 rax;
+ u64 star;
+ u64 lstar;
+ u64 cstar;
+ u64 sfmask;
+ u64 kernel_gs_base;
+ u64 sysenter_cs;
+ u64 sysenter_esp;
+ u64 sysenter_eip;
+ u64 cr2;
+ u8 reserved_5[32];
+ u64 g_pat;
+ u64 dbgctl;
+ u64 br_from;
+ u64 br_to;
+ u64 last_excp_from;
+ u64 last_excp_to;
+ u8 reserved_7[80];
u32 pkru;
- u8 reserved_7a[20];
- u64 reserved_8; /* rax already available at 0x01f8 */
+ u8 reserved_8[20];
+ u64 reserved_9; /* rax already available at 0x01f8 */
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 reserved_10; /* rsp already available at 0x01d8 */
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+ u8 reserved_11[16];
+ u64 guest_exit_info_1;
+ u64 guest_exit_info_2;
+ u64 guest_exit_int_info;
+ u64 guest_nrip;
+ u64 sev_features;
+ u64 vintr_ctrl;
+ u64 guest_exit_code;
+ u64 virtual_tom;
+ u64 tlb_id;
+ u64 pcpu_id;
+ u64 event_inj;
+ u64 xcr0;
+ u8 reserved_12[16];
+
+ /* Floating point area */
+ u64 x87_dp;
+ u32 mxcsr;
+ u16 x87_ftw;
+ u16 x87_fsw;
+ u16 x87_fcw;
+ u16 x87_fop;
+ u16 x87_ds;
+ u16 x87_cs;
+ u64 x87_rip;
+ u8 fpreg_x87[80];
+ u8 fpreg_xmm[256];
+ u8 fpreg_ymm[256];
+} __packed;
+
+struct ghcb_save_area {
+ u8 reserved_1[203];
+ u8 cpl;
+ u8 reserved_2[116];
+ u64 xss;
+ u8 reserved_3[24];
+ u64 dr7;
+ u8 reserved_4[16];
+ u64 rip;
+ u8 reserved_5[88];
+ u64 rsp;
+ u8 reserved_6[24];
+ u64 rax;
+ u8 reserved_7[264];
u64 rcx;
u64 rdx;
u64 rbx;
- u64 reserved_9; /* rsp already available at 0x01d8 */
+ u8 reserved_8[8];
u64 rbp;
u64 rsi;
u64 rdi;
@@ -342,22 +461,24 @@ struct vmcb_save_area {
u64 r13;
u64 r14;
u64 r15;
- u8 reserved_10[16];
+ u8 reserved_9[16];
u64 sw_exit_code;
u64 sw_exit_info_1;
u64 sw_exit_info_2;
u64 sw_scratch;
- u8 reserved_11[56];
+ u8 reserved_10[56];
u64 xcr0;
u8 valid_bitmap[16];
u64 x87_state_gpa;
} __packed;
+#define GHCB_SHARED_BUF_SIZE 2032
+
struct ghcb {
- struct vmcb_save_area save;
- u8 reserved_save[2048 - sizeof(struct vmcb_save_area)];
+ struct ghcb_save_area save;
+ u8 reserved_save[2048 - sizeof(struct ghcb_save_area)];
- u8 shared_buffer[2032];
+ u8 shared_buffer[GHCB_SHARED_BUF_SIZE];
u8 reserved_1[10];
u16 protocol_version; /* negotiated SEV-ES/GHCB protocol version */
@@ -365,13 +486,17 @@ struct ghcb {
} __packed;
-#define EXPECTED_VMCB_SAVE_AREA_SIZE 1032
+#define EXPECTED_VMCB_SAVE_AREA_SIZE 740
+#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
+#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648
#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024
#define EXPECTED_GHCB_SIZE PAGE_SIZE
static inline void __unused_size_checks(void)
{
BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE);
}
@@ -441,26 +566,26 @@ struct vmcb {
/* GHCB Accessor functions */
#define GHCB_BITMAP_IDX(field) \
- (offsetof(struct vmcb_save_area, field) / sizeof(u64))
+ (offsetof(struct ghcb_save_area, field) / sizeof(u64))
#define DEFINE_GHCB_ACCESSORS(field) \
- static inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \
+ static __always_inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \
{ \
return test_bit(GHCB_BITMAP_IDX(field), \
(unsigned long *)&ghcb->save.valid_bitmap); \
} \
\
- static inline u64 ghcb_get_##field(struct ghcb *ghcb) \
+ static __always_inline u64 ghcb_get_##field(struct ghcb *ghcb) \
{ \
return ghcb->save.field; \
} \
\
- static inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \
+ static __always_inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \
{ \
return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0; \
} \
\
- static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \
+ static __always_inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \
{ \
__set_bit(GHCB_BITMAP_IDX(field), \
(unsigned long *)&ghcb->save.valid_bitmap); \
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
new file mode 100644
index 000000000000..020c81a7c729
--- /dev/null
+++ b/arch/x86/include/asm/tdx.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021-2022 Intel Corporation */
+#ifndef _ASM_X86_TDX_H
+#define _ASM_X86_TDX_H
+
+#include <linux/init.h>
+#include <linux/bits.h>
+#include <asm/ptrace.h>
+#include <asm/shared/tdx.h>
+
+/*
+ * SW-defined error codes.
+ *
+ * Bits 47:40 == 0xFF indicate Reserved status code class that never used by
+ * TDX module.
+ */
+#define TDX_ERROR _BITUL(63)
+#define TDX_SW_ERROR (TDX_ERROR | GENMASK_ULL(47, 40))
+#define TDX_SEAMCALL_VMFAILINVALID (TDX_SW_ERROR | _UL(0xFFFF0000))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Used to gather the output registers values of the TDCALL and SEAMCALL
+ * instructions when requesting services from the TDX module.
+ *
+ * This is a software only structure and not part of the TDX module/VMM ABI.
+ */
+struct tdx_module_output {
+ u64 rcx;
+ u64 rdx;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+};
+
+/*
+ * Used by the #VE exception handler to gather the #VE exception
+ * info from the TDX module. This is a software only structure
+ * and not part of the TDX module/VMM ABI.
+ */
+struct ve_info {
+ u64 exit_reason;
+ u64 exit_qual;
+ /* Guest Linear (virtual) Address */
+ u64 gla;
+ /* Guest Physical Address */
+ u64 gpa;
+ u32 instr_len;
+ u32 instr_info;
+};
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+
+void __init tdx_early_init(void);
+
+/* Used to communicate with the TDX module */
+u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
+ struct tdx_module_output *out);
+
+void tdx_get_ve_info(struct ve_info *ve);
+
+bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
+
+void tdx_safe_halt(void);
+
+bool tdx_early_handle_ve(struct pt_regs *regs);
+
+#else
+
+static inline void tdx_early_init(void) { };
+static inline void tdx_safe_halt(void) { };
+
+static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
+
+#endif /* CONFIG_INTEL_TDX_GUEST */
+
+#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_INTEL_TDX_GUEST)
+long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2,
+ unsigned long p3, unsigned long p4);
+#else
+static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_INTEL_TDX_GUEST && CONFIG_KVM_GUEST */
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_X86_TDX_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ebec69c35e95..f0cb881c1d69 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -92,7 +92,6 @@ struct thread_info {
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */
-#define TIF_SLD 18 /* Restore split lock detection on context switch */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -116,7 +115,6 @@ struct thread_info {
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_SLD (1 << TIF_SLD)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
@@ -128,7 +126,7 @@ struct thread_info {
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
- _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD)
+ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
/*
* Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h
index a4a8b1b16c0c..956e4145311b 100644
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -5,6 +5,15 @@
#include <asm/processor.h>
#include <asm/tsc.h>
+static inline unsigned long random_get_entropy(void)
+{
+ if (!IS_ENABLED(CONFIG_X86_TSC) &&
+ !cpu_feature_enabled(X86_FEATURE_TSC))
+ return random_get_entropy_fallback();
+ return rdtsc();
+}
+#define random_get_entropy random_get_entropy
+
/* Assume we use the PIT time source for the clock tick */
#define CLOCK_TICK_RATE PIT_TICK_RATE
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 9619385bf749..458c891a8273 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -212,30 +212,19 @@ static inline long arch_scale_freq_capacity(int cpu)
}
#define arch_scale_freq_capacity arch_scale_freq_capacity
-extern void arch_scale_freq_tick(void);
-#define arch_scale_freq_tick arch_scale_freq_tick
-
extern void arch_set_max_freq_ratio(bool turbo_disabled);
-void init_freq_invariance(bool secondary, bool cppc_ready);
+extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
#else
-static inline void arch_set_max_freq_ratio(bool turbo_disabled)
-{
-}
-static inline void init_freq_invariance(bool secondary, bool cppc_ready)
-{
-}
+static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
+static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
#endif
+extern void arch_scale_freq_tick(void);
+#define arch_scale_freq_tick arch_scale_freq_tick
+
#ifdef CONFIG_ACPI_CPPC_LIB
void init_freq_invariance_cppc(void);
#define arch_init_invariance_cppc init_freq_invariance_cppc
-
-bool amd_set_max_freq_ratio(u64 *ratio);
-#else
-static inline bool amd_set_max_freq_ratio(u64 *ratio)
-{
- return false;
-}
#endif
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 35317c5c551d..47ecfff2c83d 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -13,7 +13,7 @@
#ifdef CONFIG_X86_64
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
+struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
void __init trap_init(void);
asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
#endif
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 01a300a9700b..fbdc3d951494 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -20,13 +20,12 @@ extern void disable_TSC(void);
static inline cycles_t get_cycles(void)
{
-#ifndef CONFIG_X86_TSC
- if (!boot_cpu_has(X86_FEATURE_TSC))
+ if (!IS_ENABLED(CONFIG_X86_TSC) &&
+ !cpu_feature_enabled(X86_FEATURE_TSC))
return 0;
-#endif
-
return rdtsc();
}
+#define get_cycles get_cycles
extern struct system_counterval_t convert_art_to_tsc(u64 art);
extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h
index 7ee7ba0d63a3..769b939444ae 100644
--- a/arch/x86/include/uapi/asm/amd_hsmp.h
+++ b/arch/x86/include/uapi/asm/amd_hsmp.h
@@ -31,9 +31,22 @@ enum hsmp_message_ids {
HSMP_GET_CCLK_THROTTLE_LIMIT, /* 10h Get CCLK frequency limit in socket */
HSMP_GET_C0_PERCENT, /* 11h Get average C0 residency in socket */
HSMP_SET_NBIO_DPM_LEVEL, /* 12h Set max/min LCLK DPM Level for a given NBIO */
- /* 13h Reserved */
- HSMP_GET_DDR_BANDWIDTH = 0x14, /* 14h Get theoretical maximum and current DDR Bandwidth */
- HSMP_GET_TEMP_MONITOR, /* 15h Get per-DIMM temperature and refresh rates */
+ HSMP_GET_NBIO_DPM_LEVEL, /* 13h Get LCLK DPM level min and max for a given NBIO */
+ HSMP_GET_DDR_BANDWIDTH, /* 14h Get theoretical maximum and current DDR Bandwidth */
+ HSMP_GET_TEMP_MONITOR, /* 15h Get socket temperature */
+ HSMP_GET_DIMM_TEMP_RANGE, /* 16h Get per-DIMM temperature range and refresh rate */
+ HSMP_GET_DIMM_POWER, /* 17h Get per-DIMM power consumption */
+ HSMP_GET_DIMM_THERMAL, /* 18h Get per-DIMM thermal sensors */
+ HSMP_GET_SOCKET_FREQ_LIMIT, /* 19h Get current active frequency per socket */
+ HSMP_GET_CCLK_CORE_LIMIT, /* 1Ah Get CCLK frequency limit per core */
+ HSMP_GET_RAILS_SVI, /* 1Bh Get SVI-based Telemetry for all rails */
+ HSMP_GET_SOCKET_FMAX_FMIN, /* 1Ch Get Fmax and Fmin per socket */
+ HSMP_GET_IOLINK_BANDWITH, /* 1Dh Get current bandwidth on IO Link */
+ HSMP_GET_XGMI_BANDWITH, /* 1Eh Get current bandwidth on xGMI Link */
+ HSMP_SET_GMI3_WIDTH, /* 1Fh Set max and min GMI3 Link width */
+ HSMP_SET_PCI_RATE, /* 20h Control link rate on PCIe devices */
+ HSMP_SET_POWER_MODE, /* 21h Select power efficiency profile policy */
+ HSMP_SET_PSTATE_MAX_MIN, /* 22h Set the max and min DF P-State */
HSMP_MSG_ID_MAX,
};
@@ -175,8 +188,12 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
*/
{1, 0, HSMP_SET},
- /* RESERVED message */
- {0, 0, HSMP_RSVD},
+ /*
+ * HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1
+ * input: args[0] = nbioid[23:16]
+ * output: args[0] = max dpm level[15:8] + min dpm level[7:0]
+ */
+ {1, 1, HSMP_GET},
/*
* HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1
@@ -191,6 +208,93 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
* [7:5] fractional part
*/
{0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1
+ * input: args[0] = DIMM address[7:0]
+ * output: args[0] = refresh rate[3] + temperature range[2:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1
+ * input: args[0] = DIMM address[7:0]
+ * output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] +
+ * DIMM address[7:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1
+ * input: args[0] = DIMM address[7:0]
+ * output: args[0] = temperature in degree celcius[31:21] + update rate in ms[16:8] +
+ * DIMM address[7:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1
+ * output: args[0] = frequency in MHz[31:16] + frequency source[15:0]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1
+ * input: args[0] = apic id [31:0]
+ * output: args[0] = frequency in MHz[31:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1
+ * output: args[0] = power in mW[31:0]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1
+ * output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1
+ * input: args[0] = link id[15:8] + bw type[2:0]
+ * output: args[0] = io bandwidth in Mbps[31:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1
+ * input: args[0] = link id[15:8] + bw type[2:0]
+ * output: args[0] = xgmi bandwidth in Mbps[31:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0
+ * input: args[0] = min link width[15:8] + max link width[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1
+ * input: args[0] = link rate control value
+ * output: args[0] = previous link rate control value
+ */
+ {1, 1, HSMP_SET},
+
+ /*
+ * HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0
+ * input: args[0] = power efficiency mode[2:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0
+ * input: args[0] = min df pstate[15:8] + max df pstate[7:0]
+ */
+ {1, 0, HSMP_SET},
};
/* Reset to default packing */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index b25d3f82c2f3..bea5cdcdf532 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -10,6 +10,7 @@
#define SETUP_EFI 4
#define SETUP_APPLE_PROPERTIES 5
#define SETUP_JAILHOUSE 6
+#define SETUP_CC_BLOB 7
#define SETUP_INDIRECT (1<<31)
@@ -187,7 +188,8 @@ struct boot_params {
__u32 ext_ramdisk_image; /* 0x0c0 */
__u32 ext_ramdisk_size; /* 0x0c4 */
__u32 ext_cmd_line_ptr; /* 0x0c8 */
- __u8 _pad4[116]; /* 0x0cc */
+ __u8 _pad4[112]; /* 0x0cc */
+ __u32 cc_blob_address; /* 0x13c */
struct edid_info edid_info; /* 0x140 */
struct efi_info efi_info; /* 0x1c0 */
__u32 alt_mem_k; /* 0x1e0 */
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index efa969325ede..f69c168391aa 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -108,6 +108,14 @@
#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005
#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0
#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
+#define SVM_VMGEXIT_PSC 0x80000010
+#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011
+#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012
+#define SVM_VMGEXIT_AP_CREATION 0x80000013
+#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
+#define SVM_VMGEXIT_AP_CREATE 1
+#define SVM_VMGEXIT_AP_DESTROY 2
+#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
/* Exit code reserved for hypervisor/software use */
@@ -218,6 +226,11 @@
{ SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \
{ SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \
{ SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \
+ { SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \
+ { SVM_VMGEXIT_GUEST_REQUEST, "vmgexit_guest_request" }, \
+ { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \
+ { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \
+ { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \
{ SVM_EXIT_ERR, "invalid_guest_state" }
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c41ef42adbe8..1a2dc328cb5e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -46,8 +46,6 @@ endif
# non-deterministic coverage.
KCOV_INSTRUMENT := n
-CFLAGS_head$(BITS).o += -fno-stack-protector
-
CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace
obj-y := process_$(BITS).o signal.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 0d01e7f5078c..907cc98b1938 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -65,6 +65,13 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
static bool acpi_support_online_capable;
#endif
+#ifdef CONFIG_X86_64
+/* Physical address of the Multiprocessor Wakeup Structure mailbox */
+static u64 acpi_mp_wake_mailbox_paddr;
+/* Virtual address of the Multiprocessor Wakeup Structure mailbox */
+static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox;
+#endif
+
#ifdef CONFIG_X86_IO_APIC
/*
* Locks related to IOAPIC hotplug
@@ -336,7 +343,60 @@ acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long e
return 0;
}
-#endif /*CONFIG_X86_LOCAL_APIC */
+#ifdef CONFIG_X86_64
+static int acpi_wakeup_cpu(int apicid, unsigned long start_ip)
+{
+ /*
+ * Remap mailbox memory only for the first call to acpi_wakeup_cpu().
+ *
+ * Wakeup of secondary CPUs is fully serialized in the core code.
+ * No need to protect acpi_mp_wake_mailbox from concurrent accesses.
+ */
+ if (!acpi_mp_wake_mailbox) {
+ acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr,
+ sizeof(*acpi_mp_wake_mailbox),
+ MEMREMAP_WB);
+ }
+
+ /*
+ * Mailbox memory is shared between the firmware and OS. Firmware will
+ * listen on mailbox command address, and once it receives the wakeup
+ * command, the CPU associated with the given apicid will be booted.
+ *
+ * The value of 'apic_id' and 'wakeup_vector' must be visible to the
+ * firmware before the wakeup command is visible. smp_store_release()
+ * ensures ordering and visibility.
+ */
+ acpi_mp_wake_mailbox->apic_id = apicid;
+ acpi_mp_wake_mailbox->wakeup_vector = start_ip;
+ smp_store_release(&acpi_mp_wake_mailbox->command,
+ ACPI_MP_WAKE_COMMAND_WAKEUP);
+
+ /*
+ * Wait for the CPU to wake up.
+ *
+ * The CPU being woken up is essentially in a spin loop waiting to be
+ * woken up. It should not take long for it wake up and acknowledge by
+ * zeroing out ->command.
+ *
+ * ACPI specification doesn't provide any guidance on how long kernel
+ * has to wait for a wake up acknowledgement. It also doesn't provide
+ * a way to cancel a wake up request if it takes too long.
+ *
+ * In TDX environment, the VMM has control over how long it takes to
+ * wake up secondary. It can postpone scheduling secondary vCPU
+ * indefinitely. Giving up on wake up request and reporting error opens
+ * possible attack vector for VMM: it can wake up a secondary CPU when
+ * kernel doesn't expect it. Wait until positive result of the wake up
+ * request.
+ */
+ while (READ_ONCE(acpi_mp_wake_mailbox->command))
+ cpu_relax();
+
+ return 0;
+}
+#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
@@ -375,7 +435,7 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
isa_irq_to_gsi[bus_irq] = gsi;
}
-static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
+static void mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
int polarity)
{
#ifdef CONFIG_X86_MPPARSE
@@ -387,9 +447,9 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
u8 pin;
if (!acpi_ioapic)
- return 0;
+ return;
if (!dev || !dev_is_pci(dev))
- return 0;
+ return;
pdev = to_pci_dev(dev);
number = pdev->bus->number;
@@ -408,7 +468,6 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
mp_save_irq(&mp_irq);
#endif
- return 0;
}
static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
@@ -1083,6 +1142,29 @@ static int __init acpi_parse_madt_lapic_entries(void)
}
return 0;
}
+
+#ifdef CONFIG_X86_64
+static int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_madt_multiproc_wakeup *mp_wake;
+
+ if (!IS_ENABLED(CONFIG_SMP))
+ return -ENODEV;
+
+ mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
+ if (BAD_MADT_ENTRY(mp_wake, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(&header->common);
+
+ acpi_mp_wake_mailbox_paddr = mp_wake->base_address;
+
+ acpi_wake_cpu_handler_update(acpi_wakeup_cpu);
+
+ return 0;
+}
+#endif /* CONFIG_X86_64 */
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
@@ -1278,6 +1360,14 @@ static void __init acpi_process_madt(void)
smp_found_config = 1;
}
+
+#ifdef CONFIG_X86_64
+ /*
+ * Parse MADT MP Wake entry.
+ */
+ acpi_table_parse_madt(ACPI_MADT_TYPE_MULTIPROC_WAKEUP,
+ acpi_parse_mp_wake, 1);
+#endif
}
if (error == -EINVAL) {
/*
@@ -1772,7 +1862,7 @@ int __acpi_release_global_lock(unsigned int *lock)
void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
{
- e820__range_add(addr, size, E820_TYPE_ACPI);
+ e820__range_add(addr, size, E820_TYPE_NVS);
e820__update_table_print();
}
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
index df1644d9b3b6..8b8cbf22461a 100644
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -50,20 +50,17 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
return err;
}
-bool amd_set_max_freq_ratio(u64 *ratio)
+static void amd_set_max_freq_ratio(void)
{
struct cppc_perf_caps perf_caps;
u64 highest_perf, nominal_perf;
u64 perf_ratio;
int rc;
- if (!ratio)
- return false;
-
rc = cppc_get_perf_caps(0, &perf_caps);
if (rc) {
pr_debug("Could not retrieve perf counters (%d)\n", rc);
- return false;
+ return;
}
highest_perf = amd_get_highest_perf();
@@ -71,7 +68,7 @@ bool amd_set_max_freq_ratio(u64 *ratio)
if (!highest_perf || !nominal_perf) {
pr_debug("Could not retrieve highest or nominal performance\n");
- return false;
+ return;
}
perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
@@ -79,25 +76,27 @@ bool amd_set_max_freq_ratio(u64 *ratio)
perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
if (!perf_ratio) {
pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
- return false;
+ return;
}
- *ratio = perf_ratio;
- arch_set_max_freq_ratio(false);
-
- return true;
+ freq_invariance_set_perf_ratio(perf_ratio, false);
}
static DEFINE_MUTEX(freq_invariance_lock);
void init_freq_invariance_cppc(void)
{
- static bool secondary;
+ static bool init_done;
- mutex_lock(&freq_invariance_lock);
+ if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+ return;
- init_freq_invariance(secondary, true);
- secondary = true;
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return;
+ mutex_lock(&freq_invariance_lock);
+ if (!init_done)
+ amd_set_max_freq_ratio();
+ init_done = true;
mutex_unlock(&freq_invariance_lock);
}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index d374cb3cf024..3c66073e7645 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -338,7 +338,7 @@ next:
}
}
-#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
+#if defined(CONFIG_RETPOLINE) && defined(CONFIG_OBJTOOL)
/*
* CALL/JMP *%\reg
@@ -507,11 +507,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
}
}
-#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
+#else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
-#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
+#endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */
#ifdef CONFIG_X86_KERNEL_IBT
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 020c906f7934..190e0f763375 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -188,7 +188,7 @@ int amd_smn_write(u16 node, u32 address, u32 value)
EXPORT_SYMBOL_GPL(amd_smn_write);
-int amd_cache_northbridges(void)
+static int amd_cache_northbridges(void)
{
const struct pci_device_id *misc_ids = amd_nb_misc_ids;
const struct pci_device_id *link_ids = amd_nb_link_ids;
@@ -210,14 +210,14 @@ int amd_cache_northbridges(void)
}
misc = NULL;
- while ((misc = next_northbridge(misc, misc_ids)) != NULL)
+ while ((misc = next_northbridge(misc, misc_ids)))
misc_count++;
if (!misc_count)
return -ENODEV;
root = NULL;
- while ((root = next_northbridge(root, root_ids)) != NULL)
+ while ((root = next_northbridge(root, root_ids)))
root_count++;
if (root_count) {
@@ -290,7 +290,6 @@ int amd_cache_northbridges(void)
return 0;
}
-EXPORT_SYMBOL_GPL(amd_cache_northbridges);
/*
* Ignores subdevice/subvendor but as far as I can figure out
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b70344bf6600..189d3a5e471a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -170,7 +170,7 @@ static __init int setup_apicpmtimer(char *s)
{
apic_calibrate_pmtmr = 1;
notsc_setup(NULL);
- return 0;
+ return 1;
}
__setup("apicpmtimer", setup_apicpmtimer);
#endif
@@ -320,6 +320,9 @@ int lapic_get_maxlvt(void)
#define APIC_DIVISOR 16
#define TSC_DIVISOR 8
+/* i82489DX specific */
+#define I82489DX_BASE_DIVIDER (((0x2) << 18))
+
/*
* This function sets up the local APIC timer, with a timeout of
* 'clocks' APIC bus clock. During calibration we actually call
@@ -340,8 +343,14 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
+ /*
+ * The i82489DX APIC uses bit 18 and 19 for the base divider. This
+ * overlaps with bit 18 on integrated APICs, but is not documented
+ * in the SDM. No problem though. i82489DX equipped systems do not
+ * have TSC deadline timer.
+ */
if (!lapic_is_integrated())
- lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+ lvtt_value |= I82489DX_BASE_DIVIDER;
if (!irqen)
lvtt_value |= APIC_LVT_MASKED;
@@ -1419,22 +1428,21 @@ void __init apic_intr_mode_init(void)
return;
case APIC_VIRTUAL_WIRE:
pr_info("APIC: Switch to virtual wire mode setup\n");
- default_setup_apic_routing();
break;
case APIC_VIRTUAL_WIRE_NO_CONFIG:
pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
upmode = true;
- default_setup_apic_routing();
break;
case APIC_SYMMETRIC_IO:
pr_info("APIC: Switch to symmetric I/O mode setup\n");
- default_setup_apic_routing();
break;
case APIC_SYMMETRIC_IO_NO_ROUTING:
pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
break;
}
+ default_setup_apic_routing();
+
if (x86_platform.apic_post_init)
x86_platform.apic_post_init();
@@ -2551,6 +2559,16 @@ u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid)
}
EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid);
+#ifdef CONFIG_X86_64
+void __init acpi_wake_cpu_handler_update(wakeup_cpu_handler handler)
+{
+ struct apic **drv;
+
+ for (drv = __apicdrivers; drv < __apicdrivers_end; drv++)
+ (*drv)->wakeup_secondary_cpu_64 = handler;
+}
+#endif
+
/*
* Override the generic EOI implementation with an optimized version.
* Only called during early boot when only one CPU is active and with
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index c1bb384935b0..a868b76cd3d4 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -65,6 +65,7 @@
#include <asm/irq_remapping.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
+#include <asm/pgtable.h>
#define for_each_ioapic(idx) \
for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
@@ -2677,6 +2678,19 @@ static struct resource * __init ioapic_setup_resources(void)
return res;
}
+static void io_apic_set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
+{
+ pgprot_t flags = FIXMAP_PAGE_NOCACHE;
+
+ /*
+ * Ensure fixmaps for IOAPIC MMIO respect memory encryption pgprot
+ * bits, just like normal ioremap():
+ */
+ flags = pgprot_decrypted(flags);
+
+ __set_fixmap(idx, phys, flags);
+}
+
void __init io_apic_init_mappings(void)
{
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
@@ -2709,7 +2723,7 @@ fake_ioapic_page:
__func__, PAGE_SIZE, PAGE_SIZE);
ioapic_phys = __pa(ioapic_phys);
}
- set_fixmap_nocache(idx, ioapic_phys);
+ io_apic_set_fixmap(idx, ioapic_phys);
apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
ioapic_phys);
@@ -2838,7 +2852,7 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
ioapics[idx].mp_config.apicaddr = address;
- set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+ io_apic_set_fixmap(FIX_IO_APIC_BASE_0 + idx, address);
if (bad_ioapic_register(idx)) {
clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
return -ENODEV;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f5a48e66e4f5..482855227964 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -199,7 +199,13 @@ static void __init uv_tsc_check_sync(void)
int mmr_shift;
char *state;
- /* Different returns from different UV BIOS versions */
+ /* UV5 guarantees synced TSCs; do not zero TSC_ADJUST */
+ if (!is_uv(UV2|UV3|UV4)) {
+ mark_tsc_async_resets("UV5+");
+ return;
+ }
+
+ /* UV2,3,4, UV BIOS TSC sync state available */
mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
mmr_shift =
is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
@@ -1340,7 +1346,7 @@ static void __init decode_gam_params(unsigned long ptr)
static void __init decode_gam_rng_tbl(unsigned long ptr)
{
struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr;
- unsigned long lgre = 0;
+ unsigned long lgre = 0, gend = 0;
int index = 0;
int sock_min = 999999, pnode_min = 99999;
int sock_max = -1, pnode_max = -1;
@@ -1374,6 +1380,9 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
flag, size, suffix[order],
gre->type, gre->nasid, gre->sockid, gre->pnode);
+ if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
+ gend = (unsigned long)gre->limit << UV_GAM_RANGE_SHFT;
+
/* update to next range start */
lgre = gre->limit;
if (sock_min > gre->sockid)
@@ -1391,7 +1400,8 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
_max_pnode = pnode_max;
_gr_table_len = index;
- pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", index, _min_socket, _max_socket, _min_pnode, _max_pnode);
+ pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x), pnodes(min:%x,max:%x), gap_end(%d)\n",
+ index, _min_socket, _max_socket, _min_pnode, _max_pnode, fls64(gend));
}
/* Walk through UVsystab decoding the fields */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 9fb0a2f8b62a..437308004ef2 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -18,6 +18,7 @@
#include <asm/bootparam.h>
#include <asm/suspend.h>
#include <asm/tlbflush.h>
+#include <asm/tdx.h>
#ifdef CONFIG_XEN
#include <xen/interface/xen.h>
@@ -66,6 +67,22 @@ static void __used common(void)
#endif
BLANK();
+ OFFSET(TDX_MODULE_rcx, tdx_module_output, rcx);
+ OFFSET(TDX_MODULE_rdx, tdx_module_output, rdx);
+ OFFSET(TDX_MODULE_r8, tdx_module_output, r8);
+ OFFSET(TDX_MODULE_r9, tdx_module_output, r9);
+ OFFSET(TDX_MODULE_r10, tdx_module_output, r10);
+ OFFSET(TDX_MODULE_r11, tdx_module_output, r11);
+
+ BLANK();
+ OFFSET(TDX_HYPERCALL_r10, tdx_hypercall_args, r10);
+ OFFSET(TDX_HYPERCALL_r11, tdx_hypercall_args, r11);
+ OFFSET(TDX_HYPERCALL_r12, tdx_hypercall_args, r12);
+ OFFSET(TDX_HYPERCALL_r13, tdx_hypercall_args, r13);
+ OFFSET(TDX_HYPERCALL_r14, tdx_hypercall_args, r14);
+ OFFSET(TDX_HYPERCALL_r15, tdx_hypercall_args, r15);
+
+ BLANK();
OFFSET(BP_scratch, boot_params, scratch);
OFFSET(BP_secure_boot, boot_params, secure_boot);
OFFSET(BP_loadflags, boot_params, hdr.loadflags);
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 9ca008f9e9b1..1f60a2b27936 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -6,146 +6,446 @@
* Copyright (C) 2017 Intel Corp.
* Author: Len Brown <len.brown@intel.com>
*/
-
+#include <linux/cpufreq.h>
#include <linux/delay.h>
#include <linux/ktime.h>
#include <linux/math64.h>
#include <linux/percpu.h>
-#include <linux/cpufreq.h>
-#include <linux/smp.h>
-#include <linux/sched/isolation.h>
#include <linux/rcupdate.h>
+#include <linux/sched/isolation.h>
+#include <linux/sched/topology.h>
+#include <linux/smp.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/cpu.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "cpu.h"
-struct aperfmperf_sample {
- unsigned int khz;
- atomic_t scfpending;
- ktime_t time;
- u64 aperf;
- u64 mperf;
+struct aperfmperf {
+ seqcount_t seq;
+ unsigned long last_update;
+ u64 acnt;
+ u64 mcnt;
+ u64 aperf;
+ u64 mperf;
};
-static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = {
+ .seq = SEQCNT_ZERO(cpu_samples.seq)
+};
-#define APERFMPERF_CACHE_THRESHOLD_MS 10
-#define APERFMPERF_REFRESH_DELAY_MS 10
-#define APERFMPERF_STALE_THRESHOLD_MS 1000
+static void init_counter_refs(void)
+{
+ u64 aperf, mperf;
+
+ rdmsrl(MSR_IA32_APERF, aperf);
+ rdmsrl(MSR_IA32_MPERF, mperf);
+ this_cpu_write(cpu_samples.aperf, aperf);
+ this_cpu_write(cpu_samples.mperf, mperf);
+}
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
/*
- * aperfmperf_snapshot_khz()
- * On the current CPU, snapshot APERF, MPERF, and jiffies
- * unless we already did it within 10ms
- * calculate kHz, save snapshot
+ * APERF/MPERF frequency ratio computation.
+ *
+ * The scheduler wants to do frequency invariant accounting and needs a <1
+ * ratio to account for the 'current' frequency, corresponding to
+ * freq_curr / freq_max.
+ *
+ * Since the frequency freq_curr on x86 is controlled by micro-controller and
+ * our P-state setting is little more than a request/hint, we need to observe
+ * the effective frequency 'BusyMHz', i.e. the average frequency over a time
+ * interval after discarding idle time. This is given by:
+ *
+ * BusyMHz = delta_APERF / delta_MPERF * freq_base
+ *
+ * where freq_base is the max non-turbo P-state.
+ *
+ * The freq_max term has to be set to a somewhat arbitrary value, because we
+ * can't know which turbo states will be available at a given point in time:
+ * it all depends on the thermal headroom of the entire package. We set it to
+ * the turbo level with 4 cores active.
+ *
+ * Benchmarks show that's a good compromise between the 1C turbo ratio
+ * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
+ * which would ignore the entire turbo range (a conspicuous part, making
+ * freq_curr/freq_max always maxed out).
+ *
+ * An exception to the heuristic above is the Atom uarch, where we choose the
+ * highest turbo level for freq_max since Atom's are generally oriented towards
+ * power efficiency.
+ *
+ * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
+ * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
*/
-static void aperfmperf_snapshot_khz(void *dummy)
+
+DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
+
+static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
+static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
+
+void arch_set_max_freq_ratio(bool turbo_disabled)
{
- u64 aperf, aperf_delta;
- u64 mperf, mperf_delta;
- struct aperfmperf_sample *s = this_cpu_ptr(&samples);
- unsigned long flags;
+ arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
+ arch_turbo_freq_ratio;
+}
+EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
- local_irq_save(flags);
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
- local_irq_restore(flags);
+static bool __init turbo_disabled(void)
+{
+ u64 misc_en;
+ int err;
+
+ err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
+ if (err)
+ return false;
+
+ return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
+}
+
+static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
+{
+ int err;
+
+ err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
+ if (err)
+ return false;
+
+ err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
+ if (err)
+ return false;
+
+ *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */
+ *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */
+
+ return true;
+}
+
+#define X86_MATCH(model) \
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
+ INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
+
+static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = {
+ X86_MATCH(XEON_PHI_KNL),
+ X86_MATCH(XEON_PHI_KNM),
+ {}
+};
+
+static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = {
+ X86_MATCH(SKYLAKE_X),
+ {}
+};
+
+static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = {
+ X86_MATCH(ATOM_GOLDMONT),
+ X86_MATCH(ATOM_GOLDMONT_D),
+ X86_MATCH(ATOM_GOLDMONT_PLUS),
+ {}
+};
+
+static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
+ int num_delta_fratio)
+{
+ int fratio, delta_fratio, found;
+ int err, i;
+ u64 msr;
+
+ err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+ if (err)
+ return false;
+
+ *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
+
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
+ if (err)
+ return false;
+
+ fratio = (msr >> 8) & 0xFF;
+ i = 16;
+ found = 0;
+ do {
+ if (found >= num_delta_fratio) {
+ *turbo_freq = fratio;
+ return true;
+ }
+
+ delta_fratio = (msr >> (i + 5)) & 0x7;
+
+ if (delta_fratio) {
+ found += 1;
+ fratio -= delta_fratio;
+ }
+
+ i += 8;
+ } while (i < 64);
+
+ return true;
+}
+
+static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
+{
+ u64 ratios, counts;
+ u32 group_size;
+ int err, i;
+
+ err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+ if (err)
+ return false;
+
+ *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
+
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
+ if (err)
+ return false;
+
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
+ if (err)
+ return false;
+
+ for (i = 0; i < 64; i += 8) {
+ group_size = (counts >> i) & 0xFF;
+ if (group_size >= size) {
+ *turbo_freq = (ratios >> i) & 0xFF;
+ return true;
+ }
+ }
+
+ return false;
+}
- aperf_delta = aperf - s->aperf;
- mperf_delta = mperf - s->mperf;
+static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
+{
+ u64 msr;
+ int err;
+
+ err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+ if (err)
+ return false;
+
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
+ if (err)
+ return false;
+
+ *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
+ *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */
+
+ /* The CPU may have less than 4 cores */
+ if (!*turbo_freq)
+ *turbo_freq = msr & 0xFF; /* 1C turbo */
+
+ return true;
+}
+
+static bool __init intel_set_max_freq_ratio(void)
+{
+ u64 base_freq, turbo_freq;
+ u64 turbo_ratio;
+ if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
+ goto out;
+
+ if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
+ skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
+ goto out;
+
+ if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
+ knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
+ goto out;
+
+ if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
+ skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
+ goto out;
+
+ if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
+ goto out;
+
+ return false;
+
+out:
/*
- * There is no architectural guarantee that MPERF
- * increments faster than we can read it.
+ * Some hypervisors advertise X86_FEATURE_APERFMPERF
+ * but then fill all MSR's with zeroes.
+ * Some CPUs have turbo boost but don't declare any turbo ratio
+ * in MSR_TURBO_RATIO_LIMIT.
*/
- if (mperf_delta == 0)
- return;
+ if (!base_freq || !turbo_freq) {
+ pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
+ return false;
+ }
- s->time = ktime_get();
- s->aperf = aperf;
- s->mperf = mperf;
- s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
- atomic_set_release(&s->scfpending, 0);
+ turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
+ if (!turbo_ratio) {
+ pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
+ return false;
+ }
+
+ arch_turbo_freq_ratio = turbo_ratio;
+ arch_set_max_freq_ratio(turbo_disabled());
+
+ return true;
}
-static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
+#ifdef CONFIG_PM_SLEEP
+static struct syscore_ops freq_invariance_syscore_ops = {
+ .resume = init_counter_refs,
+};
+
+static void register_freq_invariance_syscore_ops(void)
{
- s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
- struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu);
+ register_syscore_ops(&freq_invariance_syscore_ops);
+}
+#else
+static inline void register_freq_invariance_syscore_ops(void) {}
+#endif
- /* Don't bother re-computing within the cache threshold time. */
- if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
- return true;
+static void freq_invariance_enable(void)
+{
+ if (static_branch_unlikely(&arch_scale_freq_key)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+ static_branch_enable(&arch_scale_freq_key);
+ register_freq_invariance_syscore_ops();
+ pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
+}
+
+void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled)
+{
+ arch_turbo_freq_ratio = ratio;
+ arch_set_max_freq_ratio(turbo_disabled);
+ freq_invariance_enable();
+}
+
+static void __init bp_init_freq_invariance(void)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return;
- if (!atomic_xchg(&s->scfpending, 1) || wait)
- smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
+ if (intel_set_max_freq_ratio())
+ freq_invariance_enable();
+}
- /* Return false if the previous iteration was too long ago. */
- return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
+static void disable_freq_invariance_workfn(struct work_struct *work)
+{
+ static_branch_disable(&arch_scale_freq_key);
}
-unsigned int aperfmperf_get_khz(int cpu)
+static DECLARE_WORK(disable_freq_invariance_work,
+ disable_freq_invariance_workfn);
+
+DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
+
+static void scale_freq_tick(u64 acnt, u64 mcnt)
{
- if (!cpu_khz)
- return 0;
+ u64 freq_scale;
- if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
- return 0;
+ if (!arch_scale_freq_invariant())
+ return;
- if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
- return 0;
+ if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
+ goto error;
- if (rcu_is_idle_cpu(cpu))
- return 0; /* Idle CPUs are completely uninteresting. */
+ if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
+ goto error;
- aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
- return per_cpu(samples.khz, cpu);
+ freq_scale = div64_u64(acnt, mcnt);
+ if (!freq_scale)
+ goto error;
+
+ if (freq_scale > SCHED_CAPACITY_SCALE)
+ freq_scale = SCHED_CAPACITY_SCALE;
+
+ this_cpu_write(arch_freq_scale, freq_scale);
+ return;
+
+error:
+ pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
+ schedule_work(&disable_freq_invariance_work);
}
+#else
+static inline void bp_init_freq_invariance(void) { }
+static inline void scale_freq_tick(u64 acnt, u64 mcnt) { }
+#endif /* CONFIG_X86_64 && CONFIG_SMP */
-void arch_freq_prepare_all(void)
+void arch_scale_freq_tick(void)
{
- ktime_t now = ktime_get();
- bool wait = false;
- int cpu;
+ struct aperfmperf *s = this_cpu_ptr(&cpu_samples);
+ u64 acnt, mcnt, aperf, mperf;
- if (!cpu_khz)
+ if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
return;
- if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
- return;
+ rdmsrl(MSR_IA32_APERF, aperf);
+ rdmsrl(MSR_IA32_MPERF, mperf);
+ acnt = aperf - s->aperf;
+ mcnt = mperf - s->mperf;
- for_each_online_cpu(cpu) {
- if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
- continue;
- if (rcu_is_idle_cpu(cpu))
- continue; /* Idle CPUs are completely uninteresting. */
- if (!aperfmperf_snapshot_cpu(cpu, now, false))
- wait = true;
- }
+ s->aperf = aperf;
+ s->mperf = mperf;
+
+ raw_write_seqcount_begin(&s->seq);
+ s->last_update = jiffies;
+ s->acnt = acnt;
+ s->mcnt = mcnt;
+ raw_write_seqcount_end(&s->seq);
- if (wait)
- msleep(APERFMPERF_REFRESH_DELAY_MS);
+ scale_freq_tick(acnt, mcnt);
}
+/*
+ * Discard samples older than the define maximum sample age of 20ms. There
+ * is no point in sending IPIs in such a case. If the scheduler tick was
+ * not running then the CPU is either idle or isolated.
+ */
+#define MAX_SAMPLE_AGE ((unsigned long)HZ / 50)
+
unsigned int arch_freq_get_on_cpu(int cpu)
{
- struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu);
+ struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu);
+ unsigned int seq, freq;
+ unsigned long last;
+ u64 acnt, mcnt;
- if (!cpu_khz)
- return 0;
+ if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+ goto fallback;
- if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
- return 0;
+ do {
+ seq = raw_read_seqcount_begin(&s->seq);
+ last = s->last_update;
+ acnt = s->acnt;
+ mcnt = s->mcnt;
+ } while (read_seqcount_retry(&s->seq, seq));
- if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
- return 0;
+ /*
+ * Bail on invalid count and when the last update was too long ago,
+ * which covers idle and NOHZ full CPUs.
+ */
+ if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE)
+ goto fallback;
+
+ return div64_u64((cpu_khz * acnt), mcnt);
+
+fallback:
+ freq = cpufreq_quick_get(cpu);
+ return freq ? freq : cpu_khz;
+}
- if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
- return per_cpu(samples.khz, cpu);
+static int __init bp_init_aperfmperf(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+ return 0;
- msleep(APERFMPERF_REFRESH_DELAY_MS);
- atomic_set(&s->scfpending, 1);
- smp_mb(); /* ->scfpending before smp_call_function_single(). */
- smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
+ init_counter_refs();
+ bp_init_freq_invariance();
+ return 0;
+}
+early_initcall(bp_init_aperfmperf);
- return per_cpu(samples.khz, cpu);
+void ap_init_aperfmperf(void)
+{
+ if (cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+ init_counter_refs();
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 6296e1ebed1d..d879a6c93609 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -446,6 +446,13 @@ void update_srbds_msr(void)
if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED)
return;
+ /*
+ * A MDS_NO CPU for which SRBDS mitigation is not needed due to TSX
+ * being disabled and it hasn't received the SRBDS MSR microcode.
+ */
+ if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL))
+ return;
+
rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
switch (srbds_mitigation) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e342ae4db3c4..2e9142797c99 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -60,6 +60,7 @@
#include <asm/uv/uv.h>
#include <asm/sigframe.h>
#include <asm/traps.h>
+#include <asm/sev.h>
#include "cpu.h"
@@ -298,13 +299,6 @@ static int __init cachesize_setup(char *str)
}
__setup("cachesize=", cachesize_setup);
-static int __init x86_sep_setup(char *s)
-{
- setup_clear_cpu_cap(X86_FEATURE_SEP);
- return 1;
-}
-__setup("nosep", x86_sep_setup);
-
/* Standard macro to see if a specific flag is changeable */
static inline int flag_is_changeable_p(u32 flag)
{
@@ -376,26 +370,12 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
}
#endif
-static __init int setup_disable_smep(char *arg)
-{
- setup_clear_cpu_cap(X86_FEATURE_SMEP);
- return 1;
-}
-__setup("nosmep", setup_disable_smep);
-
static __always_inline void setup_smep(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_SMEP))
cr4_set_bits(X86_CR4_SMEP);
}
-static __init int setup_disable_smap(char *arg)
-{
- setup_clear_cpu_cap(X86_FEATURE_SMAP);
- return 1;
-}
-__setup("nosmap", setup_disable_smap);
-
static __always_inline void setup_smap(struct cpuinfo_x86 *c)
{
unsigned long eflags = native_save_fl();
@@ -403,14 +383,8 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
/* This should have been cleared long ago */
BUG_ON(eflags & X86_EFLAGS_AC);
- if (cpu_has(c, X86_FEATURE_SMAP)) {
-#ifdef CONFIG_X86_SMAP
+ if (cpu_has(c, X86_FEATURE_SMAP))
cr4_set_bits(X86_CR4_SMAP);
-#else
- clear_cpu_cap(c, X86_FEATURE_SMAP);
- cr4_clear_bits(X86_CR4_SMAP);
-#endif
- }
}
static __always_inline void setup_umip(struct cpuinfo_x86 *c)
@@ -1368,8 +1342,8 @@ static void detect_nopl(void)
static void __init cpu_parse_early_param(void)
{
char arg[128];
- char *argptr = arg;
- int arglen, res, bit;
+ char *argptr = arg, *opt;
+ int arglen, taint = 0;
#ifdef CONFIG_X86_32
if (cmdline_find_option_bool(boot_command_line, "no387"))
@@ -1397,21 +1371,61 @@ static void __init cpu_parse_early_param(void)
return;
pr_info("Clearing CPUID bits:");
- do {
- res = get_option(&argptr, &bit);
- if (res == 0 || res == 3)
- break;
- /* If the argument was too long, the last bit may be cut off */
- if (res == 1 && arglen >= sizeof(arg))
- break;
+ while (argptr) {
+ bool found __maybe_unused = false;
+ unsigned int bit;
+
+ opt = strsep(&argptr, ",");
+
+ /*
+ * Handle naked numbers first for feature flags which don't
+ * have names.
+ */
+ if (!kstrtouint(opt, 10, &bit)) {
+ if (bit < NCAPINTS * 32) {
+
+#ifdef CONFIG_X86_FEATURE_NAMES
+ /* empty-string, i.e., ""-defined feature flags */
+ if (!x86_cap_flags[bit])
+ pr_cont(" " X86_CAP_FMT_NUM, x86_cap_flag_num(bit));
+ else
+#endif
+ pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+
+ setup_clear_cpu_cap(bit);
+ taint++;
+ }
+ /*
+ * The assumption is that there are no feature names with only
+ * numbers in the name thus go to the next argument.
+ */
+ continue;
+ }
+
+#ifdef CONFIG_X86_FEATURE_NAMES
+ for (bit = 0; bit < 32 * NCAPINTS; bit++) {
+ if (!x86_cap_flag(bit))
+ continue;
- if (bit >= 0 && bit < NCAPINTS * 32) {
- pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+ if (strcmp(x86_cap_flag(bit), opt))
+ continue;
+
+ pr_cont(" %s", opt);
setup_clear_cpu_cap(bit);
+ taint++;
+ found = true;
+ break;
}
- } while (res == 2);
+
+ if (!found)
+ pr_cont(" (unknown: %s)", opt);
+#endif
+ }
pr_cont("\n");
+
+ if (taint)
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
}
/*
@@ -1859,14 +1873,6 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
tsx_ap_init();
}
-static __init int setup_noclflush(char *arg)
-{
- setup_clear_cpu_cap(X86_FEATURE_CLFLUSH);
- setup_clear_cpu_cap(X86_FEATURE_CLFLUSHOPT);
- return 1;
-}
-__setup("noclflush", setup_noclflush);
-
void print_cpu_info(struct cpuinfo_x86 *c)
{
const char *vendor = NULL;
@@ -2126,6 +2132,9 @@ void cpu_init_exception_handling(void)
load_TR_desc();
+ /* GHCB needs to be setup to handle #VC. */
+ setup_ghcb();
+
/* Finally load the IDT */
load_current_idt();
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f7a5370a9b3b..fd5dead8371c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -7,10 +7,13 @@
#include <linux/smp.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/semaphore.h>
#include <linux/thread_info.h>
#include <linux/init.h>
#include <linux/uaccess.h>
+#include <linux/workqueue.h>
#include <linux/delay.h>
+#include <linux/cpuhotplug.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
@@ -91,7 +94,7 @@ static bool ring3mwait_disabled __read_mostly;
static int __init ring3mwait_disable(char *__unused)
{
ring3mwait_disabled = true;
- return 0;
+ return 1;
}
__setup("ring3mwait=disable", ring3mwait_disable);
@@ -181,6 +184,38 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
return false;
}
+int intel_cpu_collect_info(struct ucode_cpu_info *uci)
+{
+ unsigned int val[2];
+ unsigned int family, model;
+ struct cpu_signature csig = { 0 };
+ unsigned int eax, ebx, ecx, edx;
+
+ memset(uci, 0, sizeof(*uci));
+
+ eax = 0x00000001;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ csig.sig = eax;
+
+ family = x86_family(eax);
+ model = x86_model(eax);
+
+ if (model >= 5 || family > 6) {
+ /* get processor flags from MSR 0x17 */
+ native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+ csig.pf = 1 << ((val[1] >> 18) & 7);
+ }
+
+ csig.rev = intel_get_microcode_revision();
+
+ uci->cpu_sig = csig;
+ uci->valid = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(intel_cpu_collect_info);
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -999,6 +1034,8 @@ static const struct {
static struct ratelimit_state bld_ratelimit;
+static DEFINE_SEMAPHORE(buslock_sem);
+
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
int len = strlen(opt), ratelimit;
@@ -1109,18 +1146,52 @@ static void split_lock_init(void)
split_lock_verify_msr(sld_state != sld_off);
}
+static void __split_lock_reenable(struct work_struct *work)
+{
+ sld_update_msr(true);
+ up(&buslock_sem);
+}
+
+/*
+ * If a CPU goes offline with pending delayed work to re-enable split lock
+ * detection then the delayed work will be executed on some other CPU. That
+ * handles releasing the buslock_sem, but because it executes on a
+ * different CPU probably won't re-enable split lock detection. This is a
+ * problem on HT systems since the sibling CPU on the same core may then be
+ * left running with split lock detection disabled.
+ *
+ * Unconditionally re-enable detection here.
+ */
+static int splitlock_cpu_offline(unsigned int cpu)
+{
+ sld_update_msr(true);
+
+ return 0;
+}
+
+static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
+
static void split_lock_warn(unsigned long ip)
{
- pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
- current->comm, current->pid, ip);
+ int cpu;
- /*
- * Disable the split lock detection for this task so it can make
- * progress and set TIF_SLD so the detection is re-enabled via
- * switch_to_sld() when the task is scheduled out.
- */
+ if (!current->reported_split_lock)
+ pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
+ current->comm, current->pid, ip);
+ current->reported_split_lock = 1;
+
+ /* misery factor #1, sleep 10ms before trying to execute split lock */
+ if (msleep_interruptible(10) > 0)
+ return;
+ /* Misery factor #2, only allow one buslocked disabled core at a time */
+ if (down_interruptible(&buslock_sem) == -EINTR)
+ return;
+ cpu = get_cpu();
+ schedule_delayed_work_on(cpu, &split_lock_reenable, 2);
+
+ /* Disable split lock detection on this CPU to make progress */
sld_update_msr(false);
- set_tsk_thread_flag(current, TIF_SLD);
+ put_cpu();
}
bool handle_guest_split_lock(unsigned long ip)
@@ -1194,18 +1265,6 @@ void handle_bus_lock(struct pt_regs *regs)
}
/*
- * This function is called only when switching between tasks with
- * different split-lock detection modes. It sets the MSR for the
- * mode of the new task. This is right most of the time, but since
- * the MSR is shared by hyperthreads on a physical core there can
- * be glitches when the two threads need different modes.
- */
-void switch_to_sld(unsigned long tifn)
-{
- sld_update_msr(!(tifn & _TIF_SLD));
-}
-
-/*
* Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should
* only be trusted if it is confirmed that a CPU model implements a
* specific feature at a particular bit position.
@@ -1230,6 +1289,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, 1),
{}
};
@@ -1274,10 +1334,14 @@ static void sld_state_show(void)
pr_info("disabled\n");
break;
case sld_warn:
- if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+ if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
- else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+ if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
+ pr_warn("No splitlock CPU offline handler\n");
+ } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
pr_info("#DB: warning on user-space bus_locks\n");
+ }
break;
case sld_fatal:
if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 1940d305db1c..1c87501e0fa3 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -1294,10 +1294,23 @@ out_free:
kfree(bank);
}
+static void __threshold_remove_device(struct threshold_bank **bp)
+{
+ unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
+
+ for (bank = 0; bank < numbanks; bank++) {
+ if (!bp[bank])
+ continue;
+
+ threshold_remove_bank(bp[bank]);
+ bp[bank] = NULL;
+ }
+ kfree(bp);
+}
+
int mce_threshold_remove_device(unsigned int cpu)
{
struct threshold_bank **bp = this_cpu_read(threshold_banks);
- unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
if (!bp)
return 0;
@@ -1308,13 +1321,7 @@ int mce_threshold_remove_device(unsigned int cpu)
*/
this_cpu_write(threshold_banks, NULL);
- for (bank = 0; bank < numbanks; bank++) {
- if (bp[bank]) {
- threshold_remove_bank(bp[bank]);
- bp[bank] = NULL;
- }
- }
- kfree(bp);
+ __threshold_remove_device(bp);
return 0;
}
@@ -1351,15 +1358,14 @@ int mce_threshold_create_device(unsigned int cpu)
if (!(this_cpu_read(bank_map) & (1 << bank)))
continue;
err = threshold_create_bank(bp, cpu, bank);
- if (err)
- goto out_err;
+ if (err) {
+ __threshold_remove_device(bp);
+ return err;
+ }
}
this_cpu_write(threshold_banks, bp);
if (thresholding_irq_en)
mce_threshold_vector = amd_threshold_interrupt;
return 0;
-out_err:
- mce_threshold_remove_device(cpu);
- return err;
}
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 0e3ae64d3b76..717192915f28 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -177,16 +177,14 @@ retry:
/* no more record */
if (*record_id == APEI_ERST_INVALID_RECORD_ID)
goto out;
- rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
+ rc = erst_read_record(*record_id, &rcd.hdr, sizeof(rcd), sizeof(rcd),
+ &CPER_CREATOR_MCE);
/* someone else has cleared the record, try next one */
if (rc == -ENOENT)
goto retry;
else if (rc < 0)
goto out;
- /* try to skip other type records in storage */
- else if (rc != sizeof(rcd) ||
- !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
- goto retry;
+
memcpy(m, &rcd.mce, sizeof(*m));
rc = sizeof(*m);
out:
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 981496e6bc0e..d775fcd74e98 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -69,7 +69,9 @@ DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
struct mce_bank {
u64 ctl; /* subevents to enable */
- bool init; /* initialise bank? */
+
+ __u64 init : 1, /* initialise bank? */
+ __reserved_1 : 63;
};
static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 1add86935349..00483d1c27e4 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -301,85 +301,65 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
}
}
-static __always_inline int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
+/* See AMD PPR(s) section Machine Check Error Handling. */
+static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
{
- u64 mcx_cfg;
+ char *panic_msg = NULL;
+ int ret;
/*
- * We need to look at the following bits:
- * - "succor" bit (data poisoning support), and
- * - TCC bit (Task Context Corrupt)
- * in MCi_STATUS to determine error severity.
+ * Default return value: Action required, the error must be handled
+ * immediately.
*/
- if (!mce_flags.succor)
- return MCE_PANIC_SEVERITY;
-
- mcx_cfg = mce_rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(m->bank));
-
- /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
- if ((mcx_cfg & MCI_CONFIG_MCAX) &&
- (m->status & MCI_STATUS_TCC) &&
- (err_ctx == IN_KERNEL))
- return MCE_PANIC_SEVERITY;
-
- /* ...otherwise invoke hwpoison handler. */
- return MCE_AR_SEVERITY;
-}
-
-/*
- * See AMD Error Scope Hierarchy table in a newer BKDG. For example
- * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
- */
-static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
-{
- enum context ctx = error_context(m, regs);
+ ret = MCE_AR_SEVERITY;
/* Processor Context Corrupt, no need to fumble too much, die! */
- if (m->status & MCI_STATUS_PCC)
- return MCE_PANIC_SEVERITY;
-
- if (m->status & MCI_STATUS_UC) {
-
- if (ctx == IN_KERNEL)
- return MCE_PANIC_SEVERITY;
+ if (m->status & MCI_STATUS_PCC) {
+ panic_msg = "Processor Context Corrupt";
+ ret = MCE_PANIC_SEVERITY;
+ goto out;
+ }
- /*
- * On older systems where overflow_recov flag is not present, we
- * should simply panic if an error overflow occurs. If
- * overflow_recov flag is present and set, then software can try
- * to at least kill process to prolong system operation.
- */
- if (mce_flags.overflow_recov) {
- if (mce_flags.smca)
- return mce_severity_amd_smca(m, ctx);
-
- /* kill current process */
- return MCE_AR_SEVERITY;
- } else {
- /* at least one error was not logged */
- if (m->status & MCI_STATUS_OVER)
- return MCE_PANIC_SEVERITY;
- }
-
- /*
- * For any other case, return MCE_UC_SEVERITY so that we log the
- * error and exit #MC handler.
- */
- return MCE_UC_SEVERITY;
+ if (m->status & MCI_STATUS_DEFERRED) {
+ ret = MCE_DEFERRED_SEVERITY;
+ goto out;
}
/*
- * deferred error: poll handler catches these and adds to mce_ring so
- * memory-failure can take recovery actions.
+ * If the UC bit is not set, the system either corrected or deferred
+ * the error. No action will be required after logging the error.
*/
- if (m->status & MCI_STATUS_DEFERRED)
- return MCE_DEFERRED_SEVERITY;
+ if (!(m->status & MCI_STATUS_UC)) {
+ ret = MCE_KEEP_SEVERITY;
+ goto out;
+ }
/*
- * corrected error: poll handler catches these and passes responsibility
- * of decoding the error to EDAC
+ * On MCA overflow, without the MCA overflow recovery feature the
+ * system will not be able to recover, panic.
*/
- return MCE_KEEP_SEVERITY;
+ if ((m->status & MCI_STATUS_OVER) && !mce_flags.overflow_recov) {
+ panic_msg = "Overflowed uncorrected error without MCA Overflow Recovery";
+ ret = MCE_PANIC_SEVERITY;
+ goto out;
+ }
+
+ if (!mce_flags.succor) {
+ panic_msg = "Uncorrected error without MCA Recovery";
+ ret = MCE_PANIC_SEVERITY;
+ goto out;
+ }
+
+ if (error_context(m, regs) == IN_KERNEL) {
+ panic_msg = "Uncorrected unrecoverable error in kernel context";
+ ret = MCE_PANIC_SEVERITY;
+ }
+
+out:
+ if (msg && panic_msg)
+ *msg = panic_msg;
+
+ return ret;
}
static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index d28a9f8f3fec..025c8f0cd948 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -45,20 +45,6 @@ static struct microcode_intel *intel_ucode_patch;
/* last level cache size per core */
static int llc_size_per_core;
-static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
- unsigned int s2, unsigned int p2)
-{
- if (s1 != s2)
- return false;
-
- /* Processor flags are either both 0 ... */
- if (!p1 && !p2)
- return true;
-
- /* ... or they intersect. */
- return p1 & p2;
-}
-
/*
* Returns 1 if update has been found, 0 otherwise.
*/
@@ -69,7 +55,7 @@ static int find_matching_signature(void *mc, unsigned int csig, int cpf)
struct extended_signature *ext_sig;
int i;
- if (cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf))
+ if (intel_cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf))
return 1;
/* Look for ext. headers: */
@@ -80,7 +66,7 @@ static int find_matching_signature(void *mc, unsigned int csig, int cpf)
ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE;
for (i = 0; i < ext_hdr->count; i++) {
- if (cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf))
+ if (intel_cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf))
return 1;
ext_sig++;
}
@@ -342,37 +328,6 @@ next:
return patch;
}
-static int collect_cpu_info_early(struct ucode_cpu_info *uci)
-{
- unsigned int val[2];
- unsigned int family, model;
- struct cpu_signature csig = { 0 };
- unsigned int eax, ebx, ecx, edx;
-
- memset(uci, 0, sizeof(*uci));
-
- eax = 0x00000001;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- csig.sig = eax;
-
- family = x86_family(eax);
- model = x86_model(eax);
-
- if ((model >= 5) || (family > 6)) {
- /* get processor flags from MSR 0x17 */
- native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
- csig.pf = 1 << ((val[1] >> 18) & 7);
- }
-
- csig.rev = intel_get_microcode_revision();
-
- uci->cpu_sig = csig;
- uci->valid = 1;
-
- return 0;
-}
-
static void show_saved_mc(void)
{
#ifdef DEBUG
@@ -386,7 +341,7 @@ static void show_saved_mc(void)
return;
}
- collect_cpu_info_early(&uci);
+ intel_cpu_collect_info(&uci);
sig = uci.cpu_sig.sig;
pf = uci.cpu_sig.pf;
@@ -502,7 +457,7 @@ void show_ucode_info_early(void)
struct ucode_cpu_info uci;
if (delay_ucode_info) {
- collect_cpu_info_early(&uci);
+ intel_cpu_collect_info(&uci);
print_ucode_info(&uci, current_mc_date);
delay_ucode_info = 0;
}
@@ -604,7 +559,7 @@ int __init save_microcode_in_initrd_intel(void)
if (!(cp.data && cp.size))
return 0;
- collect_cpu_info_early(&uci);
+ intel_cpu_collect_info(&uci);
scan_microcode(cp.data, cp.size, &uci, true);
@@ -637,7 +592,7 @@ static struct microcode_intel *__load_ucode_intel(struct ucode_cpu_info *uci)
if (!(cp.data && cp.size))
return NULL;
- collect_cpu_info_early(uci);
+ intel_cpu_collect_info(uci);
return scan_microcode(cp.data, cp.size, uci, false);
}
@@ -712,7 +667,7 @@ void reload_ucode_intel(void)
struct microcode_intel *p;
struct ucode_cpu_info uci;
- collect_cpu_info_early(&uci);
+ intel_cpu_collect_info(&uci);
p = find_patch(&uci);
if (!p)
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 4eec8889b0ff..099b6f0d96bd 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -84,14 +84,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
if (cpu_has(c, X86_FEATURE_TSC)) {
- unsigned int freq = aperfmperf_get_khz(cpu);
-
- if (!freq)
- freq = cpufreq_quick_get(cpu);
- if (!freq)
- freq = cpu_khz;
- seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
- freq / 1000, (freq % 1000));
+ unsigned int freq = arch_freq_get_on_cpu(cpu);
+
+ seq_printf(m, "cpu MHz\t\t: %u.%03u\n", freq / 1000, (freq % 1000));
}
/* Cache size */
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 83f901e2c2df..f276aff521e8 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -341,14 +341,14 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
/* Check whether cpus belong to parent ctrl group */
cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
- if (cpumask_weight(tmpmask)) {
+ if (!cpumask_empty(tmpmask)) {
rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
return -EINVAL;
}
/* Check whether cpus are dropped from this group */
cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
- if (cpumask_weight(tmpmask)) {
+ if (!cpumask_empty(tmpmask)) {
/* Give any dropped cpus to parent rdtgroup */
cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
update_closid_rmid(tmpmask, prgrp);
@@ -359,7 +359,7 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
* and update per-cpu rmid
*/
cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
- if (cpumask_weight(tmpmask)) {
+ if (!cpumask_empty(tmpmask)) {
head = &prgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
if (crgrp == rdtgrp)
@@ -394,7 +394,7 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
/* Check whether cpus are dropped from this group */
cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
- if (cpumask_weight(tmpmask)) {
+ if (!cpumask_empty(tmpmask)) {
/* Can't drop from default group */
if (rdtgrp == &rdtgroup_default) {
rdt_last_cmd_puts("Can't drop CPUs from default group\n");
@@ -413,12 +413,12 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
* and update per-cpu closid/rmid.
*/
cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
- if (cpumask_weight(tmpmask)) {
+ if (!cpumask_empty(tmpmask)) {
list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
if (r == rdtgrp)
continue;
cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
- if (cpumask_weight(tmpmask1))
+ if (!cpumask_empty(tmpmask1))
cpumask_rdtgrp_clear(r, tmpmask1);
}
update_closid_rmid(tmpmask, rdtgrp);
@@ -488,7 +488,7 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
/* check that user didn't specify any offline cpus */
cpumask_andnot(tmpmask, newmask, cpu_online_mask);
- if (cpumask_weight(tmpmask)) {
+ if (!cpumask_empty(tmpmask)) {
ret = -EINVAL;
rdt_last_cmd_puts("Can only assign online CPUs\n");
goto unlock;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 4143b1e4c5c6..dbaa8326d6f2 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -43,6 +43,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
+ { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 7c63a1911fae..3c24e6124d95 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -12,6 +12,92 @@
#include "encls.h"
#include "sgx.h"
+#define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd))
+/*
+ * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to
+ * determine the page index associated with the first PCMD entry
+ * within a PCMD page.
+ */
+#define PCMD_FIRST_MASK GENMASK(4, 0)
+
+/**
+ * reclaimer_writing_to_pcmd() - Query if any enclave page associated with
+ * a PCMD page is in process of being reclaimed.
+ * @encl: Enclave to which PCMD page belongs
+ * @start_addr: Address of enclave page using first entry within the PCMD page
+ *
+ * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is
+ * stored. The PCMD data of a reclaimed enclave page contains enough
+ * information for the processor to verify the page at the time
+ * it is loaded back into the Enclave Page Cache (EPC).
+ *
+ * The backing storage to which enclave pages are reclaimed is laid out as
+ * follows:
+ * Encrypted enclave pages:SECS page:PCMD pages
+ *
+ * Each PCMD page contains the PCMD metadata of
+ * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages.
+ *
+ * A PCMD page can only be truncated if it is (a) empty, and (b) not in the
+ * process of getting data (and thus soon being non-empty). (b) is tested with
+ * a check if an enclave page sharing the PCMD page is in the process of being
+ * reclaimed.
+ *
+ * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it
+ * intends to reclaim that enclave page - it means that the PCMD page
+ * associated with that enclave page is about to get some data and thus
+ * even if the PCMD page is empty, it should not be truncated.
+ *
+ * Context: Enclave mutex (&sgx_encl->lock) must be held.
+ * Return: 1 if the reclaimer is about to write to the PCMD page
+ * 0 if the reclaimer has no intention to write to the PCMD page
+ */
+static int reclaimer_writing_to_pcmd(struct sgx_encl *encl,
+ unsigned long start_addr)
+{
+ int reclaimed = 0;
+ int i;
+
+ /*
+ * PCMD_FIRST_MASK is based on number of PCMD entries within
+ * PCMD page being 32.
+ */
+ BUILD_BUG_ON(PCMDS_PER_PAGE != 32);
+
+ for (i = 0; i < PCMDS_PER_PAGE; i++) {
+ struct sgx_encl_page *entry;
+ unsigned long addr;
+
+ addr = start_addr + i * PAGE_SIZE;
+
+ /*
+ * Stop when reaching the SECS page - it does not
+ * have a page_array entry and its reclaim is
+ * started and completed with enclave mutex held so
+ * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED
+ * flag.
+ */
+ if (addr == encl->base + encl->size)
+ break;
+
+ entry = xa_load(&encl->page_array, PFN_DOWN(addr));
+ if (!entry)
+ continue;
+
+ /*
+ * VA page slot ID uses same bit as the flag so it is important
+ * to ensure that the page is not already in backing store.
+ */
+ if (entry->epc_page &&
+ (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) {
+ reclaimed = 1;
+ break;
+ }
+ }
+
+ return reclaimed;
+}
+
/*
* Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
* follow right after the EPC data in the backing storage. In addition to the
@@ -47,6 +133,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
struct sgx_encl *encl = encl_page->encl;
pgoff_t page_index, page_pcmd_off;
+ unsigned long pcmd_first_page;
struct sgx_pageinfo pginfo;
struct sgx_backing b;
bool pcmd_page_empty;
@@ -58,6 +145,11 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
else
page_index = PFN_DOWN(encl->size);
+ /*
+ * Address of enclave page using the first entry within the PCMD page.
+ */
+ pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base;
+
page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
ret = sgx_encl_get_backing(encl, page_index, &b);
@@ -84,6 +176,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
}
memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
+ set_page_dirty(b.pcmd);
/*
* The area for the PCMD in the page was zeroed above. Check if the
@@ -94,12 +187,20 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
kunmap_atomic(pcmd_page);
kunmap_atomic((void *)(unsigned long)pginfo.contents);
- sgx_encl_put_backing(&b, false);
+ get_page(b.pcmd);
+ sgx_encl_put_backing(&b);
sgx_encl_truncate_backing_page(encl, page_index);
- if (pcmd_page_empty)
+ if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
+ pcmd_page = kmap_atomic(b.pcmd);
+ if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
+ pr_warn("PCMD page not empty after truncate.\n");
+ kunmap_atomic(pcmd_page);
+ }
+
+ put_page(b.pcmd);
return ret;
}
@@ -645,15 +746,9 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
/**
* sgx_encl_put_backing() - Unpin the backing storage
* @backing: data for accessing backing storage for the page
- * @do_write: mark pages dirty
*/
-void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write)
+void sgx_encl_put_backing(struct sgx_backing *backing)
{
- if (do_write) {
- set_page_dirty(backing->pcmd);
- set_page_dirty(backing->contents);
- }
-
put_page(backing->pcmd);
put_page(backing->contents);
}
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index fec43ca65065..d44e7372151f 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -107,7 +107,7 @@ void sgx_encl_release(struct kref *ref);
int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm);
int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
struct sgx_backing *backing);
-void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write);
+void sgx_encl_put_backing(struct sgx_backing *backing);
int sgx_encl_test_and_clear_young(struct mm_struct *mm,
struct sgx_encl_page *page);
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 8e4bc6453d26..ab4ec54bbdd9 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -191,6 +191,8 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
backing->pcmd_offset;
ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
+ set_page_dirty(backing->pcmd);
+ set_page_dirty(backing->contents);
kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
backing->pcmd_offset));
@@ -308,6 +310,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
sgx_encl_ewb(epc_page, backing);
encl_page->epc_page = NULL;
encl->secs_child_cnt--;
+ sgx_encl_put_backing(backing);
if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) {
ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size),
@@ -320,7 +323,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
sgx_encl_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
- sgx_encl_put_backing(&secs_backing, true);
+ sgx_encl_put_backing(&secs_backing);
}
out:
@@ -379,11 +382,14 @@ static void sgx_reclaim_pages(void)
goto skip;
page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
+
+ mutex_lock(&encl_page->encl->lock);
ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]);
- if (ret)
+ if (ret) {
+ mutex_unlock(&encl_page->encl->lock);
goto skip;
+ }
- mutex_lock(&encl_page->encl->lock);
encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
mutex_unlock(&encl_page->encl->lock);
continue;
@@ -411,7 +417,6 @@ skip:
encl_page = epc_page->owner;
sgx_reclaimer_write(epc_page, &backing[i]);
- sgx_encl_put_backing(&backing[i], true);
kref_put(&encl_page->encl->refcount, sgx_encl_release);
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index e8326a8d1c5d..9730c88530fc 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -407,7 +407,7 @@ int crash_load_segments(struct kimage *image)
}
image->elf_load_addr = kbuf.mem;
pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- image->elf_load_addr, kbuf.bufsz, kbuf.bufsz);
+ image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
return ret;
}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 39e1c8626ab9..c8340156bfd2 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -142,7 +142,8 @@ static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
* Non-compacted format and legacy features use the cached fixed
* offsets.
*/
- if (!cpu_feature_enabled(X86_FEATURE_XSAVES) || xfeature <= XFEATURE_SSE)
+ if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
+ xfeature <= XFEATURE_SSE)
return xstate_offsets[xfeature];
/*
@@ -369,12 +370,12 @@ static void __init setup_init_fpu_buf(void)
/*
* All components are now in init state. Read the state back so
* that init_fpstate contains all non-zero init state. This only
- * works with XSAVE, but not with XSAVEOPT and XSAVES because
+ * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
* those use the init optimization which skips writing data for
* components in init state.
*
* XSAVE could be used, but that would require to reshuffle the
- * data when XSAVES is available because XSAVES uses xstate
+ * data when XSAVEC/S is available because XSAVEC/S uses xstate
* compaction. But doing so is a pointless exercise because most
* components have an all zeros init state except for the legacy
* ones (FP and SSE). Those can be saved with FXSAVE into the
@@ -584,7 +585,8 @@ static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
*/
static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
{
- bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+ bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
+ bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
int i;
@@ -595,7 +597,7 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
* Supervisor state components can be managed only by
* XSAVES.
*/
- if (!compacted && xfeature_is_supervisor(i)) {
+ if (!xsaves && xfeature_is_supervisor(i)) {
XSTATE_WARN_ON(1);
return false;
}
@@ -612,8 +614,11 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
* the size of the *user* states. If we use it to size a buffer
* that we use 'XSAVES' on, we could potentially overflow the
* buffer because 'XSAVES' saves system states too.
+ *
+ * This also takes compaction into account. So this works for
+ * XSAVEC as well.
*/
-static unsigned int __init get_xsaves_size(void)
+static unsigned int __init get_compacted_size(void)
{
unsigned int eax, ebx, ecx, edx;
/*
@@ -623,6 +628,10 @@ static unsigned int __init get_xsaves_size(void)
* containing all the state components
* corresponding to bits currently set in
* XCR0 | IA32_XSS.
+ *
+ * When XSAVES is not available but XSAVEC is (virt), then there
+ * are no supervisor states, but XSAVEC still uses compacted
+ * format.
*/
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
return ebx;
@@ -632,13 +641,13 @@ static unsigned int __init get_xsaves_size(void)
* Get the total size of the enabled xstates without the independent supervisor
* features.
*/
-static unsigned int __init get_xsaves_size_no_independent(void)
+static unsigned int __init get_xsave_compacted_size(void)
{
u64 mask = xfeatures_mask_independent();
unsigned int size;
if (!mask)
- return get_xsaves_size();
+ return get_compacted_size();
/* Disable independent features. */
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
@@ -647,7 +656,7 @@ static unsigned int __init get_xsaves_size_no_independent(void)
* Ask the hardware what size is required of the buffer.
* This is the size required for the task->fpu buffer.
*/
- size = get_xsaves_size();
+ size = get_compacted_size();
/* Re-enable independent features so XSAVES will work on them again. */
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
@@ -687,20 +696,21 @@ static int __init init_xstate_size(void)
{
/* Recompute the context size for enabled features: */
unsigned int user_size, kernel_size, kernel_default_size;
- bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+ bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
/* Uncompacted user space size */
user_size = get_xsave_size_user();
/*
- * XSAVES kernel size includes supervisor states and
- * uses compacted format when available.
+ * XSAVES kernel size includes supervisor states and uses compacted
+ * format. XSAVEC uses compacted format, but does not save
+ * supervisor states.
*
- * XSAVE does not support supervisor states so
- * kernel and user size is identical.
+ * XSAVE[OPT] do not support supervisor states so kernel and user
+ * size is identical.
*/
if (compacted)
- kernel_size = get_xsaves_size_no_independent();
+ kernel_size = get_xsave_compacted_size();
else
kernel_size = user_size;
@@ -813,8 +823,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
if (!cpu_feature_enabled(X86_FEATURE_XFD))
fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
- fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
- XFEATURE_MASK_SUPERVISOR_SUPPORTED;
+ if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
+ fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
+ else
+ fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
+ XFEATURE_MASK_SUPERVISOR_SUPPORTED;
fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
@@ -837,6 +850,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
*/
init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
+ /* Set up compaction feature bit */
+ if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
+ cpu_feature_enabled(X86_FEATURE_XSAVES))
+ setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
+
/* Enable xstate instructions to be able to continue with initialization: */
fpu__init_cpu_xstate();
@@ -873,7 +891,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
fpu_kernel_cfg.max_features,
fpu_kernel_cfg.max_size,
- boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
+ boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
return;
out_disable:
@@ -917,7 +935,7 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
return NULL;
- if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
+ if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
return NULL;
}
@@ -1215,7 +1233,7 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
}
for (i = 0; i < XFEATURE_MAX; i++) {
- u64 mask = ((u64)1 << i);
+ mask = BIT_ULL(i);
if (hdr.xfeatures & mask) {
void *dst = __raw_xsave_addr(xsave, i);
@@ -1525,7 +1543,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
* vendors into extending XFD for the pre AMX states, especially
* AVX512.
*/
- bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+ bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
struct fpu *fpu = &current->group_leader->thread.fpu;
struct fpu_state_perm *perm;
unsigned int ksize, usize;
@@ -1687,16 +1705,13 @@ EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
* e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
* XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
*/
-long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
+long fpu_xstate_prctl(int option, unsigned long arg2)
{
u64 __user *uptr = (u64 __user *)arg2;
u64 permitted, supported;
unsigned long idx = arg2;
bool guest = false;
- if (tsk != current)
- return -EPERM;
-
switch (option) {
case ARCH_GET_XCOMP_SUPP:
supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features;
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index d22ace092ca2..5ad47031383b 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -16,7 +16,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
* XRSTORS requires these bits set in xcomp_bv, or it will
* trigger #GP:
*/
- if (cpu_feature_enabled(X86_FEATURE_XSAVES))
+ if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED))
xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
}
@@ -79,6 +79,7 @@ static inline u64 xfeatures_mask_independent(void)
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
+#define XSAVEC ".byte " REX_PREFIX "0x0f,0xc7,0x27"
#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
@@ -97,9 +98,11 @@ static inline u64 xfeatures_mask_independent(void)
: "memory")
/*
- * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
- * format and supervisor states in addition to modified optimization in
- * XSAVEOPT.
+ * If XSAVES is enabled, it replaces XSAVEC because it supports supervisor
+ * states in addition to XSAVEC.
+ *
+ * Otherwise if XSAVEC is enabled, it replaces XSAVEOPT because it supports
+ * compacted storage format in addition to XSAVEOPT.
*
* Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
* supports modified optimization which is not supported by XSAVE.
@@ -111,8 +114,9 @@ static inline u64 xfeatures_mask_independent(void)
* address of the instruction where we might get an exception at.
*/
#define XSTATE_XSAVE(st, lmask, hmask, err) \
- asm volatile(ALTERNATIVE_2(XSAVE, \
+ asm volatile(ALTERNATIVE_3(XSAVE, \
XSAVEOPT, X86_FEATURE_XSAVEOPT, \
+ XSAVEC, X86_FEATURE_XSAVEC, \
XSAVES, X86_FEATURE_XSAVES) \
"\n" \
"xor %[err], %[err]\n" \
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1e31c7d21597..b09d73c2ba89 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -579,9 +579,7 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+#if defined(CONFIG_DYNAMIC_FTRACE) && !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)
extern void ftrace_graph_call(void);
static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
{
@@ -610,18 +608,7 @@ int ftrace_disable_ftrace_graph_caller(void)
return ftrace_mod_jmp(ip, &ftrace_stub);
}
-#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
-int ftrace_enable_ftrace_graph_caller(void)
-{
- return 0;
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
- return 0;
-}
-#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
-#endif /* !CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_DYNAMIC_FTRACE && !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
/*
* Hook the return address and push it in the stack of return addrs
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 4f5ecbbaae77..bd4a34100ed0 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -40,6 +40,7 @@
#include <asm/extable.h>
#include <asm/trapnr.h>
#include <asm/sev.h>
+#include <asm/tdx.h>
/*
* Manage page tables very early on.
@@ -143,7 +144,20 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdv
if (sme_get_me_mask()) {
vaddr = (unsigned long)__start_bss_decrypted;
vaddr_end = (unsigned long)__end_bss_decrypted;
+
for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+ /*
+ * On SNP, transition the page to shared in the RMP table so that
+ * it is consistent with the page table attribute change.
+ *
+ * __start_bss_decrypted has a virtual address in the high range
+ * mapping (kernel .text). PVALIDATE, by way of
+ * early_snp_set_memory_shared(), requires a valid virtual
+ * address but the kernel is currently running off of the identity
+ * mapping so use __pa() to get a *currently* valid virtual address.
+ */
+ early_snp_set_memory_shared(__pa(vaddr), __pa(vaddr), PTRS_PER_PMD);
+
i = pmd_index(vaddr);
pmd[i] -= sme_get_me_mask();
}
@@ -192,9 +206,6 @@ unsigned long __head __startup_64(unsigned long physaddr,
if (load_delta & ~PMD_PAGE_MASK)
for (;;);
- /* Activate Secure Memory Encryption (SME) if supported and enabled */
- sme_enable(bp);
-
/* Include the SME encryption mask in the fixup value */
load_delta += sme_get_me_mask();
@@ -308,15 +319,6 @@ unsigned long __head __startup_64(unsigned long physaddr,
return sme_postprocess_startup(bp, pmd);
}
-unsigned long __startup_secondary_64(void)
-{
- /*
- * Return the SME encryption mask (if SME is active) to be used as a
- * modifier for the initial pgdir entry programmed into CR3.
- */
- return sme_get_me_mask();
-}
-
/* Wipe all early page tables except for the kernel symbol map */
static void __init reset_early_page_tables(void)
{
@@ -416,6 +418,9 @@ void __init do_early_exception(struct pt_regs *regs, int trapnr)
trapnr == X86_TRAP_VC && handle_vc_boot_ghcb(regs))
return;
+ if (trapnr == X86_TRAP_VE && tdx_early_handle_ve(regs))
+ return;
+
early_fixup_exception(regs, trapnr);
}
@@ -514,6 +519,9 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
idt_setup_early_handler();
+ /* Needed before cc_platform_has() can be used for TDX */
+ tdx_early_init();
+
copy_bootdata(__va(real_mode_data));
/*
@@ -600,8 +608,10 @@ static void startup_64_load_idt(unsigned long physbase)
void early_setup_idt(void)
{
/* VMM Communication Exception */
- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+ setup_ghcb();
set_bringup_idt_handler(bringup_idt_table, X86_TRAP_VC, vc_boot_ghcb);
+ }
bringup_idt_descr.address = (unsigned long)bringup_idt_table;
native_load_idt(&bringup_idt_descr);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b8e3019547a5..92c4afa2b729 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -65,10 +65,39 @@ SYM_CODE_START_NOALIGN(startup_64)
leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp
leaq _text(%rip), %rdi
+
+ /*
+ * initial_gs points to initial fixed_percpu_data struct with storage for
+ * the stack protector canary. Global pointer fixups are needed at this
+ * stage, so apply them as is done in fixup_pointer(), and initialize %gs
+ * such that the canary can be accessed at %gs:40 for subsequent C calls.
+ */
+ movl $MSR_GS_BASE, %ecx
+ movq initial_gs(%rip), %rax
+ movq $_text, %rdx
+ subq %rdx, %rax
+ addq %rdi, %rax
+ movq %rax, %rdx
+ shrq $32, %rdx
+ wrmsr
+
pushq %rsi
call startup_64_setup_env
popq %rsi
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /*
+ * Activate SEV/SME memory encryption if supported/enabled. This needs to
+ * be done now, since this also includes setup of the SEV-SNP CPUID table,
+ * which needs to be done before any CPUID instructions are executed in
+ * subsequent code.
+ */
+ movq %rsi, %rdi
+ pushq %rsi
+ call sme_enable
+ popq %rsi
+#endif
+
/* Now switch to __KERNEL_CS so IRET works reliably */
pushq $__KERNEL_CS
leaq .Lon_kernel_cs(%rip), %rax
@@ -134,16 +163,32 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
* Retrieve the modifier (SME encryption mask if SME is active) to be
* added to the initial pgdir entry that will be programmed into CR3.
*/
- pushq %rsi
- call __startup_secondary_64
- popq %rsi
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ movq sme_me_mask, %rax
+#else
+ xorq %rax, %rax
+#endif
/* Form the CR3 value being sure to include the CR3 modifier */
addq $(init_top_pgt - __START_KERNEL_map), %rax
1:
+#ifdef CONFIG_X86_MCE
+ /*
+ * Preserve CR4.MCE if the kernel will enable #MC support.
+ * Clearing MCE may fault in some environments (that also force #MC
+ * support). Any machine check that occurs before #MC support is fully
+ * configured will crash the system regardless of the CR4.MCE value set
+ * here.
+ */
+ movq %cr4, %rcx
+ andl $X86_CR4_MCE, %ecx
+#else
+ movl $0, %ecx
+#endif
+
/* Enable PAE mode, PGE and LA57 */
- movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
+ orl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
#ifdef CONFIG_X86_5LEVEL
testl $1, __pgtable_l5_enabled(%rip)
jz 1f
@@ -249,13 +294,23 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
/* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
rdmsr
+ /*
+ * Preserve current value of EFER for comparison and to skip
+ * EFER writes if no change was made (for TDX guest)
+ */
+ movl %eax, %edx
btsl $_EFER_SCE, %eax /* Enable System Call */
btl $20,%edi /* No Execute supported? */
jnc 1f
btsl $_EFER_NX, %eax
btsq $_PAGE_BIT_NX,early_pmd_flags(%rip)
-1: wrmsr /* Make changes effective */
+ /* Avoid writing EFER if no change was made (for TDX guest) */
+1: cmpl %edx, %eax
+ je 1f
+ xor %edx, %edx
+ wrmsr /* Make changes effective */
+1:
/* Setup cr0 */
movl $CR0_STATE, %eax
/* Make changes effective */
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 608eb63bf044..a58c6bc1cd68 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -69,6 +69,9 @@ static const __initconst struct idt_data early_idts[] = {
*/
INTG(X86_TRAP_PF, asm_exc_page_fault),
#endif
+#ifdef CONFIG_INTEL_TDX_GUEST
+ INTG(X86_TRAP_VE, asm_exc_virtualization_exception),
+#endif
};
/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index e73f7df362f5..cec0bfa3bc04 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -157,7 +157,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
struct nmi_desc *desc = nmi_to_desc(type);
unsigned long flags;
- if (!action->handler)
+ if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list)))
return -EINVAL;
raw_spin_lock_irqsave(&desc->lock, flags);
@@ -177,7 +177,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
list_add_rcu(&action->list, &desc->head);
else
list_add_tail_rcu(&action->list, &desc->head);
-
+
raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
@@ -186,7 +186,7 @@ EXPORT_SYMBOL(__register_nmi_handler);
void unregister_nmi_handler(unsigned int type, const char *name)
{
struct nmi_desc *desc = nmi_to_desc(type);
- struct nmiaction *n;
+ struct nmiaction *n, *found = NULL;
unsigned long flags;
raw_spin_lock_irqsave(&desc->lock, flags);
@@ -200,12 +200,16 @@ void unregister_nmi_handler(unsigned int type, const char *name)
WARN(in_nmi(),
"Trying to free NMI (%s) from NMI context!\n", n->name);
list_del_rcu(&n->list);
+ found = n;
break;
}
}
raw_spin_unlock_irqrestore(&desc->lock, flags);
- synchronize_rcu();
+ if (found) {
+ synchronize_rcu();
+ INIT_LIST_HEAD(&found->list);
+ }
}
EXPORT_SYMBOL_GPL(unregister_nmi_handler);
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 36e84d904260..319fef37d9dc 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -21,6 +21,7 @@
#include <asm/sections.h>
#include <asm/io.h>
#include <asm/setup_arch.h>
+#include <asm/sev.h>
static struct resource system_rom_resource = {
.name = "System ROM",
@@ -197,11 +198,21 @@ static int __init romchecksum(const unsigned char *rom, unsigned long length)
void __init probe_roms(void)
{
- const unsigned char *rom;
unsigned long start, length, upper;
+ const unsigned char *rom;
unsigned char c;
int i;
+ /*
+ * The ROM memory range is not part of the e820 table and is therefore not
+ * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted
+ * memory, and SNP requires encrypted memory to be validated before access.
+ * Do that here.
+ */
+ snp_prep_memory(video_rom_resource.start,
+ ((system_rom_resource.end + 1) - video_rom_resource.start),
+ SNP_PAGE_STATE_PRIVATE);
+
/* video rom */
upper = adapter_rom_resources[0].start;
for (start = video_rom_resource.start; start < upper; start += 2048) {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b370767f5b19..58fb48d3004f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -46,6 +46,7 @@
#include <asm/proto.h>
#include <asm/frame.h>
#include <asm/unwind.h>
+#include <asm/tdx.h>
#include "process.h"
@@ -160,6 +161,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
savesegment(ds, p->thread.ds);
#else
p->thread.sp0 = (unsigned long) (childregs + 1);
+ savesegment(gs, p->thread.gs);
/*
* Clear all status flags including IF and set fixed bit. 64bit
* does not have this initialization as the frame does not contain
@@ -191,10 +193,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
if (sp)
childregs->sp = sp;
-#ifdef CONFIG_X86_32
- task_user_gs(p) = get_user_gs(current_pt_regs());
-#endif
-
if (unlikely(p->flags & PF_IO_WORKER)) {
/*
* An IO thread is a user space thread, but it doesn't
@@ -334,7 +332,7 @@ static int get_cpuid_mode(void)
return !test_thread_flag(TIF_NOCPUID);
}
-static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
+static int set_cpuid_mode(unsigned long cpuid_enabled)
{
if (!boot_cpu_has(X86_FEATURE_CPUID_FAULT))
return -ENODEV;
@@ -405,7 +403,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
}
/**
- * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
+ * native_tss_update_io_bitmap - Update I/O bitmap before exiting to user mode
*/
void native_tss_update_io_bitmap(void)
{
@@ -686,9 +684,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
/* Enforce MSR update to ensure consistent state */
__speculation_ctrl_update(~tifn, tifn);
}
-
- if ((tifp ^ tifn) & _TIF_SLD)
- switch_to_sld(tifn);
}
/*
@@ -873,6 +868,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
} else if (prefer_mwait_c1_over_halt(c)) {
pr_info("using mwait in idle threads\n");
x86_idle = mwait_idle;
+ } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+ pr_info("using TDX aware idle routine\n");
+ x86_idle = tdx_safe_halt;
} else
x86_idle = default_idle;
}
@@ -985,20 +983,19 @@ unsigned long __get_wchan(struct task_struct *p)
return addr;
}
-long do_arch_prctl_common(struct task_struct *task, int option,
- unsigned long arg2)
+long do_arch_prctl_common(int option, unsigned long arg2)
{
switch (option) {
case ARCH_GET_CPUID:
return get_cpuid_mode();
case ARCH_SET_CPUID:
- return set_cpuid_mode(task, arg2);
+ return set_cpuid_mode(arg2);
case ARCH_GET_XCOMP_SUPP:
case ARCH_GET_XCOMP_PERM:
case ARCH_REQ_XCOMP_PERM:
case ARCH_GET_XCOMP_GUEST_PERM:
case ARCH_REQ_XCOMP_GUEST_PERM:
- return fpu_xstate_prctl(task, option, arg2);
+ return fpu_xstate_prctl(option, arg2);
}
return -EINVAL;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 26edb1cd07a4..2f314b170c9f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -63,10 +63,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
unsigned long d0, d1, d2, d3, d6, d7;
unsigned short gs;
- if (user_mode(regs))
- gs = get_user_gs(regs);
- else
- savesegment(gs, gs);
+ savesegment(gs, gs);
show_ip(regs, log_lvl);
@@ -114,7 +111,7 @@ void release_thread(struct task_struct *dead_task)
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
- set_user_gs(regs, 0);
+ loadsegment(gs, 0);
regs->fs = 0;
regs->ds = __USER_DS;
regs->es = __USER_DS;
@@ -177,7 +174,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* used %fs or %gs (it does not today), or if the kernel is
* running inside of a hypervisor layer.
*/
- lazy_save_gs(prev->gs);
+ savesegment(gs, prev->gs);
/*
* Load the per-thread Thread-Local Storage descriptor.
@@ -208,7 +205,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* Restore %gs if needed (which is common)
*/
if (prev->gs | next->gs)
- lazy_load_gs(next->gs);
+ loadsegment(gs, next->gs);
this_cpu_write(current_task, next_p);
@@ -222,5 +219,5 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
- return do_arch_prctl_common(current, option, arg2);
+ return do_arch_prctl_common(option, arg2);
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e459253649be..1962008fe743 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -844,7 +844,7 @@ SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
ret = do_arch_prctl_64(current, option, arg2);
if (ret == -EINVAL)
- ret = do_arch_prctl_common(current, option, arg2);
+ ret = do_arch_prctl_common(option, arg2);
return ret;
}
@@ -852,7 +852,7 @@ SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
#ifdef CONFIG_IA32_EMULATION
COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
- return do_arch_prctl_common(current, option, arg2);
+ return do_arch_prctl_common(option, arg2);
}
#endif
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 98d10ef60571..37c12fb92906 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -170,9 +170,9 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
retval = *pt_regs_access(task_pt_regs(task), offset);
else {
if (task == current)
- retval = get_user_gs(task_pt_regs(task));
+ savesegment(gs, retval);
else
- retval = task_user_gs(task);
+ retval = task->thread.gs;
}
return retval;
}
@@ -210,7 +210,7 @@ static int set_segment_reg(struct task_struct *task,
break;
case offsetof(struct user_regs_struct, gs):
- task_user_gs(task) = value;
+ task->thread.gs = value;
}
return 0;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c95b9ac5a457..249981bf3d8a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -756,6 +756,30 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
return 0;
}
+void x86_configure_nx(void)
+{
+ if (boot_cpu_has(X86_FEATURE_NX))
+ __supported_pte_mask |= _PAGE_NX;
+ else
+ __supported_pte_mask &= ~_PAGE_NX;
+}
+
+static void __init x86_report_nx(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_NX)) {
+ printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
+ "missing in CPU!\n");
+ } else {
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+ printk(KERN_INFO "NX (Execute Disable) protection: active\n");
+#else
+ /* 32bit non-PAE kernel, NX cannot be used */
+ printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
+ "cannot be enabled: non-PAE kernel!\n");
+#endif
+ }
+}
+
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -896,9 +920,7 @@ void __init setup_arch(char **cmdline_p)
/*
* x86_configure_nx() is called before parse_early_param() to detect
* whether hardware doesn't support NX (so that the early EHCI debug
- * console setup can safely call set_fixmap()). It may then be called
- * again from within noexec_setup() during parsing early parameters
- * to honor the respective command line option.
+ * console setup can safely call set_fixmap()).
*/
x86_configure_nx();
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index ce987688bbc0..b478edf43bec 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -14,6 +14,68 @@
#define has_cpuflag(f) boot_cpu_has(f)
#endif
+/* I/O parameters for CPUID-related helpers */
+struct cpuid_leaf {
+ u32 fn;
+ u32 subfn;
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+};
+
+/*
+ * Individual entries of the SNP CPUID table, as defined by the SNP
+ * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
+ */
+struct snp_cpuid_fn {
+ u32 eax_in;
+ u32 ecx_in;
+ u64 xcr0_in;
+ u64 xss_in;
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+ u64 __reserved;
+} __packed;
+
+/*
+ * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
+ * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
+ * of 64 entries per CPUID table.
+ */
+#define SNP_CPUID_COUNT_MAX 64
+
+struct snp_cpuid_table {
+ u32 count;
+ u32 __reserved1;
+ u64 __reserved2;
+ struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
+} __packed;
+
+/*
+ * Since feature negotiation related variables are set early in the boot
+ * process they must reside in the .data section so as not to be zeroed
+ * out when the .bss section is later cleared.
+ *
+ * GHCB protocol version negotiated with the hypervisor.
+ */
+static u16 ghcb_version __ro_after_init;
+
+/* Copy of the SNP firmware's CPUID page. */
+static struct snp_cpuid_table cpuid_table_copy __ro_after_init;
+
+/*
+ * These will be initialized based on CPUID table so that non-present
+ * all-zero leaves (for sparse tables) can be differentiated from
+ * invalid/out-of-range leaves. This is needed since all-zero leaves
+ * still need to be post-processed.
+ */
+static u32 cpuid_std_range_max __ro_after_init;
+static u32 cpuid_hyp_range_max __ro_after_init;
+static u32 cpuid_ext_range_max __ro_after_init;
+
static bool __init sev_es_check_cpu_features(void)
{
if (!has_cpuflag(X86_FEATURE_RDRAND)) {
@@ -24,15 +86,12 @@ static bool __init sev_es_check_cpu_features(void)
return true;
}
-static void __noreturn sev_es_terminate(unsigned int reason)
+static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
{
u64 val = GHCB_MSR_TERM_REQ;
- /*
- * Tell the hypervisor what went wrong - only reason-set 0 is
- * currently supported.
- */
- val |= GHCB_SEV_TERM_REASON(0, reason);
+ /* Tell the hypervisor what went wrong. */
+ val |= GHCB_SEV_TERM_REASON(set, reason);
/* Request Guest Termination from Hypvervisor */
sev_es_wr_ghcb_msr(val);
@@ -42,6 +101,42 @@ static void __noreturn sev_es_terminate(unsigned int reason)
asm volatile("hlt\n" : : : "memory");
}
+/*
+ * The hypervisor features are available from GHCB version 2 onward.
+ */
+static u64 get_hv_features(void)
+{
+ u64 val;
+
+ if (ghcb_version < 2)
+ return 0;
+
+ sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ);
+ VMGEXIT();
+
+ val = sev_es_rd_ghcb_msr();
+ if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP)
+ return 0;
+
+ return GHCB_MSR_HV_FT_RESP_VAL(val);
+}
+
+static void snp_register_ghcb_early(unsigned long paddr)
+{
+ unsigned long pfn = paddr >> PAGE_SHIFT;
+ u64 val;
+
+ sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn));
+ VMGEXIT();
+
+ val = sev_es_rd_ghcb_msr();
+
+ /* If the response GPA is not ours then abort the guest */
+ if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) ||
+ (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
+}
+
static bool sev_es_negotiate_protocol(void)
{
u64 val;
@@ -54,10 +149,12 @@ static bool sev_es_negotiate_protocol(void)
if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
return false;
- if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR ||
- GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR)
+ if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
+ GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
return false;
+ ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX);
+
return true;
}
@@ -104,10 +201,7 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt
if (ret == 1) {
u64 info = ghcb->save.sw_exit_info_2;
- unsigned long v;
-
- info = ghcb->save.sw_exit_info_2;
- v = info & SVM_EVTINJ_VEC_MASK;
+ unsigned long v = info & SVM_EVTINJ_VEC_MASK;
/* Check if exception information from hypervisor is sane. */
if ((info & SVM_EVTINJ_VALID) &&
@@ -130,7 +224,7 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
u64 exit_info_1, u64 exit_info_2)
{
/* Fill in protocol and format specifiers */
- ghcb->protocol_version = GHCB_PROTOCOL_MAX;
+ ghcb->protocol_version = ghcb_version;
ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
ghcb_set_sw_exit_code(ghcb, exit_code);
@@ -150,6 +244,290 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
return verify_exception_info(ghcb, ctxt);
}
+static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
+{
+ u64 val;
+
+ sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx));
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+ if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+ return -EIO;
+
+ *reg = (val >> 32);
+
+ return 0;
+}
+
+static int sev_cpuid_hv(struct cpuid_leaf *leaf)
+{
+ int ret;
+
+ /*
+ * MSR protocol does not support fetching non-zero subfunctions, but is
+ * sufficient to handle current early-boot cases. Should that change,
+ * make sure to report an error rather than ignoring the index and
+ * grabbing random values. If this issue arises in the future, handling
+ * can be added here to use GHCB-page protocol for cases that occur late
+ * enough in boot that GHCB page is available.
+ */
+ if (cpuid_function_is_indexed(leaf->fn) && leaf->subfn)
+ return -EINVAL;
+
+ ret = __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EAX, &leaf->eax);
+ ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EBX, &leaf->ebx);
+ ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_ECX, &leaf->ecx);
+ ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EDX, &leaf->edx);
+
+ return ret;
+}
+
+/*
+ * This may be called early while still running on the initial identity
+ * mapping. Use RIP-relative addressing to obtain the correct address
+ * while running with the initial identity mapping as well as the
+ * switch-over to kernel virtual addresses later.
+ */
+static const struct snp_cpuid_table *snp_cpuid_get_table(void)
+{
+ void *ptr;
+
+ asm ("lea cpuid_table_copy(%%rip), %0"
+ : "=r" (ptr)
+ : "p" (&cpuid_table_copy));
+
+ return ptr;
+}
+
+/*
+ * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of
+ * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0
+ * and 1 based on the corresponding features enabled by a particular
+ * combination of XCR0 and XSS registers so that a guest can look up the
+ * version corresponding to the features currently enabled in its XCR0/XSS
+ * registers. The only values that differ between these versions/table
+ * entries is the enabled XSAVE area size advertised via EBX.
+ *
+ * While hypervisors may choose to make use of this support, it is more
+ * robust/secure for a guest to simply find the entry corresponding to the
+ * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the
+ * XSAVE area size using subfunctions 2 through 64, as documented in APM
+ * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here.
+ *
+ * Since base/legacy XSAVE area size is documented as 0x240, use that value
+ * directly rather than relying on the base size in the CPUID table.
+ *
+ * Return: XSAVE area size on success, 0 otherwise.
+ */
+static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
+{
+ const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+ u64 xfeatures_found = 0;
+ u32 xsave_size = 0x240;
+ int i;
+
+ for (i = 0; i < cpuid_table->count; i++) {
+ const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
+
+ if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64))
+ continue;
+ if (!(xfeatures_en & (BIT_ULL(e->ecx_in))))
+ continue;
+ if (xfeatures_found & (BIT_ULL(e->ecx_in)))
+ continue;
+
+ xfeatures_found |= (BIT_ULL(e->ecx_in));
+
+ if (compacted)
+ xsave_size += e->eax;
+ else
+ xsave_size = max(xsave_size, e->eax + e->ebx);
+ }
+
+ /*
+ * Either the guest set unsupported XCR0/XSS bits, or the corresponding
+ * entries in the CPUID table were not present. This is not a valid
+ * state to be in.
+ */
+ if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2)))
+ return 0;
+
+ return xsave_size;
+}
+
+static bool
+snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
+{
+ const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+ int i;
+
+ for (i = 0; i < cpuid_table->count; i++) {
+ const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
+
+ if (e->eax_in != leaf->fn)
+ continue;
+
+ if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn)
+ continue;
+
+ /*
+ * For 0xD subfunctions 0 and 1, only use the entry corresponding
+ * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0).
+ * See the comments above snp_cpuid_calc_xsave_size() for more
+ * details.
+ */
+ if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1))
+ if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in)
+ continue;
+
+ leaf->eax = e->eax;
+ leaf->ebx = e->ebx;
+ leaf->ecx = e->ecx;
+ leaf->edx = e->edx;
+
+ return true;
+ }
+
+ return false;
+}
+
+static void snp_cpuid_hv(struct cpuid_leaf *leaf)
+{
+ if (sev_cpuid_hv(leaf))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
+}
+
+static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
+{
+ struct cpuid_leaf leaf_hv = *leaf;
+
+ switch (leaf->fn) {
+ case 0x1:
+ snp_cpuid_hv(&leaf_hv);
+
+ /* initial APIC ID */
+ leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
+ /* APIC enabled bit */
+ leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9));
+
+ /* OSXSAVE enabled bit */
+ if (native_read_cr4() & X86_CR4_OSXSAVE)
+ leaf->ecx |= BIT(27);
+ break;
+ case 0x7:
+ /* OSPKE enabled bit */
+ leaf->ecx &= ~BIT(4);
+ if (native_read_cr4() & X86_CR4_PKE)
+ leaf->ecx |= BIT(4);
+ break;
+ case 0xB:
+ leaf_hv.subfn = 0;
+ snp_cpuid_hv(&leaf_hv);
+
+ /* extended APIC ID */
+ leaf->edx = leaf_hv.edx;
+ break;
+ case 0xD: {
+ bool compacted = false;
+ u64 xcr0 = 1, xss = 0;
+ u32 xsave_size;
+
+ if (leaf->subfn != 0 && leaf->subfn != 1)
+ return 0;
+
+ if (native_read_cr4() & X86_CR4_OSXSAVE)
+ xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ if (leaf->subfn == 1) {
+ /* Get XSS value if XSAVES is enabled. */
+ if (leaf->eax & BIT(3)) {
+ unsigned long lo, hi;
+
+ asm volatile("rdmsr" : "=a" (lo), "=d" (hi)
+ : "c" (MSR_IA32_XSS));
+ xss = (hi << 32) | lo;
+ }
+
+ /*
+ * The PPR and APM aren't clear on what size should be
+ * encoded in 0xD:0x1:EBX when compaction is not enabled
+ * by either XSAVEC (feature bit 1) or XSAVES (feature
+ * bit 3) since SNP-capable hardware has these feature
+ * bits fixed as 1. KVM sets it to 0 in this case, but
+ * to avoid this becoming an issue it's safer to simply
+ * treat this as unsupported for SNP guests.
+ */
+ if (!(leaf->eax & (BIT(1) | BIT(3))))
+ return -EINVAL;
+
+ compacted = true;
+ }
+
+ xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted);
+ if (!xsave_size)
+ return -EINVAL;
+
+ leaf->ebx = xsave_size;
+ }
+ break;
+ case 0x8000001E:
+ snp_cpuid_hv(&leaf_hv);
+
+ /* extended APIC ID */
+ leaf->eax = leaf_hv.eax;
+ /* compute ID */
+ leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0));
+ /* node ID */
+ leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0));
+ break;
+ default:
+ /* No fix-ups needed, use values as-is. */
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
+ * should be treated as fatal by caller.
+ */
+static int snp_cpuid(struct cpuid_leaf *leaf)
+{
+ const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+
+ if (!cpuid_table->count)
+ return -EOPNOTSUPP;
+
+ if (!snp_cpuid_get_validated_func(leaf)) {
+ /*
+ * Some hypervisors will avoid keeping track of CPUID entries
+ * where all values are zero, since they can be handled the
+ * same as out-of-range values (all-zero). This is useful here
+ * as well as it allows virtually all guest configurations to
+ * work using a single SNP CPUID table.
+ *
+ * To allow for this, there is a need to distinguish between
+ * out-of-range entries and in-range zero entries, since the
+ * CPUID table entries are only a template that may need to be
+ * augmented with additional values for things like
+ * CPU-specific information during post-processing. So if it's
+ * not in the table, set the values to zero. Then, if they are
+ * within a valid CPUID range, proceed with post-processing
+ * using zeros as the initial values. Otherwise, skip
+ * post-processing and just return zeros immediately.
+ */
+ leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
+
+ /* Skip post-processing for out-of-range zero leafs. */
+ if (!(leaf->fn <= cpuid_std_range_max ||
+ (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
+ (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
+ return 0;
+ }
+
+ return snp_cpuid_postprocess(leaf);
+}
+
/*
* Boot VC Handler - This is the first VC handler during boot, there is no GHCB
* page yet, so it only supports the MSR based communication with the
@@ -157,40 +535,33 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
*/
void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
{
+ unsigned int subfn = lower_bits(regs->cx, 32);
unsigned int fn = lower_bits(regs->ax, 32);
- unsigned long val;
+ struct cpuid_leaf leaf;
+ int ret;
/* Only CPUID is supported via MSR protocol */
if (exit_code != SVM_EXIT_CPUID)
goto fail;
- sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
- VMGEXIT();
- val = sev_es_rd_ghcb_msr();
- if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
- goto fail;
- regs->ax = val >> 32;
+ leaf.fn = fn;
+ leaf.subfn = subfn;
- sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
- VMGEXIT();
- val = sev_es_rd_ghcb_msr();
- if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
- goto fail;
- regs->bx = val >> 32;
+ ret = snp_cpuid(&leaf);
+ if (!ret)
+ goto cpuid_done;
- sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
- VMGEXIT();
- val = sev_es_rd_ghcb_msr();
- if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+ if (ret != -EOPNOTSUPP)
goto fail;
- regs->cx = val >> 32;
- sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
- VMGEXIT();
- val = sev_es_rd_ghcb_msr();
- if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+ if (sev_cpuid_hv(&leaf))
goto fail;
- regs->dx = val >> 32;
+
+cpuid_done:
+ regs->ax = leaf.eax;
+ regs->bx = leaf.ebx;
+ regs->cx = leaf.ecx;
+ regs->dx = leaf.edx;
/*
* This is a VC handler and the #VC is only raised when SEV-ES is
@@ -221,7 +592,7 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
fail:
/* Terminate the guest */
- sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
}
static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
@@ -481,12 +852,37 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ret;
}
+static int vc_handle_cpuid_snp(struct pt_regs *regs)
+{
+ struct cpuid_leaf leaf;
+ int ret;
+
+ leaf.fn = regs->ax;
+ leaf.subfn = regs->cx;
+ ret = snp_cpuid(&leaf);
+ if (!ret) {
+ regs->ax = leaf.eax;
+ regs->bx = leaf.ebx;
+ regs->cx = leaf.ecx;
+ regs->dx = leaf.edx;
+ }
+
+ return ret;
+}
+
static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
struct pt_regs *regs = ctxt->regs;
u32 cr4 = native_read_cr4();
enum es_result ret;
+ int snp_cpuid_ret;
+
+ snp_cpuid_ret = vc_handle_cpuid_snp(regs);
+ if (!snp_cpuid_ret)
+ return ES_OK;
+ if (snp_cpuid_ret != -EOPNOTSUPP)
+ return ES_VMM_ERROR;
ghcb_set_rax(ghcb, regs->ax);
ghcb_set_rcx(ghcb, regs->cx);
@@ -538,3 +934,67 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
return ES_OK;
}
+
+struct cc_setup_data {
+ struct setup_data header;
+ u32 cc_blob_address;
+};
+
+/*
+ * Search for a Confidential Computing blob passed in as a setup_data entry
+ * via the Linux Boot Protocol.
+ */
+static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
+{
+ struct cc_setup_data *sd = NULL;
+ struct setup_data *hdr;
+
+ hdr = (struct setup_data *)bp->hdr.setup_data;
+
+ while (hdr) {
+ if (hdr->type == SETUP_CC_BLOB) {
+ sd = (struct cc_setup_data *)hdr;
+ return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address;
+ }
+ hdr = (struct setup_data *)hdr->next;
+ }
+
+ return NULL;
+}
+
+/*
+ * Initialize the kernel's copy of the SNP CPUID table, and set up the
+ * pointer that will be used to access it.
+ *
+ * Maintaining a direct mapping of the SNP CPUID table used by firmware would
+ * be possible as an alternative, but the approach is brittle since the
+ * mapping needs to be updated in sync with all the changes to virtual memory
+ * layout and related mapping facilities throughout the boot process.
+ */
+static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
+{
+ const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
+ int i;
+
+ if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
+
+ cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys;
+ if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
+
+ cpuid_table = snp_cpuid_get_table();
+ memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table));
+
+ /* Initialize CPUID ranges for range-checking. */
+ for (i = 0; i < cpuid_table->count; i++) {
+ const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
+
+ if (fn->eax_in == 0x0)
+ cpuid_std_range_max = fn->eax;
+ else if (fn->eax_in == 0x40000000)
+ cpuid_hyp_range_max = fn->eax;
+ else if (fn->eax_in == 0x80000000)
+ cpuid_ext_range_max = fn->eax;
+ }
+}
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index e6d316a01fdd..c05f0124c410 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -18,6 +18,10 @@
#include <linux/memblock.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/cpumask.h>
+#include <linux/efi.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
@@ -31,9 +35,28 @@
#include <asm/svm.h>
#include <asm/smp.h>
#include <asm/cpu.h>
+#include <asm/apic.h>
+#include <asm/cpuid.h>
+#include <asm/cmdline.h>
#define DR7_RESET_VALUE 0x400
+/* AP INIT values as documented in the APM2 section "Processor Initialization State" */
+#define AP_INIT_CS_LIMIT 0xffff
+#define AP_INIT_DS_LIMIT 0xffff
+#define AP_INIT_LDTR_LIMIT 0xffff
+#define AP_INIT_GDTR_LIMIT 0xffff
+#define AP_INIT_IDTR_LIMIT 0xffff
+#define AP_INIT_TR_LIMIT 0xffff
+#define AP_INIT_RFLAGS_DEFAULT 0x2
+#define AP_INIT_DR6_DEFAULT 0xffff0ff0
+#define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
+#define AP_INIT_XCR0_DEFAULT 0x1
+#define AP_INIT_X87_FTW_DEFAULT 0x5555
+#define AP_INIT_X87_FCW_DEFAULT 0x0040
+#define AP_INIT_CR0_DEFAULT 0x60000010
+#define AP_INIT_MXCSR_DEFAULT 0x1f80
+
/* For early boot hypervisor communication in SEV-ES enabled guests */
static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
@@ -41,7 +64,10 @@ static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
* Needs to be in the .data section because we need it NULL before bss is
* cleared
*/
-static struct ghcb __initdata *boot_ghcb;
+static struct ghcb *boot_ghcb __section(".data");
+
+/* Bitmap of SEV features supported by the hypervisor */
+static u64 sev_hv_features __ro_after_init;
/* #VC handler runtime per-CPU data */
struct sev_es_runtime_data {
@@ -87,6 +113,15 @@ struct ghcb_state {
static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
+static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
+
+struct sev_config {
+ __u64 debug : 1,
+ __reserved : 63;
+};
+
+static struct sev_config sev_cfg __read_mostly;
+
static __always_inline bool on_vc_stack(struct pt_regs *regs)
{
unsigned long sp = regs->sp;
@@ -523,13 +558,68 @@ void noinstr __sev_es_nmi_complete(void)
__sev_put_ghcb(&state);
}
-static u64 get_jump_table_addr(void)
+static u64 __init get_secrets_page(void)
+{
+ u64 pa_data = boot_params.cc_blob_address;
+ struct cc_blob_sev_info info;
+ void *map;
+
+ /*
+ * The CC blob contains the address of the secrets page, check if the
+ * blob is present.
+ */
+ if (!pa_data)
+ return 0;
+
+ map = early_memremap(pa_data, sizeof(info));
+ if (!map) {
+ pr_err("Unable to locate SNP secrets page: failed to map the Confidential Computing blob.\n");
+ return 0;
+ }
+ memcpy(&info, map, sizeof(info));
+ early_memunmap(map, sizeof(info));
+
+ /* smoke-test the secrets page passed */
+ if (!info.secrets_phys || info.secrets_len != PAGE_SIZE)
+ return 0;
+
+ return info.secrets_phys;
+}
+
+static u64 __init get_snp_jump_table_addr(void)
+{
+ struct snp_secrets_page_layout *layout;
+ void __iomem *mem;
+ u64 pa, addr;
+
+ pa = get_secrets_page();
+ if (!pa)
+ return 0;
+
+ mem = ioremap_encrypted(pa, PAGE_SIZE);
+ if (!mem) {
+ pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
+ return 0;
+ }
+
+ layout = (__force struct snp_secrets_page_layout *)mem;
+
+ addr = layout->os_area.ap_jump_table_pa;
+ iounmap(mem);
+
+ return addr;
+}
+
+static u64 __init get_jump_table_addr(void)
{
struct ghcb_state state;
unsigned long flags;
struct ghcb *ghcb;
u64 ret = 0;
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return get_snp_jump_table_addr();
+
local_irq_save(flags);
ghcb = __sev_get_ghcb(&state);
@@ -553,7 +643,496 @@ static u64 get_jump_table_addr(void)
return ret;
}
-int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
+static void pvalidate_pages(unsigned long vaddr, unsigned int npages, bool validate)
+{
+ unsigned long vaddr_end;
+ int rc;
+
+ vaddr = vaddr & PAGE_MASK;
+ vaddr_end = vaddr + (npages << PAGE_SHIFT);
+
+ while (vaddr < vaddr_end) {
+ rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
+ if (WARN(rc, "Failed to validate address 0x%lx ret %d", vaddr, rc))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+
+ vaddr = vaddr + PAGE_SIZE;
+ }
+}
+
+static void __init early_set_pages_state(unsigned long paddr, unsigned int npages, enum psc_op op)
+{
+ unsigned long paddr_end;
+ u64 val;
+
+ paddr = paddr & PAGE_MASK;
+ paddr_end = paddr + (npages << PAGE_SHIFT);
+
+ while (paddr < paddr_end) {
+ /*
+ * Use the MSR protocol because this function can be called before
+ * the GHCB is established.
+ */
+ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
+ VMGEXIT();
+
+ val = sev_es_rd_ghcb_msr();
+
+ if (WARN(GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP,
+ "Wrong PSC response code: 0x%x\n",
+ (unsigned int)GHCB_RESP_CODE(val)))
+ goto e_term;
+
+ if (WARN(GHCB_MSR_PSC_RESP_VAL(val),
+ "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n",
+ op == SNP_PAGE_STATE_PRIVATE ? "private" : "shared",
+ paddr, GHCB_MSR_PSC_RESP_VAL(val)))
+ goto e_term;
+
+ paddr = paddr + PAGE_SIZE;
+ }
+
+ return;
+
+e_term:
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+}
+
+void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+ unsigned int npages)
+{
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return;
+
+ /*
+ * Ask the hypervisor to mark the memory pages as private in the RMP
+ * table.
+ */
+ early_set_pages_state(paddr, npages, SNP_PAGE_STATE_PRIVATE);
+
+ /* Validate the memory pages after they've been added in the RMP table. */
+ pvalidate_pages(vaddr, npages, true);
+}
+
+void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+ unsigned int npages)
+{
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return;
+
+ /* Invalidate the memory pages before they are marked shared in the RMP table. */
+ pvalidate_pages(vaddr, npages, false);
+
+ /* Ask hypervisor to mark the memory pages shared in the RMP table. */
+ early_set_pages_state(paddr, npages, SNP_PAGE_STATE_SHARED);
+}
+
+void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op)
+{
+ unsigned long vaddr, npages;
+
+ vaddr = (unsigned long)__va(paddr);
+ npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+
+ if (op == SNP_PAGE_STATE_PRIVATE)
+ early_snp_set_memory_private(vaddr, paddr, npages);
+ else if (op == SNP_PAGE_STATE_SHARED)
+ early_snp_set_memory_shared(vaddr, paddr, npages);
+ else
+ WARN(1, "invalid memory op %d\n", op);
+}
+
+static int vmgexit_psc(struct snp_psc_desc *desc)
+{
+ int cur_entry, end_entry, ret = 0;
+ struct snp_psc_desc *data;
+ struct ghcb_state state;
+ struct es_em_ctxt ctxt;
+ unsigned long flags;
+ struct ghcb *ghcb;
+
+ /*
+ * __sev_get_ghcb() needs to run with IRQs disabled because it is using
+ * a per-CPU GHCB.
+ */
+ local_irq_save(flags);
+
+ ghcb = __sev_get_ghcb(&state);
+ if (!ghcb) {
+ ret = 1;
+ goto out_unlock;
+ }
+
+ /* Copy the input desc into GHCB shared buffer */
+ data = (struct snp_psc_desc *)ghcb->shared_buffer;
+ memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
+
+ /*
+ * As per the GHCB specification, the hypervisor can resume the guest
+ * before processing all the entries. Check whether all the entries
+ * are processed. If not, then keep retrying. Note, the hypervisor
+ * will update the data memory directly to indicate the status, so
+ * reference the data->hdr everywhere.
+ *
+ * The strategy here is to wait for the hypervisor to change the page
+ * state in the RMP table before guest accesses the memory pages. If the
+ * page state change was not successful, then later memory access will
+ * result in a crash.
+ */
+ cur_entry = data->hdr.cur_entry;
+ end_entry = data->hdr.end_entry;
+
+ while (data->hdr.cur_entry <= data->hdr.end_entry) {
+ ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
+
+ /* This will advance the shared buffer data points to. */
+ ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
+
+ /*
+ * Page State Change VMGEXIT can pass error code through
+ * exit_info_2.
+ */
+ if (WARN(ret || ghcb->save.sw_exit_info_2,
+ "SNP: PSC failed ret=%d exit_info_2=%llx\n",
+ ret, ghcb->save.sw_exit_info_2)) {
+ ret = 1;
+ goto out;
+ }
+
+ /* Verify that reserved bit is not set */
+ if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
+ ret = 1;
+ goto out;
+ }
+
+ /*
+ * Sanity check that entry processing is not going backwards.
+ * This will happen only if hypervisor is tricking us.
+ */
+ if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
+"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
+ end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
+ ret = 1;
+ goto out;
+ }
+ }
+
+out:
+ __sev_put_ghcb(&state);
+
+out_unlock:
+ local_irq_restore(flags);
+
+ return ret;
+}
+
+static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
+ unsigned long vaddr_end, int op)
+{
+ struct psc_hdr *hdr;
+ struct psc_entry *e;
+ unsigned long pfn;
+ int i;
+
+ hdr = &data->hdr;
+ e = data->entries;
+
+ memset(data, 0, sizeof(*data));
+ i = 0;
+
+ while (vaddr < vaddr_end) {
+ if (is_vmalloc_addr((void *)vaddr))
+ pfn = vmalloc_to_pfn((void *)vaddr);
+ else
+ pfn = __pa(vaddr) >> PAGE_SHIFT;
+
+ e->gfn = pfn;
+ e->operation = op;
+ hdr->end_entry = i;
+
+ /*
+ * Current SNP implementation doesn't keep track of the RMP page
+ * size so use 4K for simplicity.
+ */
+ e->pagesize = RMP_PG_SIZE_4K;
+
+ vaddr = vaddr + PAGE_SIZE;
+ e++;
+ i++;
+ }
+
+ if (vmgexit_psc(data))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+}
+
+static void set_pages_state(unsigned long vaddr, unsigned int npages, int op)
+{
+ unsigned long vaddr_end, next_vaddr;
+ struct snp_psc_desc *desc;
+
+ desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT);
+ if (!desc)
+ panic("SNP: failed to allocate memory for PSC descriptor\n");
+
+ vaddr = vaddr & PAGE_MASK;
+ vaddr_end = vaddr + (npages << PAGE_SHIFT);
+
+ while (vaddr < vaddr_end) {
+ /* Calculate the last vaddr that fits in one struct snp_psc_desc. */
+ next_vaddr = min_t(unsigned long, vaddr_end,
+ (VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr);
+
+ __set_pages_state(desc, vaddr, next_vaddr, op);
+
+ vaddr = next_vaddr;
+ }
+
+ kfree(desc);
+}
+
+void snp_set_memory_shared(unsigned long vaddr, unsigned int npages)
+{
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return;
+
+ pvalidate_pages(vaddr, npages, false);
+
+ set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
+}
+
+void snp_set_memory_private(unsigned long vaddr, unsigned int npages)
+{
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return;
+
+ set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
+
+ pvalidate_pages(vaddr, npages, true);
+}
+
+static int snp_set_vmsa(void *va, bool vmsa)
+{
+ u64 attrs;
+
+ /*
+ * Running at VMPL0 allows the kernel to change the VMSA bit for a page
+ * using the RMPADJUST instruction. However, for the instruction to
+ * succeed it must target the permissions of a lesser privileged
+ * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST
+ * instruction in the AMD64 APM Volume 3).
+ */
+ attrs = 1;
+ if (vmsa)
+ attrs |= RMPADJUST_VMSA_PAGE_BIT;
+
+ return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
+}
+
+#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
+#define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
+#define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
+
+#define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2)
+#define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3)
+
+static void *snp_alloc_vmsa_page(void)
+{
+ struct page *p;
+
+ /*
+ * Allocate VMSA page to work around the SNP erratum where the CPU will
+ * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
+ * collides with the RMP entry of VMSA page. The recommended workaround
+ * is to not use a large page.
+ *
+ * Allocate an 8k page which is also 8k-aligned.
+ */
+ p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
+ if (!p)
+ return NULL;
+
+ split_page(p, 1);
+
+ /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
+ __free_page(p);
+
+ return page_address(p + 1);
+}
+
+static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa)
+{
+ int err;
+
+ err = snp_set_vmsa(vmsa, false);
+ if (err)
+ pr_err("clear VMSA page failed (%u), leaking page\n", err);
+ else
+ free_page((unsigned long)vmsa);
+}
+
+static int wakeup_cpu_via_vmgexit(int apic_id, unsigned long start_ip)
+{
+ struct sev_es_save_area *cur_vmsa, *vmsa;
+ struct ghcb_state state;
+ unsigned long flags;
+ struct ghcb *ghcb;
+ u8 sipi_vector;
+ int cpu, ret;
+ u64 cr4;
+
+ /*
+ * The hypervisor SNP feature support check has happened earlier, just check
+ * the AP_CREATION one here.
+ */
+ if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
+ return -EOPNOTSUPP;
+
+ /*
+ * Verify the desired start IP against the known trampoline start IP
+ * to catch any future new trampolines that may be introduced that
+ * would require a new protected guest entry point.
+ */
+ if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
+ "Unsupported SNP start_ip: %lx\n", start_ip))
+ return -EINVAL;
+
+ /* Override start_ip with known protected guest start IP */
+ start_ip = real_mode_header->sev_es_trampoline_start;
+
+ /* Find the logical CPU for the APIC ID */
+ for_each_present_cpu(cpu) {
+ if (arch_match_cpu_phys_id(cpu, apic_id))
+ break;
+ }
+ if (cpu >= nr_cpu_ids)
+ return -EINVAL;
+
+ cur_vmsa = per_cpu(sev_vmsa, cpu);
+
+ /*
+ * A new VMSA is created each time because there is no guarantee that
+ * the current VMSA is the kernels or that the vCPU is not running. If
+ * an attempt was done to use the current VMSA with a running vCPU, a
+ * #VMEXIT of that vCPU would wipe out all of the settings being done
+ * here.
+ */
+ vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page();
+ if (!vmsa)
+ return -ENOMEM;
+
+ /* CR4 should maintain the MCE value */
+ cr4 = native_read_cr4() & X86_CR4_MCE;
+
+ /* Set the CS value based on the start_ip converted to a SIPI vector */
+ sipi_vector = (start_ip >> 12);
+ vmsa->cs.base = sipi_vector << 12;
+ vmsa->cs.limit = AP_INIT_CS_LIMIT;
+ vmsa->cs.attrib = INIT_CS_ATTRIBS;
+ vmsa->cs.selector = sipi_vector << 8;
+
+ /* Set the RIP value based on start_ip */
+ vmsa->rip = start_ip & 0xfff;
+
+ /* Set AP INIT defaults as documented in the APM */
+ vmsa->ds.limit = AP_INIT_DS_LIMIT;
+ vmsa->ds.attrib = INIT_DS_ATTRIBS;
+ vmsa->es = vmsa->ds;
+ vmsa->fs = vmsa->ds;
+ vmsa->gs = vmsa->ds;
+ vmsa->ss = vmsa->ds;
+
+ vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT;
+ vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT;
+ vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS;
+ vmsa->idtr.limit = AP_INIT_IDTR_LIMIT;
+ vmsa->tr.limit = AP_INIT_TR_LIMIT;
+ vmsa->tr.attrib = INIT_TR_ATTRIBS;
+
+ vmsa->cr4 = cr4;
+ vmsa->cr0 = AP_INIT_CR0_DEFAULT;
+ vmsa->dr7 = DR7_RESET_VALUE;
+ vmsa->dr6 = AP_INIT_DR6_DEFAULT;
+ vmsa->rflags = AP_INIT_RFLAGS_DEFAULT;
+ vmsa->g_pat = AP_INIT_GPAT_DEFAULT;
+ vmsa->xcr0 = AP_INIT_XCR0_DEFAULT;
+ vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT;
+ vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT;
+ vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT;
+
+ /* SVME must be set. */
+ vmsa->efer = EFER_SVME;
+
+ /*
+ * Set the SNP-specific fields for this VMSA:
+ * VMPL level
+ * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
+ */
+ vmsa->vmpl = 0;
+ vmsa->sev_features = sev_status >> 2;
+
+ /* Switch the page over to a VMSA page now that it is initialized */
+ ret = snp_set_vmsa(vmsa, true);
+ if (ret) {
+ pr_err("set VMSA page failed (%u)\n", ret);
+ free_page((unsigned long)vmsa);
+
+ return -EINVAL;
+ }
+
+ /* Issue VMGEXIT AP Creation NAE event */
+ local_irq_save(flags);
+
+ ghcb = __sev_get_ghcb(&state);
+
+ vc_ghcb_invalidate(ghcb);
+ ghcb_set_rax(ghcb, vmsa->sev_features);
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
+ ghcb_set_sw_exit_info_1(ghcb, ((u64)apic_id << 32) | SVM_VMGEXIT_AP_CREATE);
+ ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
+ lower_32_bits(ghcb->save.sw_exit_info_1)) {
+ pr_err("SNP AP Creation error\n");
+ ret = -EINVAL;
+ }
+
+ __sev_put_ghcb(&state);
+
+ local_irq_restore(flags);
+
+ /* Perform cleanup if there was an error */
+ if (ret) {
+ snp_cleanup_vmsa(vmsa);
+ vmsa = NULL;
+ }
+
+ /* Free up any previous VMSA page */
+ if (cur_vmsa)
+ snp_cleanup_vmsa(cur_vmsa);
+
+ /* Record the current VMSA page */
+ per_cpu(sev_vmsa, cpu) = vmsa;
+
+ return ret;
+}
+
+void snp_set_wakeup_secondary_cpu(void)
+{
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return;
+
+ /*
+ * Always set this override if SNP is enabled. This makes it the
+ * required method to start APs under SNP. If the hypervisor does
+ * not support AP creation, then no APs will be started.
+ */
+ apic->wakeup_secondary_cpu = wakeup_cpu_via_vmgexit;
+}
+
+int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
{
u16 startup_cs, startup_ip;
phys_addr_t jump_table_pa;
@@ -644,15 +1223,39 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ret;
}
-/*
- * This function runs on the first #VC exception after the kernel
- * switched to virtual addresses.
- */
-static bool __init sev_es_setup_ghcb(void)
+static void snp_register_per_cpu_ghcb(void)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ snp_register_ghcb_early(__pa(ghcb));
+}
+
+void setup_ghcb(void)
{
+ if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+ return;
+
/* First make sure the hypervisor talks a supported protocol. */
if (!sev_es_negotiate_protocol())
- return false;
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+
+ /*
+ * Check whether the runtime #VC exception handler is active. It uses
+ * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
+ *
+ * If SNP is active, register the per-CPU GHCB page so that the runtime
+ * exception handler can use it.
+ */
+ if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ snp_register_per_cpu_ghcb();
+
+ return;
+ }
/*
* Clear the boot_ghcb. The first exception comes in before the bss
@@ -663,7 +1266,9 @@ static bool __init sev_es_setup_ghcb(void)
/* Alright - Make the boot-ghcb public */
boot_ghcb = &boot_ghcb_page;
- return true;
+ /* SNP guest requires that GHCB GPA must be registered. */
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ snp_register_ghcb_early(__pa(&boot_ghcb_page));
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -766,6 +1371,17 @@ void __init sev_es_init_vc_handling(void)
if (!sev_es_check_cpu_features())
panic("SEV-ES CPU Features missing");
+ /*
+ * SNP is supported in v2 of the GHCB spec which mandates support for HV
+ * features.
+ */
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
+ sev_hv_features = get_hv_features();
+
+ if (!(sev_hv_features & GHCB_HV_FT_SNP))
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+ }
+
/* Enable SEV-ES special handling */
static_branch_enable(&sev_es_enable_key);
@@ -1337,7 +1953,7 @@ DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
show_regs(regs);
/* Ask hypervisor to sev_es_terminate */
- sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
/* If that fails and we get here - just panic */
panic("Returned from Terminate-Request to Hypervisor\n");
@@ -1383,10 +1999,6 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
struct es_em_ctxt ctxt;
enum es_result result;
- /* Do initial setup or terminate the guest */
- if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb()))
- sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
-
vc_ghcb_invalidate(boot_ghcb);
result = vc_init_em_ctxt(&ctxt, regs, exit_code);
@@ -1425,6 +2037,215 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
fail:
show_regs(regs);
- while (true)
- halt();
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+}
+
+/*
+ * Initial set up of SNP relies on information provided by the
+ * Confidential Computing blob, which can be passed to the kernel
+ * in the following ways, depending on how it is booted:
+ *
+ * - when booted via the boot/decompress kernel:
+ * - via boot_params
+ *
+ * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH):
+ * - via a setup_data entry, as defined by the Linux Boot Protocol
+ *
+ * Scan for the blob in that order.
+ */
+static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+{
+ struct cc_blob_sev_info *cc_info;
+
+ /* Boot kernel would have passed the CC blob via boot_params. */
+ if (bp->cc_blob_address) {
+ cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address;
+ goto found_cc_info;
+ }
+
+ /*
+ * If kernel was booted directly, without the use of the
+ * boot/decompression kernel, the CC blob may have been passed via
+ * setup_data instead.
+ */
+ cc_info = find_cc_blob_setup_data(bp);
+ if (!cc_info)
+ return NULL;
+
+found_cc_info:
+ if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
+ snp_abort();
+
+ return cc_info;
+}
+
+bool __init snp_init(struct boot_params *bp)
+{
+ struct cc_blob_sev_info *cc_info;
+
+ if (!bp)
+ return false;
+
+ cc_info = find_cc_blob(bp);
+ if (!cc_info)
+ return false;
+
+ setup_cpuid_table(cc_info);
+
+ /*
+ * The CC blob will be used later to access the secrets page. Cache
+ * it here like the boot kernel does.
+ */
+ bp->cc_blob_address = (u32)(unsigned long)cc_info;
+
+ return true;
+}
+
+void __init snp_abort(void)
+{
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+}
+
+static void dump_cpuid_table(void)
+{
+ const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+ int i = 0;
+
+ pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
+ cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
+
+ for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
+ const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
+
+ pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
+ i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
+ fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
+ }
+}
+
+/*
+ * It is useful from an auditing/testing perspective to provide an easy way
+ * for the guest owner to know that the CPUID table has been initialized as
+ * expected, but that initialization happens too early in boot to print any
+ * sort of indicator, and there's not really any other good place to do it,
+ * so do it here.
+ */
+static int __init report_cpuid_table(void)
+{
+ const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+
+ if (!cpuid_table->count)
+ return 0;
+
+ pr_info("Using SNP CPUID table, %d entries present.\n",
+ cpuid_table->count);
+
+ if (sev_cfg.debug)
+ dump_cpuid_table();
+
+ return 0;
+}
+arch_initcall(report_cpuid_table);
+
+static int __init init_sev_config(char *str)
+{
+ char *s;
+
+ while ((s = strsep(&str, ","))) {
+ if (!strcmp(s, "debug")) {
+ sev_cfg.debug = true;
+ continue;
+ }
+
+ pr_info("SEV command-line option '%s' was not recognized\n", s);
+ }
+
+ return 1;
+}
+__setup("sev=", init_sev_config);
+
+int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err)
+{
+ struct ghcb_state state;
+ struct es_em_ctxt ctxt;
+ unsigned long flags;
+ struct ghcb *ghcb;
+ int ret;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return -ENODEV;
+
+ if (!fw_err)
+ return -EINVAL;
+
+ /*
+ * __sev_get_ghcb() needs to run with IRQs disabled because it is using
+ * a per-CPU GHCB.
+ */
+ local_irq_save(flags);
+
+ ghcb = __sev_get_ghcb(&state);
+ if (!ghcb) {
+ ret = -EIO;
+ goto e_restore_irq;
+ }
+
+ vc_ghcb_invalidate(ghcb);
+
+ if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
+ ghcb_set_rax(ghcb, input->data_gpa);
+ ghcb_set_rbx(ghcb, input->data_npages);
+ }
+
+ ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
+ if (ret)
+ goto e_put;
+
+ if (ghcb->save.sw_exit_info_2) {
+ /* Number of expected pages are returned in RBX */
+ if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST &&
+ ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN)
+ input->data_npages = ghcb_get_rbx(ghcb);
+
+ *fw_err = ghcb->save.sw_exit_info_2;
+
+ ret = -EIO;
+ }
+
+e_put:
+ __sev_put_ghcb(&state);
+e_restore_irq:
+ local_irq_restore(flags);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(snp_issue_guest_request);
+
+static struct platform_device sev_guest_device = {
+ .name = "sev-guest",
+ .id = -1,
+};
+
+static int __init snp_init_platform_device(void)
+{
+ struct sev_guest_platform_data data;
+ u64 gpa;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return -ENODEV;
+
+ gpa = get_secrets_page();
+ if (!gpa)
+ return -ENODEV;
+
+ data.secrets_gpa = gpa;
+ if (platform_device_add_data(&sev_guest_device, &data, sizeof(data)))
+ return -ENODEV;
+
+ if (platform_device_register(&sev_guest_device))
+ return -ENODEV;
+
+ pr_info("SNP guest platform device initialized.\n");
+ return 0;
}
+device_initcall(snp_init_platform_device);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e439eb14325f..9c7265b524c7 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -93,7 +93,7 @@ static bool restore_sigcontext(struct pt_regs *regs,
return false;
#ifdef CONFIG_X86_32
- set_user_gs(regs, sc.gs);
+ loadsegment(gs, sc.gs);
regs->fs = sc.fs;
regs->es = sc.es;
regs->ds = sc.ds;
@@ -146,8 +146,10 @@ __unsafe_setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask)
{
#ifdef CONFIG_X86_32
- unsafe_put_user(get_user_gs(regs),
- (unsigned int __user *)&sc->gs, Efault);
+ unsigned int gs;
+ savesegment(gs, gs);
+
+ unsafe_put_user(gs, (unsigned int __user *)&sc->gs, Efault);
unsafe_put_user(regs->fs, (unsigned int __user *)&sc->fs, Efault);
unsafe_put_user(regs->es, (unsigned int __user *)&sc->es, Efault);
unsafe_put_user(regs->ds, (unsigned int __user *)&sc->ds, Efault);
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index b52407c56000..879ef8c72f5c 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -149,8 +149,10 @@ static inline void signal_compat_build_tests(void)
BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18);
BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_flags) != 0x24);
BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10);
BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_flags) != 0x18);
CHECK_CSI_OFFSET(_sigpoll);
CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2ef14772dc04..5e7f9532a10d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -56,7 +56,6 @@
#include <linux/numa.h>
#include <linux/pgtable.h>
#include <linux/overflow.h>
-#include <linux/syscore_ops.h>
#include <asm/acpi.h>
#include <asm/desc.h>
@@ -82,6 +81,7 @@
#include <asm/spec-ctrl.h>
#include <asm/hw_irq.h>
#include <asm/stackprotector.h>
+#include <asm/sev.h>
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
@@ -187,7 +187,7 @@ static void smp_callin(void)
*/
set_cpu_sibling_map(raw_smp_processor_id());
- init_freq_invariance(true, false);
+ ap_init_aperfmperf();
/*
* Get our bogomips.
@@ -1082,6 +1082,11 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
unsigned long boot_error = 0;
unsigned long timeout;
+#ifdef CONFIG_X86_64
+ /* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
+ if (apic->wakeup_secondary_cpu_64)
+ start_ip = real_mode_header->trampoline_start64;
+#endif
idle->thread.sp = (unsigned long)task_pt_regs(idle);
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_code = (unsigned long)start_secondary;
@@ -1123,11 +1128,14 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
/*
* Wake up a CPU in difference cases:
- * - Use the method in the APIC driver if it's defined
+ * - Use a method from the APIC driver if one defined, with wakeup
+ * straight to 64-bit mode preferred over wakeup to RM.
* Otherwise,
* - Use an INIT boot APIC message for APs or NMI for BSP.
*/
- if (apic->wakeup_secondary_cpu)
+ if (apic->wakeup_secondary_cpu_64)
+ boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
+ else if (apic->wakeup_secondary_cpu)
boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
else
boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
@@ -1397,7 +1405,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
{
smp_prepare_cpus_common();
- init_freq_invariance(false, false);
smp_sanity_check();
switch (apic_intr_mode) {
@@ -1430,6 +1437,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
smp_quirk_init_udelay();
speculative_store_bypass_ht_init();
+
+ snp_set_wakeup_secondary_cpu();
}
void arch_thaw_secondary_cpus_begin(void)
@@ -1847,357 +1856,3 @@ void native_play_dead(void)
}
#endif
-
-#ifdef CONFIG_X86_64
-/*
- * APERF/MPERF frequency ratio computation.
- *
- * The scheduler wants to do frequency invariant accounting and needs a <1
- * ratio to account for the 'current' frequency, corresponding to
- * freq_curr / freq_max.
- *
- * Since the frequency freq_curr on x86 is controlled by micro-controller and
- * our P-state setting is little more than a request/hint, we need to observe
- * the effective frequency 'BusyMHz', i.e. the average frequency over a time
- * interval after discarding idle time. This is given by:
- *
- * BusyMHz = delta_APERF / delta_MPERF * freq_base
- *
- * where freq_base is the max non-turbo P-state.
- *
- * The freq_max term has to be set to a somewhat arbitrary value, because we
- * can't know which turbo states will be available at a given point in time:
- * it all depends on the thermal headroom of the entire package. We set it to
- * the turbo level with 4 cores active.
- *
- * Benchmarks show that's a good compromise between the 1C turbo ratio
- * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
- * which would ignore the entire turbo range (a conspicuous part, making
- * freq_curr/freq_max always maxed out).
- *
- * An exception to the heuristic above is the Atom uarch, where we choose the
- * highest turbo level for freq_max since Atom's are generally oriented towards
- * power efficiency.
- *
- * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
- * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
- */
-
-DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
-
-static DEFINE_PER_CPU(u64, arch_prev_aperf);
-static DEFINE_PER_CPU(u64, arch_prev_mperf);
-static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
-static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
-
-void arch_set_max_freq_ratio(bool turbo_disabled)
-{
- arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
- arch_turbo_freq_ratio;
-}
-EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
-
-static bool turbo_disabled(void)
-{
- u64 misc_en;
- int err;
-
- err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
- if (err)
- return false;
-
- return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
-}
-
-static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
-{
- int err;
-
- err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
- if (err)
- return false;
-
- err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
- if (err)
- return false;
-
- *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */
- *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */
-
- return true;
-}
-
-#define X86_MATCH(model) \
- X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
- INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
-
-static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
- X86_MATCH(XEON_PHI_KNL),
- X86_MATCH(XEON_PHI_KNM),
- {}
-};
-
-static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
- X86_MATCH(SKYLAKE_X),
- {}
-};
-
-static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
- X86_MATCH(ATOM_GOLDMONT),
- X86_MATCH(ATOM_GOLDMONT_D),
- X86_MATCH(ATOM_GOLDMONT_PLUS),
- {}
-};
-
-static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
- int num_delta_fratio)
-{
- int fratio, delta_fratio, found;
- int err, i;
- u64 msr;
-
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
- if (err)
- return false;
-
- *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
-
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
- if (err)
- return false;
-
- fratio = (msr >> 8) & 0xFF;
- i = 16;
- found = 0;
- do {
- if (found >= num_delta_fratio) {
- *turbo_freq = fratio;
- return true;
- }
-
- delta_fratio = (msr >> (i + 5)) & 0x7;
-
- if (delta_fratio) {
- found += 1;
- fratio -= delta_fratio;
- }
-
- i += 8;
- } while (i < 64);
-
- return true;
-}
-
-static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
-{
- u64 ratios, counts;
- u32 group_size;
- int err, i;
-
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
- if (err)
- return false;
-
- *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
-
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
- if (err)
- return false;
-
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
- if (err)
- return false;
-
- for (i = 0; i < 64; i += 8) {
- group_size = (counts >> i) & 0xFF;
- if (group_size >= size) {
- *turbo_freq = (ratios >> i) & 0xFF;
- return true;
- }
- }
-
- return false;
-}
-
-static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
-{
- u64 msr;
- int err;
-
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
- if (err)
- return false;
-
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
- if (err)
- return false;
-
- *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
- *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */
-
- /* The CPU may have less than 4 cores */
- if (!*turbo_freq)
- *turbo_freq = msr & 0xFF; /* 1C turbo */
-
- return true;
-}
-
-static bool intel_set_max_freq_ratio(void)
-{
- u64 base_freq, turbo_freq;
- u64 turbo_ratio;
-
- if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
- goto out;
-
- if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
- skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
- goto out;
-
- if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
- knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
- goto out;
-
- if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
- skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
- goto out;
-
- if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
- goto out;
-
- return false;
-
-out:
- /*
- * Some hypervisors advertise X86_FEATURE_APERFMPERF
- * but then fill all MSR's with zeroes.
- * Some CPUs have turbo boost but don't declare any turbo ratio
- * in MSR_TURBO_RATIO_LIMIT.
- */
- if (!base_freq || !turbo_freq) {
- pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
- return false;
- }
-
- turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
- if (!turbo_ratio) {
- pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
- return false;
- }
-
- arch_turbo_freq_ratio = turbo_ratio;
- arch_set_max_freq_ratio(turbo_disabled());
-
- return true;
-}
-
-static void init_counter_refs(void)
-{
- u64 aperf, mperf;
-
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
-
- this_cpu_write(arch_prev_aperf, aperf);
- this_cpu_write(arch_prev_mperf, mperf);
-}
-
-#ifdef CONFIG_PM_SLEEP
-static struct syscore_ops freq_invariance_syscore_ops = {
- .resume = init_counter_refs,
-};
-
-static void register_freq_invariance_syscore_ops(void)
-{
- /* Bail out if registered already. */
- if (freq_invariance_syscore_ops.node.prev)
- return;
-
- register_syscore_ops(&freq_invariance_syscore_ops);
-}
-#else
-static inline void register_freq_invariance_syscore_ops(void) {}
-#endif
-
-void init_freq_invariance(bool secondary, bool cppc_ready)
-{
- bool ret = false;
-
- if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
- return;
-
- if (secondary) {
- if (static_branch_likely(&arch_scale_freq_key)) {
- init_counter_refs();
- }
- return;
- }
-
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- ret = intel_set_max_freq_ratio();
- else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
- if (!cppc_ready) {
- return;
- }
- ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
- }
-
- if (ret) {
- init_counter_refs();
- static_branch_enable(&arch_scale_freq_key);
- register_freq_invariance_syscore_ops();
- pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
- } else {
- pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
- }
-}
-
-static void disable_freq_invariance_workfn(struct work_struct *work)
-{
- static_branch_disable(&arch_scale_freq_key);
-}
-
-static DECLARE_WORK(disable_freq_invariance_work,
- disable_freq_invariance_workfn);
-
-DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
-
-void arch_scale_freq_tick(void)
-{
- u64 freq_scale;
- u64 aperf, mperf;
- u64 acnt, mcnt;
-
- if (!arch_scale_freq_invariant())
- return;
-
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
-
- acnt = aperf - this_cpu_read(arch_prev_aperf);
- mcnt = mperf - this_cpu_read(arch_prev_mperf);
-
- this_cpu_write(arch_prev_aperf, aperf);
- this_cpu_write(arch_prev_mperf, mperf);
-
- if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
- goto error;
-
- if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
- goto error;
-
- freq_scale = div64_u64(acnt, mcnt);
- if (!freq_scale)
- goto error;
-
- if (freq_scale > SCHED_CAPACITY_SCALE)
- freq_scale = SCHED_CAPACITY_SCALE;
-
- this_cpu_write(arch_freq_scale, freq_scale);
- return;
-
-error:
- pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
- schedule_work(&disable_freq_invariance_work);
-}
-#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 660b78827638..8cc653ffdccd 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -68,9 +68,6 @@ static int __init control_va_addr_alignment(char *str)
if (*str == 0)
return 1;
- if (*str == '=')
- str++;
-
if (!strcmp(str, "32"))
va_align.flags = ALIGN_VA_32;
else if (!strcmp(str, "64"))
@@ -80,11 +77,11 @@ static int __init control_va_addr_alignment(char *str)
else if (!strcmp(str, "on"))
va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
else
- return 0;
+ pr_warn("invalid option value: 'align_va_addr=%s'\n", str);
return 1;
}
-__setup("align_va_addr", control_va_addr_alignment);
+__setup("align_va_addr=", control_va_addr_alignment);
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1563fb995005..d62b2cb85cea 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -62,6 +62,7 @@
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include <asm/vdso.h>
+#include <asm/tdx.h>
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@@ -686,13 +687,40 @@ static bool try_fixup_enqcmd_gp(void)
#endif
}
+static bool gp_try_fixup_and_notify(struct pt_regs *regs, int trapnr,
+ unsigned long error_code, const char *str)
+{
+ if (fixup_exception(regs, trapnr, error_code, 0))
+ return true;
+
+ current->thread.error_code = error_code;
+ current->thread.trap_nr = trapnr;
+
+ /*
+ * To be potentially processing a kprobe fault and to trust the result
+ * from kprobe_running(), we have to be non-preemptible.
+ */
+ if (!preemptible() && kprobe_running() &&
+ kprobe_fault_handler(regs, trapnr))
+ return true;
+
+ return notify_die(DIE_GPF, str, regs, error_code, trapnr, SIGSEGV) == NOTIFY_STOP;
+}
+
+static void gp_user_force_sig_segv(struct pt_regs *regs, int trapnr,
+ unsigned long error_code, const char *str)
+{
+ current->thread.error_code = error_code;
+ current->thread.trap_nr = trapnr;
+ show_signal(current, SIGSEGV, "", str, regs, error_code);
+ force_sig(SIGSEGV);
+}
+
DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
{
char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
enum kernel_gp_hint hint = GP_NO_HINT;
- struct task_struct *tsk;
unsigned long gp_addr;
- int ret;
if (user_mode(regs) && try_fixup_enqcmd_gp())
return;
@@ -711,40 +739,18 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
return;
}
- tsk = current;
-
if (user_mode(regs)) {
if (fixup_iopl_exception(regs))
goto exit;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_nr = X86_TRAP_GP;
-
if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0))
goto exit;
- show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
- force_sig(SIGSEGV);
+ gp_user_force_sig_segv(regs, X86_TRAP_GP, error_code, desc);
goto exit;
}
- if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
- goto exit;
-
- tsk->thread.error_code = error_code;
- tsk->thread.trap_nr = X86_TRAP_GP;
-
- /*
- * To be potentially processing a kprobe fault and to trust the result
- * from kprobe_running(), we have to be non-preemptible.
- */
- if (!preemptible() &&
- kprobe_running() &&
- kprobe_fault_handler(regs, X86_TRAP_GP))
- goto exit;
-
- ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV);
- if (ret == NOTIFY_STOP)
+ if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc))
goto exit;
if (error_code)
@@ -892,14 +898,10 @@ sync:
}
#endif
-struct bad_iret_stack {
- void *error_entry_ret;
- struct pt_regs regs;
-};
-
-asmlinkage __visible noinstr
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
+asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
{
+ struct pt_regs tmp, *new_stack;
+
/*
* This is called from entry_64.S early in handling a fault
* caused by a bad iret to user mode. To handle the fault
@@ -908,19 +910,18 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
* just below the IRET frame) and we want to pretend that the
* exception came from the IRET target.
*/
- struct bad_iret_stack tmp, *new_stack =
- (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
+ new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
/* Copy the IRET target to the temporary storage. */
- __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
+ __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8);
/* Copy the remainder of the stack from the current stack. */
- __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
+ __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip));
/* Update the entry stack */
__memcpy(new_stack, &tmp, sizeof(tmp));
- BUG_ON(!user_mode(&new_stack->regs));
+ BUG_ON(!user_mode(new_stack));
return new_stack;
}
#endif
@@ -1343,6 +1344,91 @@ DEFINE_IDTENTRY(exc_device_not_available)
}
}
+#ifdef CONFIG_INTEL_TDX_GUEST
+
+#define VE_FAULT_STR "VE fault"
+
+static void ve_raise_fault(struct pt_regs *regs, long error_code)
+{
+ if (user_mode(regs)) {
+ gp_user_force_sig_segv(regs, X86_TRAP_VE, error_code, VE_FAULT_STR);
+ return;
+ }
+
+ if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code, VE_FAULT_STR))
+ return;
+
+ die_addr(VE_FAULT_STR, regs, error_code, 0);
+}
+
+/*
+ * Virtualization Exceptions (#VE) are delivered to TDX guests due to
+ * specific guest actions which may happen in either user space or the
+ * kernel:
+ *
+ * * Specific instructions (WBINVD, for example)
+ * * Specific MSR accesses
+ * * Specific CPUID leaf accesses
+ * * Access to specific guest physical addresses
+ *
+ * In the settings that Linux will run in, virtualization exceptions are
+ * never generated on accesses to normal, TD-private memory that has been
+ * accepted (by BIOS or with tdx_enc_status_changed()).
+ *
+ * Syscall entry code has a critical window where the kernel stack is not
+ * yet set up. Any exception in this window leads to hard to debug issues
+ * and can be exploited for privilege escalation. Exceptions in the NMI
+ * entry code also cause issues. Returning from the exception handler with
+ * IRET will re-enable NMIs and nested NMI will corrupt the NMI stack.
+ *
+ * For these reasons, the kernel avoids #VEs during the syscall gap and
+ * the NMI entry code. Entry code paths do not access TD-shared memory,
+ * MMIO regions, use #VE triggering MSRs, instructions, or CPUID leaves
+ * that might generate #VE. VMM can remove memory from TD at any point,
+ * but access to unaccepted (or missing) private memory leads to VM
+ * termination, not to #VE.
+ *
+ * Similarly to page faults and breakpoints, #VEs are allowed in NMI
+ * handlers once the kernel is ready to deal with nested NMIs.
+ *
+ * During #VE delivery, all interrupts, including NMIs, are blocked until
+ * TDGETVEINFO is called. It prevents #VE nesting until the kernel reads
+ * the VE info.
+ *
+ * If a guest kernel action which would normally cause a #VE occurs in
+ * the interrupt-disabled region before TDGETVEINFO, a #DF (fault
+ * exception) is delivered to the guest which will result in an oops.
+ *
+ * The entry code has been audited carefully for following these expectations.
+ * Changes in the entry code have to be audited for correctness vs. this
+ * aspect. Similarly to #PF, #VE in these places will expose kernel to
+ * privilege escalation or may lead to random crashes.
+ */
+DEFINE_IDTENTRY(exc_virtualization_exception)
+{
+ struct ve_info ve;
+
+ /*
+ * NMIs/Machine-checks/Interrupts will be in a disabled state
+ * till TDGETVEINFO TDCALL is executed. This ensures that VE
+ * info cannot be overwritten by a nested #VE.
+ */
+ tdx_get_ve_info(&ve);
+
+ cond_local_irq_enable(regs);
+
+ /*
+ * If tdx_handle_virt_exception() could not process
+ * it successfully, treat it as #GP(0) and handle it.
+ */
+ if (!tdx_handle_virt_exception(regs, &ve))
+ ve_raise_fault(regs, 0);
+
+ cond_local_irq_disable(regs);
+}
+
+#endif
+
#ifdef CONFIG_X86_32
DEFINE_IDTENTRY_SW(iret_error)
{
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index c21bcd668284..e9e803a4d44c 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -151,7 +151,7 @@ exit_vm86:
memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
- lazy_load_gs(vm86->regs32.gs);
+ loadsegment(gs, vm86->regs32.gs);
regs->pt.ax = retval;
return;
@@ -325,7 +325,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
* Save old state
*/
vm86->saved_sp0 = tsk->thread.sp0;
- lazy_save_gs(vm86->regs32.gs);
+ savesegment(gs, vm86->regs32.gs);
/* make room for real-mode segments */
preempt_disable();
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0c1ba6aa0765..de6d44e07e34 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -19,6 +19,7 @@
#include <asm/user.h>
#include <asm/fpu/xstate.h>
#include <asm/sgx.h>
+#include <asm/cpuid.h>
#include "cpuid.h"
#include "lapic.h"
#include "mmu.h"
@@ -744,24 +745,8 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
cpuid_count(entry->function, entry->index,
&entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
- switch (function) {
- case 4:
- case 7:
- case 0xb:
- case 0xd:
- case 0xf:
- case 0x10:
- case 0x12:
- case 0x14:
- case 0x17:
- case 0x18:
- case 0x1d:
- case 0x1e:
- case 0x1f:
- case 0x8000001d:
+ if (cpuid_function_is_indexed(function))
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
- break;
- }
return entry;
}
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 46f9dfb60469..a0702b6be3e8 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1914,7 +1914,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
struct hv_send_ipi_ex send_ipi_ex;
struct hv_send_ipi send_ipi;
DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
- unsigned long valid_bank_mask;
+ u64 valid_bank_mask;
u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
u32 vector;
bool all_cpus;
@@ -1956,7 +1956,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
- if (hc->var_cnt != bitmap_weight(&valid_bank_mask, 64))
+ if (hc->var_cnt != bitmap_weight((unsigned long *)&valid_bank_mask, 64))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
if (all_cpus)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 311e4e1d7870..45e1573f8f1d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5470,14 +5470,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
uint i;
if (pcid == kvm_get_active_pcid(vcpu)) {
- mmu->invlpg(vcpu, gva, mmu->root.hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->root.hpa);
tlb_flush = true;
}
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) {
- mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
tlb_flush = true;
}
}
@@ -5665,6 +5667,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
{
struct kvm_mmu_page *sp, *node;
int nr_zapped, batch = 0;
+ bool unstable;
restart:
list_for_each_entry_safe_reverse(sp, node,
@@ -5696,11 +5699,12 @@ restart:
goto restart;
}
- if (__kvm_mmu_prepare_zap_page(kvm, sp,
- &kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
- batch += nr_zapped;
+ unstable = __kvm_mmu_prepare_zap_page(kvm, sp,
+ &kvm->arch.zapped_obsolete_pages, &nr_zapped);
+ batch += nr_zapped;
+
+ if (unstable)
goto restart;
- }
}
/*
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index eca39f56c231..0604bc29f0b8 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -171,9 +171,12 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
return true;
}
-static int cmp_u64(const void *a, const void *b)
+static int cmp_u64(const void *pa, const void *pb)
{
- return *(__u64 *)a - *(__u64 *)b;
+ u64 a = *(u64 *)pa;
+ u64 b = *(u64 *)pb;
+
+ return (a > b) - (a < b);
}
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 7c392873626f..636c77ef55fc 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -562,12 +562,20 @@ e_unpin:
static int sev_es_sync_vmsa(struct vcpu_svm *svm)
{
- struct vmcb_save_area *save = &svm->vmcb->save;
+ struct sev_es_save_area *save = svm->sev_es.vmsa;
/* Check some debug related fields before encrypting the VMSA */
- if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1))
+ if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1))
return -EINVAL;
+ /*
+ * SEV-ES will use a VMSA that is pointed to by the VMCB, not
+ * the traditional VMSA that is part of the VMCB. Copy the
+ * traditional VMSA as it has been built so far (in prep
+ * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
+ */
+ memcpy(save, &svm->vmcb->save, sizeof(svm->vmcb->save));
+
/* Sync registgers */
save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
@@ -595,14 +603,6 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
save->xss = svm->vcpu.arch.ia32_xss;
save->dr6 = svm->vcpu.arch.dr6;
- /*
- * SEV-ES will use a VMSA that is pointed to by the VMCB, not
- * the traditional VMSA that is part of the VMCB. Copy the
- * traditional VMSA as it has been built so far (in prep
- * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
- */
- memcpy(svm->sev_es.vmsa, save, sizeof(*save));
-
return 0;
}
@@ -2966,7 +2966,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
sev_enc_bit));
}
-void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa)
+void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
{
/*
* As an SEV-ES guest, hardware will restore the host state on VMEXIT,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7e45d03cd018..17d334ef5430 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1270,8 +1270,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
*/
vmsave(__sme_page_pa(sd->save_area));
if (sev_es_guest(vcpu->kvm)) {
- struct vmcb_save_area *hostsa;
- hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
+ struct sev_es_save_area *hostsa;
+ hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
sev_es_prepare_switch_to_guest(hostsa);
}
@@ -3117,8 +3117,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
"tr:",
save01->tr.selector, save01->tr.attrib,
save01->tr.limit, save01->tr.base);
- pr_err("cpl: %d efer: %016llx\n",
- save->cpl, save->efer);
+ pr_err("vmpl: %d cpl: %d efer: %016llx\n",
+ save->vmpl, save->cpl, save->efer);
pr_err("%-15s %016llx %-13s %016llx\n",
"cr0:", save->cr0, "cr2:", save->cr2);
pr_err("%-15s %016llx %-13s %016llx\n",
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f76deff71002..2d83845b9032 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -181,7 +181,7 @@ struct svm_nested_state {
struct vcpu_sev_es_state {
/* SEV-ES support */
- struct vmcb_save_area *vmsa;
+ struct sev_es_save_area *vmsa;
struct ghcb *ghcb;
struct kvm_host_map ghcb_map;
bool received_first_sipi;
@@ -622,7 +622,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
void sev_es_init_vmcb(struct vcpu_svm *svm);
void sev_es_vcpu_reset(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
-void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa);
+void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
/* vmenter.S */
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 65d15df6212d..0e65d00e2339 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -54,8 +54,8 @@ static void delay_loop(u64 __loops)
" jnz 2b \n"
"3: dec %0 \n"
- : /* we don't need output */
- :"a" (loops)
+ : "+a" (loops)
+ :
);
}
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index b781d324211b..21104c41cba0 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -342,9 +342,9 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
*/
static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
{
-#ifdef CONFIG_X86_64
unsigned short sel;
+#ifdef CONFIG_X86_64
switch (seg_reg_idx) {
case INAT_SEG_REG_IGNORE:
return 0;
@@ -402,7 +402,8 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
case INAT_SEG_REG_FS:
return (unsigned short)(regs->fs & 0xffff);
case INAT_SEG_REG_GS:
- return get_user_gs(regs);
+ savesegment(gs, sel);
+ return sel;
case INAT_SEG_REG_IGNORE:
default:
return -EINVAL;
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index 2b3eb8c948a3..a58f451a7dd3 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -11,7 +11,7 @@
#include <asm/msr.h>
#include <asm/archrandom.h>
#include <asm/e820/api.h>
-#include <asm/io.h>
+#include <asm/shared/io.h>
/*
* When built for the regular kernel, several functions need to be stubbed out
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/arch/x86/lib/mmx_32.c
+++ /dev/null
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index b82ca14ba718..4a9fd9029a53 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -153,7 +153,7 @@ static long pm_address(u_char FPU_modrm, u_char segment,
switch (segment) {
case PREFIX_GS_ - 1:
/* user gs handling can be lazy, use special accessors */
- addr->selector = get_user_gs(FPU_info->regs);
+ savesegment(gs, addr->selector);
break;
default:
addr->selector = PM_REG_(segment);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fe3d3061fc11..d957dc15b371 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -20,13 +20,12 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
- pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
+ pgtable.o physaddr.o tlb.o cpu_entry_area.o maccess.o
obj-y += pat/
# Make sure __phys_addr has no stackprotector
CFLAGS_physaddr.o := -fno-stack-protector
-CFLAGS_setup_nx.o := -fno-stack-protector
CFLAGS_mem_encrypt_identity.o := -fno-stack-protector
CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index 058b2f36b3a6..b3ca7d23e4b0 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -154,7 +154,7 @@ int __init amd_numa_init(void)
node_set(nodeid, numa_nodes_parsed);
}
- if (!nodes_weight(numa_nodes_parsed))
+ if (nodes_empty(numa_nodes_parsed))
return -ENOENT;
/*
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d0074c6ed31a..fad8faa29d04 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -149,7 +149,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
unsigned char opcode;
if (user_mode(regs)) {
- if (get_user(opcode, instr))
+ if (get_user(opcode, (unsigned char __user *) instr))
break;
} else {
if (get_kernel_nofault(opcode, instr))
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e2942335d143..61d0ab154f96 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -110,7 +110,6 @@ int force_personality32;
/*
* noexec32=on|off
* Control non executable heap for 32bit processes.
- * To control the stack too use noexec=off
*
* on PROT_READ does not imply PROT_EXEC for 32-bit processes (default)
* off PROT_READ implies PROT_EXEC
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 17a492c27306..1ad0228f8ceb 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -242,10 +242,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
* If the page being mapped is in memory and SEV is active then
* make sure the memory encryption attribute is enabled in the
* resulting mapping.
+ * In TDX guests, memory is marked private by default. If encryption
+ * is not requested (using encrypted), explicitly set decrypt
+ * attribute in all IOREMAPPED memory.
*/
prot = PAGE_KERNEL_IO;
if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
prot = pgprot_encrypted(prot);
+ else
+ prot = pgprot_decrypted(prot);
switch (pcm) {
case _PAGE_CACHE_MODE_UC:
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 50d209939c66..11350e2fd736 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -42,7 +42,14 @@ bool force_dma_unencrypted(struct device *dev)
static void print_mem_encrypt_feature_info(void)
{
- pr_info("AMD Memory Encryption Features active:");
+ pr_info("Memory Encryption Features active:");
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+ pr_cont(" Intel TDX\n");
+ return;
+ }
+
+ pr_cont(" AMD");
/* Secure Memory Encryption */
if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
@@ -62,6 +69,10 @@ static void print_mem_encrypt_feature_info(void)
if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
pr_cont(" SEV-ES");
+ /* Secure Nested Paging */
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ pr_cont(" SEV-SNP");
+
pr_cont("\n");
}
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 6169053c2854..d3c88d9ef8d6 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -31,6 +31,7 @@
#include <asm/processor-flags.h>
#include <asm/msr.h>
#include <asm/cmdline.h>
+#include <asm/sev.h>
#include "mm_internal.h"
@@ -48,6 +49,36 @@ EXPORT_SYMBOL(sme_me_mask);
static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
/*
+ * SNP-specific routine which needs to additionally change the page state from
+ * private to shared before copying the data from the source to destination and
+ * restore after the copy.
+ */
+static inline void __init snp_memcpy(void *dst, void *src, size_t sz,
+ unsigned long paddr, bool decrypt)
+{
+ unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+
+ if (decrypt) {
+ /*
+ * @paddr needs to be accessed decrypted, mark the page shared in
+ * the RMP table before copying it.
+ */
+ early_snp_set_memory_shared((unsigned long)__va(paddr), paddr, npages);
+
+ memcpy(dst, src, sz);
+
+ /* Restore the page state after the memcpy. */
+ early_snp_set_memory_private((unsigned long)__va(paddr), paddr, npages);
+ } else {
+ /*
+ * @paddr need to be accessed encrypted, no need for the page state
+ * change.
+ */
+ memcpy(dst, src, sz);
+ }
+}
+
+/*
* This routine does not change the underlying encryption setting of the
* page(s) that map this memory. It assumes that eventually the memory is
* meant to be accessed as either encrypted or decrypted but the contents
@@ -95,8 +126,13 @@ static void __init __sme_early_enc_dec(resource_size_t paddr,
* Use a temporary buffer, of cache-line multiple size, to
* avoid data corruption as documented in the APM.
*/
- memcpy(sme_early_buffer, src, len);
- memcpy(dst, sme_early_buffer, len);
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
+ snp_memcpy(sme_early_buffer, src, len, paddr, enc);
+ snp_memcpy(dst, sme_early_buffer, len, paddr, !enc);
+ } else {
+ memcpy(sme_early_buffer, src, len);
+ memcpy(dst, sme_early_buffer, len);
+ }
early_memunmap(dst, len);
early_memunmap(src, len);
@@ -280,11 +316,24 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
{
+ /*
+ * To maintain the security guarantees of SEV-SNP guests, make sure
+ * to invalidate the memory before encryption attribute is cleared.
+ */
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
+ snp_set_memory_shared(vaddr, npages);
}
/* Return true unconditionally: return value doesn't matter for the SEV side */
static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
{
+ /*
+ * After memory is mapped encrypted in the page table, validate it
+ * so that it is consistent with the page table updates.
+ */
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && enc)
+ snp_set_memory_private(vaddr, npages);
+
if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
enc_dec_hypercall(vaddr, npages, enc);
@@ -322,14 +371,28 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
clflush_cache_range(__va(pa), size);
/* Encrypt/decrypt the contents in-place */
- if (enc)
+ if (enc) {
sme_early_encrypt(pa, size);
- else
+ } else {
sme_early_decrypt(pa, size);
+ /*
+ * ON SNP, the page state in the RMP table must happen
+ * before the page table updates.
+ */
+ early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1);
+ }
+
/* Change the page encryption mask. */
new_pte = pfn_pte(pfn, new_prot);
set_pte_atomic(kpte, new_pte);
+
+ /*
+ * If page is set encrypted in the page table, then update the RMP table to
+ * add this page as private.
+ */
+ if (enc)
+ early_snp_set_memory_private((unsigned long)__va(pa), pa, 1);
}
static int __init early_set_memory_enc_dec(unsigned long vaddr,
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index b43bc24d2bb6..f415498d3175 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -45,6 +45,7 @@
#include <asm/sections.h>
#include <asm/cmdline.h>
#include <asm/coco.h>
+#include <asm/sev.h>
#include "mm_internal.h"
@@ -509,8 +510,11 @@ void __init sme_enable(struct boot_params *bp)
bool active_by_default;
unsigned long me_mask;
char buffer[16];
+ bool snp;
u64 msr;
+ snp = snp_init(bp);
+
/* Check for the SME/SEV support leaf */
eax = 0x80000000;
ecx = 0;
@@ -542,6 +546,10 @@ void __init sme_enable(struct boot_params *bp)
sev_status = __rdmsr(MSR_AMD64_SEV);
feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
+ /* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */
+ if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ snp_abort();
+
/* Check if memory encryption is enabled */
if (feature_mask == AMD_SME_BIT) {
/*
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 933a2ebad471..c3317f0650d8 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -400,7 +400,7 @@ static void leave_uniprocessor(void)
int cpu;
int err;
- if (!cpumask_available(downed_cpus) || cpumask_weight(downed_cpus) == 0)
+ if (!cpumask_available(downed_cpus) || cpumask_empty(downed_cpus))
return;
pr_notice("Re-enabling CPUs...\n");
for_each_cpu(cpu, downed_cpus) {
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 1a02b791d273..9a9305367fdd 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -123,7 +123,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
* Continue to fill physical nodes with fake nodes until there is no
* memory left on any of them.
*/
- while (nodes_weight(physnode_mask)) {
+ while (!nodes_empty(physnode_mask)) {
for_each_node_mask(i, physnode_mask) {
u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
u64 start, limit, end;
@@ -270,7 +270,7 @@ static int __init split_nodes_size_interleave_uniform(struct numa_meminfo *ei,
* Fill physical nodes with fake nodes of size until there is no memory
* left on any of them.
*/
- while (nodes_weight(physnode_mask)) {
+ while (!nodes_empty(physnode_mask)) {
for_each_node_mask(i, physnode_mask) {
u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
u64 start, limit, end;
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 4ba2a3ee4bce..d5ef64ddd35e 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -101,7 +101,7 @@ int pat_debug_enable;
static int __init pat_debug_setup(char *str)
{
pat_debug_enable = 1;
- return 0;
+ return 1;
}
__setup("debugpat", pat_debug_setup);
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 5d5c7bb50ce9..ffe3b3a087fe 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -540,7 +540,7 @@ static inline bool pti_kernel_image_global_ok(void)
* cases where RANDSTRUCT is in use to help keep the layout a
* secret.
*/
- if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT))
+ if (IS_ENABLED(CONFIG_RANDSTRUCT))
return false;
return true;
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
deleted file mode 100644
index ed5667f5169f..000000000000
--- a/arch/x86/mm/setup_nx.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/pgtable.h>
-
-#include <asm/proto.h>
-#include <asm/cpufeature.h>
-
-static int disable_nx;
-
-/*
- * noexec = on|off
- *
- * Control non-executable mappings for processes.
- *
- * on Enable
- * off Disable
- */
-static int __init noexec_setup(char *str)
-{
- if (!str)
- return -EINVAL;
- if (!strncmp(str, "on", 2)) {
- disable_nx = 0;
- } else if (!strncmp(str, "off", 3)) {
- disable_nx = 1;
- }
- x86_configure_nx();
- return 0;
-}
-early_param("noexec", noexec_setup);
-
-void x86_configure_nx(void)
-{
- if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
- __supported_pte_mask |= _PAGE_NX;
- else
- __supported_pte_mask &= ~_PAGE_NX;
-}
-
-void __init x86_report_nx(void)
-{
- if (!boot_cpu_has(X86_FEATURE_NX)) {
- printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
- "missing in CPU!\n");
- } else {
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
- if (disable_nx) {
- printk(KERN_INFO "NX (Execute Disable) protection: "
- "disabled by kernel command line option\n");
- } else {
- printk(KERN_INFO "NX (Execute Disable) protection: "
- "active\n");
- }
-#else
- /* 32bit non-PAE kernel, NX cannot be used */
- printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
- "cannot be enabled: non-PAE kernel!\n");
-#endif
- }
-}
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 97b63e35e152..a498b847d740 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -25,6 +25,8 @@
#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
#define PIRQ_VERSION 0x0100
+#define IRT_SIGNATURE (('$' << 0) + ('I' << 8) + ('R' << 16) + ('T' << 24))
+
static int broken_hp_bios_irq9;
static int acer_tm360_irqrouting;
@@ -68,30 +70,99 @@ void (*pcibios_disable_irq)(struct pci_dev *dev) = pirq_disable_irq;
* and perform checksum verification.
*/
-static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
+static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr,
+ u8 *limit)
{
struct irq_routing_table *rt;
int i;
u8 sum;
- rt = (struct irq_routing_table *) addr;
+ rt = (struct irq_routing_table *)addr;
if (rt->signature != PIRQ_SIGNATURE ||
rt->version != PIRQ_VERSION ||
rt->size % 16 ||
- rt->size < sizeof(struct irq_routing_table))
+ rt->size < sizeof(struct irq_routing_table) ||
+ (limit && rt->size > limit - addr))
return NULL;
sum = 0;
for (i = 0; i < rt->size; i++)
sum += addr[i];
if (!sum) {
- DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n",
- rt);
+ DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%lx\n",
+ __pa(rt));
return rt;
}
return NULL;
}
+/*
+ * Handle the $IRT PCI IRQ Routing Table format used by AMI for its BCP
+ * (BIOS Configuration Program) external tool meant for tweaking BIOS
+ * structures without the need to rebuild it from sources. The $IRT
+ * format has been invented by AMI before Microsoft has come up with its
+ * $PIR format and a $IRT table is therefore there in some systems that
+ * lack a $PIR table.
+ *
+ * It uses the same PCI BIOS 2.1 format for interrupt routing entries
+ * themselves but has a different simpler header prepended instead,
+ * occupying 8 bytes, where a `$IRT' signature is followed by one byte
+ * specifying the total number of interrupt routing entries allocated in
+ * the table, then one byte specifying the actual number of entries used
+ * (which the BCP tool can take advantage of when modifying the table),
+ * and finally a 16-bit word giving the IRQs devoted exclusively to PCI.
+ * Unlike with the $PIR table there is no alignment guarantee.
+ *
+ * Given the similarity of the two formats the $IRT one is trivial to
+ * convert to the $PIR one, which we do here, except that obviously we
+ * have no information as to the router device to use, but we can handle
+ * it by matching PCI device IDs actually seen on the bus against ones
+ * that our individual routers recognise.
+ *
+ * Reportedly there is another $IRT table format where a 16-bit word
+ * follows the header instead that points to interrupt routing entries
+ * in a $PIR table provided elsewhere. In that case this code will not
+ * be reached though as the $PIR table will have been chosen instead.
+ */
+static inline struct irq_routing_table *pirq_convert_irt_table(u8 *addr,
+ u8 *limit)
+{
+ struct irt_routing_table *ir;
+ struct irq_routing_table *rt;
+ u16 size;
+ u8 sum;
+ int i;
+
+ ir = (struct irt_routing_table *)addr;
+ if (ir->signature != IRT_SIGNATURE || !ir->used || ir->size < ir->used)
+ return NULL;
+
+ size = sizeof(*ir) + ir->used * sizeof(ir->slots[0]);
+ if (size > limit - addr)
+ return NULL;
+
+ DBG(KERN_DEBUG "PCI: $IRT Interrupt Routing Table found at 0x%lx\n",
+ __pa(ir));
+ size = sizeof(*rt) + ir->used * sizeof(rt->slots[0]);
+ rt = kzalloc(size, GFP_KERNEL);
+ if (!rt)
+ return NULL;
+
+ rt->signature = PIRQ_SIGNATURE;
+ rt->version = PIRQ_VERSION;
+ rt->size = size;
+ rt->exclusive_irqs = ir->exclusive_irqs;
+ for (i = 0; i < ir->used; i++)
+ rt->slots[i] = ir->slots[i];
+
+ addr = (u8 *)rt;
+ sum = 0;
+ for (i = 0; i < size; i++)
+ sum += addr[i];
+ rt->checksum = -sum;
+
+ return rt;
+}
/*
* Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
@@ -99,17 +170,29 @@ static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
static struct irq_routing_table * __init pirq_find_routing_table(void)
{
+ u8 * const bios_start = (u8 *)__va(0xf0000);
+ u8 * const bios_end = (u8 *)__va(0x100000);
u8 *addr;
struct irq_routing_table *rt;
if (pirq_table_addr) {
- rt = pirq_check_routing_table((u8 *) __va(pirq_table_addr));
+ rt = pirq_check_routing_table((u8 *)__va(pirq_table_addr),
+ NULL);
if (rt)
return rt;
printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n");
}
- for (addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
- rt = pirq_check_routing_table(addr);
+ for (addr = bios_start;
+ addr < bios_end - sizeof(struct irq_routing_table);
+ addr += 16) {
+ rt = pirq_check_routing_table(addr, bios_end);
+ if (rt)
+ return rt;
+ }
+ for (addr = bios_start;
+ addr < bios_end - sizeof(struct irt_routing_table);
+ addr++) {
+ rt = pirq_convert_irt_table(addr, bios_end);
if (rt)
return rt;
}
@@ -135,7 +218,8 @@ static void __init pirq_peer_trick(void)
#ifdef DEBUG
{
int j;
- DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
+ DBG(KERN_DEBUG "%02x:%02x.%x slot=%02x",
+ e->bus, e->devfn / 8, e->devfn % 8, e->slot);
for (j = 0; j < 4; j++)
DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
DBG("\n");
@@ -253,6 +337,15 @@ static void write_pc_conf_nybble(u8 base, u8 index, u8 val)
pc_conf_set(reg, x);
}
+/*
+ * FinALi pirq rules are as follows:
+ *
+ * - bit 0 selects between INTx Routing Table Mapping Registers,
+ *
+ * - bit 3 selects the nibble within the INTx Routing Table Mapping Register,
+ *
+ * - bits 7:4 map to bits 3:0 of the PCI INTx Sensitivity Register.
+ */
static int pirq_finali_get(struct pci_dev *router, struct pci_dev *dev,
int pirq)
{
@@ -260,11 +353,13 @@ static int pirq_finali_get(struct pci_dev *router, struct pci_dev *dev,
0, 9, 3, 10, 4, 5, 7, 6, 0, 11, 0, 12, 0, 14, 0, 15
};
unsigned long flags;
+ u8 index;
u8 x;
+ index = (pirq & 1) << 1 | (pirq & 8) >> 3;
raw_spin_lock_irqsave(&pc_conf_lock, flags);
pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY);
- x = irqmap[read_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1)];
+ x = irqmap[read_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, index)];
pc_conf_set(PC_CONF_FINALI_LOCK, 0);
raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
return x;
@@ -278,13 +373,15 @@ static int pirq_finali_set(struct pci_dev *router, struct pci_dev *dev,
};
u8 val = irqmap[irq];
unsigned long flags;
+ u8 index;
if (!val)
return 0;
+ index = (pirq & 1) << 1 | (pirq & 8) >> 3;
raw_spin_lock_irqsave(&pc_conf_lock, flags);
pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY);
- write_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1, val);
+ write_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, index, val);
pc_conf_set(PC_CONF_FINALI_LOCK, 0);
raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
return 1;
@@ -293,7 +390,7 @@ static int pirq_finali_set(struct pci_dev *router, struct pci_dev *dev,
static int pirq_finali_lvl(struct pci_dev *router, struct pci_dev *dev,
int pirq, int irq)
{
- u8 mask = ~(1u << (pirq - 1));
+ u8 mask = ~((pirq & 0xf0u) >> 4);
unsigned long flags;
u8 trig;
@@ -579,6 +676,81 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
return 1;
}
+
+/*
+ * PIRQ routing for the SiS85C497 AT Bus Controller & Megacell (ATM)
+ * ISA bridge used with the SiS 85C496/497 486 Green PC VESA/ISA/PCI
+ * Chipset.
+ *
+ * There are four PCI INTx#-to-IRQ Link registers provided in the
+ * SiS85C497 part of the peculiar combined 85C496/497 configuration
+ * space decoded by the SiS85C496 PCI & CPU Memory Controller (PCM)
+ * host bridge, at 0xc0/0xc1/0xc2/0xc3 respectively for the PCI INT
+ * A/B/C/D lines. Bit 7 enables the respective link if set and bits
+ * 3:0 select the 8259A IRQ line as follows:
+ *
+ * 0000 : Reserved
+ * 0001 : Reserved
+ * 0010 : Reserved
+ * 0011 : IRQ3
+ * 0100 : IRQ4
+ * 0101 : IRQ5
+ * 0110 : IRQ6
+ * 0111 : IRQ7
+ * 1000 : Reserved
+ * 1001 : IRQ9
+ * 1010 : IRQ10
+ * 1011 : IRQ11
+ * 1100 : IRQ12
+ * 1101 : Reserved
+ * 1110 : IRQ14
+ * 1111 : IRQ15
+ *
+ * We avoid using a reserved value for disabled links, hence the
+ * choice of IRQ15 for that case.
+ *
+ * References:
+ *
+ * "486 Green PC VESA/ISA/PCI Chipset, SiS 85C496/497", Rev 3.0,
+ * Silicon Integrated Systems Corp., July 1995
+ */
+
+#define PCI_SIS497_INTA_TO_IRQ_LINK 0xc0u
+
+#define PIRQ_SIS497_IRQ_MASK 0x0fu
+#define PIRQ_SIS497_IRQ_ENABLE 0x80u
+
+static int pirq_sis497_get(struct pci_dev *router, struct pci_dev *dev,
+ int pirq)
+{
+ int reg;
+ u8 x;
+
+ reg = pirq;
+ if (reg >= 1 && reg <= 4)
+ reg += PCI_SIS497_INTA_TO_IRQ_LINK - 1;
+
+ pci_read_config_byte(router, reg, &x);
+ return (x & PIRQ_SIS497_IRQ_ENABLE) ? (x & PIRQ_SIS497_IRQ_MASK) : 0;
+}
+
+static int pirq_sis497_set(struct pci_dev *router, struct pci_dev *dev,
+ int pirq, int irq)
+{
+ int reg;
+ u8 x;
+
+ reg = pirq;
+ if (reg >= 1 && reg <= 4)
+ reg += PCI_SIS497_INTA_TO_IRQ_LINK - 1;
+
+ pci_read_config_byte(router, reg, &x);
+ x &= ~(PIRQ_SIS497_IRQ_MASK | PIRQ_SIS497_IRQ_ENABLE);
+ x |= irq ? (PIRQ_SIS497_IRQ_ENABLE | irq) : PIRQ_SIS497_IRQ_MASK;
+ pci_write_config_byte(router, reg, x);
+ return 1;
+}
+
/*
* PIRQ routing for SiS 85C503 router used in several SiS chipsets.
* We have to deal with the following issues here:
@@ -640,11 +812,12 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
* bit 6-4 are probably unused, not like 5595
*/
-#define PIRQ_SIS_IRQ_MASK 0x0f
-#define PIRQ_SIS_IRQ_DISABLE 0x80
-#define PIRQ_SIS_USB_ENABLE 0x40
+#define PIRQ_SIS503_IRQ_MASK 0x0f
+#define PIRQ_SIS503_IRQ_DISABLE 0x80
+#define PIRQ_SIS503_USB_ENABLE 0x40
-static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+static int pirq_sis503_get(struct pci_dev *router, struct pci_dev *dev,
+ int pirq)
{
u8 x;
int reg;
@@ -653,10 +826,11 @@ static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
if (reg >= 0x01 && reg <= 0x04)
reg += 0x40;
pci_read_config_byte(router, reg, &x);
- return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
+ return (x & PIRQ_SIS503_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS503_IRQ_MASK);
}
-static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+static int pirq_sis503_set(struct pci_dev *router, struct pci_dev *dev,
+ int pirq, int irq)
{
u8 x;
int reg;
@@ -665,8 +839,8 @@ static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
if (reg >= 0x01 && reg <= 0x04)
reg += 0x40;
pci_read_config_byte(router, reg, &x);
- x &= ~(PIRQ_SIS_IRQ_MASK | PIRQ_SIS_IRQ_DISABLE);
- x |= irq ? irq: PIRQ_SIS_IRQ_DISABLE;
+ x &= ~(PIRQ_SIS503_IRQ_MASK | PIRQ_SIS503_IRQ_DISABLE);
+ x |= irq ? irq : PIRQ_SIS503_IRQ_DISABLE;
pci_write_config_byte(router, reg, x);
return 1;
}
@@ -958,13 +1132,19 @@ static __init int serverworks_router_probe(struct irq_router *r,
static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
{
- if (device != PCI_DEVICE_ID_SI_503)
- return 0;
-
- r->name = "SIS";
- r->get = pirq_sis_get;
- r->set = pirq_sis_set;
- return 1;
+ switch (device) {
+ case PCI_DEVICE_ID_SI_496:
+ r->name = "SiS85C497";
+ r->get = pirq_sis497_get;
+ r->set = pirq_sis497_set;
+ return 1;
+ case PCI_DEVICE_ID_SI_503:
+ r->name = "SiS85C503";
+ r->get = pirq_sis503_get;
+ r->set = pirq_sis503_set;
+ return 1;
+ }
+ return 0;
}
static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
@@ -1084,10 +1264,32 @@ static struct pci_dev *pirq_router_dev;
* chipset" ?
*/
+static bool __init pirq_try_router(struct irq_router *r,
+ struct irq_routing_table *rt,
+ struct pci_dev *dev)
+{
+ struct irq_router_handler *h;
+
+ DBG(KERN_DEBUG "PCI: Trying IRQ router for [%04x:%04x]\n",
+ dev->vendor, dev->device);
+
+ for (h = pirq_routers; h->vendor; h++) {
+ /* First look for a router match */
+ if (rt->rtr_vendor == h->vendor &&
+ h->probe(r, dev, rt->rtr_device))
+ return true;
+ /* Fall back to a device match */
+ if (dev->vendor == h->vendor &&
+ h->probe(r, dev, dev->device))
+ return true;
+ }
+ return false;
+}
+
static void __init pirq_find_router(struct irq_router *r)
{
struct irq_routing_table *rt = pirq_table;
- struct irq_router_handler *h;
+ struct pci_dev *dev;
#ifdef CONFIG_PCI_BIOS
if (!rt->signature) {
@@ -1106,50 +1308,94 @@ static void __init pirq_find_router(struct irq_router *r)
DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n",
rt->rtr_vendor, rt->rtr_device);
- pirq_router_dev = pci_get_domain_bus_and_slot(0, rt->rtr_bus,
- rt->rtr_devfn);
- if (!pirq_router_dev) {
- DBG(KERN_DEBUG "PCI: Interrupt router not found at "
- "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
- return;
+ /* Use any vendor:device provided by the routing table or try all. */
+ if (rt->rtr_vendor) {
+ dev = pci_get_domain_bus_and_slot(0, rt->rtr_bus,
+ rt->rtr_devfn);
+ if (dev && pirq_try_router(r, rt, dev))
+ pirq_router_dev = dev;
+ } else {
+ dev = NULL;
+ for_each_pci_dev(dev) {
+ if (pirq_try_router(r, rt, dev)) {
+ pirq_router_dev = dev;
+ break;
+ }
+ }
}
- for (h = pirq_routers; h->vendor; h++) {
- /* First look for a router match */
- if (rt->rtr_vendor == h->vendor &&
- h->probe(r, pirq_router_dev, rt->rtr_device))
- break;
- /* Fall back to a device match */
- if (pirq_router_dev->vendor == h->vendor &&
- h->probe(r, pirq_router_dev, pirq_router_dev->device))
- break;
- }
- dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
- pirq_router.name,
- pirq_router_dev->vendor, pirq_router_dev->device);
+ if (pirq_router_dev)
+ dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
+ pirq_router.name,
+ pirq_router_dev->vendor, pirq_router_dev->device);
+ else
+ DBG(KERN_DEBUG "PCI: Interrupt router not found at "
+ "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
/* The device remains referenced for the kernel lifetime */
}
-static struct irq_info *pirq_get_info(struct pci_dev *dev)
+/*
+ * We're supposed to match on the PCI device only and not the function,
+ * but some BIOSes build their tables with the PCI function included
+ * for motherboard devices, so if a complete match is found, then give
+ * it precedence over a slot match.
+ */
+static struct irq_info *pirq_get_dev_info(struct pci_dev *dev)
{
struct irq_routing_table *rt = pirq_table;
int entries = (rt->size - sizeof(struct irq_routing_table)) /
sizeof(struct irq_info);
+ struct irq_info *slotinfo = NULL;
struct irq_info *info;
for (info = rt->slots; entries--; info++)
- if (info->bus == dev->bus->number &&
- PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
- return info;
- return NULL;
+ if (info->bus == dev->bus->number) {
+ if (info->devfn == dev->devfn)
+ return info;
+ if (!slotinfo &&
+ PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+ slotinfo = info;
+ }
+ return slotinfo;
+}
+
+/*
+ * Buses behind bridges are typically not listed in the PIRQ routing table.
+ * Do the usual dance then and walk the tree of bridges up adjusting the
+ * pin number accordingly on the way until the originating root bus device
+ * has been reached and then use its routing information.
+ */
+static struct irq_info *pirq_get_info(struct pci_dev *dev, u8 *pin)
+{
+ struct pci_dev *temp_dev = dev;
+ struct irq_info *info;
+ u8 temp_pin = *pin;
+ u8 dpin = temp_pin;
+
+ info = pirq_get_dev_info(dev);
+ while (!info && temp_dev->bus->parent) {
+ struct pci_dev *bridge = temp_dev->bus->self;
+
+ temp_pin = pci_swizzle_interrupt_pin(temp_dev, temp_pin);
+ info = pirq_get_dev_info(bridge);
+ if (info)
+ dev_warn(&dev->dev,
+ "using bridge %s INT %c to get INT %c\n",
+ pci_name(bridge),
+ 'A' + temp_pin - 1, 'A' + dpin - 1);
+
+ temp_dev = bridge;
+ }
+ *pin = temp_pin;
+ return info;
}
static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
{
- u8 pin;
struct irq_info *info;
int i, pirq, newirq;
+ u8 dpin, pin;
int irq = 0;
u32 mask;
struct irq_router *r = &pirq_router;
@@ -1157,8 +1403,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
char *msg = NULL;
/* Find IRQ pin */
- pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
- if (!pin) {
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &dpin);
+ if (!dpin) {
dev_dbg(&dev->dev, "no interrupt pin\n");
return 0;
}
@@ -1171,20 +1417,21 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
if (!pirq_table)
return 0;
- info = pirq_get_info(dev);
+ pin = dpin;
+ info = pirq_get_info(dev, &pin);
if (!info) {
dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n",
- 'A' + pin - 1);
+ 'A' + dpin - 1);
return 0;
}
pirq = info->irq[pin - 1].link;
mask = info->irq[pin - 1].bitmap;
if (!pirq) {
- dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin - 1);
+ dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + dpin - 1);
return 0;
}
dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x",
- 'A' + pin - 1, pirq, mask, pirq_table->exclusive_irqs);
+ 'A' + dpin - 1, pirq, mask, pirq_table->exclusive_irqs);
mask &= pcibios_irq_mask;
/* Work around broken HP Pavilion Notebooks which assign USB to
@@ -1226,7 +1473,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
newirq = i;
}
}
- dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin - 1, newirq);
+ dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + dpin - 1, newirq);
/* Check if it is hardcoded */
if ((pirq & 0xf0) == 0xf0) {
@@ -1260,15 +1507,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
return 0;
}
}
- dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq);
+ dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n",
+ msg, 'A' + dpin - 1, irq);
/* Update IRQ for all devices with the same pirq value */
for_each_pci_dev(dev2) {
- pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
- if (!pin)
+ pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &dpin);
+ if (!dpin)
continue;
- info = pirq_get_info(dev2);
+ pin = dpin;
+ info = pirq_get_info(dev2, &pin);
if (!info)
continue;
if (info->irq[pin - 1].link == pirq) {
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 147c30a81f15..1591d67e0bcd 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -93,6 +93,9 @@ static const unsigned long * const efi_tables[] = {
#ifdef CONFIG_LOAD_UEFI_KEYS
&efi.mokvar_table,
#endif
+#ifdef CONFIG_EFI_COCO_SECRET
+ &efi.coco_secret,
+#endif
};
u64 efi_setup; /* efi setup_data physical address */
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 1e9ff28bc2e0..a60af0230e27 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -244,8 +244,10 @@ static inline bool uv_nmi_action_is(const char *action)
/* Setup which NMI support is present in system */
static void uv_nmi_setup_mmrs(void)
{
+ bool new_nmi_method_only = false;
+
/* First determine arch specific MMRs to handshake with BIOS */
- if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) {
+ if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) { /* UV2,3,4 setup */
uvh_nmi_mmrx = UVH_EVENT_OCCURRED0;
uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED0_ALIAS;
uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT;
@@ -255,26 +257,25 @@ static void uv_nmi_setup_mmrs(void)
uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
uvh_nmi_mmrx_req_shift = 62;
- } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) {
+ } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) { /* UV5+ setup */
uvh_nmi_mmrx = UVH_EVENT_OCCURRED1;
uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED1_ALIAS;
uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT;
uvh_nmi_mmrx_type = "OCRD1-EXTIO_INT0";
- uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST;
- uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
- uvh_nmi_mmrx_req_shift = 62;
+ new_nmi_method_only = true; /* Newer nmi always valid on UV5+ */
+ uvh_nmi_mmrx_req = 0; /* no request bit to clear */
} else {
- pr_err("UV:%s:cannot find EVENT_OCCURRED*_EXTIO_INT0\n",
- __func__);
+ pr_err("UV:%s:NMI support not available on this system\n", __func__);
return;
}
/* Then find out if new NMI is supported */
- if (likely(uv_read_local_mmr(uvh_nmi_mmrx_supported))) {
- uv_write_local_mmr(uvh_nmi_mmrx_req,
- 1UL << uvh_nmi_mmrx_req_shift);
+ if (new_nmi_method_only || uv_read_local_mmr(uvh_nmi_mmrx_supported)) {
+ if (uvh_nmi_mmrx_req)
+ uv_write_local_mmr(uvh_nmi_mmrx_req,
+ 1UL << uvh_nmi_mmrx_req_shift);
nmi_mmr = uvh_nmi_mmrx;
nmi_mmr_clear = uvh_nmi_mmrx_clear;
nmi_mmr_pending = 1UL << uvh_nmi_mmrx_shift;
@@ -985,7 +986,7 @@ static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
/* Clear global flags */
if (master) {
- if (cpumask_weight(uv_nmi_cpu_mask))
+ if (!cpumask_empty(uv_nmi_cpu_mask))
uv_nmi_cleanup_mask();
atomic_set(&uv_nmi_cpus_in_nmi, -1);
atomic_set(&uv_nmi_cpu, -1);
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index c5e29db02a46..41d7669a97ad 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -67,7 +67,7 @@ void __init reserve_real_mode(void)
memblock_reserve(0, SZ_1M);
}
-static void sme_sev_setup_real_mode(struct trampoline_header *th)
+static void __init sme_sev_setup_real_mode(struct trampoline_header *th)
{
#ifdef CONFIG_AMD_MEM_ENCRYPT
if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
index 8c1db5bf5d78..2eb62be6d256 100644
--- a/arch/x86/realmode/rm/header.S
+++ b/arch/x86/realmode/rm/header.S
@@ -24,6 +24,7 @@ SYM_DATA_START(real_mode_header)
.long pa_sev_es_trampoline_start
#endif
#ifdef CONFIG_X86_64
+ .long pa_trampoline_start64
.long pa_trampoline_pgd;
#endif
/* ACPI S3 wakeup */
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index cc8391f86cdb..e38d61d6562e 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -70,7 +70,7 @@ SYM_CODE_START(trampoline_start)
movw $__KERNEL_DS, %dx # Data segment descriptor
# Enable protected mode
- movl $X86_CR0_PE, %eax # protected mode (PE) bit
+ movl $(CR0_STATE & ~X86_CR0_PG), %eax
movl %eax, %cr0 # into protected mode
# flush prefetch and jump to startup_32
@@ -143,13 +143,24 @@ SYM_CODE_START(startup_32)
movl %eax, %cr3
# Set up EFER
+ movl $MSR_EFER, %ecx
+ rdmsr
+ /*
+ * Skip writing to EFER if the register already has desired
+ * value (to avoid #VE for the TDX guest).
+ */
+ cmp pa_tr_efer, %eax
+ jne .Lwrite_efer
+ cmp pa_tr_efer + 4, %edx
+ je .Ldone_efer
+.Lwrite_efer:
movl pa_tr_efer, %eax
movl pa_tr_efer + 4, %edx
- movl $MSR_EFER, %ecx
wrmsr
- # Enable paging and in turn activate Long Mode
- movl $(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE), %eax
+.Ldone_efer:
+ # Enable paging and in turn activate Long Mode.
+ movl $CR0_STATE, %eax
movl %eax, %cr0
/*
@@ -161,6 +172,19 @@ SYM_CODE_START(startup_32)
ljmpl $__KERNEL_CS, $pa_startup_64
SYM_CODE_END(startup_32)
+SYM_CODE_START(pa_trampoline_compat)
+ /*
+ * In compatibility mode. Prep ESP and DX for startup_32, then disable
+ * paging and complete the switch to legacy 32-bit mode.
+ */
+ movl $rm_stack_end, %esp
+ movw $__KERNEL_DS, %dx
+
+ movl $(CR0_STATE & ~X86_CR0_PG), %eax
+ movl %eax, %cr0
+ ljmpl $__KERNEL32_CS, $pa_startup_32
+SYM_CODE_END(pa_trampoline_compat)
+
.section ".text64","ax"
.code64
.balign 4
@@ -169,6 +193,20 @@ SYM_CODE_START(startup_64)
jmpq *tr_start(%rip)
SYM_CODE_END(startup_64)
+SYM_CODE_START(trampoline_start64)
+ /*
+ * APs start here on a direct transfer from 64-bit BIOS with identity
+ * mapped page tables. Load the kernel's GDT in order to gear down to
+ * 32-bit mode (to handle 4-level vs. 5-level paging), and to (re)load
+ * segment registers. Load the zero IDT so any fault triggers a
+ * shutdown instead of jumping back into BIOS.
+ */
+ lidt tr_idt(%rip)
+ lgdt tr_gdt64(%rip)
+
+ ljmpl *tr_compat(%rip)
+SYM_CODE_END(trampoline_start64)
+
.section ".rodata","a"
# Duplicate the global descriptor table
# so the kernel can live anywhere
@@ -182,6 +220,17 @@ SYM_DATA_START(tr_gdt)
.quad 0x00cf93000000ffff # __KERNEL_DS
SYM_DATA_END_LABEL(tr_gdt, SYM_L_LOCAL, tr_gdt_end)
+SYM_DATA_START(tr_gdt64)
+ .short tr_gdt_end - tr_gdt - 1 # gdt limit
+ .long pa_tr_gdt
+ .long 0
+SYM_DATA_END(tr_gdt64)
+
+SYM_DATA_START(tr_compat)
+ .long pa_trampoline_compat
+ .short __KERNEL32_CS
+SYM_DATA_END(tr_compat)
+
.bss
.balign PAGE_SIZE
SYM_DATA(trampoline_pgd, .space PAGE_SIZE)
diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S
index 5033e640f957..4331c32c47f8 100644
--- a/arch/x86/realmode/rm/trampoline_common.S
+++ b/arch/x86/realmode/rm/trampoline_common.S
@@ -1,4 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
.section ".rodata","a"
.balign 16
-SYM_DATA_LOCAL(tr_idt, .fill 1, 6, 0)
+
+/*
+ * When a bootloader hands off to the kernel in 32-bit mode an
+ * IDT with a 2-byte limit and 4-byte base is needed. When a boot
+ * loader hands off to a kernel 64-bit mode the base address
+ * extends to 8-bytes. Reserve enough space for either scenario.
+ */
+SYM_DATA_START_LOCAL(tr_idt)
+ .short 0
+ .quad 0
+SYM_DATA_END(tr_idt)
diff --git a/arch/x86/realmode/rm/wakemain.c b/arch/x86/realmode/rm/wakemain.c
index 1d6437e6d2ba..a6f4d8388ad8 100644
--- a/arch/x86/realmode/rm/wakemain.c
+++ b/arch/x86/realmode/rm/wakemain.c
@@ -62,8 +62,12 @@ static void send_morse(const char *pattern)
}
}
+struct port_io_ops pio_ops;
+
void main(void)
{
+ init_default_io_ops();
+
/* Kill machine if structures are wrong */
if (wakeup_header.real_magic != 0x12345678)
while (1)
diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S
new file mode 100644
index 000000000000..49a54356ae99
--- /dev/null
+++ b/arch/x86/virt/vmx/tdx/tdxcall.S
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/asm-offsets.h>
+#include <asm/tdx.h>
+
+/*
+ * TDCALL and SEAMCALL are supported in Binutils >= 2.36.
+ */
+#define tdcall .byte 0x66,0x0f,0x01,0xcc
+#define seamcall .byte 0x66,0x0f,0x01,0xcf
+
+/*
+ * TDX_MODULE_CALL - common helper macro for both
+ * TDCALL and SEAMCALL instructions.
+ *
+ * TDCALL - used by TDX guests to make requests to the
+ * TDX module and hypercalls to the VMM.
+ * SEAMCALL - used by TDX hosts to make requests to the
+ * TDX module.
+ */
+.macro TDX_MODULE_CALL host:req
+ /*
+ * R12 will be used as temporary storage for struct tdx_module_output
+ * pointer. Since R12-R15 registers are not used by TDCALL/SEAMCALL
+ * services supported by this function, it can be reused.
+ */
+
+ /* Callee saved, so preserve it */
+ push %r12
+
+ /*
+ * Push output pointer to stack.
+ * After the operation, it will be fetched into R12 register.
+ */
+ push %r9
+
+ /* Mangle function call ABI into TDCALL/SEAMCALL ABI: */
+ /* Move Leaf ID to RAX */
+ mov %rdi, %rax
+ /* Move input 4 to R9 */
+ mov %r8, %r9
+ /* Move input 3 to R8 */
+ mov %rcx, %r8
+ /* Move input 1 to RCX */
+ mov %rsi, %rcx
+ /* Leave input param 2 in RDX */
+
+ .if \host
+ seamcall
+ /*
+ * SEAMCALL instruction is essentially a VMExit from VMX root
+ * mode to SEAM VMX root mode. VMfailInvalid (CF=1) indicates
+ * that the targeted SEAM firmware is not loaded or disabled,
+ * or P-SEAMLDR is busy with another SEAMCALL. %rax is not
+ * changed in this case.
+ *
+ * Set %rax to TDX_SEAMCALL_VMFAILINVALID for VMfailInvalid.
+ * This value will never be used as actual SEAMCALL error code as
+ * it is from the Reserved status code class.
+ */
+ jnc .Lno_vmfailinvalid
+ mov $TDX_SEAMCALL_VMFAILINVALID, %rax
+.Lno_vmfailinvalid:
+
+ .else
+ tdcall
+ .endif
+
+ /*
+ * Fetch output pointer from stack to R12 (It is used
+ * as temporary storage)
+ */
+ pop %r12
+
+ /*
+ * Since this macro can be invoked with NULL as an output pointer,
+ * check if caller provided an output struct before storing output
+ * registers.
+ *
+ * Update output registers, even if the call failed (RAX != 0).
+ * Other registers may contain details of the failure.
+ */
+ test %r12, %r12
+ jz .Lno_output_struct
+
+ /* Copy result registers to output struct: */
+ movq %rcx, TDX_MODULE_rcx(%r12)
+ movq %rdx, TDX_MODULE_rdx(%r12)
+ movq %r8, TDX_MODULE_r8(%r12)
+ movq %r9, TDX_MODULE_r9(%r12)
+ movq %r10, TDX_MODULE_r10(%r12)
+ movq %r11, TDX_MODULE_r11(%r12)
+
+.Lno_output_struct:
+ /* Restore the state of R12 register */
+ pop %r12
+.endm
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5038edb79ad5..ca85d1409917 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -30,7 +30,6 @@
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/edd.h>
-#include <linux/objtool.h>
#include <xen/xen.h>
#include <xen/events.h>
@@ -165,7 +164,6 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
*bx &= maskebx;
}
-STACK_FRAME_NON_STANDARD(xen_cpuid); /* XEN_EMULATE_PREFIX */
static bool __init xen_check_mwait(void)
{
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 688aa8b6ae29..ba7af2eca755 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -260,8 +260,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
return 0;
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
- if (ctxt == NULL)
+ if (ctxt == NULL) {
+ cpumask_clear_cpu(cpu, xen_cpu_initialized_map);
+ cpumask_clear_cpu(cpu, cpu_callout_mask);
return -ENOMEM;
+ }
gdt = get_cpu_gdt_rw(cpu);
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index bd113bc6e192..0b0f0172cced 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -4,6 +4,7 @@ config XTENSA
select ARCH_32BIT_OFF_T
select ARCH_HAS_BINFMT_FLAT if !MMU
select ARCH_HAS_CURRENT_STACK_POINTER
+ select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DMA_PREP_COHERENT if MMU
select ARCH_HAS_SYNC_DMA_FOR_CPU if MMU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if MMU
@@ -29,8 +30,10 @@ config XTENSA
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
+ select HAVE_ARCH_KCSAN
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
+ select HAVE_CONTEXT_TRACKING
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_EXIT_THREAD
@@ -42,6 +45,7 @@ config XTENSA
select HAVE_PERF_EVENTS
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_VIRT_CPU_ACCOUNTING_GEN
select IRQ_DOMAIN
select MODULES_USE_ELF_RELA
select PERF_USE_VMALLOC
@@ -79,6 +83,7 @@ config STACKTRACE_SUPPORT
config MMU
def_bool n
+ select PFAULT
config HAVE_XTENSA_GPIO32
def_bool n
@@ -178,6 +183,16 @@ config XTENSA_FAKE_NMI
If unsure, say N.
+config PFAULT
+ bool "Handle protection faults" if EXPERT && !MMU
+ default y
+ help
+ Handle protection faults. MMU configurations must enable it.
+ noMMU configurations may disable it if used memory map never
+ generates protection faults or faults are always fatal.
+
+ If unsure, say Y.
+
config XTENSA_UNALIGNED_USER
bool "Unaligned memory access in user space"
help
@@ -773,6 +788,9 @@ endmenu
menu "Power management options"
+config ARCH_HIBERNATION_POSSIBLE
+ def_bool y
+
source "kernel/power/Kconfig"
endmenu
diff --git a/arch/xtensa/boot/lib/Makefile b/arch/xtensa/boot/lib/Makefile
index e3d717c7bfa1..162d10af36f3 100644
--- a/arch/xtensa/boot/lib/Makefile
+++ b/arch/xtensa/boot/lib/Makefile
@@ -16,6 +16,7 @@ CFLAGS_REMOVE_inffast.o = -pg
endif
KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
CFLAGS_REMOVE_inflate.o += -fstack-protector -fstack-protector-strong
CFLAGS_REMOVE_zmem.o += -fstack-protector -fstack-protector-strong
diff --git a/arch/xtensa/include/asm/barrier.h b/arch/xtensa/include/asm/barrier.h
index d6f8d4ddc2bc..898ea397e9bc 100644
--- a/arch/xtensa/include/asm/barrier.h
+++ b/arch/xtensa/include/asm/barrier.h
@@ -11,9 +11,15 @@
#include <asm/core.h>
-#define mb() ({ __asm__ __volatile__("memw" : : : "memory"); })
-#define rmb() barrier()
-#define wmb() mb()
+#define __mb() ({ __asm__ __volatile__("memw" : : : "memory"); })
+#define __rmb() barrier()
+#define __wmb() __mb()
+
+#ifdef CONFIG_SMP
+#define __smp_mb() __mb()
+#define __smp_rmb() __rmb()
+#define __smp_wmb() __wmb()
+#endif
#if XCHAL_HAVE_S32C1I
#define __smp_mb__before_atomic() barrier()
diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h
index cd225896c40f..e02ec5833389 100644
--- a/arch/xtensa/include/asm/bitops.h
+++ b/arch/xtensa/include/asm/bitops.h
@@ -99,7 +99,7 @@ static inline unsigned long __fls(unsigned long word)
#if XCHAL_HAVE_EXCLUSIVE
#define BIT_OP(op, insn, inv) \
-static inline void op##_bit(unsigned int bit, volatile unsigned long *p)\
+static inline void arch_##op##_bit(unsigned int bit, volatile unsigned long *p)\
{ \
unsigned long tmp; \
unsigned long mask = 1UL << (bit & 31); \
@@ -119,7 +119,7 @@ static inline void op##_bit(unsigned int bit, volatile unsigned long *p)\
#define TEST_AND_BIT_OP(op, insn, inv) \
static inline int \
-test_and_##op##_bit(unsigned int bit, volatile unsigned long *p) \
+arch_test_and_##op##_bit(unsigned int bit, volatile unsigned long *p) \
{ \
unsigned long tmp, value; \
unsigned long mask = 1UL << (bit & 31); \
@@ -142,7 +142,7 @@ test_and_##op##_bit(unsigned int bit, volatile unsigned long *p) \
#elif XCHAL_HAVE_S32C1I
#define BIT_OP(op, insn, inv) \
-static inline void op##_bit(unsigned int bit, volatile unsigned long *p)\
+static inline void arch_##op##_bit(unsigned int bit, volatile unsigned long *p)\
{ \
unsigned long tmp, value; \
unsigned long mask = 1UL << (bit & 31); \
@@ -163,7 +163,7 @@ static inline void op##_bit(unsigned int bit, volatile unsigned long *p)\
#define TEST_AND_BIT_OP(op, insn, inv) \
static inline int \
-test_and_##op##_bit(unsigned int bit, volatile unsigned long *p) \
+arch_test_and_##op##_bit(unsigned int bit, volatile unsigned long *p) \
{ \
unsigned long tmp, value; \
unsigned long mask = 1UL << (bit & 31); \
@@ -205,6 +205,8 @@ BIT_OPS(change, "xor", )
#undef BIT_OP
#undef TEST_AND_BIT_OP
+#include <asm-generic/bitops/instrumented-atomic.h>
+
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h
index 0fbe2a740b8d..3b1a0d5d2169 100644
--- a/arch/xtensa/include/asm/coprocessor.h
+++ b/arch/xtensa/include/asm/coprocessor.h
@@ -142,11 +142,12 @@ typedef struct { XCHAL_CP6_SA_LIST(2) } xtregs_cp6_t
typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
__attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));
-extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
-extern void coprocessor_flush(struct thread_info*, int);
-
-extern void coprocessor_release_all(struct thread_info*);
-extern void coprocessor_flush_all(struct thread_info*);
+struct thread_info;
+void coprocessor_flush(struct thread_info *ti, int cp_index);
+void coprocessor_release_all(struct thread_info *ti);
+void coprocessor_flush_all(struct thread_info *ti);
+void coprocessor_flush_release_all(struct thread_info *ti);
+void local_coprocessors_flush_release_all(void);
#endif /* XTENSA_HAVE_COPROCESSORS */
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 4489a27d527a..76bc63127c66 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -246,6 +246,13 @@ extern unsigned long __get_wchan(struct task_struct *p);
v; \
})
+#define xtensa_xsr(x, sr) \
+ ({ \
+ unsigned int __v__ = (unsigned int)(x); \
+ __asm__ __volatile__ ("xsr %0, " __stringify(sr) : "+a"(__v__)); \
+ __v__; \
+ })
+
#if XCHAL_HAVE_EXTERN_REGS
static inline void set_er(unsigned long value, unsigned long addr)
diff --git a/arch/xtensa/include/asm/sections.h b/arch/xtensa/include/asm/sections.h
index a8c42d08e281..3bc6b9afa993 100644
--- a/arch/xtensa/include/asm/sections.h
+++ b/arch/xtensa/include/asm/sections.h
@@ -29,7 +29,7 @@ extern char _Level5InterruptVector_text_end[];
extern char _Level6InterruptVector_text_start[];
extern char _Level6InterruptVector_text_end[];
#endif
-#ifdef CONFIG_SMP
+#ifdef CONFIG_SECONDARY_RESET_VECTOR
extern char _SecondaryResetVector_text_start[];
extern char _SecondaryResetVector_text_end[];
#endif
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index f6fcbba1d02f..326db1c1d5d8 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -52,12 +52,21 @@ struct thread_info {
__u32 cpu; /* current CPU */
__s32 preempt_count; /* 0 => preemptable,< 0 => BUG*/
- unsigned long cpenable;
#if XCHAL_HAVE_EXCLUSIVE
/* result of the most recent exclusive store */
unsigned long atomctl8;
#endif
+#ifdef CONFIG_USER_ABI_CALL0_PROBE
+ /* Address where PS.WOE was enabled by the ABI probing code */
+ unsigned long ps_woe_fix_addr;
+#endif
+ /*
+ * If i-th bit is set then coprocessor state is loaded into the
+ * coprocessor i on CPU cp_owner_cpu.
+ */
+ unsigned long cpenable;
+ u32 cp_owner_cpu;
/* Allocate storage for extra user states and coprocessor states. */
#if XTENSA_HAVE_COPROCESSORS
xtregs_coprocessor_t xtregs_cp;
diff --git a/arch/xtensa/include/asm/timex.h b/arch/xtensa/include/asm/timex.h
index 233ec75e60c6..3f2462f2d027 100644
--- a/arch/xtensa/include/asm/timex.h
+++ b/arch/xtensa/include/asm/timex.h
@@ -29,10 +29,6 @@
extern unsigned long ccount_freq;
-typedef unsigned long long cycles_t;
-
-#define get_cycles() (0)
-
void local_timer_setup(unsigned cpu);
/*
@@ -59,4 +55,6 @@ static inline void set_linux_timer (unsigned long ccompare)
xtensa_set_sr(ccompare, SREG_CCOMPARE + LINUX_TIMER);
}
+#include <asm-generic/timex.h>
+
#endif /* _XTENSA_TIMEX_H */
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 6fa47cd8e02d..6f74ccc0c7ea 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -12,6 +12,8 @@
#include <asm/ptrace.h>
+typedef void xtensa_exception_handler(struct pt_regs *regs);
+
/*
* Per-CPU exception handling data structure.
* EXCSAVE1 points to it.
@@ -25,31 +27,47 @@ struct exc_table {
void *fixup;
/* For passing a parameter to fixup */
void *fixup_param;
+#if XTENSA_HAVE_COPROCESSORS
+ /* Pointers to owner struct thread_info */
+ struct thread_info *coprocessor_owner[XCHAL_CP_MAX];
+#endif
/* Fast user exception handlers */
void *fast_user_handler[EXCCAUSE_N];
/* Fast kernel exception handlers */
void *fast_kernel_handler[EXCCAUSE_N];
/* Default C-Handlers */
- void *default_handler[EXCCAUSE_N];
+ xtensa_exception_handler *default_handler[EXCCAUSE_N];
};
-/*
- * handler must be either of the following:
- * void (*)(struct pt_regs *regs);
- * void (*)(struct pt_regs *regs, unsigned long exccause);
- */
-extern void * __init trap_set_handler(int cause, void *handler);
-extern void do_unhandled(struct pt_regs *regs, unsigned long exccause);
-void fast_second_level_miss(void);
+DECLARE_PER_CPU(struct exc_table, exc_table);
+
+xtensa_exception_handler *
+__init trap_set_handler(int cause, xtensa_exception_handler *handler);
+
+asmlinkage void fast_illegal_instruction_user(void);
+asmlinkage void fast_syscall_user(void);
+asmlinkage void fast_alloca(void);
+asmlinkage void fast_unaligned(void);
+asmlinkage void fast_second_level_miss(void);
+asmlinkage void fast_store_prohibited(void);
+asmlinkage void fast_coprocessor(void);
+
+asmlinkage void kernel_exception(void);
+asmlinkage void user_exception(void);
+asmlinkage void system_call(struct pt_regs *regs);
+
+void do_IRQ(int hwirq, struct pt_regs *regs);
+void do_page_fault(struct pt_regs *regs);
+void do_unhandled(struct pt_regs *regs);
/* Initialize minimal exc_table structure sufficient for basic paging */
static inline void __init early_trap_init(void)
{
- static struct exc_table exc_table __initdata = {
+ static struct exc_table init_exc_table __initdata = {
.fast_kernel_handler[EXCCAUSE_DTLB_MISS] =
fast_second_level_miss,
};
- __asm__ __volatile__("wsr %0, excsave1\n" : : "a" (&exc_table));
+ xtensa_set_sr(&init_exc_table, excsave1);
}
void secondary_trap_init(void);
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index 5fd6cd15e0fb..897c1c741058 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_S32C1I_SELFTEST) += s32c1i_selftest.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+obj-$(CONFIG_HIBERNATION) += hibernate.o
# In the Xtensa architecture, assembly generates literals which must always
# precede the L32R instruction with a relative offset less than 256 kB.
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 37278e2785fb..da38de20ae59 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -21,6 +21,7 @@
#include <linux/ptrace.h>
#include <linux/mm.h>
#include <linux/kbuild.h>
+#include <linux/suspend.h>
#include <asm/ptrace.h>
#include <asm/traps.h>
@@ -87,14 +88,19 @@ int main(void)
OFFSET(TI_STSTUS, thread_info, status);
OFFSET(TI_CPU, thread_info, cpu);
OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
+#ifdef CONFIG_USER_ABI_CALL0_PROBE
+ OFFSET(TI_PS_WOE_FIX_ADDR, thread_info, ps_woe_fix_addr);
+#endif
/* struct thread_info (offset from start_struct) */
DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra));
DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
- DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
#if XCHAL_HAVE_EXCLUSIVE
DEFINE(THREAD_ATOMCTL8, offsetof (struct thread_info, atomctl8));
#endif
+ DEFINE(THREAD_CPENABLE, offsetof(struct thread_info, cpenable));
+ DEFINE(THREAD_CPU, offsetof(struct thread_info, cpu));
+ DEFINE(THREAD_CP_OWNER_CPU, offsetof(struct thread_info, cp_owner_cpu));
#if XTENSA_HAVE_COPROCESSORS
DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
@@ -137,11 +143,22 @@ int main(void)
DEFINE(EXC_TABLE_DOUBLE_SAVE, offsetof(struct exc_table, double_save));
DEFINE(EXC_TABLE_FIXUP, offsetof(struct exc_table, fixup));
DEFINE(EXC_TABLE_PARAM, offsetof(struct exc_table, fixup_param));
+#if XTENSA_HAVE_COPROCESSORS
+ DEFINE(EXC_TABLE_COPROCESSOR_OWNER,
+ offsetof(struct exc_table, coprocessor_owner));
+#endif
DEFINE(EXC_TABLE_FAST_USER,
offsetof(struct exc_table, fast_user_handler));
DEFINE(EXC_TABLE_FAST_KERNEL,
offsetof(struct exc_table, fast_kernel_handler));
DEFINE(EXC_TABLE_DEFAULT, offsetof(struct exc_table, default_handler));
+#ifdef CONFIG_HIBERNATION
+ DEFINE(PBE_ADDRESS, offsetof(struct pbe, address));
+ DEFINE(PBE_ORIG_ADDRESS, offsetof(struct pbe, orig_address));
+ DEFINE(PBE_NEXT, offsetof(struct pbe, next));
+ DEFINE(PBE_SIZE, sizeof(struct pbe));
+#endif
+
return 0;
}
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index c7b9f12896f2..ef33e76e07d8 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -19,6 +19,26 @@
#include <asm/current.h>
#include <asm/regs.h>
+/*
+ * Rules for coprocessor state manipulation on SMP:
+ *
+ * - a task may have live coprocessors only on one CPU.
+ *
+ * - whether coprocessor context of task T is live on some CPU is
+ * denoted by T's thread_info->cpenable.
+ *
+ * - non-zero thread_info->cpenable means that thread_info->cp_owner_cpu
+ * is valid in the T's thread_info. Zero thread_info->cpenable means that
+ * coprocessor context is valid in the T's thread_info.
+ *
+ * - if a coprocessor context of task T is live on CPU X, only CPU X changes
+ * T's thread_info->cpenable, cp_owner_cpu and coprocessor save area.
+ * This is done by making sure that for the task T with live coprocessor
+ * on CPU X cpenable SR is 0 when T runs on any other CPU Y.
+ * When fast_coprocessor exception is taken on CPU Y it goes to the
+ * C-level do_coprocessor that uses IPI to make CPU X flush T's coprocessors.
+ */
+
#if XTENSA_HAVE_COPROCESSORS
/*
@@ -30,34 +50,30 @@
.align 4; \
.Lsave_cp_regs_cp##x: \
xchal_cp##x##_store a2 a3 a4 a5 a6; \
- jx a0; \
+ ret; \
.endif
-#define SAVE_CP_REGS_TAB(x) \
- .if XTENSA_HAVE_COPROCESSOR(x); \
- .long .Lsave_cp_regs_cp##x; \
- .else; \
- .long 0; \
- .endif; \
- .long THREAD_XTREGS_CP##x
-
-
#define LOAD_CP_REGS(x) \
.if XTENSA_HAVE_COPROCESSOR(x); \
.align 4; \
.Lload_cp_regs_cp##x: \
xchal_cp##x##_load a2 a3 a4 a5 a6; \
- jx a0; \
+ ret; \
.endif
-#define LOAD_CP_REGS_TAB(x) \
+#define CP_REGS_TAB(x) \
.if XTENSA_HAVE_COPROCESSOR(x); \
+ .long .Lsave_cp_regs_cp##x; \
.long .Lload_cp_regs_cp##x; \
.else; \
- .long 0; \
+ .long 0, 0; \
.endif; \
.long THREAD_XTREGS_CP##x
+#define CP_REGS_TAB_SAVE 0
+#define CP_REGS_TAB_LOAD 4
+#define CP_REGS_TAB_OFFSET 8
+
__XTENSA_HANDLER
SAVE_CP_REGS(0)
@@ -79,25 +95,15 @@
LOAD_CP_REGS(7)
.align 4
-.Lsave_cp_regs_jump_table:
- SAVE_CP_REGS_TAB(0)
- SAVE_CP_REGS_TAB(1)
- SAVE_CP_REGS_TAB(2)
- SAVE_CP_REGS_TAB(3)
- SAVE_CP_REGS_TAB(4)
- SAVE_CP_REGS_TAB(5)
- SAVE_CP_REGS_TAB(6)
- SAVE_CP_REGS_TAB(7)
-
-.Lload_cp_regs_jump_table:
- LOAD_CP_REGS_TAB(0)
- LOAD_CP_REGS_TAB(1)
- LOAD_CP_REGS_TAB(2)
- LOAD_CP_REGS_TAB(3)
- LOAD_CP_REGS_TAB(4)
- LOAD_CP_REGS_TAB(5)
- LOAD_CP_REGS_TAB(6)
- LOAD_CP_REGS_TAB(7)
+.Lcp_regs_jump_table:
+ CP_REGS_TAB(0)
+ CP_REGS_TAB(1)
+ CP_REGS_TAB(2)
+ CP_REGS_TAB(3)
+ CP_REGS_TAB(4)
+ CP_REGS_TAB(5)
+ CP_REGS_TAB(6)
+ CP_REGS_TAB(7)
/*
* Entry condition:
@@ -115,9 +121,37 @@
ENTRY(fast_coprocessor)
+ s32i a3, a2, PT_AREG3
+
+#ifdef CONFIG_SMP
+ /*
+ * Check if any coprocessor context is live on another CPU
+ * and if so go through the C-level coprocessor exception handler
+ * to flush it to memory.
+ */
+ GET_THREAD_INFO (a0, a2)
+ l32i a3, a0, THREAD_CPENABLE
+ beqz a3, .Lload_local
+
+ /*
+ * Pairs with smp_wmb in local_coprocessor_release_all
+ * and with both memws below.
+ */
+ memw
+ l32i a3, a0, THREAD_CPU
+ l32i a0, a0, THREAD_CP_OWNER_CPU
+ beq a0, a3, .Lload_local
+
+ rsr a0, ps
+ l32i a3, a2, PT_AREG3
+ bbci.l a0, PS_UM_BIT, 1f
+ call0 user_exception
+1: call0 kernel_exception
+#endif
+
/* Save remaining registers a1-a3 and SAR */
- s32i a3, a2, PT_AREG3
+.Lload_local:
rsr a3, sar
s32i a1, a2, PT_AREG1
s32i a3, a2, PT_SAR
@@ -125,13 +159,15 @@ ENTRY(fast_coprocessor)
rsr a2, depc
s32i a2, a1, PT_AREG2
- /*
- * The hal macros require up to 4 temporary registers. We use a3..a6.
- */
+ /* The hal macros require up to 4 temporary registers. We use a3..a6. */
s32i a4, a1, PT_AREG4
s32i a5, a1, PT_AREG5
s32i a6, a1, PT_AREG6
+ s32i a7, a1, PT_AREG7
+ s32i a8, a1, PT_AREG8
+ s32i a9, a1, PT_AREG9
+ s32i a10, a1, PT_AREG10
/* Find coprocessor number. Subtract first CP EXCCAUSE from EXCCAUSE */
@@ -148,58 +184,74 @@ ENTRY(fast_coprocessor)
wsr a0, cpenable
rsync
- /* Retrieve previous owner. (a3 still holds CP number) */
+ /* Get coprocessor save/load table entry (a7). */
- movi a0, coprocessor_owner # list of owners
- addx4 a0, a3, a0 # entry for CP
- l32i a4, a0, 0
+ movi a7, .Lcp_regs_jump_table
+ addx8 a7, a3, a7
+ addx4 a7, a3, a7
- beqz a4, 1f # skip 'save' if no previous owner
+ /* Retrieve previous owner (a8). */
- /* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
+ rsr a0, excsave1 # exc_table
+ addx4 a0, a3, a0 # entry for CP
+ l32i a8, a0, EXC_TABLE_COPROCESSOR_OWNER
+
+ /* Set new owner (a9). */
- l32i a5, a4, THREAD_CPENABLE
- xor a5, a5, a2 # (1 << cp-id) still in a2
- s32i a5, a4, THREAD_CPENABLE
+ GET_THREAD_INFO (a9, a1)
+ l32i a4, a9, THREAD_CPU
+ s32i a9, a0, EXC_TABLE_COPROCESSOR_OWNER
+ s32i a4, a9, THREAD_CP_OWNER_CPU
/*
- * Get context save area and 'call' save routine.
- * (a4 still holds previous owner (thread_info), a3 CP number)
+ * Enable coprocessor for the new owner. (a2 = 1 << CP number)
+ * This can be done before loading context into the coprocessor.
*/
+ l32i a4, a9, THREAD_CPENABLE
+ or a4, a4, a2
- movi a5, .Lsave_cp_regs_jump_table
- movi a0, 2f # a0: 'return' address
- addx8 a3, a3, a5 # a3: coprocessor number
- l32i a2, a3, 4 # a2: xtregs offset
- l32i a3, a3, 0 # a3: jump address
- add a2, a2, a4
- jx a3
+ /*
+ * Make sure THREAD_CP_OWNER_CPU is in memory before updating
+ * THREAD_CPENABLE
+ */
+ memw # (2)
+ s32i a4, a9, THREAD_CPENABLE
- /* Note that only a0 and a1 were preserved. */
+ beqz a8, 1f # skip 'save' if no previous owner
-2: rsr a3, exccause
- addi a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
- movi a0, coprocessor_owner
- addx4 a0, a3, a0
+ /* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
- /* Set new 'owner' (a0 points to the CP owner, a3 contains the CP nr) */
+ l32i a10, a8, THREAD_CPENABLE
+ xor a10, a10, a2
-1: GET_THREAD_INFO (a4, a1)
- s32i a4, a0, 0
+ /* Get context save area and call save routine. */
- /* Get context save area and 'call' load routine. */
+ l32i a2, a7, CP_REGS_TAB_OFFSET
+ l32i a3, a7, CP_REGS_TAB_SAVE
+ add a2, a2, a8
+ callx0 a3
- movi a5, .Lload_cp_regs_jump_table
- movi a0, 1f
- addx8 a3, a3, a5
- l32i a2, a3, 4 # a2: xtregs offset
- l32i a3, a3, 0 # a3: jump address
- add a2, a2, a4
- jx a3
+ /*
+ * Make sure coprocessor context and THREAD_CP_OWNER_CPU are in memory
+ * before updating THREAD_CPENABLE
+ */
+ memw # (3)
+ s32i a10, a8, THREAD_CPENABLE
+1:
+ /* Get context save area and call load routine. */
+
+ l32i a2, a7, CP_REGS_TAB_OFFSET
+ l32i a3, a7, CP_REGS_TAB_LOAD
+ add a2, a2, a9
+ callx0 a3
/* Restore all registers and return from exception handler. */
-1: l32i a6, a1, PT_AREG6
+ l32i a10, a1, PT_AREG10
+ l32i a9, a1, PT_AREG9
+ l32i a8, a1, PT_AREG8
+ l32i a7, a1, PT_AREG7
+ l32i a6, a1, PT_AREG6
l32i a5, a1, PT_AREG5
l32i a4, a1, PT_AREG4
@@ -230,29 +282,21 @@ ENDPROC(fast_coprocessor)
ENTRY(coprocessor_flush)
- /* reserve 4 bytes on stack to save a0 */
- abi_entry(4)
-
- s32i a0, a1, 0
- movi a0, .Lsave_cp_regs_jump_table
- addx8 a3, a3, a0
- l32i a4, a3, 4
- l32i a3, a3, 0
- add a2, a2, a4
- beqz a3, 1f
- callx0 a3
-1: l32i a0, a1, 0
-
- abi_ret(4)
+ abi_entry_default
+
+ movi a4, .Lcp_regs_jump_table
+ addx8 a4, a3, a4
+ addx4 a3, a3, a4
+ l32i a4, a3, CP_REGS_TAB_SAVE
+ beqz a4, 1f
+ l32i a3, a3, CP_REGS_TAB_OFFSET
+ add a2, a2, a3
+ mov a7, a0
+ callx0 a4
+ mov a0, a7
+1:
+ abi_ret_default
ENDPROC(coprocessor_flush)
- .data
-
-ENTRY(coprocessor_owner)
-
- .fill XCHAL_CP_MAX, 4, 0
-
-END(coprocessor_owner)
-
#endif /* XTENSA_HAVE_COPROCESSORS */
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 6b6eff658795..e3eae648ba2e 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -28,15 +28,6 @@
#include <asm/tlbflush.h>
#include <variant/tie-asm.h>
-/* Unimplemented features. */
-
-#undef KERNEL_STACK_OVERFLOW_CHECK
-
-/* Not well tested.
- *
- * - fast_coprocessor
- */
-
/*
* Macro to find first bit set in WINDOWBASE from the left + 1
*
@@ -178,28 +169,26 @@ _user_exception:
/* Save only live registers. */
-UABI_W _bbsi.l a2, 1, 1f
+UABI_W _bbsi.l a2, 1, .Lsave_window_registers
s32i a4, a1, PT_AREG4
s32i a5, a1, PT_AREG5
s32i a6, a1, PT_AREG6
s32i a7, a1, PT_AREG7
-UABI_W _bbsi.l a2, 2, 1f
+UABI_W _bbsi.l a2, 2, .Lsave_window_registers
s32i a8, a1, PT_AREG8
s32i a9, a1, PT_AREG9
s32i a10, a1, PT_AREG10
s32i a11, a1, PT_AREG11
-UABI_W _bbsi.l a2, 3, 1f
+UABI_W _bbsi.l a2, 3, .Lsave_window_registers
s32i a12, a1, PT_AREG12
s32i a13, a1, PT_AREG13
s32i a14, a1, PT_AREG14
s32i a15, a1, PT_AREG15
#if defined(USER_SUPPORT_WINDOWED)
- _bnei a2, 1, 1f # only one valid frame?
+ /* If only one valid frame skip saving regs. */
- /* Only one valid frame, skip saving regs. */
-
- j 2f
+ beqi a2, 1, common_exception
/* Save the remaining registers.
* We have to save all registers up to the first '1' from
@@ -208,8 +197,8 @@ UABI_W _bbsi.l a2, 3, 1f
* All register frames starting from the top field to the marked '1'
* must be saved.
*/
-
-1: addi a3, a2, -1 # eliminate '1' in bit 0: yyyyxxww0
+.Lsave_window_registers:
+ addi a3, a2, -1 # eliminate '1' in bit 0: yyyyxxww0
neg a3, a3 # yyyyxxww0 -> YYYYXXWW1+1
and a3, a3, a2 # max. only one bit is set
@@ -250,7 +239,7 @@ UABI_W _bbsi.l a2, 3, 1f
/* We are back to the original stack pointer (a1) */
#endif
-2: /* Now, jump to the common exception handler. */
+ /* Now, jump to the common exception handler. */
j common_exception
@@ -350,15 +339,6 @@ KABI_W _bbsi.l a2, 3, 1f
l32i a0, a1, PT_AREG0 # restore saved a0
wsr a0, depc
-#ifdef KERNEL_STACK_OVERFLOW_CHECK
-
- /* Stack overflow check, for debugging */
- extui a2, a1, TASK_SIZE_BITS,XX
- movi a3, SIZE??
- _bge a2, a3, out_of_stack_panic
-
-#endif
-
/*
* This is the common exception handler.
* We get here from the user exception handler or simply by falling through
@@ -442,7 +422,6 @@ KABI_W or a3, a3, a0
moveqz a3, a0, a2 # a3 = LOCKLEVEL iff interrupt
KABI_W movi a2, PS_WOE_MASK
KABI_W or a3, a3, a2
- rsr a2, exccause
#endif
/* restore return address (or 0 if return to userspace) */
@@ -469,42 +448,56 @@ KABI_W or a3, a3, a2
save_xtregs_opt a1 a3 a4 a5 a6 a7 PT_XTREGS_OPT
+#ifdef CONFIG_TRACE_IRQFLAGS
+ rsr abi_tmp0, ps
+ extui abi_tmp0, abi_tmp0, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+ beqz abi_tmp0, 1f
+ abi_call trace_hardirqs_off
+1:
+#endif
+#ifdef CONFIG_CONTEXT_TRACKING
+ l32i abi_tmp0, a1, PT_PS
+ bbci.l abi_tmp0, PS_UM_BIT, 1f
+ abi_call context_tracking_user_exit
+1:
+#endif
+
/* Go to second-level dispatcher. Set up parameters to pass to the
* exception handler and call the exception handler.
*/
- rsr a4, excsave1
- addx4 a4, a2, a4
- l32i a4, a4, EXC_TABLE_DEFAULT # load handler
- mov abi_arg1, a2 # pass EXCCAUSE
- mov abi_arg0, a1 # pass stack frame
+ l32i abi_arg1, a1, PT_EXCCAUSE # pass EXCCAUSE
+ rsr abi_tmp0, excsave1
+ addx4 abi_tmp0, abi_arg1, abi_tmp0
+ l32i abi_tmp0, abi_tmp0, EXC_TABLE_DEFAULT # load handler
+ mov abi_arg0, a1 # pass stack frame
/* Call the second-level handler */
- abi_callx a4
+ abi_callx abi_tmp0
/* Jump here for exception exit */
.global common_exception_return
common_exception_return:
#if XTENSA_FAKE_NMI
- l32i abi_tmp0, a1, PT_EXCCAUSE
- movi abi_tmp1, EXCCAUSE_MAPPED_NMI
- l32i abi_saved1, a1, PT_PS
- beq abi_tmp0, abi_tmp1, .Lrestore_state
+ l32i abi_tmp0, a1, PT_EXCCAUSE
+ movi abi_tmp1, EXCCAUSE_MAPPED_NMI
+ l32i abi_saved1, a1, PT_PS
+ beq abi_tmp0, abi_tmp1, .Lrestore_state
#endif
.Ltif_loop:
- irq_save a2, a3
+ irq_save abi_tmp0, abi_tmp1
#ifdef CONFIG_TRACE_IRQFLAGS
abi_call trace_hardirqs_off
#endif
/* Jump if we are returning from kernel exceptions. */
- l32i abi_saved1, a1, PT_PS
- GET_THREAD_INFO(a2, a1)
- l32i a4, a2, TI_FLAGS
- _bbci.l abi_saved1, PS_UM_BIT, .Lexit_tif_loop_kernel
+ l32i abi_saved1, a1, PT_PS
+ GET_THREAD_INFO(abi_tmp0, a1)
+ l32i abi_saved0, abi_tmp0, TI_FLAGS
+ _bbci.l abi_saved1, PS_UM_BIT, .Lexit_tif_loop_kernel
/* Specific to a user exception exit:
* We need to check some flags for signal handling and rescheduling,
@@ -513,75 +506,80 @@ common_exception_return:
* Note that we don't disable interrupts here.
*/
- _bbsi.l a4, TIF_NEED_RESCHED, .Lresched
- movi a2, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL
- bnone a4, a2, .Lexit_tif_loop_user
+ _bbsi.l abi_saved0, TIF_NEED_RESCHED, .Lresched
+ movi abi_tmp0, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL
+ bnone abi_saved0, abi_tmp0, .Lexit_tif_loop_user
- l32i a4, a1, PT_DEPC
- bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state
+ l32i abi_tmp0, a1, PT_DEPC
+ bgeui abi_tmp0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state
/* Call do_signal() */
#ifdef CONFIG_TRACE_IRQFLAGS
abi_call trace_hardirqs_on
#endif
- rsil a2, 0
- mov abi_arg0, a1
+ rsil abi_tmp0, 0
+ mov abi_arg0, a1
abi_call do_notify_resume # int do_notify_resume(struct pt_regs*)
- j .Ltif_loop
+ j .Ltif_loop
.Lresched:
#ifdef CONFIG_TRACE_IRQFLAGS
abi_call trace_hardirqs_on
#endif
- rsil a2, 0
+ rsil abi_tmp0, 0
abi_call schedule # void schedule (void)
- j .Ltif_loop
+ j .Ltif_loop
.Lexit_tif_loop_kernel:
#ifdef CONFIG_PREEMPTION
- _bbci.l a4, TIF_NEED_RESCHED, .Lrestore_state
+ _bbci.l abi_saved0, TIF_NEED_RESCHED, .Lrestore_state
/* Check current_thread_info->preempt_count */
- l32i a4, a2, TI_PRE_COUNT
- bnez a4, .Lrestore_state
+ l32i abi_tmp1, abi_tmp0, TI_PRE_COUNT
+ bnez abi_tmp1, .Lrestore_state
abi_call preempt_schedule_irq
#endif
- j .Lrestore_state
+ j .Lrestore_state
.Lexit_tif_loop_user:
+#ifdef CONFIG_CONTEXT_TRACKING
+ abi_call context_tracking_user_enter
+#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- _bbci.l a4, TIF_DB_DISABLED, 1f
+ _bbci.l abi_saved0, TIF_DB_DISABLED, 1f
abi_call restore_dbreak
1:
#endif
#ifdef CONFIG_DEBUG_TLB_SANITY
- l32i a4, a1, PT_DEPC
- bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state
+ l32i abi_tmp0, a1, PT_DEPC
+ bgeui abi_tmp0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state
abi_call check_tlb_sanity
#endif
.Lrestore_state:
#ifdef CONFIG_TRACE_IRQFLAGS
- extui a4, abi_saved1, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
- bgei a4, LOCKLEVEL, 1f
+ extui abi_tmp0, abi_saved1, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+ bgei abi_tmp0, LOCKLEVEL, 1f
abi_call trace_hardirqs_on
1:
#endif
- /* Restore optional registers. */
+ /*
+ * Restore optional registers.
+ * abi_arg* are used as temporary registers here.
+ */
- load_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
+ load_xtregs_opt a1 abi_tmp0 abi_arg0 abi_arg1 abi_arg2 abi_arg3 PT_XTREGS_OPT
/* Restore SCOMPARE1 */
#if XCHAL_HAVE_S32C1I
- l32i a2, a1, PT_SCOMPARE1
- wsr a2, scompare1
+ l32i abi_tmp0, a1, PT_SCOMPARE1
+ wsr abi_tmp0, scompare1
#endif
- wsr abi_saved1, ps /* disable interrupts */
-
- _bbci.l abi_saved1, PS_UM_BIT, kernel_exception_exit
+ wsr abi_saved1, ps /* disable interrupts */
+ _bbci.l abi_saved1, PS_UM_BIT, kernel_exception_exit
user_exception_exit:
@@ -795,7 +793,7 @@ ENDPROC(kernel_exception)
ENTRY(debug_exception)
rsr a0, SREG_EPS + XCHAL_DEBUGLEVEL
- bbsi.l a0, PS_EXCM_BIT, 1f # exception mode
+ bbsi.l a0, PS_EXCM_BIT, .Ldebug_exception_in_exception # exception mode
/* Set EPC1 and EXCCAUSE */
@@ -814,10 +812,10 @@ ENTRY(debug_exception)
/* Switch to kernel/user stack, restore jump vector, and save a0 */
- bbsi.l a2, PS_UM_BIT, 2f # jump if user mode
-
+ bbsi.l a2, PS_UM_BIT, .Ldebug_exception_user # jump if user mode
addi a2, a1, -16 - PT_KERNEL_SIZE # assume kernel stack
-3:
+
+.Ldebug_exception_continue:
l32i a0, a3, DT_DEBUG_SAVE
s32i a1, a2, PT_AREG1
s32i a0, a2, PT_AREG0
@@ -845,10 +843,12 @@ ENTRY(debug_exception)
bbsi.l a2, PS_UM_BIT, _user_exception
j _kernel_exception
-2: rsr a2, excsave1
+.Ldebug_exception_user:
+ rsr a2, excsave1
l32i a2, a2, EXC_TABLE_KSTK # load kernel stack pointer
- j 3b
+ j .Ldebug_exception_continue
+.Ldebug_exception_in_exception:
#ifdef CONFIG_HAVE_HW_BREAKPOINT
/* Debug exception while in exception mode. This may happen when
* window overflow/underflow handler or fast exception handler hits
@@ -856,8 +856,8 @@ ENTRY(debug_exception)
* breakpoints, single-step faulting instruction and restore data
* breakpoints.
*/
-1:
- bbci.l a0, PS_UM_BIT, 1b # jump if kernel mode
+
+ bbci.l a0, PS_UM_BIT, .Ldebug_exception_in_exception # jump if kernel mode
rsr a0, debugcause
bbsi.l a0, DEBUGCAUSE_DBREAK_BIT, .Ldebug_save_dbreak
@@ -901,7 +901,7 @@ ENTRY(debug_exception)
rfi XCHAL_DEBUGLEVEL
#else
/* Debug exception while in exception mode. Should not happen. */
-1: j 1b // FIXME!!
+ j .Ldebug_exception_in_exception // FIXME!!
#endif
ENDPROC(debug_exception)
@@ -1056,6 +1056,11 @@ ENTRY(fast_illegal_instruction_user)
movi a3, PS_WOE_MASK
or a0, a0, a3
wsr a0, ps
+#ifdef CONFIG_USER_ABI_CALL0_PROBE
+ GET_THREAD_INFO(a3, a2)
+ rsr a0, epc1
+ s32i a0, a3, TI_PS_WOE_FIX_ADDR
+#endif
l32i a3, a2, PT_AREG3
l32i a0, a2, PT_AREG0
rsr a2, depc
@@ -1630,12 +1635,13 @@ ENTRY(fast_second_level_miss)
GET_CURRENT(a1,a2)
l32i a0, a1, TASK_MM # tsk->mm
- beqz a0, 9f
+ beqz a0, .Lfast_second_level_miss_no_mm
-8: rsr a3, excvaddr # fault address
+.Lfast_second_level_miss_continue:
+ rsr a3, excvaddr # fault address
_PGD_OFFSET(a0, a3, a1)
l32i a0, a0, 0 # read pmdval
- beqz a0, 2f
+ beqz a0, .Lfast_second_level_miss_no_pmd
/* Read ptevaddr and convert to top of page-table page.
*
@@ -1678,12 +1684,13 @@ ENTRY(fast_second_level_miss)
addi a3, a3, DTLB_WAY_PGD
add a1, a1, a3 # ... + way_number
-3: wdtlb a0, a1
+.Lfast_second_level_miss_wdtlb:
+ wdtlb a0, a1
dsync
/* Exit critical section. */
-
-4: rsr a3, excsave1
+.Lfast_second_level_miss_skip_wdtlb:
+ rsr a3, excsave1
movi a0, 0
s32i a0, a3, EXC_TABLE_FIXUP
@@ -1707,19 +1714,21 @@ ENTRY(fast_second_level_miss)
esync
rfde
-9: l32i a0, a1, TASK_ACTIVE_MM # unlikely case mm == 0
- bnez a0, 8b
+.Lfast_second_level_miss_no_mm:
+ l32i a0, a1, TASK_ACTIVE_MM # unlikely case mm == 0
+ bnez a0, .Lfast_second_level_miss_continue
/* Even more unlikely case active_mm == 0.
* We can get here with NMI in the middle of context_switch that
* touches vmalloc area.
*/
movi a0, init_mm
- j 8b
+ j .Lfast_second_level_miss_continue
+.Lfast_second_level_miss_no_pmd:
#if (DCACHE_WAY_SIZE > PAGE_SIZE)
-2: /* Special case for cache aliasing.
+ /* Special case for cache aliasing.
* We (should) only get here if a clear_user_page, copy_user_page
* or the aliased cache flush functions got preemptively interrupted
* by another task. Re-establish temporary mapping to the
@@ -1729,24 +1738,24 @@ ENTRY(fast_second_level_miss)
/* We shouldn't be in a double exception */
l32i a0, a2, PT_DEPC
- bgeui a0, VALID_DOUBLE_EXCEPTION_ADDRESS, 2f
+ bgeui a0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lfast_second_level_miss_slow
/* Make sure the exception originated in the special functions */
movi a0, __tlbtemp_mapping_start
rsr a3, epc1
- bltu a3, a0, 2f
+ bltu a3, a0, .Lfast_second_level_miss_slow
movi a0, __tlbtemp_mapping_end
- bgeu a3, a0, 2f
+ bgeu a3, a0, .Lfast_second_level_miss_slow
/* Check if excvaddr was in one of the TLBTEMP_BASE areas. */
movi a3, TLBTEMP_BASE_1
rsr a0, excvaddr
- bltu a0, a3, 2f
+ bltu a0, a3, .Lfast_second_level_miss_slow
addi a1, a0, -TLBTEMP_SIZE
- bgeu a1, a3, 2f
+ bgeu a1, a3, .Lfast_second_level_miss_slow
/* Check if we have to restore an ITLB mapping. */
@@ -1772,19 +1781,19 @@ ENTRY(fast_second_level_miss)
mov a0, a6
movnez a0, a7, a3
- j 3b
+ j .Lfast_second_level_miss_wdtlb
/* ITLB entry. We only use dst in a6. */
1: witlb a6, a1
isync
- j 4b
+ j .Lfast_second_level_miss_skip_wdtlb
#endif // DCACHE_WAY_SIZE > PAGE_SIZE
-
-2: /* Invalid PGD, default exception handling */
+ /* Invalid PGD, default exception handling */
+.Lfast_second_level_miss_slow:
rsr a1, depc
s32i a1, a2, PT_AREG2
@@ -1824,12 +1833,13 @@ ENTRY(fast_store_prohibited)
GET_CURRENT(a1,a2)
l32i a0, a1, TASK_MM # tsk->mm
- beqz a0, 9f
+ beqz a0, .Lfast_store_no_mm
-8: rsr a1, excvaddr # fault address
+.Lfast_store_continue:
+ rsr a1, excvaddr # fault address
_PGD_OFFSET(a0, a1, a3)
l32i a0, a0, 0
- beqz a0, 2f
+ beqz a0, .Lfast_store_slow
/*
* Note that we test _PAGE_WRITABLE_BIT only if PTE is present
@@ -1839,8 +1849,8 @@ ENTRY(fast_store_prohibited)
_PTE_OFFSET(a0, a1, a3)
l32i a3, a0, 0 # read pteval
movi a1, _PAGE_CA_INVALID
- ball a3, a1, 2f
- bbci.l a3, _PAGE_WRITABLE_BIT, 2f
+ ball a3, a1, .Lfast_store_slow
+ bbci.l a3, _PAGE_WRITABLE_BIT, .Lfast_store_slow
movi a1, _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HW_WRITE
or a3, a3, a1
@@ -1868,7 +1878,6 @@ ENTRY(fast_store_prohibited)
l32i a2, a2, PT_DEPC
bgeui a2, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-
rsr a2, depc
rfe
@@ -1878,11 +1887,17 @@ ENTRY(fast_store_prohibited)
esync
rfde
-9: l32i a0, a1, TASK_ACTIVE_MM # unlikely case mm == 0
- j 8b
-
-2: /* If there was a problem, handle fault in C */
+.Lfast_store_no_mm:
+ l32i a0, a1, TASK_ACTIVE_MM # unlikely case mm == 0
+ j .Lfast_store_continue
+ /* If there was a problem, handle fault in C */
+.Lfast_store_slow:
+ rsr a1, excvaddr
+ pdtlb a0, a1
+ bbci.l a0, DTLB_HIT_BIT, 1f
+ idtlb a0
+1:
rsr a3, depc # still holds a2
s32i a3, a2, PT_AREG2
mov a1, a2
@@ -2071,8 +2086,16 @@ ENTRY(_switch_to)
#if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
l32i a3, a5, THREAD_CPENABLE
- xsr a3, cpenable
- s32i a3, a4, THREAD_CPENABLE
+#ifdef CONFIG_SMP
+ beqz a3, 1f
+ memw # pairs with memw (2) in fast_coprocessor
+ l32i a6, a5, THREAD_CP_OWNER_CPU
+ l32i a7, a5, THREAD_CPU
+ beq a6, a7, 1f # load 0 into CPENABLE if current CPU is not the owner
+ movi a3, 0
+1:
+#endif
+ wsr a3, cpenable
#endif
#if XCHAL_HAVE_EXCLUSIVE
@@ -2147,3 +2170,95 @@ ENTRY(ret_from_kernel_thread)
j common_exception_return
ENDPROC(ret_from_kernel_thread)
+
+#ifdef CONFIG_HIBERNATION
+
+ .bss
+ .align 4
+.Lsaved_regs:
+#if defined(__XTENSA_WINDOWED_ABI__)
+ .fill 2, 4
+#elif defined(__XTENSA_CALL0_ABI__)
+ .fill 6, 4
+#else
+#error Unsupported Xtensa ABI
+#endif
+ .align XCHAL_NCP_SA_ALIGN
+.Lsaved_user_regs:
+ .fill XTREGS_USER_SIZE, 1
+
+ .previous
+
+ENTRY(swsusp_arch_suspend)
+
+ abi_entry_default
+
+ movi a2, .Lsaved_regs
+ movi a3, .Lsaved_user_regs
+ s32i a0, a2, 0
+ s32i a1, a2, 4
+ save_xtregs_user a3 a4 a5 a6 a7 a8 0
+#if defined(__XTENSA_WINDOWED_ABI__)
+ spill_registers_kernel
+#elif defined(__XTENSA_CALL0_ABI__)
+ s32i a12, a2, 8
+ s32i a13, a2, 12
+ s32i a14, a2, 16
+ s32i a15, a2, 20
+#else
+#error Unsupported Xtensa ABI
+#endif
+ abi_call swsusp_save
+ mov a2, abi_rv
+ abi_ret_default
+
+ENDPROC(swsusp_arch_suspend)
+
+ENTRY(swsusp_arch_resume)
+
+ abi_entry_default
+
+#if defined(__XTENSA_WINDOWED_ABI__)
+ spill_registers_kernel
+#endif
+
+ movi a2, restore_pblist
+ l32i a2, a2, 0
+
+.Lcopy_pbe:
+ l32i a3, a2, PBE_ADDRESS
+ l32i a4, a2, PBE_ORIG_ADDRESS
+
+ __loopi a3, a9, PAGE_SIZE, 16
+ l32i a5, a3, 0
+ l32i a6, a3, 4
+ l32i a7, a3, 8
+ l32i a8, a3, 12
+ addi a3, a3, 16
+ s32i a5, a4, 0
+ s32i a6, a4, 4
+ s32i a7, a4, 8
+ s32i a8, a4, 12
+ addi a4, a4, 16
+ __endl a3, a9
+
+ l32i a2, a2, PBE_NEXT
+ bnez a2, .Lcopy_pbe
+
+ movi a2, .Lsaved_regs
+ movi a3, .Lsaved_user_regs
+ l32i a0, a2, 0
+ l32i a1, a2, 4
+ load_xtregs_user a3 a4 a5 a6 a7 a8 0
+#if defined(__XTENSA_CALL0_ABI__)
+ l32i a12, a2, 8
+ l32i a13, a2, 12
+ l32i a14, a2, 16
+ l32i a15, a2, 20
+#endif
+ movi a2, 0
+ abi_ret_default
+
+ENDPROC(swsusp_arch_resume)
+
+#endif
diff --git a/arch/xtensa/kernel/hibernate.c b/arch/xtensa/kernel/hibernate.c
new file mode 100644
index 000000000000..06984327d6e2
--- /dev/null
+++ b/arch/xtensa/kernel/hibernate.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <asm/coprocessor.h>
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
+ unsigned long nosave_end_pfn = PFN_UP(__pa(&__nosave_end));
+
+ return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+ WARN_ON(num_online_cpus() != 1);
+#if XTENSA_HAVE_COPROCESSORS
+ local_coprocessors_flush_release_all();
+#endif
+}
+
+void notrace restore_processor_state(void)
+{
+}
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index e8bfbca5f001..7e38292dd07a 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -47,6 +47,7 @@
#include <asm/asm-offsets.h>
#include <asm/regs.h>
#include <asm/hw_breakpoint.h>
+#include <asm/traps.h>
extern void ret_from_fork(void);
extern void ret_from_kernel_thread(void);
@@ -63,52 +64,114 @@ EXPORT_SYMBOL(__stack_chk_guard);
#if XTENSA_HAVE_COPROCESSORS
-void coprocessor_release_all(struct thread_info *ti)
+void local_coprocessors_flush_release_all(void)
{
- unsigned long cpenable;
- int i;
+ struct thread_info **coprocessor_owner;
+ struct thread_info *unique_owner[XCHAL_CP_MAX];
+ int n = 0;
+ int i, j;
- /* Make sure we don't switch tasks during this operation. */
+ coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+ xtensa_set_sr(XCHAL_CP_MASK, cpenable);
- preempt_disable();
+ for (i = 0; i < XCHAL_CP_MAX; i++) {
+ struct thread_info *ti = coprocessor_owner[i];
- /* Walk through all cp owners and release it for the requested one. */
+ if (ti) {
+ coprocessor_flush(ti, i);
- cpenable = ti->cpenable;
+ for (j = 0; j < n; j++)
+ if (unique_owner[j] == ti)
+ break;
+ if (j == n)
+ unique_owner[n++] = ti;
- for (i = 0; i < XCHAL_CP_MAX; i++) {
- if (coprocessor_owner[i] == ti) {
- coprocessor_owner[i] = 0;
- cpenable &= ~(1 << i);
+ coprocessor_owner[i] = NULL;
}
}
+ for (i = 0; i < n; i++) {
+ /* pairs with memw (1) in fast_coprocessor and memw in switch_to */
+ smp_wmb();
+ unique_owner[i]->cpenable = 0;
+ }
+ xtensa_set_sr(0, cpenable);
+}
- ti->cpenable = cpenable;
+static void local_coprocessor_release_all(void *info)
+{
+ struct thread_info *ti = info;
+ struct thread_info **coprocessor_owner;
+ int i;
+
+ coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+
+ /* Walk through all cp owners and release it for the requested one. */
+
+ for (i = 0; i < XCHAL_CP_MAX; i++) {
+ if (coprocessor_owner[i] == ti)
+ coprocessor_owner[i] = NULL;
+ }
+ /* pairs with memw (1) in fast_coprocessor and memw in switch_to */
+ smp_wmb();
+ ti->cpenable = 0;
if (ti == current_thread_info())
xtensa_set_sr(0, cpenable);
+}
- preempt_enable();
+void coprocessor_release_all(struct thread_info *ti)
+{
+ if (ti->cpenable) {
+ /* pairs with memw (2) in fast_coprocessor */
+ smp_rmb();
+ smp_call_function_single(ti->cp_owner_cpu,
+ local_coprocessor_release_all,
+ ti, true);
+ }
}
-void coprocessor_flush_all(struct thread_info *ti)
+static void local_coprocessor_flush_all(void *info)
{
- unsigned long cpenable, old_cpenable;
+ struct thread_info *ti = info;
+ struct thread_info **coprocessor_owner;
+ unsigned long old_cpenable;
int i;
- preempt_disable();
-
- old_cpenable = xtensa_get_sr(cpenable);
- cpenable = ti->cpenable;
- xtensa_set_sr(cpenable, cpenable);
+ coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+ old_cpenable = xtensa_xsr(ti->cpenable, cpenable);
for (i = 0; i < XCHAL_CP_MAX; i++) {
- if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
+ if (coprocessor_owner[i] == ti)
coprocessor_flush(ti, i);
- cpenable >>= 1;
}
xtensa_set_sr(old_cpenable, cpenable);
+}
- preempt_enable();
+void coprocessor_flush_all(struct thread_info *ti)
+{
+ if (ti->cpenable) {
+ /* pairs with memw (2) in fast_coprocessor */
+ smp_rmb();
+ smp_call_function_single(ti->cp_owner_cpu,
+ local_coprocessor_flush_all,
+ ti, true);
+ }
+}
+
+static void local_coprocessor_flush_release_all(void *info)
+{
+ local_coprocessor_flush_all(info);
+ local_coprocessor_release_all(info);
+}
+
+void coprocessor_flush_release_all(struct thread_info *ti)
+{
+ if (ti->cpenable) {
+ /* pairs with memw (2) in fast_coprocessor */
+ smp_rmb();
+ smp_call_function_single(ti->cp_owner_cpu,
+ local_coprocessor_flush_release_all,
+ ti, true);
+ }
}
#endif
@@ -140,8 +203,7 @@ void flush_thread(void)
{
#if XTENSA_HAVE_COPROCESSORS
struct thread_info *ti = current_thread_info();
- coprocessor_flush_all(ti);
- coprocessor_release_all(ti);
+ coprocessor_flush_release_all(ti);
#endif
flush_ptrace_hw_breakpoint(current);
}
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 323c678a691f..22cdaa6729d3 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -171,8 +171,7 @@ static int tie_set(struct task_struct *target,
#if XTENSA_HAVE_COPROCESSORS
/* Flush all coprocessors before we overwrite them. */
- coprocessor_flush_all(ti);
- coprocessor_release_all(ti);
+ coprocessor_flush_release_all(ti);
ti->xtregs_cp.cp0 = newregs->cp0;
ti->xtregs_cp.cp1 = newregs->cp1;
ti->xtregs_cp.cp2 = newregs->cp2;
diff --git a/arch/xtensa/kernel/s32c1i_selftest.c b/arch/xtensa/kernel/s32c1i_selftest.c
index 07e56e3a9a8b..8362388c8719 100644
--- a/arch/xtensa/kernel/s32c1i_selftest.c
+++ b/arch/xtensa/kernel/s32c1i_selftest.c
@@ -40,14 +40,13 @@ static inline int probed_compare_swap(int *v, int cmp, int set)
/* Handle probed exception */
-static void __init do_probed_exception(struct pt_regs *regs,
- unsigned long exccause)
+static void __init do_probed_exception(struct pt_regs *regs)
{
if (regs->pc == rcw_probe_pc) { /* exception on s32c1i ? */
regs->pc += 3; /* skip the s32c1i instruction */
- rcw_exc = exccause;
+ rcw_exc = regs->exccause;
} else {
- do_unhandled(regs, exccause);
+ do_unhandled(regs);
}
}
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index 6f68649e86ba..c9ffd42db873 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -162,8 +162,7 @@ setup_sigcontext(struct rt_sigframe __user *frame, struct pt_regs *regs)
return err;
#if XTENSA_HAVE_COPROCESSORS
- coprocessor_flush_all(ti);
- coprocessor_release_all(ti);
+ coprocessor_flush_release_all(ti);
err |= __copy_to_user(&frame->xtregs.cp, &ti->xtregs_cp,
sizeof (frame->xtregs.cp));
#endif
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index 1254da07ead1..4dc109dd6214 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -30,6 +30,7 @@
#include <linux/thread_info.h>
#include <asm/cacheflush.h>
+#include <asm/coprocessor.h>
#include <asm/kdebug.h>
#include <asm/mmu_context.h>
#include <asm/mxregs.h>
@@ -272,6 +273,12 @@ int __cpu_disable(void)
*/
set_cpu_online(cpu, false);
+#if XTENSA_HAVE_COPROCESSORS
+ /*
+ * Flush coprocessor contexts that are active on the current CPU.
+ */
+ local_coprocessors_flush_release_all();
+#endif
/*
* OK - migrate IRQs away from this CPU
*/
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 9345007d474d..0c25e035ff10 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -48,25 +48,20 @@
* Machine specific interrupt handlers
*/
-extern void kernel_exception(void);
-extern void user_exception(void);
-
-extern void fast_illegal_instruction_user(void);
-extern void fast_syscall_user(void);
-extern void fast_alloca(void);
-extern void fast_unaligned(void);
-extern void fast_second_level_miss(void);
-extern void fast_store_prohibited(void);
-extern void fast_coprocessor(void);
-
-extern void do_illegal_instruction (struct pt_regs*);
-extern void do_interrupt (struct pt_regs*);
-extern void do_nmi(struct pt_regs *);
-extern void do_unaligned_user (struct pt_regs*);
-extern void do_multihit (struct pt_regs*, unsigned long);
-extern void do_page_fault (struct pt_regs*, unsigned long);
-extern void do_debug (struct pt_regs*);
-extern void system_call (struct pt_regs*);
+static void do_illegal_instruction(struct pt_regs *regs);
+static void do_div0(struct pt_regs *regs);
+static void do_interrupt(struct pt_regs *regs);
+#if XTENSA_FAKE_NMI
+static void do_nmi(struct pt_regs *regs);
+#endif
+#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
+static void do_unaligned_user(struct pt_regs *regs);
+#endif
+static void do_multihit(struct pt_regs *regs);
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs);
+#endif
+static void do_debug(struct pt_regs *regs);
/*
* The vector table must be preceded by a save area (which
@@ -78,7 +73,8 @@ extern void system_call (struct pt_regs*);
#define USER 0x02
#define COPROCESSOR(x) \
-{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER, fast_coprocessor }
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor },\
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, 0, do_coprocessor }
typedef struct {
int cause;
@@ -100,7 +96,7 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = {
#ifdef SUPPORT_WINDOWED
{ EXCCAUSE_ALLOCA, USER|KRNL, fast_alloca },
#endif
-/* EXCCAUSE_INTEGER_DIVIDE_BY_ZERO unhandled */
+{ EXCCAUSE_INTEGER_DIVIDE_BY_ZERO, 0, do_div0 },
/* EXCCAUSE_PRIVILEGED unhandled */
#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
#ifdef CONFIG_XTENSA_UNALIGNED_USER
@@ -110,21 +106,21 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = {
{ EXCCAUSE_UNALIGNED, KRNL, fast_unaligned },
#endif
#ifdef CONFIG_MMU
-{ EXCCAUSE_ITLB_MISS, 0, do_page_fault },
-{ EXCCAUSE_ITLB_MISS, USER|KRNL, fast_second_level_miss},
+{ EXCCAUSE_ITLB_MISS, 0, do_page_fault },
+{ EXCCAUSE_ITLB_MISS, USER|KRNL, fast_second_level_miss},
+{ EXCCAUSE_DTLB_MISS, USER|KRNL, fast_second_level_miss},
+{ EXCCAUSE_DTLB_MISS, 0, do_page_fault },
+{ EXCCAUSE_STORE_CACHE_ATTRIBUTE, USER|KRNL, fast_store_prohibited },
+#endif /* CONFIG_MMU */
+#ifdef CONFIG_PFAULT
{ EXCCAUSE_ITLB_MULTIHIT, 0, do_multihit },
-{ EXCCAUSE_ITLB_PRIVILEGE, 0, do_page_fault },
-/* EXCCAUSE_SIZE_RESTRICTION unhandled */
+{ EXCCAUSE_ITLB_PRIVILEGE, 0, do_page_fault },
{ EXCCAUSE_FETCH_CACHE_ATTRIBUTE, 0, do_page_fault },
-{ EXCCAUSE_DTLB_MISS, USER|KRNL, fast_second_level_miss},
-{ EXCCAUSE_DTLB_MISS, 0, do_page_fault },
{ EXCCAUSE_DTLB_MULTIHIT, 0, do_multihit },
-{ EXCCAUSE_DTLB_PRIVILEGE, 0, do_page_fault },
-/* EXCCAUSE_DTLB_SIZE_RESTRICTION unhandled */
-{ EXCCAUSE_STORE_CACHE_ATTRIBUTE, USER|KRNL, fast_store_prohibited },
+{ EXCCAUSE_DTLB_PRIVILEGE, 0, do_page_fault },
{ EXCCAUSE_STORE_CACHE_ATTRIBUTE, 0, do_page_fault },
{ EXCCAUSE_LOAD_CACHE_ATTRIBUTE, 0, do_page_fault },
-#endif /* CONFIG_MMU */
+#endif
/* XCCHAL_EXCCAUSE_FLOATING_POINT unhandled */
#if XTENSA_HAVE_COPROCESSOR(0)
COPROCESSOR(0),
@@ -179,7 +175,7 @@ __die_if_kernel(const char *str, struct pt_regs *regs, long err)
* Unhandled Exceptions. Kill user task or panic if in kernel space.
*/
-void do_unhandled(struct pt_regs *regs, unsigned long exccause)
+void do_unhandled(struct pt_regs *regs)
{
__die_if_kernel("Caught unhandled exception - should not happen",
regs, SIGKILL);
@@ -189,7 +185,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
"(pid = %d, pc = %#010lx) - should not happen\n"
"\tEXCCAUSE is %ld\n",
current->comm, task_pid_nr(current), regs->pc,
- exccause);
+ regs->exccause);
force_sig(SIGILL);
}
@@ -197,7 +193,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
* Multi-hit exception. This if fatal!
*/
-void do_multihit(struct pt_regs *regs, unsigned long exccause)
+static void do_multihit(struct pt_regs *regs)
{
die("Caught multihit exception", regs, SIGKILL);
}
@@ -206,8 +202,6 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
* IRQ handler.
*/
-extern void do_IRQ(int, struct pt_regs *);
-
#if XTENSA_FAKE_NMI
#define IS_POW2(v) (((v) & ((v) - 1)) == 0)
@@ -240,14 +234,10 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id);
DEFINE_PER_CPU(unsigned long, nmi_count);
-void do_nmi(struct pt_regs *regs)
+static void do_nmi(struct pt_regs *regs)
{
- struct pt_regs *old_regs;
-
- if ((regs->ps & PS_INTLEVEL_MASK) < LOCKLEVEL)
- trace_hardirqs_off();
+ struct pt_regs *old_regs = set_irq_regs(regs);
- old_regs = set_irq_regs(regs);
nmi_enter();
++*this_cpu_ptr(&nmi_count);
check_valid_nmi();
@@ -257,7 +247,7 @@ void do_nmi(struct pt_regs *regs)
}
#endif
-void do_interrupt(struct pt_regs *regs)
+static void do_interrupt(struct pt_regs *regs)
{
static const unsigned int_level_mask[] = {
0,
@@ -269,12 +259,9 @@ void do_interrupt(struct pt_regs *regs)
XCHAL_INTLEVEL6_MASK,
XCHAL_INTLEVEL7_MASK,
};
- struct pt_regs *old_regs;
+ struct pt_regs *old_regs = set_irq_regs(regs);
unsigned unhandled = ~0u;
- trace_hardirqs_off();
-
- old_regs = set_irq_regs(regs);
irq_enter();
for (;;) {
@@ -306,13 +293,47 @@ void do_interrupt(struct pt_regs *regs)
set_irq_regs(old_regs);
}
+static bool check_div0(struct pt_regs *regs)
+{
+ static const u8 pattern[] = {'D', 'I', 'V', '0'};
+ const u8 *p;
+ u8 buf[5];
+
+ if (user_mode(regs)) {
+ if (copy_from_user(buf, (void __user *)regs->pc + 2, 5))
+ return false;
+ p = buf;
+ } else {
+ p = (const u8 *)regs->pc + 2;
+ }
+
+ return memcmp(p, pattern, sizeof(pattern)) == 0 ||
+ memcmp(p + 1, pattern, sizeof(pattern)) == 0;
+}
+
/*
* Illegal instruction. Fatal if in kernel space.
*/
-void
-do_illegal_instruction(struct pt_regs *regs)
+static void do_illegal_instruction(struct pt_regs *regs)
{
+#ifdef CONFIG_USER_ABI_CALL0_PROBE
+ /*
+ * When call0 application encounters an illegal instruction fast
+ * exception handler will attempt to set PS.WOE and retry failing
+ * instruction.
+ * If we get here we know that that instruction is also illegal
+ * with PS.WOE set, so it's not related to the windowed option
+ * hence PS.WOE may be cleared.
+ */
+ if (regs->pc == current_thread_info()->ps_woe_fix_addr)
+ regs->ps &= ~PS_WOE_MASK;
+#endif
+ if (check_div0(regs)) {
+ do_div0(regs);
+ return;
+ }
+
__die_if_kernel("Illegal instruction in kernel", regs, SIGKILL);
/* If in user mode, send SIGILL signal to current process. */
@@ -322,6 +343,11 @@ do_illegal_instruction(struct pt_regs *regs)
force_sig(SIGILL);
}
+static void do_div0(struct pt_regs *regs)
+{
+ __die_if_kernel("Unhandled division by 0 in kernel", regs, SIGKILL);
+ force_sig_fault(SIGFPE, FPE_INTDIV, (void __user *)regs->pc);
+}
/*
* Handle unaligned memory accesses from user space. Kill task.
@@ -331,8 +357,7 @@ do_illegal_instruction(struct pt_regs *regs)
*/
#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
-void
-do_unaligned_user (struct pt_regs *regs)
+static void do_unaligned_user(struct pt_regs *regs)
{
__die_if_kernel("Unhandled unaligned exception in kernel",
regs, SIGKILL);
@@ -347,14 +372,20 @@ do_unaligned_user (struct pt_regs *regs)
}
#endif
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs)
+{
+ coprocessor_flush_release_all(current_thread_info());
+}
+#endif
+
/* Handle debug events.
* When CONFIG_HAVE_HW_BREAKPOINT is on this handler is called with
* preemption disabled to avoid rescheduling and keep mapping of hardware
* breakpoint structures to debug registers intact, so that
* DEBUGCAUSE.DBNUM could be used in case of data breakpoint hit.
*/
-void
-do_debug(struct pt_regs *regs)
+static void do_debug(struct pt_regs *regs)
{
#ifdef CONFIG_HAVE_HW_BREAKPOINT
int ret = check_hw_breakpoint(regs);
@@ -381,7 +412,8 @@ do_debug(struct pt_regs *regs)
/* Set exception C handler - for temporary use when probing exceptions */
-void * __init trap_set_handler(int cause, void *handler)
+xtensa_exception_handler *
+__init trap_set_handler(int cause, xtensa_exception_handler *handler)
{
void *previous = per_cpu(exc_table, 0).default_handler[cause];
@@ -392,8 +424,7 @@ void * __init trap_set_handler(int cause, void *handler)
static void trap_init_excsave(void)
{
- unsigned long excsave1 = (unsigned long)this_cpu_ptr(&exc_table);
- __asm__ __volatile__("wsr %0, excsave1\n" : : "a" (excsave1));
+ xtensa_set_sr(this_cpu_ptr(&exc_table), excsave1);
}
static void trap_init_debug(void)
diff --git a/arch/xtensa/lib/Makefile b/arch/xtensa/lib/Makefile
index 5848c133f7ea..d4e9c397e3fd 100644
--- a/arch/xtensa/lib/Makefile
+++ b/arch/xtensa/lib/Makefile
@@ -8,3 +8,5 @@ lib-y += memcopy.o memset.o checksum.o \
divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o \
usercopy.o strncpy_user.o strnlen_user.o
lib-$(CONFIG_PCI) += pci-auto.o
+lib-$(CONFIG_KCSAN) += kcsan-stubs.o
+KCSAN_SANITIZE_kcsan-stubs.o := n
diff --git a/arch/xtensa/lib/kcsan-stubs.c b/arch/xtensa/lib/kcsan-stubs.c
new file mode 100644
index 000000000000..2b08faa62b86
--- /dev/null
+++ b/arch/xtensa/lib/kcsan-stubs.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bug.h>
+#include <linux/types.h>
+
+void __atomic_store_8(volatile void *p, u64 v, int i)
+{
+ BUG();
+}
+
+u64 __atomic_load_8(const volatile void *p, int i)
+{
+ BUG();
+}
+
+u64 __atomic_exchange_8(volatile void *p, u64 v, int i)
+{
+ BUG();
+}
+
+bool __atomic_compare_exchange_8(volatile void *p1, void *p2, u64 v, bool b, int i1, int i2)
+{
+ BUG();
+}
+
+u64 __atomic_fetch_add_8(volatile void *p, u64 v, int i)
+{
+ BUG();
+}
+
+u64 __atomic_fetch_sub_8(volatile void *p, u64 v, int i)
+{
+ BUG();
+}
+
+u64 __atomic_fetch_and_8(volatile void *p, u64 v, int i)
+{
+ BUG();
+}
+
+u64 __atomic_fetch_or_8(volatile void *p, u64 v, int i)
+{
+ BUG();
+}
+
+u64 __atomic_fetch_xor_8(volatile void *p, u64 v, int i)
+{
+ BUG();
+}
+
+u64 __atomic_fetch_nand_8(volatile void *p, u64 v, int i)
+{
+ BUG();
+}
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S
index 582d817979ed..b20d206bcb71 100644
--- a/arch/xtensa/lib/memcopy.S
+++ b/arch/xtensa/lib/memcopy.S
@@ -402,13 +402,13 @@ WEAK(memmove)
*/
# copy 16 bytes per iteration for word-aligned dst and word-aligned src
#if XCHAL_HAVE_LOOPS
- loopnez a7, .backLoop1done
+ loopnez a7, .LbackLoop1done
#else /* !XCHAL_HAVE_LOOPS */
- beqz a7, .backLoop1done
+ beqz a7, .LbackLoop1done
slli a8, a7, 4
sub a8, a3, a8 # a8 = start of first 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop1:
+.LbackLoop1:
addi a3, a3, -16
l32i a7, a3, 12
l32i a6, a3, 8
@@ -420,9 +420,9 @@ WEAK(memmove)
s32i a7, a5, 4
s32i a6, a5, 0
#if !XCHAL_HAVE_LOOPS
- bne a3, a8, .backLoop1 # continue loop if a3:src != a8:src_start
+ bne a3, a8, .LbackLoop1 # continue loop if a3:src != a8:src_start
#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop1done:
+.LbackLoop1done:
bbci.l a4, 3, .Lback2
# copy 8 bytes
addi a3, a3, -8
@@ -479,13 +479,13 @@ WEAK(memmove)
#endif
l32i a6, a3, 0 # load first word
#if XCHAL_HAVE_LOOPS
- loopnez a7, .backLoop2done
+ loopnez a7, .LbackLoop2done
#else /* !XCHAL_HAVE_LOOPS */
- beqz a7, .backLoop2done
+ beqz a7, .LbackLoop2done
slli a10, a7, 4
sub a10, a3, a10 # a10 = start of first 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop2:
+.LbackLoop2:
addi a3, a3, -16
l32i a7, a3, 12
l32i a8, a3, 8
@@ -501,9 +501,9 @@ WEAK(memmove)
__src_b a9, a6, a9
s32i a9, a5, 0
#if !XCHAL_HAVE_LOOPS
- bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
+ bne a3, a10, .LbackLoop2 # continue loop if a3:src != a10:src_start
#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop2done:
+.LbackLoop2done:
bbci.l a4, 3, .Lback12
# copy 8 bytes
addi a3, a3, -8
diff --git a/arch/xtensa/mm/Makefile b/arch/xtensa/mm/Makefile
index f7fb08ae768f..44153a335951 100644
--- a/arch/xtensa/mm/Makefile
+++ b/arch/xtensa/mm/Makefile
@@ -4,7 +4,8 @@
#
obj-y := init.o misc.o
-obj-$(CONFIG_MMU) += cache.o fault.o ioremap.o mmu.o tlb.o
+obj-$(CONFIG_PFAULT) += fault.o
+obj-$(CONFIG_MMU) += cache.o ioremap.o mmu.o tlb.o
obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_KASAN) += kasan_init.o
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 06d0973a0d74..16f0a5ff5799 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -21,9 +21,61 @@
#include <asm/cacheflush.h>
#include <asm/hardirq.h>
-DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
void bad_page_fault(struct pt_regs*, unsigned long, int);
+static void vmalloc_fault(struct pt_regs *regs, unsigned int address)
+{
+#ifdef CONFIG_MMU
+ /* Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ */
+ struct mm_struct *act_mm = current->active_mm;
+ int index = pgd_index(address);
+ pgd_t *pgd, *pgd_k;
+ p4d_t *p4d, *p4d_k;
+ pud_t *pud, *pud_k;
+ pmd_t *pmd, *pmd_k;
+ pte_t *pte_k;
+
+ if (act_mm == NULL)
+ goto bad_page_fault;
+
+ pgd = act_mm->pgd + index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd_k))
+ goto bad_page_fault;
+
+ pgd_val(*pgd) = pgd_val(*pgd_k);
+
+ p4d = p4d_offset(pgd, address);
+ p4d_k = p4d_offset(pgd_k, address);
+ if (!p4d_present(*p4d) || !p4d_present(*p4d_k))
+ goto bad_page_fault;
+
+ pud = pud_offset(p4d, address);
+ pud_k = pud_offset(p4d_k, address);
+ if (!pud_present(*pud) || !pud_present(*pud_k))
+ goto bad_page_fault;
+
+ pmd = pmd_offset(pud, address);
+ pmd_k = pmd_offset(pud_k, address);
+ if (!pmd_present(*pmd) || !pmd_present(*pmd_k))
+ goto bad_page_fault;
+
+ pmd_val(*pmd) = pmd_val(*pmd_k);
+ pte_k = pte_offset_kernel(pmd_k, address);
+
+ if (!pte_present(*pte_k))
+ goto bad_page_fault;
+ return;
+
+bad_page_fault:
+ bad_page_fault(regs, address, SIGKILL);
+#else
+ WARN_ONCE(1, "%s in noMMU configuration\n", __func__);
+#endif
+}
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
@@ -49,8 +101,10 @@ void do_page_fault(struct pt_regs *regs)
/* We fault-in kernel-space virtual memory on-demand. The
* 'reference' page table is init_mm.pgd.
*/
- if (address >= TASK_SIZE && !user_mode(regs))
- goto vmalloc_fault;
+ if (address >= TASK_SIZE && !user_mode(regs)) {
+ vmalloc_fault(regs, address);
+ return;
+ }
/* If we're in an interrupt or have no user
* context, we must not take the fault..
@@ -114,7 +168,7 @@ good_area:
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
- goto bad_page_fault;
+ bad_page_fault(regs, address, SIGKILL);
return;
}
@@ -181,56 +235,6 @@ do_sigbus:
if (!user_mode(regs))
bad_page_fault(regs, address, SIGBUS);
return;
-
-vmalloc_fault:
- {
- /* Synchronize this task's top level page-table
- * with the 'reference' page table.
- */
- struct mm_struct *act_mm = current->active_mm;
- int index = pgd_index(address);
- pgd_t *pgd, *pgd_k;
- p4d_t *p4d, *p4d_k;
- pud_t *pud, *pud_k;
- pmd_t *pmd, *pmd_k;
- pte_t *pte_k;
-
- if (act_mm == NULL)
- goto bad_page_fault;
-
- pgd = act_mm->pgd + index;
- pgd_k = init_mm.pgd + index;
-
- if (!pgd_present(*pgd_k))
- goto bad_page_fault;
-
- pgd_val(*pgd) = pgd_val(*pgd_k);
-
- p4d = p4d_offset(pgd, address);
- p4d_k = p4d_offset(pgd_k, address);
- if (!p4d_present(*p4d) || !p4d_present(*p4d_k))
- goto bad_page_fault;
-
- pud = pud_offset(p4d, address);
- pud_k = pud_offset(p4d_k, address);
- if (!pud_present(*pud) || !pud_present(*pud_k))
- goto bad_page_fault;
-
- pmd = pmd_offset(pud, address);
- pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd) || !pmd_present(*pmd_k))
- goto bad_page_fault;
-
- pmd_val(*pmd) = pmd_val(*pmd_k);
- pte_k = pte_offset_kernel(pmd_k, address);
-
- if (!pte_present(*pte_k))
- goto bad_page_fault;
- return;
- }
-bad_page_fault:
- bad_page_fault(regs, address, SIGKILL);
- return;
}
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index 38acda4f04e8..92e158c69c10 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -18,6 +18,8 @@
#include <asm/initialize_mmu.h>
#include <asm/io.h>
+DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
+
#if defined(CONFIG_HIGHMEM)
static void * __init init_pmd(unsigned long vaddr, unsigned long n_pages)
{
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index be3aaaad8bee..fd84d4891758 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -38,9 +38,6 @@
#define ISS_NET_TIMER_VALUE (HZ / 10)
-static DEFINE_SPINLOCK(opened_lock);
-static LIST_HEAD(opened);
-
static DEFINE_SPINLOCK(devices_lock);
static LIST_HEAD(devices);
@@ -59,17 +56,27 @@ struct tuntap_info {
/* ------------------------------------------------------------------------- */
+struct iss_net_private;
+
+struct iss_net_ops {
+ int (*open)(struct iss_net_private *lp);
+ void (*close)(struct iss_net_private *lp);
+ int (*read)(struct iss_net_private *lp, struct sk_buff **skb);
+ int (*write)(struct iss_net_private *lp, struct sk_buff **skb);
+ unsigned short (*protocol)(struct sk_buff *skb);
+ int (*poll)(struct iss_net_private *lp);
+};
+
/* This structure contains out private information for the driver. */
struct iss_net_private {
struct list_head device_list;
- struct list_head opened_list;
spinlock_t lock;
struct net_device *dev;
struct platform_device pdev;
struct timer_list tl;
- struct net_device_stats stats;
+ struct rtnl_link_stats64 stats;
struct timer_list timer;
unsigned int timer_val;
@@ -82,12 +89,7 @@ struct iss_net_private {
struct tuntap_info tuntap;
} info;
- int (*open)(struct iss_net_private *lp);
- void (*close)(struct iss_net_private *lp);
- int (*read)(struct iss_net_private *lp, struct sk_buff **skb);
- int (*write)(struct iss_net_private *lp, struct sk_buff **skb);
- unsigned short (*protocol)(struct sk_buff *skb);
- int (*poll)(struct iss_net_private *lp);
+ const struct iss_net_ops *net_ops;
} tp;
};
@@ -215,6 +217,15 @@ static int tuntap_poll(struct iss_net_private *lp)
return simc_poll(lp->tp.info.tuntap.fd);
}
+static const struct iss_net_ops tuntap_ops = {
+ .open = tuntap_open,
+ .close = tuntap_close,
+ .read = tuntap_read,
+ .write = tuntap_write,
+ .protocol = tuntap_protocol,
+ .poll = tuntap_poll,
+};
+
/*
* ethX=tuntap,[mac address],device name
*/
@@ -257,13 +268,7 @@ static int tuntap_probe(struct iss_net_private *lp, int index, char *init)
lp->mtu = TRANSPORT_TUNTAP_MTU;
lp->tp.info.tuntap.fd = -1;
-
- lp->tp.open = tuntap_open;
- lp->tp.close = tuntap_close;
- lp->tp.read = tuntap_read;
- lp->tp.write = tuntap_write;
- lp->tp.protocol = tuntap_protocol;
- lp->tp.poll = tuntap_poll;
+ lp->tp.net_ops = &tuntap_ops;
return 1;
}
@@ -278,14 +283,16 @@ static int iss_net_rx(struct net_device *dev)
/* Check if there is any new data. */
- if (lp->tp.poll(lp) == 0)
+ if (lp->tp.net_ops->poll(lp) == 0)
return 0;
/* Try to allocate memory, if it fails, try again next round. */
skb = dev_alloc_skb(dev->mtu + 2 + ETH_HEADER_OTHER);
if (skb == NULL) {
+ spin_lock_bh(&lp->lock);
lp->stats.rx_dropped++;
+ spin_unlock_bh(&lp->lock);
return 0;
}
@@ -295,15 +302,17 @@ static int iss_net_rx(struct net_device *dev)
skb->dev = dev;
skb_reset_mac_header(skb);
- pkt_len = lp->tp.read(lp, &skb);
+ pkt_len = lp->tp.net_ops->read(lp, &skb);
skb_put(skb, pkt_len);
if (pkt_len > 0) {
skb_trim(skb, pkt_len);
- skb->protocol = lp->tp.protocol(skb);
+ skb->protocol = lp->tp.net_ops->protocol(skb);
+ spin_lock_bh(&lp->lock);
lp->stats.rx_bytes += skb->len;
lp->stats.rx_packets++;
+ spin_unlock_bh(&lp->lock);
netif_rx(skb);
return pkt_len;
}
@@ -311,38 +320,24 @@ static int iss_net_rx(struct net_device *dev)
return pkt_len;
}
-static int iss_net_poll(void)
+static int iss_net_poll(struct iss_net_private *lp)
{
- struct list_head *ele;
int err, ret = 0;
- spin_lock(&opened_lock);
-
- list_for_each(ele, &opened) {
- struct iss_net_private *lp;
-
- lp = list_entry(ele, struct iss_net_private, opened_list);
-
- if (!netif_running(lp->dev))
- break;
-
- spin_lock(&lp->lock);
-
- while ((err = iss_net_rx(lp->dev)) > 0)
- ret++;
+ if (!netif_running(lp->dev))
+ return 0;
- spin_unlock(&lp->lock);
+ while ((err = iss_net_rx(lp->dev)) > 0)
+ ret++;
- if (err < 0) {
- pr_err("Device '%s' read returned %d, shutting it down\n",
- lp->dev->name, err);
- dev_close(lp->dev);
- } else {
- /* FIXME reactivate_fd(lp->fd, ISS_ETH_IRQ); */
- }
+ if (err < 0) {
+ pr_err("Device '%s' read returned %d, shutting it down\n",
+ lp->dev->name, err);
+ dev_close(lp->dev);
+ } else {
+ /* FIXME reactivate_fd(lp->fd, ISS_ETH_IRQ); */
}
- spin_unlock(&opened_lock);
return ret;
}
@@ -351,10 +346,8 @@ static void iss_net_timer(struct timer_list *t)
{
struct iss_net_private *lp = from_timer(lp, t, timer);
- iss_net_poll();
- spin_lock(&lp->lock);
+ iss_net_poll(lp);
mod_timer(&lp->timer, jiffies + lp->timer_val);
- spin_unlock(&lp->lock);
}
@@ -363,11 +356,9 @@ static int iss_net_open(struct net_device *dev)
struct iss_net_private *lp = netdev_priv(dev);
int err;
- spin_lock_bh(&lp->lock);
-
- err = lp->tp.open(lp);
+ err = lp->tp.net_ops->open(lp);
if (err < 0)
- goto out;
+ return err;
netif_start_queue(dev);
@@ -378,36 +369,21 @@ static int iss_net_open(struct net_device *dev)
while ((err = iss_net_rx(dev)) > 0)
;
- spin_unlock_bh(&lp->lock);
- spin_lock_bh(&opened_lock);
- list_add(&lp->opened_list, &opened);
- spin_unlock_bh(&opened_lock);
- spin_lock_bh(&lp->lock);
-
timer_setup(&lp->timer, iss_net_timer, 0);
lp->timer_val = ISS_NET_TIMER_VALUE;
mod_timer(&lp->timer, jiffies + lp->timer_val);
-out:
- spin_unlock_bh(&lp->lock);
return err;
}
static int iss_net_close(struct net_device *dev)
{
struct iss_net_private *lp = netdev_priv(dev);
- netif_stop_queue(dev);
- spin_lock_bh(&lp->lock);
-
- spin_lock(&opened_lock);
- list_del(&opened);
- spin_unlock(&opened_lock);
+ netif_stop_queue(dev);
del_timer_sync(&lp->timer);
+ lp->tp.net_ops->close(lp);
- lp->tp.close(lp);
-
- spin_unlock_bh(&lp->lock);
return 0;
}
@@ -417,13 +393,14 @@ static int iss_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
int len;
netif_stop_queue(dev);
- spin_lock_bh(&lp->lock);
- len = lp->tp.write(lp, &skb);
+ len = lp->tp.net_ops->write(lp, &skb);
if (len == skb->len) {
+ spin_lock_bh(&lp->lock);
lp->stats.tx_packets++;
lp->stats.tx_bytes += skb->len;
+ spin_unlock_bh(&lp->lock);
netif_trans_update(dev);
netif_start_queue(dev);
@@ -432,24 +409,29 @@ static int iss_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
} else if (len == 0) {
netif_start_queue(dev);
+ spin_lock_bh(&lp->lock);
lp->stats.tx_dropped++;
+ spin_unlock_bh(&lp->lock);
} else {
netif_start_queue(dev);
pr_err("%s: %s failed(%d)\n", dev->name, __func__, len);
}
- spin_unlock_bh(&lp->lock);
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
-static struct net_device_stats *iss_net_get_stats(struct net_device *dev)
+static void iss_net_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
{
struct iss_net_private *lp = netdev_priv(dev);
- return &lp->stats;
+
+ spin_lock_bh(&lp->lock);
+ *stats = lp->stats;
+ spin_unlock_bh(&lp->lock);
}
static void iss_net_set_multicast_list(struct net_device *dev)
@@ -460,19 +442,6 @@ static void iss_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
}
-static int iss_net_set_mac(struct net_device *dev, void *addr)
-{
- struct iss_net_private *lp = netdev_priv(dev);
- struct sockaddr *hwaddr = addr;
-
- if (!is_valid_ether_addr(hwaddr->sa_data))
- return -EADDRNOTAVAIL;
- spin_lock_bh(&lp->lock);
- eth_hw_addr_set(dev, hwaddr->sa_data);
- spin_unlock_bh(&lp->lock);
- return 0;
-}
-
static int iss_net_change_mtu(struct net_device *dev, int new_mtu)
{
return -EINVAL;
@@ -494,11 +463,11 @@ static int driver_registered;
static const struct net_device_ops iss_netdev_ops = {
.ndo_open = iss_net_open,
.ndo_stop = iss_net_close,
- .ndo_get_stats = iss_net_get_stats,
+ .ndo_get_stats64 = iss_net_get_stats64,
.ndo_start_xmit = iss_net_start_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = iss_net_change_mtu,
- .ndo_set_mac_address = iss_net_set_mac,
+ .ndo_set_mac_address = eth_mac_addr,
.ndo_tx_timeout = iss_net_tx_timeout,
.ndo_set_rx_mode = iss_net_set_multicast_list,
};
@@ -520,7 +489,6 @@ static int iss_net_configure(int index, char *init)
lp = netdev_priv(dev);
*lp = (struct iss_net_private) {
.device_list = LIST_HEAD_INIT(lp->device_list),
- .opened_list = LIST_HEAD_INIT(lp->opened_list),
.dev = dev,
.index = index,
};
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 0f0e0724397f..4255b92fa3eb 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -211,12 +211,18 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf,
struct simdisk *dev = pde_data(file_inode(file));
const char *s = dev->filename;
if (s) {
- ssize_t n = simple_read_from_buffer(buf, size, ppos,
- s, strlen(s));
- if (n < 0)
- return n;
- buf += n;
- size -= n;
+ ssize_t len = strlen(s);
+ char *temp = kmalloc(len + 2, GFP_KERNEL);
+
+ if (!temp)
+ return -ENOMEM;
+
+ len = scnprintf(temp, len + 2, "%s\n", s);
+ len = simple_read_from_buffer(buf, size, ppos,
+ temp, len);
+
+ kfree(temp);
+ return len;
}
return simple_read_from_buffer(buf, size, ppos, "\n", 1);
}
diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c
index 145d129be76f..0dc22c371614 100644
--- a/arch/xtensa/platforms/xt2000/setup.c
+++ b/arch/xtensa/platforms/xt2000/setup.c
@@ -78,7 +78,7 @@ void __init platform_init(bp_tag_t *first)
void platform_heartbeat(void)
{
- static int i=0, t = 0;
+ static int i, t;
if (--t < 0)
{