summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/pgtable.h2
-rw-r--r--arch/alpha/kernel/Makefile2
-rw-r--r--arch/alpha/kernel/srmcons.c3
-rw-r--r--arch/arc/include/asm/arcregs.h2
-rw-r--r--arch/arc/include/asm/atomic.h4
-rw-r--r--arch/arc/include/asm/atomic64-arcv2.h15
-rw-r--r--arch/arc/include/asm/bitops.h4
-rw-r--r--arch/arc/include/asm/bug.h4
-rw-r--r--arch/arc/include/asm/cache.h4
-rw-r--r--arch/arc/include/asm/current.h4
-rw-r--r--arch/arc/include/asm/dsp-impl.h2
-rw-r--r--arch/arc/include/asm/dsp.h4
-rw-r--r--arch/arc/include/asm/dwarf.h4
-rw-r--r--arch/arc/include/asm/entry.h4
-rw-r--r--arch/arc/include/asm/irqflags-arcv2.h4
-rw-r--r--arch/arc/include/asm/irqflags-compact.h4
-rw-r--r--arch/arc/include/asm/jump_label.h4
-rw-r--r--arch/arc/include/asm/linkage.h6
-rw-r--r--arch/arc/include/asm/mmu-arcv2.h4
-rw-r--r--arch/arc/include/asm/mmu.h2
-rw-r--r--arch/arc/include/asm/page.h4
-rw-r--r--arch/arc/include/asm/pgtable-bits-arcv2.h6
-rw-r--r--arch/arc/include/asm/pgtable-levels.h4
-rw-r--r--arch/arc/include/asm/pgtable.h4
-rw-r--r--arch/arc/include/asm/processor.h4
-rw-r--r--arch/arc/include/asm/ptrace.h4
-rw-r--r--arch/arc/include/asm/switch_to.h2
-rw-r--r--arch/arc/include/asm/thread_info.h4
-rw-r--r--arch/arc/include/uapi/asm/ptrace.h4
-rw-r--r--arch/arc/kernel/Makefile2
-rw-r--r--arch/arc/kernel/unwind.c11
-rw-r--r--arch/arm/configs/exynos_defconfig2
-rw-r--r--arch/arm/configs/multi_v7_defconfig2
-rw-r--r--arch/arm/configs/pxa_defconfig2
-rw-r--r--arch/arm/include/asm/pgtable.h2
-rw-r--r--arch/arm/include/asm/simd.h3
-rw-r--r--arch/arm/kernel/Makefile2
-rw-r--r--arch/arm/mm/ioremap.c4
-rw-r--r--arch/arm/vfp/vfpmodule.c1
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/Kconfig.platforms2
-rw-r--r--arch/arm64/boot/dts/apple/spi1-nvram.dtsi2
-rw-r--r--arch/arm64/boot/dts/apple/t8103-j293.dts2
-rw-r--r--arch/arm64/boot/dts/apple/t8103-jxxx.dtsi2
-rw-r--r--arch/arm64/boot/dts/apple/t8103.dtsi2
-rw-r--r--arch/arm64/boot/dts/apple/t8112-j493.dts2
-rw-r--r--arch/arm64/boot/dts/apple/t8112.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts20
-rw-r--r--arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts12
-rw-r--r--arch/arm64/boot/dts/freescale/imx95.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi1
-rw-r--r--arch/arm64/boot/dts/renesas/r9a09g057.dtsi165
-rw-r--r--arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi23
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts28
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3576.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi20
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi5
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi35
-rw-r--r--arch/arm64/configs/defconfig5
-rw-r--r--arch/arm64/include/asm/el2_setup.h43
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h2
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h62
-rw-r--r--arch/arm64/include/asm/kvm_host.h39
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/arm64/include/asm/tlbflush.h9
-rw-r--r--arch/arm64/kernel/Makefile5
-rw-r--r--arch/arm64/kernel/cpufeature.c64
-rw-r--r--arch/arm64/kernel/cpuinfo.c7
-rw-r--r--arch/arm64/kernel/efi.c11
-rw-r--r--arch/arm64/kernel/image-vars.h17
-rw-r--r--arch/arm64/kernel/pi/idreg-override.c3
-rw-r--r--arch/arm64/kernel/process.c9
-rw-r--r--arch/arm64/kernel/ptrace.c2
-rw-r--r--arch/arm64/kernel/smp.c2
-rw-r--r--arch/arm64/kvm/arch_timer.c18
-rw-r--r--arch/arm64/kvm/arm.c19
-rw-r--r--arch/arm64/kvm/debug.c4
-rw-r--r--arch/arm64/kvm/fpsimd.c30
-rw-r--r--arch/arm64/kvm/hyp/exception.c4
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h151
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c9
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c20
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c59
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c111
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c48
-rw-r--r--arch/arm64/kvm/nested.c28
-rw-r--r--arch/arm64/kvm/pmu-emul.c24
-rw-r--r--arch/arm64/kvm/sys_regs.c62
-rw-r--r--arch/arm64/kvm/sys_regs.h4
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3-nested.c95
-rw-r--r--arch/arm64/lib/crypto/poly1305-glue.c4
-rw-r--r--arch/arm64/mm/fault.c30
-rw-r--r--arch/arm64/mm/mmu.c3
-rw-r--r--arch/arm64/mm/proc.S1
-rw-r--r--arch/csky/include/asm/pgtable.h2
-rw-r--r--arch/csky/kernel/Makefile2
-rw-r--r--arch/hexagon/include/asm/pgtable.h2
-rw-r--r--arch/hexagon/kernel/Makefile2
-rw-r--r--arch/loongarch/Kconfig18
-rw-r--r--arch/loongarch/Makefile11
-rw-r--r--arch/loongarch/boot/dts/loongson-2k0500.dtsi160
-rw-r--r--arch/loongarch/boot/dts/loongson-2k1000-ref.dts24
-rw-r--r--arch/loongarch/boot/dts/loongson-2k1000.dtsi42
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000.dtsi60
-rwxr-xr-xarch/loongarch/boot/install.sh56
-rw-r--r--arch/loongarch/include/asm/acpi.h2
-rw-r--r--arch/loongarch/include/asm/addrspace.h8
-rw-r--r--arch/loongarch/include/asm/alternative-asm.h4
-rw-r--r--arch/loongarch/include/asm/alternative.h4
-rw-r--r--arch/loongarch/include/asm/asm-extable.h6
-rw-r--r--arch/loongarch/include/asm/asm.h8
-rw-r--r--arch/loongarch/include/asm/cpu.h4
-rw-r--r--arch/loongarch/include/asm/entry-common.h8
-rw-r--r--arch/loongarch/include/asm/ftrace.h4
-rw-r--r--arch/loongarch/include/asm/gpr-num.h6
-rw-r--r--arch/loongarch/include/asm/irqflags.h20
-rw-r--r--arch/loongarch/include/asm/jump_label.h4
-rw-r--r--arch/loongarch/include/asm/kasan.h2
-rw-r--r--arch/loongarch/include/asm/loongarch.h20
-rw-r--r--arch/loongarch/include/asm/numa.h14
-rw-r--r--arch/loongarch/include/asm/orc_types.h4
-rw-r--r--arch/loongarch/include/asm/page.h4
-rw-r--r--arch/loongarch/include/asm/pgtable-bits.h4
-rw-r--r--arch/loongarch/include/asm/pgtable.h6
-rw-r--r--arch/loongarch/include/asm/prefetch.h2
-rw-r--r--arch/loongarch/include/asm/smp.h3
-rw-r--r--arch/loongarch/include/asm/sparsemem.h5
-rw-r--r--arch/loongarch/include/asm/stackframe.h6
-rw-r--r--arch/loongarch/include/asm/stacktrace.h5
-rw-r--r--arch/loongarch/include/asm/thread_info.h4
-rw-r--r--arch/loongarch/include/asm/topology.h15
-rw-r--r--arch/loongarch/include/asm/types.h2
-rw-r--r--arch/loongarch/include/asm/unwind_hints.h6
-rw-r--r--arch/loongarch/include/asm/vdso/arch_data.h4
-rw-r--r--arch/loongarch/include/asm/vdso/getrandom.h6
-rw-r--r--arch/loongarch/include/asm/vdso/gettimeofday.h10
-rw-r--r--arch/loongarch/include/asm/vdso/processor.h4
-rw-r--r--arch/loongarch/include/asm/vdso/vdso.h4
-rw-r--r--arch/loongarch/include/asm/vdso/vsyscall.h4
-rw-r--r--arch/loongarch/kernel/Makefile2
-rw-r--r--arch/loongarch/kernel/acpi.c53
-rw-r--r--arch/loongarch/kernel/alternative.c1
-rw-r--r--arch/loongarch/kernel/efi.c12
-rw-r--r--arch/loongarch/kernel/elf.c1
-rw-r--r--arch/loongarch/kernel/entry.S3
-rw-r--r--arch/loongarch/kernel/kfpu.c1
-rw-r--r--arch/loongarch/kernel/numa.c108
-rw-r--r--arch/loongarch/kernel/paravirt.c1
-rw-r--r--arch/loongarch/kernel/smp.c38
-rw-r--r--arch/loongarch/kernel/time.c2
-rw-r--r--arch/loongarch/kernel/traps.c1
-rw-r--r--arch/loongarch/kernel/unwind_guess.c1
-rw-r--r--arch/loongarch/kernel/unwind_orc.c3
-rw-r--r--arch/loongarch/kernel/unwind_prologue.c1
-rw-r--r--arch/loongarch/kernel/vdso.c4
-rw-r--r--arch/loongarch/kvm/intc/eiointc.c89
-rw-r--r--arch/loongarch/lib/crc32-loongarch.c1
-rw-r--r--arch/loongarch/lib/csum.c1
-rw-r--r--arch/loongarch/mm/hugetlbpage.c3
-rw-r--r--arch/loongarch/mm/init.c8
-rw-r--r--arch/loongarch/mm/ioremap.c4
-rw-r--r--arch/loongarch/pci/acpi.c14
-rw-r--r--arch/loongarch/pci/pci.c1
-rw-r--r--arch/m68k/include/asm/mcf_pgtable.h2
-rw-r--r--arch/m68k/include/asm/motorola_pgtable.h2
-rw-r--r--arch/m68k/include/asm/sun3_pgtable.h2
-rw-r--r--arch/m68k/kernel/Makefile2
-rw-r--r--arch/microblaze/include/asm/pgtable.h2
-rw-r--r--arch/microblaze/kernel/Makefile2
-rw-r--r--arch/mips/Kbuild.platforms1
-rw-r--r--arch/mips/Kconfig28
-rw-r--r--arch/mips/alchemy/common/gpiolib.c6
-rw-r--r--arch/mips/bcm63xx/boards/board_bcm963xx.c2
-rw-r--r--arch/mips/bcm63xx/gpio.c7
-rw-r--r--arch/mips/boot/compressed/uart-16550.c5
-rw-r--r--arch/mips/boot/dts/Makefile1
-rw-r--r--arch/mips/boot/dts/econet/Makefile2
-rw-r--r--arch/mips/boot/dts/econet/en751221.dtsi67
-rw-r--r--arch/mips/boot/dts/econet/en751221_smartfiber_xp8421-b.dts19
-rw-r--r--arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts1
-rw-r--r--arch/mips/boot/dts/pic32/pic32mzda.dtsi2
-rw-r--r--arch/mips/boot/dts/realtek/rtl930x.dtsi33
-rw-r--r--arch/mips/econet/Kconfig48
-rw-r--r--arch/mips/econet/Makefile2
-rw-r--r--arch/mips/econet/Platform5
-rw-r--r--arch/mips/econet/init.c78
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h20
-rw-r--r--arch/mips/include/asm/mach-loongson2ef/loongson.h9
-rw-r--r--arch/mips/include/asm/pgtable.h4
-rw-r--r--arch/mips/include/asm/topology.h3
-rw-r--r--arch/mips/kernel/Makefile2
-rw-r--r--arch/mips/kernel/gpio_txx9.c8
-rw-r--r--arch/mips/kernel/ptrace.c34
-rw-r--r--arch/mips/kernel/smp-cps.c2
-rw-r--r--arch/mips/kernel/smp.c18
-rw-r--r--arch/mips/kernel/vpe.c3
-rw-r--r--arch/mips/rb532/gpio.c8
-rw-r--r--arch/mips/txx9/generic/setup.c8
-rw-r--r--arch/mips/vdso/Makefile1
-rw-r--r--arch/nios2/include/asm/pgtable.h2
-rw-r--r--arch/nios2/kernel/Makefile2
-rw-r--r--arch/openrisc/include/asm/pgtable.h2
-rw-r--r--arch/openrisc/kernel/Makefile2
-rw-r--r--arch/parisc/include/asm/pgtable.h2
-rw-r--r--arch/parisc/kernel/Makefile2
-rw-r--r--arch/powerpc/boot/dts/microwatt.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8315erdb.dts10
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h2
-rw-r--r--arch/powerpc/include/uapi/asm/ioctls.h8
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/eeh.c2
-rw-r--r--arch/powerpc/kernel/vdso/Makefile2
-rw-r--r--arch/powerpc/kvm/booke.c2
-rw-r--r--arch/powerpc/platforms/book3s/vas-api.c9
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c3
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c8
-rw-r--r--arch/riscv/Kconfig42
-rw-r--r--arch/riscv/Kconfig.vendor13
-rw-r--r--arch/riscv/Makefile4
-rw-r--r--arch/riscv/configs/defconfig24
-rw-r--r--arch/riscv/include/asm/asm-prototypes.h2
-rw-r--r--arch/riscv/include/asm/barrier.h5
-rw-r--r--arch/riscv/include/asm/cacheflush.h1
-rw-r--r--arch/riscv/include/asm/cmpxchg.h4
-rw-r--r--arch/riscv/include/asm/cpufeature.h14
-rw-r--r--arch/riscv/include/asm/ftrace.h62
-rw-r--r--arch/riscv/include/asm/hwcap.h1
-rw-r--r--arch/riscv/include/asm/hwprobe.h3
-rw-r--r--arch/riscv/include/asm/image.h2
-rw-r--r--arch/riscv/include/asm/insn-def.h66
-rw-r--r--arch/riscv/include/asm/kexec.h6
-rw-r--r--arch/riscv/include/asm/kvm_aia.h4
-rw-r--r--arch/riscv/include/asm/kvm_host.h3
-rw-r--r--arch/riscv/include/asm/pgtable-64.h5
-rw-r--r--arch/riscv/include/asm/pgtable.h100
-rw-r--r--arch/riscv/include/asm/processor.h31
-rw-r--r--arch/riscv/include/asm/ptrace.h2
-rw-r--r--arch/riscv/include/asm/runtime-const.h2
-rw-r--r--arch/riscv/include/asm/sbi.h60
-rw-r--r--arch/riscv/include/asm/tlbflush.h2
-rw-r--r--arch/riscv/include/asm/uaccess.h219
-rw-r--r--arch/riscv/include/asm/vdso/getrandom.h30
-rw-r--r--arch/riscv/include/asm/vector.h34
-rw-r--r--arch/riscv/include/asm/vendor_extensions/sifive.h16
-rw-r--r--arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h19
-rw-r--r--arch/riscv/include/uapi/asm/hwprobe.h2
-rw-r--r--arch/riscv/include/uapi/asm/vendor/sifive.h6
-rw-r--r--arch/riscv/kernel/Makefile4
-rw-r--r--arch/riscv/kernel/asm-offsets.c18
-rw-r--r--arch/riscv/kernel/cpu_ops_sbi.c6
-rw-r--r--arch/riscv/kernel/cpufeature.c21
-rw-r--r--arch/riscv/kernel/elf_kexec.c485
-rw-r--r--arch/riscv/kernel/entry.S9
-rw-r--r--arch/riscv/kernel/ftrace.c252
-rw-r--r--arch/riscv/kernel/kexec_elf.c144
-rw-r--r--arch/riscv/kernel/kexec_image.c96
-rw-r--r--arch/riscv/kernel/machine_kexec_file.c361
-rw-r--r--arch/riscv/kernel/mcount-dyn.S117
-rw-r--r--arch/riscv/kernel/module-sections.c81
-rw-r--r--arch/riscv/kernel/process.c2
-rw-r--r--arch/riscv/kernel/sbi.c81
-rw-r--r--arch/riscv/kernel/setup.c1
-rw-r--r--arch/riscv/kernel/sys_hwprobe.c6
-rw-r--r--arch/riscv/kernel/traps.c10
-rw-r--r--arch/riscv/kernel/traps_misaligned.c114
-rw-r--r--arch/riscv/kernel/unaligned_access_speed.c8
-rw-r--r--arch/riscv/kernel/vdso.c2
-rw-r--r--arch/riscv/kernel/vdso/Makefile15
-rw-r--r--arch/riscv/kernel/vdso/getrandom.c10
-rw-r--r--arch/riscv/kernel/vdso/vdso.lds.S5
-rw-r--r--arch/riscv/kernel/vdso/vgetrandom-chacha.S249
-rw-r--r--arch/riscv/kernel/vendor_extensions.c10
-rw-r--r--arch/riscv/kernel/vendor_extensions/Makefile2
-rw-r--r--arch/riscv/kernel/vendor_extensions/sifive.c21
-rw-r--r--arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c22
-rw-r--r--arch/riscv/kvm/aia.c51
-rw-r--r--arch/riscv/kvm/aia_imsic.c45
-rw-r--r--arch/riscv/kvm/vcpu.c10
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c8
-rw-r--r--arch/riscv/kvm/vcpu_timer.c16
-rw-r--r--arch/riscv/lib/riscv_v_helpers.c11
-rw-r--r--arch/riscv/lib/uaccess.S50
-rw-r--r--arch/riscv/lib/uaccess_vector.S15
-rw-r--r--arch/riscv/mm/cacheflush.c29
-rw-r--r--arch/riscv/mm/pgtable.c10
-rw-r--r--arch/riscv/mm/tlbflush.c38
-rwxr-xr-xarch/riscv/tools/relocs_check.sh4
-rw-r--r--arch/s390/crypto/sha1_s390.c2
-rw-r--r--arch/s390/crypto/sha512_s390.c3
-rw-r--r--arch/s390/include/asm/pgtable.h2
-rw-r--r--arch/s390/include/asm/ptrace.h2
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kvm/gaccess.c8
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--arch/s390/net/bpf_jit_comp.c10
-rw-r--r--arch/s390/pci/pci_event.c59
-rw-r--r--arch/sh/boards/mach-ecovec24/setup.c1
-rw-r--r--arch/sh/drivers/heartbeat.c2
-rw-r--r--arch/sh/drivers/pci/common.c4
-rw-r--r--arch/sh/drivers/push-switch.c2
-rw-r--r--arch/sh/include/asm/cache.h4
-rw-r--r--arch/sh/include/asm/dwarf.h6
-rw-r--r--arch/sh/include/asm/fpu.h4
-rw-r--r--arch/sh/include/asm/ftrace.h8
-rw-r--r--arch/sh/include/asm/mmu.h4
-rw-r--r--arch/sh/include/asm/page.h8
-rw-r--r--arch/sh/include/asm/pgtable.h4
-rw-r--r--arch/sh/include/asm/pgtable_32.h10
-rw-r--r--arch/sh/include/asm/processor.h4
-rw-r--r--arch/sh/include/asm/smc37c93x.h4
-rw-r--r--arch/sh/include/asm/suspend.h2
-rw-r--r--arch/sh/include/asm/thread_info.h10
-rw-r--r--arch/sh/include/asm/tlb.h4
-rw-r--r--arch/sh/include/asm/types.h4
-rw-r--r--arch/sh/include/mach-common/mach/romimage.h6
-rw-r--r--arch/sh/include/mach-ecovec24/mach/romimage.h6
-rw-r--r--arch/sh/include/mach-kfr2r09/mach/romimage.h6
-rw-r--r--arch/sh/kernel/Makefile2
-rw-r--r--arch/sh/kernel/kprobes.c4
-rw-r--r--arch/sparc/include/asm/pgtable_32.h2
-rw-r--r--arch/sparc/include/asm/pgtable_64.h2
-rw-r--r--arch/sparc/kernel/Makefile2
-rw-r--r--arch/sparc/kernel/viohs.c2
-rw-r--r--arch/um/Kconfig6
-rw-r--r--arch/um/configs/i386_defconfig7
-rw-r--r--arch/um/configs/x86_64_defconfig7
-rw-r--r--arch/um/drivers/Kconfig204
-rw-r--r--arch/um/drivers/Makefile22
-rw-r--r--arch/um/drivers/chan_kern.c10
-rw-r--r--arch/um/drivers/daemon.h29
-rw-r--r--arch/um/drivers/daemon_kern.c95
-rw-r--r--arch/um/drivers/daemon_user.c194
-rw-r--r--arch/um/drivers/net_kern.c889
-rw-r--r--arch/um/drivers/net_user.c271
-rw-r--r--arch/um/drivers/slip.h21
-rw-r--r--arch/um/drivers/slip_common.c55
-rw-r--r--arch/um/drivers/slip_common.h106
-rw-r--r--arch/um/drivers/slip_kern.c93
-rw-r--r--arch/um/drivers/slip_user.c252
-rw-r--r--arch/um/drivers/slirp.h34
-rw-r--r--arch/um/drivers/slirp_kern.c120
-rw-r--r--arch/um/drivers/slirp_user.c124
-rw-r--r--arch/um/drivers/ubd_user.c2
-rw-r--r--arch/um/drivers/umcast.h27
-rw-r--r--arch/um/drivers/umcast_kern.c188
-rw-r--r--arch/um/drivers/umcast_user.c184
-rw-r--r--arch/um/drivers/vde.h32
-rw-r--r--arch/um/drivers/vde_kern.c129
-rw-r--r--arch/um/drivers/vde_user.c125
-rw-r--r--arch/um/drivers/vector_kern.c92
-rw-r--r--arch/um/drivers/vfio_kern.c656
-rw-r--r--arch/um/drivers/vfio_user.c327
-rw-r--r--arch/um/drivers/vfio_user.h44
-rw-r--r--arch/um/drivers/virt-pci.c15
-rw-r--r--arch/um/drivers/xterm.c11
-rw-r--r--arch/um/include/asm/asm-prototypes.h5
-rw-r--r--arch/um/include/asm/irq.h5
-rw-r--r--arch/um/include/asm/mmu.h3
-rw-r--r--arch/um/include/asm/pgtable.h2
-rw-r--r--arch/um/include/shared/common-offsets.h4
-rw-r--r--arch/um/include/shared/irq_user.h2
-rw-r--r--arch/um/include/shared/net_kern.h69
-rw-r--r--arch/um/include/shared/net_user.h52
-rw-r--r--arch/um/include/shared/os.h4
-rw-r--r--arch/um/include/shared/skas/mm_id.h9
-rw-r--r--arch/um/include/shared/skas/skas.h1
-rw-r--r--arch/um/include/shared/skas/stub-data.h20
-rw-r--r--arch/um/kernel/Makefile3
-rw-r--r--arch/um/kernel/ioport.c13
-rw-r--r--arch/um/kernel/irq.c6
-rw-r--r--arch/um/kernel/skas/mmu.c89
-rw-r--r--arch/um/kernel/skas/stub.c130
-rw-r--r--arch/um/kernel/skas/stub_exe.c159
-rw-r--r--arch/um/kernel/time.c13
-rw-r--r--arch/um/kernel/trap.c130
-rw-r--r--arch/um/os-Linux/Makefile2
-rw-r--r--arch/um/os-Linux/drivers/Makefile13
-rw-r--r--arch/um/os-Linux/drivers/etap.h21
-rw-r--r--arch/um/os-Linux/drivers/ethertap_kern.c100
-rw-r--r--arch/um/os-Linux/drivers/ethertap_user.c248
-rw-r--r--arch/um/os-Linux/drivers/tuntap.h21
-rw-r--r--arch/um/os-Linux/drivers/tuntap_kern.c86
-rw-r--r--arch/um/os-Linux/drivers/tuntap_user.c215
-rw-r--r--arch/um/os-Linux/file.c15
-rw-r--r--arch/um/os-Linux/internal.h5
-rw-r--r--arch/um/os-Linux/process.c31
-rw-r--r--arch/um/os-Linux/registers.c4
-rw-r--r--arch/um/os-Linux/sigio.c3
-rw-r--r--arch/um/os-Linux/signal.c19
-rw-r--r--arch/um/os-Linux/skas/mem.c103
-rw-r--r--arch/um/os-Linux/skas/process.c482
-rw-r--r--arch/um/os-Linux/start_up.c195
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/coco/sev/Makefile3
-rw-r--r--arch/x86/coco/sev/core.c22
-rw-r--r--arch/x86/entry/entry.S8
-rw-r--r--arch/x86/events/intel/core.c10
-rw-r--r--arch/x86/hyperv/hv_init.c1
-rw-r--r--arch/x86/hyperv/irqdomain.c69
-rw-r--r--arch/x86/hyperv/ivm.c1
-rw-r--r--arch/x86/hyperv/nested.c1
-rw-r--r--arch/x86/include/asm/amd/fch.h13
-rw-r--r--arch/x86/include/asm/cpufeatures.h6
-rw-r--r--arch/x86/include/asm/debugreg.h19
-rw-r--r--arch/x86/include/asm/irqflags.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h10
-rw-r--r--arch/x86/include/asm/module.h8
-rw-r--r--arch/x86/include/asm/mshyperv.h22
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/mwait.h27
-rw-r--r--arch/x86/include/asm/nospec-branch.h37
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/sev.h17
-rw-r--r--arch/x86/include/asm/shared/tdx.h2
-rw-r--r--arch/x86/include/asm/sighandling.h22
-rw-r--r--arch/x86/include/asm/tdx.h2
-rw-r--r--arch/x86/include/asm/trace/fpu.h15
-rw-r--r--arch/x86/include/uapi/asm/debugreg.h21
-rw-r--r--arch/x86/include/uapi/asm/kvm.h8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/alternative.c81
-rw-r--r--arch/x86/kernel/cpu/amd.c58
-rw-r--r--arch/x86/kernel/cpu/bugs.c136
-rw-r--r--arch/x86/kernel/cpu/common.c38
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c28
-rw-r--r--arch/x86/kernel/cpu/mce/core.c24
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c1
-rw-r--r--arch/x86/kernel/cpu/microcode/amd_shas.c112
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c6
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/ioport.c13
-rw-r--r--arch/x86/kernel/kgdb.c2
-rw-r--r--arch/x86/kernel/process.c22
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/signal_32.c4
-rw-r--r--arch/x86/kernel/signal_64.c4
-rw-r--r--arch/x86/kernel/smp.c24
-rw-r--r--arch/x86/kernel/smpboot.c54
-rw-r--r--arch/x86/kernel/traps.c34
-rw-r--r--arch/x86/kvm/cpuid.c10
-rw-r--r--arch/x86/kvm/hyperv.c5
-rw-r--r--arch/x86/kvm/mmu/mmu.c9
-rw-r--r--arch/x86/kvm/reverse_cpuid.h7
-rw-r--r--arch/x86/kvm/svm/sev.c56
-rw-r--r--arch/x86/kvm/svm/vmenter.S6
-rw-r--r--arch/x86/kvm/vmx/tdx.c118
-rw-r--r--arch/x86/kvm/vmx/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c12
-rw-r--r--arch/x86/kvm/xen.c20
-rw-r--r--arch/x86/mm/init_32.c3
-rw-r--r--arch/x86/mm/init_64.c3
-rw-r--r--arch/x86/mm/pat/set_memory.c3
-rw-r--r--arch/x86/mm/pti.c5
-rw-r--r--arch/x86/pci/Makefile6
-rw-r--r--arch/x86/pci/intel_mid.c (renamed from arch/x86/pci/intel_mid_pci.c)0
-rw-r--r--arch/x86/power/hibernate.c19
-rw-r--r--arch/x86/um/asm/checksum.h3
-rw-r--r--arch/x86/um/asm/processor.h8
-rw-r--r--arch/x86/um/os-Linux/mcontext.c218
-rw-r--r--arch/x86/um/ptrace.c76
-rw-r--r--arch/x86/um/shared/sysdep/kernel-offsets.h2
-rw-r--r--arch/x86/um/shared/sysdep/mcontext.h9
-rw-r--r--arch/x86/um/shared/sysdep/stub-data.h23
-rw-r--r--arch/x86/um/shared/sysdep/stub.h2
-rw-r--r--arch/x86/um/shared/sysdep/stub_32.h13
-rw-r--r--arch/x86/um/shared/sysdep/stub_64.h17
-rw-r--r--arch/x86/um/tls_32.c26
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.c5
-rw-r--r--arch/xtensa/include/asm/pgtable.h2
-rw-r--r--arch/xtensa/kernel/Makefile2
-rw-r--r--arch/xtensa/platforms/iss/network.c2
486 files changed, 8088 insertions, 6657 deletions
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 2676017f42f1..44e7aedac6e8 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -327,7 +327,7 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index b6c862dff1f6..187cd8df2faf 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -3,7 +3,7 @@
# Makefile for the linux kernel.
#
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
asflags-y := $(KBUILD_CFLAGS)
ccflags-y := -Wno-sign-compare
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index a89ce84371f9..d19e51ec711d 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -69,7 +69,8 @@ srmcons_do_receive_chars(struct tty_port *port)
static void
srmcons_receive_chars(struct timer_list *t)
{
- struct srmcons_private *srmconsp = from_timer(srmconsp, t, timer);
+ struct srmcons_private *srmconsp = timer_container_of(srmconsp, t,
+ timer);
struct tty_port *port = &srmconsp->port;
unsigned long flags;
int incr = 10;
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 005d9e4d187a..a31bbf5c8bbc 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -144,7 +144,7 @@
#define ARC_AUX_AGU_MOD2 0x5E2
#define ARC_AUX_AGU_MOD3 0x5E3
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <soc/arc/arc_aux.h>
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 592d7fffc223..e615c42b93ba 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -6,7 +6,7 @@
#ifndef _ASM_ARC_ATOMIC_H
#define _ASM_ARC_ATOMIC_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/compiler.h>
@@ -31,6 +31,6 @@
#include <asm/atomic64-arcv2.h>
#endif
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
index 9b5791b85471..73080a664369 100644
--- a/arch/arc/include/asm/atomic64-arcv2.h
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -137,12 +137,9 @@ ATOMIC64_OPS(xor, xor, xor)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-static inline s64
-arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
+static inline u64 __arch_cmpxchg64_relaxed(volatile void *ptr, u64 old, u64 new)
{
- s64 prev;
-
- smp_mb();
+ u64 prev;
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
@@ -152,14 +149,12 @@ arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
" bnz 1b \n"
"2: \n"
: "=&r"(prev)
- : "r"(ptr), "ir"(expected), "r"(new)
- : "cc"); /* memory clobber comes from smp_mb() */
-
- smp_mb();
+ : "r"(ptr), "ir"(old), "r"(new)
+ : "memory", "cc");
return prev;
}
-#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+#define arch_cmpxchg64_relaxed __arch_cmpxchg64_relaxed
static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
{
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index f5a936496f06..5340c2871392 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -10,7 +10,7 @@
#error only <linux/bitops.h> can be included directly
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/compiler.h>
@@ -192,6 +192,6 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h
index 4c453ba96c51..171c16021f70 100644
--- a/arch/arc/include/asm/bug.h
+++ b/arch/arc/include/asm/bug.h
@@ -6,7 +6,7 @@
#ifndef _ASM_ARC_BUG_H
#define _ASM_ARC_BUG_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/ptrace.h>
@@ -29,6 +29,6 @@ void die(const char *str, struct pt_regs *regs, unsigned long address);
#include <asm-generic/bug.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index f0f1fc5d62b6..040a97f4dd82 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -23,7 +23,7 @@
*/
#define ARC_UNCACHED_ADDR_SPACE 0xc0000000
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/build_bug.h>
@@ -65,7 +65,7 @@
extern int ioc_enable;
extern unsigned long perip_base, perip_end;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/* Instruction cache related Auxiliary registers */
#define ARC_REG_IC_BCR 0x77 /* Build Config reg */
diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h
index 06be89f6f2f0..03ffd005f3fa 100644
--- a/arch/arc/include/asm/current.h
+++ b/arch/arc/include/asm/current.h
@@ -9,7 +9,7 @@
#ifndef _ASM_ARC_CURRENT_H
#define _ASM_ARC_CURRENT_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_ARC_CURR_IN_REG
@@ -20,6 +20,6 @@ register struct task_struct *curr_arc asm("gp");
#include <asm-generic/current.h>
#endif /* ! CONFIG_ARC_CURR_IN_REG */
-#endif /* ! __ASSEMBLY__ */
+#endif /* ! __ASSEMBLER__ */
#endif /* _ASM_ARC_CURRENT_H */
diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h
index cd5636dfeb6f..fd5fdaad90c1 100644
--- a/arch/arc/include/asm/dsp-impl.h
+++ b/arch/arc/include/asm/dsp-impl.h
@@ -11,7 +11,7 @@
#define DSP_CTRL_DISABLED_ALL 0
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/* clobbers r5 register */
.macro DSP_EARLY_INIT
diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h
index f496dbc4640b..eeaaf4e4eabd 100644
--- a/arch/arc/include/asm/dsp.h
+++ b/arch/arc/include/asm/dsp.h
@@ -7,7 +7,7 @@
#ifndef __ASM_ARC_DSP_H
#define __ASM_ARC_DSP_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* DSP-related saved registers - need to be saved only when you are
@@ -24,6 +24,6 @@ struct dsp_callee_regs {
#endif
};
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_ARC_DSP_H */
diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h
index a0d5ebe1bc3f..1524c5cf8b59 100644
--- a/arch/arc/include/asm/dwarf.h
+++ b/arch/arc/include/asm/dwarf.h
@@ -6,7 +6,7 @@
#ifndef _ASM_ARC_DWARF_H
#define _ASM_ARC_DWARF_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#ifdef ARC_DW2_UNWIND_AS_CFI
@@ -38,6 +38,6 @@
#endif /* !ARC_DW2_UNWIND_AS_CFI */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_ARC_DWARF_H */
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index 38c35722cebf..f453af251a1a 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -13,7 +13,7 @@
#include <asm/processor.h> /* For VMALLOC_START */
#include <asm/mmu.h>
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#ifdef CONFIG_ISA_ARCOMPACT
#include <asm/entry-compact.h> /* ISA specific bits */
@@ -146,7 +146,7 @@
#endif /* CONFIG_ARC_CURR_IN_REG */
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
extern void do_signal(struct pt_regs *);
extern void do_notify_resume(struct pt_regs *);
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index fb3c21f1a238..30aea562f8aa 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -50,7 +50,7 @@
#define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | __AD_ENB | \
(ARCV2_IRQ_DEF_PRIO << 1))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Save IRQ state and disable IRQs
@@ -170,6 +170,6 @@ static inline void arc_softirq_clear(int irq)
seti
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index 936a2f21f315..85c2f6bcde0c 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -40,7 +40,7 @@
#define ISA_INIT_STATUS_BITS STATUS_IE_MASK
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/******************************************************************
* IRQ Control Macros
@@ -196,6 +196,6 @@ static inline int arch_irqs_disabled(void)
flag \scratch
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h
index a339223d9e05..66ead75784d9 100644
--- a/arch/arc/include/asm/jump_label.h
+++ b/arch/arc/include/asm/jump_label.h
@@ -2,7 +2,7 @@
#ifndef _ASM_ARC_JUMP_LABEL_H
#define _ASM_ARC_JUMP_LABEL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/stringify.h>
#include <linux/types.h>
@@ -68,5 +68,5 @@ struct jump_entry {
jump_label_t key;
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
index 8a3fb71e9cfa..ba3cb65b5eaa 100644
--- a/arch/arc/include/asm/linkage.h
+++ b/arch/arc/include/asm/linkage.h
@@ -12,7 +12,7 @@
#define __ALIGN .align 4
#define __ALIGN_STR __stringify(__ALIGN)
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro ST2 e, o, off
#ifdef CONFIG_ARC_HAS_LL64
@@ -61,7 +61,7 @@
CFI_ENDPROC ASM_NL \
.size name, .-name
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
#ifdef CONFIG_ARC_HAS_ICCM
#define __arcfp_code __section(".text.arcfp")
@@ -75,6 +75,6 @@
#define __arcfp_data __section(".data")
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h
index 41412642f279..5e5482026ac9 100644
--- a/arch/arc/include/asm/mmu-arcv2.h
+++ b/arch/arc/include/asm/mmu-arcv2.h
@@ -69,7 +69,7 @@
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct mm_struct;
extern int pae40_exist_but_not_enab(void);
@@ -100,6 +100,6 @@ static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
sr \reg, [ARC_REG_PID]
.endm
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 4ae2db59d494..e3b35ceab582 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -6,7 +6,7 @@
#ifndef _ASM_ARC_MMU_H
#define _ASM_ARC_MMU_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/threads.h> /* NR_CPUS */
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index def0dfb95b43..9720fe6b2c24 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -19,7 +19,7 @@
#endif /* CONFIG_ARC_HAS_PAE40 */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define clear_page(paddr) memset((paddr), 0, PAGE_SIZE)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
@@ -136,6 +136,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#include <asm-generic/memory_model.h> /* page_to_pfn, pfn_to_page */
#include <asm-generic/getorder.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
index 8ebec1b21d24..4630c5acca05 100644
--- a/arch/arc/include/asm/pgtable-bits-arcv2.h
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -75,7 +75,7 @@
* This is to enable COW mechanism
*/
/* xwr */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE)
#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY)
@@ -130,7 +130,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
@@ -142,6 +142,6 @@ PTE_BIT_FUNC(swp_clear_exclusive, &= ~(_PAGE_SWP_EXCLUSIVE));
#include <asm/hugepage.h>
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
index d1ce4b0f1071..c8f9273372c0 100644
--- a/arch/arc/include/asm/pgtable-levels.h
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -85,7 +85,7 @@
#define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#if CONFIG_PGTABLE_LEVELS > 3
#include <asm-generic/pgtable-nop4d.h>
@@ -181,6 +181,6 @@
#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ)
#endif
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 4cf45a99fd79..bd580e2b62d7 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -19,7 +19,7 @@
*/
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern char empty_zero_page[PAGE_SIZE];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
@@ -29,6 +29,6 @@ extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
/* to cope with aliasing VIPT cache */
#define HAVE_ARCH_UNMAPPED_AREA
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index d606658e2fe7..7f7901ac6643 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -11,7 +11,7 @@
#ifndef __ASM_ARC_PROCESSOR_H
#define __ASM_ARC_PROCESSOR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/ptrace.h>
#include <asm/dsp.h>
@@ -66,7 +66,7 @@ extern void start_thread(struct pt_regs * regs, unsigned long pc,
extern unsigned int __get_wchan(struct task_struct *p);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* Default System Memory Map on ARC
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index cf79df0b2570..f6c052af8f4d 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -10,7 +10,7 @@
#include <uapi/asm/ptrace.h>
#include <linux/compiler.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
typedef union {
struct {
@@ -172,6 +172,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
extern int syscall_trace_enter(struct pt_regs *);
extern void syscall_trace_exit(struct pt_regs *);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_PTRACE_H */
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
index 1f85de8288b1..5806106a65f9 100644
--- a/arch/arc/include/asm/switch_to.h
+++ b/arch/arc/include/asm/switch_to.h
@@ -6,7 +6,7 @@
#ifndef _ASM_ARC_SWITCH_TO_H
#define _ASM_ARC_SWITCH_TO_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/sched.h>
#include <asm/dsp-impl.h>
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 12daaf3a61ea..255d2c774219 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -24,7 +24,7 @@
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define THREAD_SHIFT (PAGE_SHIFT << THREAD_SIZE_ORDER)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/thread_info.h>
@@ -62,7 +62,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* thread information flags
diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h
index 2a6eff57f6dd..3ae832db278c 100644
--- a/arch/arc/include/uapi/asm/ptrace.h
+++ b/arch/arc/include/uapi/asm/ptrace.h
@@ -14,7 +14,7 @@
#define PTRACE_GET_THREAD_AREA 25
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Userspace ABI: Register state needed by
* -ptrace (gdbserver)
@@ -53,6 +53,6 @@ struct user_regs_arcv2 {
unsigned long r30, r58, r59;
};
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _UAPI__ASM_ARC_PTRACE_H */
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index 95fbf9364c67..fa94fff02419 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -26,4 +26,4 @@ ifdef CONFIG_ISA_ARCOMPACT
CFLAGS_fpu.o += -mdpfp
endif
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index d8969dab12d4..789cfb9ea14e 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -241,15 +241,6 @@ static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
return (e1->start > e2->start) - (e1->start < e2->start);
}
-static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
-{
- struct eh_frame_hdr_table_entry *e1 = p1;
- struct eh_frame_hdr_table_entry *e2 = p2;
-
- swap(e1->start, e2->start);
- swap(e1->fde, e2->fde);
-}
-
static void init_unwind_hdr(struct unwind_table *table,
void *(*alloc) (unsigned long))
{
@@ -345,7 +336,7 @@ static void init_unwind_hdr(struct unwind_table *table,
sort(header->table,
n,
sizeof(*header->table),
- cmp_eh_frame_hdr_table_entries, swap_eh_frame_hdr_table_entries);
+ cmp_eh_frame_hdr_table_entries, NULL);
table->hdrsz = hdrSize;
smp_wmb();
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index e81964cce516..f71af368674c 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -167,7 +167,7 @@ CONFIG_MFD_MAX77686=y
CONFIG_MFD_MAX77693=y
CONFIG_MFD_MAX8997=y
CONFIG_MFD_MAX8998=y
-CONFIG_MFD_SEC_CORE=y
+CONFIG_MFD_SEC_I2C=y
CONFIG_MFD_STMPE=y
CONFIG_STMPE_I2C=y
CONFIG_MFD_TPS65090=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index aca01ad6aafc..50c170b4619f 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -612,7 +612,7 @@ CONFIG_MFD_QCOM_RPM=y
CONFIG_MFD_SPMI_PMIC=y
CONFIG_MFD_RK8XX_I2C=y
CONFIG_MFD_RN5T618=y
-CONFIG_MFD_SEC_CORE=y
+CONFIG_MFD_SEC_I2C=y
CONFIG_MFD_STMPE=y
CONFIG_MFD_PALMAS=y
CONFIG_MFD_TPS65090=y
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index ded4b9a5accf..ff29c5b0e9c9 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -335,7 +335,7 @@ CONFIG_MFD_MAX77693=y
CONFIG_MFD_MAX8907=m
CONFIG_EZX_PCAP=y
CONFIG_UCB1400_CORE=m
-CONFIG_MFD_SEC_CORE=y
+CONFIG_MFD_SEC_I2C=y
CONFIG_MFD_PALMAS=y
CONFIG_MFD_TPS65090=y
CONFIG_MFD_TPS6586X=y
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 7f1c3b4e3e04..86378eec7757 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -301,7 +301,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(swp) __pte((swp).val)
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_isset(pte, L_PTE_SWP_EXCLUSIVE);
}
diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h
index d37559762180..be08a8da046f 100644
--- a/arch/arm/include/asm/simd.h
+++ b/arch/arm/include/asm/simd.h
@@ -8,7 +8,8 @@
static __must_check inline bool may_use_simd(void)
{
- return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq();
+ return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq()
+ && !irqs_disabled();
}
#endif /* _ASM_SIMD_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index b3333d070390..afc9de7ef9a1 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -104,4 +104,4 @@ obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o
obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 748698e91a4b..27e64f782cb3 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -515,7 +515,5 @@ void __init early_ioremap_init(void)
bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
unsigned long flags)
{
- unsigned long pfn = PHYS_PFN(offset);
-
- return memblock_is_map_memory(pfn);
+ return memblock_is_map_memory(offset);
}
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 7803d50b90f8..e559ad3cd148 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -877,6 +877,7 @@ void kernel_neon_begin(void)
* the kernel mode NEON register contents never need to be preserved.
*/
BUG_ON(in_hardirq());
+ BUG_ON(irqs_disabled());
cpu = __smp_processor_id();
fpexc = fmrx(FPEXC) | FPEXC_EN;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 55fc331af337..393d71124f5d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -256,6 +256,7 @@ config ARM64
select HOTPLUG_SMT if HOTPLUG_CPU
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
+ select JUMP_LABEL
select KASAN_VMALLOC if KASAN
select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 8b76821f190f..a541bb029aa4 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -269,7 +269,7 @@ config ARCH_QCOM
bool "Qualcomm Platforms"
select GPIOLIB
select PINCTRL
- select HAVE_PWRCTL if PCI
+ select HAVE_PWRCTRL if PCI
help
This enables support for the ARMv8 based Qualcomm chipsets.
diff --git a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi
index 3df2fd3993b5..9740fbf200f0 100644
--- a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi
+++ b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi
@@ -20,8 +20,6 @@
compatible = "jedec,spi-nor";
reg = <0x0>;
spi-max-frequency = <25000000>;
- #address-cells = <1>;
- #size-cells = <1>;
partitions {
compatible = "fixed-partitions";
diff --git a/arch/arm64/boot/dts/apple/t8103-j293.dts b/arch/arm64/boot/dts/apple/t8103-j293.dts
index e2d9439397f7..5b3c42e9f0e6 100644
--- a/arch/arm64/boot/dts/apple/t8103-j293.dts
+++ b/arch/arm64/boot/dts/apple/t8103-j293.dts
@@ -100,6 +100,8 @@
&displaydfr_mipi {
status = "okay";
+ #address-cells = <1>;
+ #size-cells = <0>;
dfr_panel: panel@0 {
compatible = "apple,j293-summit", "apple,summit";
diff --git a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi
index 8e82231acab5..0c8206156bfe 100644
--- a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi
+++ b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi
@@ -71,7 +71,7 @@
*/
&port00 {
bus-range = <1 1>;
- wifi0: network@0,0 {
+ wifi0: wifi@0,0 {
compatible = "pci14e4,4425";
reg = <0x10000 0x0 0x0 0x0 0x0>;
/* To be filled by the loader */
diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi
index 20faf0c0d809..3a204845b85b 100644
--- a/arch/arm64/boot/dts/apple/t8103.dtsi
+++ b/arch/arm64/boot/dts/apple/t8103.dtsi
@@ -405,8 +405,6 @@
compatible = "apple,t8103-display-pipe-mipi", "apple,h7-display-pipe-mipi";
reg = <0x2 0x28600000 0x0 0x100000>;
power-domains = <&ps_mipi_dsi>;
- #address-cells = <1>;
- #size-cells = <0>;
status = "disabled";
ports {
diff --git a/arch/arm64/boot/dts/apple/t8112-j493.dts b/arch/arm64/boot/dts/apple/t8112-j493.dts
index be86d34c6696..fb8ad7d4c65a 100644
--- a/arch/arm64/boot/dts/apple/t8112-j493.dts
+++ b/arch/arm64/boot/dts/apple/t8112-j493.dts
@@ -63,6 +63,8 @@
&displaydfr_mipi {
status = "okay";
+ #address-cells = <1>;
+ #size-cells = <0>;
dfr_panel: panel@0 {
compatible = "apple,j493-summit", "apple,summit";
diff --git a/arch/arm64/boot/dts/apple/t8112.dtsi b/arch/arm64/boot/dts/apple/t8112.dtsi
index e95711d8337f..f68354194355 100644
--- a/arch/arm64/boot/dts/apple/t8112.dtsi
+++ b/arch/arm64/boot/dts/apple/t8112.dtsi
@@ -420,8 +420,6 @@
compatible = "apple,t8112-display-pipe-mipi", "apple,h7-display-pipe-mipi";
reg = <0x2 0x28600000 0x0 0x100000>;
power-domains = <&ps_mipi_dsi>;
- #address-cells = <1>;
- #size-cells = <0>;
status = "disabled";
ports {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
index 0baf256b4400..983b2f0e8797 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
@@ -687,11 +687,12 @@
};
wdog0: watchdog@2ad0000 {
- compatible = "fsl,imx21-wdt";
+ compatible = "fsl,ls1046a-wdt", "fsl,imx21-wdt";
reg = <0x0 0x2ad0000 0x0 0x10000>;
interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
QORIQ_CLK_PLL_DIV(2)>;
+ big-endian;
};
edma0: dma-controller@2c00000 {
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
index d29710772569..1594ce9182a5 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
@@ -464,6 +464,7 @@
};
reg_nvcc_sd: LDO5 {
+ regulator-always-on;
regulator-max-microvolt = <3300000>;
regulator-min-microvolt = <1800000>;
regulator-name = "On-module +V3.3_1.8_SD (LDO5)";
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
index 2f740d74707b..4bf818873fe3 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
@@ -70,7 +70,7 @@
tpm@1 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
index 5ab3ffe9931d..cf747ec6fa16 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
@@ -110,7 +110,7 @@
tpm@1 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
index e2b5e7ac3e46..5eb114d2360a 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
@@ -122,7 +122,7 @@
tpm@1 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
index 6daa2313f879..568d24265ddf 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
@@ -201,7 +201,7 @@
tpm@0 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x0>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
index 6c47f4b47356..9f4d0899a94d 100644
--- a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
@@ -574,17 +574,17 @@
&scmi_iomuxc {
pinctrl_emdio: emdiogrp {
fsl,pins = <
- IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x57e
- IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e
+ IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x50e
+ IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e
>;
};
pinctrl_enetc0: enetc0grp {
fsl,pins = <
- IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e
- IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e
- IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e
- IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e
+ IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e
+ IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e
+ IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e
+ IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e
IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e
IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e
IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e
@@ -598,10 +598,10 @@
pinctrl_enetc1: enetc1grp {
fsl,pins = <
- IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x57e
- IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x57e
- IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x57e
- IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x57e
+ IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x50e
+ IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x50e
+ IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x50e
+ IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x50e
IMX95_PAD_ENET2_TX_CTL__NETCMIX_TOP_ETH1_RGMII_TX_CTL 0x57e
IMX95_PAD_ENET2_TXC__NETCMIX_TOP_ETH1_RGMII_TX_CLK 0x58e
IMX95_PAD_ENET2_RX_CTL__NETCMIX_TOP_ETH1_RGMII_RX_CTL 0x57e
diff --git a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
index 6886ea766655..d7d845231312 100644
--- a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
@@ -566,17 +566,17 @@
&scmi_iomuxc {
pinctrl_emdio: emdiogrp{
fsl,pins = <
- IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x57e
- IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e
+ IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x50e
+ IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e
>;
};
pinctrl_enetc0: enetc0grp {
fsl,pins = <
- IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e
- IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e
- IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e
- IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e
+ IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e
+ IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e
+ IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e
+ IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e
IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e
IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e
IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e
diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi
index 632631a29112..5aecdd9b62ff 100644
--- a/arch/arm64/boot/dts/freescale/imx95.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx95.dtsi
@@ -1708,7 +1708,7 @@
<0x9 0 1 0>;
reg-names = "dbi","atu", "dbi2", "app", "dma", "addr_space";
num-lanes = <1>;
- interrupts = <GIC_SPI 317 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 311 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "dma";
clocks = <&scmi_clk IMX95_CLK_HSIO>,
<&scmi_clk IMX95_CLK_HSIOPLL>,
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
index ae7a275fd223..cefecb7a23cf 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
@@ -1090,6 +1090,8 @@
};
&pmk8280_rtc {
+ qcom,uefi-rtc-info;
+
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi
index c02fd4d15c96..e3888bc143a0 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi
+++ b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi
@@ -224,6 +224,7 @@
reg-names = "rtc", "alarm";
interrupts = <0x0 0x62 0x1 IRQ_TYPE_EDGE_RISING>;
qcom,no-alarm; /* alarm owned by ADSP */
+ qcom,uefi-rtc-info;
};
pmk8550_sdam_2: nvram@7100 {
diff --git a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
index 18ab5639b301..0f3501951409 100644
--- a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
@@ -280,6 +280,171 @@
resets = <&cpg 0x30>;
};
+ dmac0: dma-controller@11400000 {
+ compatible = "renesas,r9a09g057-dmac";
+ reg = <0 0x11400000 0 0x10000>;
+ interrupts = <GIC_SPI 499 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 89 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 90 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 91 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 92 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 94 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 95 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 96 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 97 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 98 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 99 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 100 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 101 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 102 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 103 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 104 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15";
+ clocks = <&cpg CPG_MOD 0x0>;
+ power-domains = <&cpg>;
+ resets = <&cpg 0x31>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ renesas,icu = <&icu 4>;
+ };
+
+ dmac1: dma-controller@14830000 {
+ compatible = "renesas,r9a09g057-dmac";
+ reg = <0 0x14830000 0 0x10000>;
+ interrupts = <GIC_SPI 495 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 25 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 27 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 28 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 29 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 30 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 31 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 32 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 33 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 34 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 35 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 36 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 37 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 38 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 39 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 40 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15";
+ clocks = <&cpg CPG_MOD 0x1>;
+ power-domains = <&cpg>;
+ resets = <&cpg 0x32>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ renesas,icu = <&icu 0>;
+ };
+
+ dmac2: dma-controller@14840000 {
+ compatible = "renesas,r9a09g057-dmac";
+ reg = <0 0x14840000 0 0x10000>;
+ interrupts = <GIC_SPI 496 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 41 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 42 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 43 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 44 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 45 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 46 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 47 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 48 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 49 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 50 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 51 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 52 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 53 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 54 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 55 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 56 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15";
+ clocks = <&cpg CPG_MOD 0x2>;
+ power-domains = <&cpg>;
+ resets = <&cpg 0x33>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ renesas,icu = <&icu 1>;
+ };
+
+ dmac3: dma-controller@12000000 {
+ compatible = "renesas,r9a09g057-dmac";
+ reg = <0 0x12000000 0 0x10000>;
+ interrupts = <GIC_SPI 497 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 57 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 58 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 59 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 60 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 61 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 62 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 63 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 64 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 65 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 66 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 67 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 68 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 69 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 70 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 71 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 72 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15";
+ clocks = <&cpg CPG_MOD 0x3>;
+ power-domains = <&cpg>;
+ resets = <&cpg 0x34>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ renesas,icu = <&icu 2>;
+ };
+
+ dmac4: dma-controller@12010000 {
+ compatible = "renesas,r9a09g057-dmac";
+ reg = <0 0x12010000 0 0x10000>;
+ interrupts = <GIC_SPI 498 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 73 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 74 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 76 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 77 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 78 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 79 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 80 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 81 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 82 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 83 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 84 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 85 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 86 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 87 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 88 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15";
+ clocks = <&cpg CPG_MOD 0x4>;
+ power-domains = <&cpg>;
+ resets = <&cpg 0x35>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ renesas,icu = <&icu 3>;
+ };
+
ostm0: timer@11800000 {
compatible = "renesas,r9a09g057-ostm", "renesas,ostm";
reg = <0x0 0x11800000 0x0 0x1000>;
diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
index ab232e5c7ad6..4203b335a263 100644
--- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
@@ -379,6 +379,18 @@
<0 RK_PA7 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
+
+ spi1 {
+ spi1_csn0_gpio_pin: spi1-csn0-gpio-pin {
+ rockchip,pins =
+ <3 RK_PB1 RK_FUNC_GPIO &pcfg_pull_up_4ma>;
+ };
+
+ spi1_csn1_gpio_pin: spi1-csn1-gpio-pin {
+ rockchip,pins =
+ <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up_4ma>;
+ };
+ };
};
&pmu_io_domains {
@@ -396,6 +408,17 @@
vqmmc-supply = <&vccio_sd>;
};
+&spi1 {
+ /*
+ * Hardware CS has a very slow rise time of about 6us,
+ * causing transmission errors.
+ * With cs-gpios we have a rise time of about 20ns.
+ */
+ cs-gpios = <&gpio3 RK_PB1 GPIO_ACTIVE_LOW>, <&gpio3 RK_PB2 GPIO_ACTIVE_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi1_clk &spi1_csn0_gpio_pin &spi1_csn1_gpio_pin &spi1_miso &spi1_mosi>;
+};
+
&tsadc {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
index 3c127c5c2607..a9021c524afb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
@@ -30,6 +30,7 @@
fan: gpio_fan {
compatible = "gpio-fan";
+ fan-supply = <&vcc12v_dcin>;
gpios = <&gpio0 RK_PD5 GPIO_ACTIVE_HIGH>;
gpio-fan,speed-map =
< 0 0>,
diff --git a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
index b09e789c75c4..801b40fea4e8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
@@ -211,10 +211,38 @@
status = "okay";
};
+&cpu_b0 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
+&cpu_b1 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
+&cpu_b2 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
+&cpu_b3 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
&cpu_l0 {
cpu-supply = <&vdd_cpu_lit_s0>;
};
+&cpu_l1 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l2 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l3 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
&gmac0 {
phy-mode = "rgmii-id";
clock_in_out = "output";
diff --git a/arch/arm64/boot/dts/rockchip/rk3576.dtsi b/arch/arm64/boot/dts/rockchip/rk3576.dtsi
index 1086482f0479..64812e3bcb61 100644
--- a/arch/arm64/boot/dts/rockchip/rk3576.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3576.dtsi
@@ -615,7 +615,7 @@
<0 0 0 2 &pcie1_intc 1>,
<0 0 0 3 &pcie1_intc 2>,
<0 0 0 4 &pcie1_intc 3>;
- linux,pci-domain = <0>;
+ linux,pci-domain = <1>;
max-link-speed = <2>;
num-ib-windows = <8>;
num-viewport = <8>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi
index 7f874c77410c..6584d73660f6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi
@@ -578,14 +578,14 @@
hdmim0_tx0_scl: hdmim0-tx0-scl {
rockchip,pins =
/* hdmim0_tx0_scl */
- <4 RK_PB7 5 &pcfg_pull_none>;
+ <4 RK_PB7 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim0_tx0_sda: hdmim0-tx0-sda {
rockchip,pins =
/* hdmim0_tx0_sda */
- <4 RK_PC0 5 &pcfg_pull_none>;
+ <4 RK_PC0 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
@@ -640,14 +640,14 @@
hdmim1_tx0_scl: hdmim1-tx0-scl {
rockchip,pins =
/* hdmim1_tx0_scl */
- <0 RK_PD5 11 &pcfg_pull_none>;
+ <0 RK_PD5 11 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim1_tx0_sda: hdmim1-tx0-sda {
rockchip,pins =
/* hdmim1_tx0_sda */
- <0 RK_PD4 11 &pcfg_pull_none>;
+ <0 RK_PD4 11 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
@@ -668,14 +668,14 @@
hdmim1_tx1_scl: hdmim1-tx1-scl {
rockchip,pins =
/* hdmim1_tx1_scl */
- <3 RK_PC6 5 &pcfg_pull_none>;
+ <3 RK_PC6 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim1_tx1_sda: hdmim1-tx1-sda {
rockchip,pins =
/* hdmim1_tx1_sda */
- <3 RK_PC5 5 &pcfg_pull_none>;
+ <3 RK_PC5 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
hdmim2_rx_cec: hdmim2-rx-cec {
@@ -709,14 +709,14 @@
hdmim2_tx0_scl: hdmim2-tx0-scl {
rockchip,pins =
/* hdmim2_tx0_scl */
- <3 RK_PC7 5 &pcfg_pull_none>;
+ <3 RK_PC7 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim2_tx0_sda: hdmim2-tx0-sda {
rockchip,pins =
/* hdmim2_tx0_sda */
- <3 RK_PD0 5 &pcfg_pull_none>;
+ <3 RK_PD0 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
@@ -730,14 +730,14 @@
hdmim2_tx1_scl: hdmim2-tx1-scl {
rockchip,pins =
/* hdmim2_tx1_scl */
- <1 RK_PA4 5 &pcfg_pull_none>;
+ <1 RK_PA4 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim2_tx1_sda: hdmim2-tx1-sda {
rockchip,pins =
/* hdmim2_tx1_sda */
- <1 RK_PA3 5 &pcfg_pull_none>;
+ <1 RK_PA3 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
index cc37f082adea..b07543315f87 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
@@ -321,6 +321,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi
index 244c66faa161..fb48ddc04bcb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi
@@ -160,14 +160,15 @@
hdmim0_tx1_scl: hdmim0-tx1-scl {
rockchip,pins =
/* hdmim0_tx1_scl */
- <2 RK_PB5 4 &pcfg_pull_none>;
+ <2 RK_PB5 4 &pcfg_pull_none_drv_level_3_smt>;
};
/omit-if-no-ref/
hdmim0_tx1_sda: hdmim0-tx1-sda {
rockchip,pins =
/* hdmim0_tx1_sda */
- <2 RK_PB4 4 &pcfg_pull_none>;
+ <2 RK_PB4 4 &pcfg_pull_none_drv_level_1_smt>;
+
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
index 8b717c4017a4..b2947b36fada 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
@@ -474,6 +474,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi
index 5c645437b507..b0475b7c655a 100644
--- a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi
@@ -333,6 +333,41 @@
};
/omit-if-no-ref/
+ pcfg_pull_none_drv_level_1_smt: pcfg-pull-none-drv-level-1-smt {
+ bias-disable;
+ drive-strength = <1>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_2_smt: pcfg-pull-none-drv-level-2-smt {
+ bias-disable;
+ drive-strength = <2>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_3_smt: pcfg-pull-none-drv-level-3-smt {
+ bias-disable;
+ drive-strength = <3>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_4_smt: pcfg-pull-none-drv-level-4-smt {
+ bias-disable;
+ drive-strength = <4>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_5_smt: pcfg-pull-none-drv-level-5-smt {
+ bias-disable;
+ drive-strength = <5>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
pcfg_output_high: pcfg-output-high {
output-high;
};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 1e99db100607..eb5c17d4c7ec 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -774,7 +774,7 @@ CONFIG_MFD_MT6397=y
CONFIG_MFD_SPMI_PMIC=y
CONFIG_MFD_RK8XX_I2C=y
CONFIG_MFD_RK8XX_SPI=y
-CONFIG_MFD_SEC_CORE=y
+CONFIG_MFD_SEC_I2C=y
CONFIG_MFD_SL28CPLD=y
CONFIG_RZ_MTU3=y
CONFIG_MFD_TI_AM335X_TSCADC=m
@@ -1444,6 +1444,7 @@ CONFIG_PLATFORM_MHU=y
CONFIG_BCM2835_MBOX=y
CONFIG_QCOM_APCS_IPC=y
CONFIG_MTK_ADSP_MBOX=m
+CONFIG_QCOM_CPUCP_MBOX=m
CONFIG_QCOM_IPCC=y
CONFIG_ROCKCHIP_IOMMU=y
CONFIG_TEGRA_IOMMU_SMMU=y
@@ -1573,6 +1574,7 @@ CONFIG_RESET_QCOM_AOSS=y
CONFIG_RESET_QCOM_PDC=m
CONFIG_RESET_RZG2L_USBPHY_CTRL=y
CONFIG_RESET_TI_SCI=y
+CONFIG_PHY_SNPS_EUSB2=m
CONFIG_PHY_XGENE=y
CONFIG_PHY_CAN_TRANSCEIVER=m
CONFIG_PHY_NXP_PTN3222=m
@@ -1597,7 +1599,6 @@ CONFIG_PHY_QCOM_EDP=m
CONFIG_PHY_QCOM_PCIE2=m
CONFIG_PHY_QCOM_QMP=m
CONFIG_PHY_QCOM_QUSB2=m
-CONFIG_PHY_QCOM_SNPS_EUSB2=m
CONFIG_PHY_QCOM_EUSB2_REPEATER=m
CONFIG_PHY_QCOM_M31_USB=m
CONFIG_PHY_QCOM_USB_HS=m
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 1e7c7475e43f..9f38340d24c2 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -287,30 +287,6 @@
.Lskip_fgt2_\@:
.endm
-.macro __init_el2_gcs
- mrs_s x1, SYS_ID_AA64PFR1_EL1
- ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
- cbz x1, .Lskip_gcs_\@
-
- /* Ensure GCS is not enabled when we start trying to do BLs */
- msr_s SYS_GCSCR_EL1, xzr
- msr_s SYS_GCSCRE0_EL1, xzr
-.Lskip_gcs_\@:
-.endm
-
-.macro __init_el2_mpam
- /* Memory Partitioning And Monitoring: disable EL2 traps */
- mrs x1, id_aa64pfr0_el1
- ubfx x0, x1, #ID_AA64PFR0_EL1_MPAM_SHIFT, #4
- cbz x0, .Lskip_mpam_\@ // skip if no MPAM
- msr_s SYS_MPAM2_EL2, xzr // use the default partition
- // and disable lower traps
- mrs_s x0, SYS_MPAMIDR_EL1
- tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg
- msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2
-.Lskip_mpam_\@:
-.endm
-
/**
* Initialize EL2 registers to sane values. This should be called early on all
* cores that were booted in EL2. Note that everything gets initialised as
@@ -328,12 +304,10 @@
__init_el2_stage2
__init_el2_gicv3
__init_el2_hstr
- __init_el2_mpam
__init_el2_nvhe_idregs
__init_el2_cptr
__init_el2_fgt
__init_el2_fgt2
- __init_el2_gcs
.endm
#ifndef __KVM_NVHE_HYPERVISOR__
@@ -375,6 +349,23 @@
#endif
.macro finalise_el2_state
+ check_override id_aa64pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT, .Linit_mpam_\@, .Lskip_mpam_\@, x1, x2
+
+.Linit_mpam_\@:
+ msr_s SYS_MPAM2_EL2, xzr // use the default partition
+ // and disable lower traps
+ mrs_s x0, SYS_MPAMIDR_EL1
+ tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg
+ msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2
+
+.Lskip_mpam_\@:
+ check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2
+
+.Linit_gcs_\@:
+ msr_s SYS_GCSCR_EL1, xzr
+ msr_s SYS_GCSCRE0_EL1, xzr
+
+.Lskip_gcs_\@:
check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
.Linit_sve_\@: /* SVE register access */
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 9e93733523f6..74a4f738c5f5 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -58,7 +58,7 @@
#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \
+ EARLY_SEGMENT_EXTRA_PAGES))
-#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, _end, 1))
+#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, kimage_limit, 1))
#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE)
#define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index bd020fc28aa9..0720898f563e 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -561,68 +561,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
vcpu_set_flag((v), e); \
} while (0)
-#define __build_check_all_or_none(r, bits) \
- BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits))
-
-#define __cpacr_to_cptr_clr(clr, set) \
- ({ \
- u64 cptr = 0; \
- \
- if ((set) & CPACR_EL1_FPEN) \
- cptr |= CPTR_EL2_TFP; \
- if ((set) & CPACR_EL1_ZEN) \
- cptr |= CPTR_EL2_TZ; \
- if ((set) & CPACR_EL1_SMEN) \
- cptr |= CPTR_EL2_TSM; \
- if ((clr) & CPACR_EL1_TTA) \
- cptr |= CPTR_EL2_TTA; \
- if ((clr) & CPTR_EL2_TAM) \
- cptr |= CPTR_EL2_TAM; \
- if ((clr) & CPTR_EL2_TCPAC) \
- cptr |= CPTR_EL2_TCPAC; \
- \
- cptr; \
- })
-
-#define __cpacr_to_cptr_set(clr, set) \
- ({ \
- u64 cptr = 0; \
- \
- if ((clr) & CPACR_EL1_FPEN) \
- cptr |= CPTR_EL2_TFP; \
- if ((clr) & CPACR_EL1_ZEN) \
- cptr |= CPTR_EL2_TZ; \
- if ((clr) & CPACR_EL1_SMEN) \
- cptr |= CPTR_EL2_TSM; \
- if ((set) & CPACR_EL1_TTA) \
- cptr |= CPTR_EL2_TTA; \
- if ((set) & CPTR_EL2_TAM) \
- cptr |= CPTR_EL2_TAM; \
- if ((set) & CPTR_EL2_TCPAC) \
- cptr |= CPTR_EL2_TCPAC; \
- \
- cptr; \
- })
-
-#define cpacr_clear_set(clr, set) \
- do { \
- BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \
- BUILD_BUG_ON((clr) & CPACR_EL1_E0POE); \
- __build_check_all_or_none((clr), CPACR_EL1_FPEN); \
- __build_check_all_or_none((set), CPACR_EL1_FPEN); \
- __build_check_all_or_none((clr), CPACR_EL1_ZEN); \
- __build_check_all_or_none((set), CPACR_EL1_ZEN); \
- __build_check_all_or_none((clr), CPACR_EL1_SMEN); \
- __build_check_all_or_none((set), CPACR_EL1_SMEN); \
- \
- if (has_vhe() || has_hvhe()) \
- sysreg_clear_set(cpacr_el1, clr, set); \
- else \
- sysreg_clear_set(cptr_el2, \
- __cpacr_to_cptr_clr(clr, set), \
- __cpacr_to_cptr_set(clr, set));\
- } while (0)
-
/*
* Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
* format if E2H isn't set.
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 6ce2c5173482..3e41a880b062 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1107,14 +1107,36 @@ static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64);
+
+#define __vcpu_assign_sys_reg(v, r, val) \
+ do { \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = (val); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ \
+ ctxt_sys_reg(ctxt, (r)) = __v; \
+ } while (0)
+
+#define __vcpu_rmw_sys_reg(v, r, op, val) \
+ do { \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = ctxt_sys_reg(ctxt, (r)); \
+ __v op (val); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ \
+ ctxt_sys_reg(ctxt, (r)) = __v; \
+ } while (0)
+
#define __vcpu_sys_reg(v,r) \
- (*({ \
+ ({ \
const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
- u64 *__r = __ctxt_sys_reg(ctxt, (r)); \
+ u64 __v = ctxt_sys_reg(ctxt, (r)); \
if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
- *__r = kvm_vcpu_apply_reg_masks((v), (r), *__r);\
- __r; \
- }))
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ __v; \
+ })
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
@@ -1267,9 +1289,8 @@ void kvm_arm_resume_guest(struct kvm *kvm);
})
/*
- * The couple of isb() below are there to guarantee the same behaviour
- * on VHE as on !VHE, where the eret to EL1 acts as a context
- * synchronization event.
+ * The isb() below is there to guarantee the same behaviour on VHE as on !VHE,
+ * where the eret to EL1 acts as a context synchronization event.
*/
#define kvm_call_hyp(f, ...) \
do { \
@@ -1287,7 +1308,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
\
if (has_vhe()) { \
ret = f(__VA_ARGS__); \
- isb(); \
} else { \
ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
} \
@@ -1460,7 +1480,6 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm,
struct reg_mask_range *range);
/* Guest/host FPSIMD coordination helpers */
-int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 88db8a0c0b37..192d86e1cc76 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -563,7 +563,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte)
return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & PTE_SWP_EXCLUSIVE;
}
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index eba1a98657f1..aa9efee17277 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -323,13 +323,14 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
}
/*
- * If mprotect/munmap/etc occurs during TLB batched flushing, we need to
- * synchronise all the TLBI issued with a DSB to avoid the race mentioned in
- * flush_tlb_batched_pending().
+ * If mprotect/munmap/etc occurs during TLB batched flushing, we need to ensure
+ * all the previously issued TLBIs targeting mm have completed. But since we
+ * can be executing on a remote CPU, a DSB cannot guarantee this like it can
+ * for arch_tlbbatch_flush(). Our only option is to flush the entire mm.
*/
static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
{
- dsb(ish);
+ flush_tlb_mm(mm);
}
/*
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 71c29a2a2f19..a2faf0049dab 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
syscall.o proton-pack.o idle.o patching.o pi/ \
- rsi.o
+ rsi.o jump_label.o
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
sys_compat.o
@@ -47,7 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o
obj-$(CONFIG_PCI) += pci.o
@@ -78,7 +77,7 @@ $(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so
obj-y += probes/
obj-y += head.o
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y)
AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 45ea79cacf46..e151585c6cca 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1199,8 +1199,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
cpacr_restore(cpacr);
}
- if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0))
+ if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
init_cpu_ftr_reg(SYS_MPAMIDR_EL1, info->reg_mpamidr);
+ }
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
@@ -1453,7 +1455,8 @@ void update_cpu_features(int cpu,
cpacr_restore(cpacr);
}
- if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) {
+ if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
taint |= check_update_ftr_reg(SYS_MPAMIDR_EL1, cpu,
info->reg_mpamidr, boot->reg_mpamidr);
}
@@ -3132,6 +3135,13 @@ static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope)
}
#endif
+#ifdef CONFIG_ARM64_SME
+static bool has_sme_feature(const struct arm64_cpu_capabilities *cap, int scope)
+{
+ return system_supports_sme() && has_user_cpuid_feature(cap, scope);
+}
+#endif
+
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL),
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES),
@@ -3220,31 +3230,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC),
#ifdef CONFIG_ARM64_SME
HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
- HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM),
- HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA),
- HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4),
#endif /* CONFIG_ARM64_SME */
HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA),
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 94525abd1c22..c1f2b6b04b41 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -496,8 +496,11 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
__cpuinfo_store_cpu_32bit(&info->aarch32);
- if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0))
- info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
+ /*
+ * info->reg_mpamidr deferred to {init,update}_cpu_features because we
+ * don't want to read it (and trigger a trap on buggy firmware) if
+ * using an aa64pfr0_el1 override to unconditionally disable MPAM.
+ */
if (IS_ENABLED(CONFIG_ARM64_SME) &&
id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 3857fd7ee8d4..62230d6dd919 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -15,6 +15,7 @@
#include <asm/efi.h>
#include <asm/stacktrace.h>
+#include <asm/vmap_stack.h>
static bool region_is_misaligned(const efi_memory_desc_t *md)
{
@@ -214,9 +215,13 @@ static int __init arm64_efi_rt_init(void)
if (!efi_enabled(EFI_RUNTIME_SERVICES))
return 0;
- p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL,
- NUMA_NO_NODE, &&l);
-l: if (!p) {
+ if (!IS_ENABLED(CONFIG_VMAP_STACK)) {
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ return -ENOMEM;
+ }
+
+ p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE);
+ if (!p) {
pr_warn("Failed to allocate EFI runtime stack\n");
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return -ENOMEM;
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 5a69b6eb4090..714b0b5ec5ac 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -10,6 +10,10 @@
#error This file should only be included in vmlinux.lds.S
#endif
+#if defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 210000
+#define ASSERT(...)
+#endif
+
#define PI_EXPORT_SYM(sym) \
__PI_EXPORT_SYM(sym, __pi_ ## sym, Cannot export BSS symbol sym to startup code)
#define __PI_EXPORT_SYM(sym, pisym, msg)\
@@ -140,4 +144,17 @@ KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
_kernel_codesize = ABSOLUTE(__inittext_end - _text);
#endif
+/*
+ * LLD will occasionally error out with a '__init_end does not converge' error
+ * if INIT_IDMAP_DIR_SIZE is defined in terms of _end, as this results in a
+ * circular dependency. Counter this by dimensioning the initial IDMAP page
+ * tables based on kimage_limit, which is defined such that its value should
+ * not change as a result of the initdata segment being pushed over a 64k
+ * segment boundary due to changes in INIT_IDMAP_DIR_SIZE, provided that its
+ * value doesn't change by more than 2M between linker passes.
+ */
+kimage_limit = ALIGN(ABSOLUTE(_end + SZ_64K), SZ_2M);
+
+#undef ASSERT
+
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c
index c6b185b885f7..bc57b290e5e7 100644
--- a/arch/arm64/kernel/pi/idreg-override.c
+++ b/arch/arm64/kernel/pi/idreg-override.c
@@ -127,6 +127,7 @@ static const struct ftr_set_desc pfr0 __prel64_initconst = {
.fields = {
FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter),
FIELD("el0", ID_AA64PFR0_EL1_EL0_SHIFT, NULL),
+ FIELD("mpam", ID_AA64PFR0_EL1_MPAM_SHIFT, NULL),
{}
},
};
@@ -154,6 +155,7 @@ static const struct ftr_set_desc pfr1 __prel64_initconst = {
FIELD("gcs", ID_AA64PFR1_EL1_GCS_SHIFT, NULL),
FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
+ FIELD("mpam_frac", ID_AA64PFR1_EL1_MPAM_frac_SHIFT, NULL),
{}
},
};
@@ -246,6 +248,7 @@ static const struct {
{ "rodata=off", "arm64_sw.rodataoff=1" },
{ "arm64.nolva", "id_aa64mmfr2.varange=0" },
{ "arm64.no32bit_el0", "id_aa64pfr0.el0=1" },
+ { "arm64.nompam", "id_aa64pfr0.mpam=0 id_aa64pfr1.mpam_frac=0" },
};
static int __init parse_hexdigit(const char *p, u64 *v)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a5ca15daeb8a..08b7042a2e2d 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -288,7 +288,9 @@ static void flush_gcs(void)
if (!system_supports_gcs())
return;
- gcs_free(current);
+ current->thread.gcspr_el0 = 0;
+ current->thread.gcs_base = 0;
+ current->thread.gcs_size = 0;
current->thread.gcs_el0_mode = 0;
write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
write_sysreg_s(0, SYS_GCSPR_EL0);
@@ -671,6 +673,11 @@ static void permission_overlay_switch(struct task_struct *next)
current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
if (current->thread.por_el0 != next->thread.por_el0) {
write_sysreg_s(next->thread.por_el0, SYS_POR_EL0);
+ /*
+ * No ISB required as we can tolerate spurious Overlay faults -
+ * the fault handler will check again based on the new value
+ * of POR_EL0.
+ */
}
}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index a360e52db02f..ee94b72bf8fb 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -141,7 +141,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
- return *addr;
+ return READ_ONCE_NOCHECK(*addr);
else
return 0;
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3b3f6b56e733..21a795303568 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -1143,7 +1143,7 @@ static inline unsigned int num_other_online_cpus(void)
void smp_send_stop(void)
{
static unsigned long stop_in_progress;
- cpumask_t mask;
+ static cpumask_t mask;
unsigned long timeout;
/*
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index fdbc8beec930..701ea10a63f1 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -108,16 +108,16 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
switch(arch_timer_ctx_index(ctxt)) {
case TIMER_VTIMER:
- __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl;
+ __vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl);
break;
case TIMER_PTIMER:
- __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
+ __vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl);
break;
case TIMER_HVTIMER:
- __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl;
+ __vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl);
break;
case TIMER_HPTIMER:
- __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl;
+ __vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl);
break;
default:
WARN_ON(1);
@@ -130,16 +130,16 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
switch(arch_timer_ctx_index(ctxt)) {
case TIMER_VTIMER:
- __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval;
+ __vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval);
break;
case TIMER_PTIMER:
- __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
+ __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval);
break;
case TIMER_HVTIMER:
- __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval;
+ __vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval);
break;
case TIMER_HPTIMER:
- __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval;
+ __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval);
break;
default:
WARN_ON(1);
@@ -1036,7 +1036,7 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
if (vcpu_has_nv(vcpu)) {
struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
- offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
+ offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2);
offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index de2b4e9c9f9f..23dd3f3fc3eb 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -825,10 +825,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (!kvm_arm_vcpu_is_finalized(vcpu))
return -EPERM;
- ret = kvm_arch_vcpu_run_map_fp(vcpu);
- if (ret)
- return ret;
-
if (likely(vcpu_has_run_once(vcpu)))
return 0;
@@ -2129,7 +2125,7 @@ static void cpu_hyp_init(void *discard)
static void cpu_hyp_uninit(void *discard)
{
- if (__this_cpu_read(kvm_hyp_initialized)) {
+ if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) {
cpu_hyp_reset();
__this_cpu_write(kvm_hyp_initialized, 0);
}
@@ -2345,8 +2341,13 @@ static void __init teardown_hyp_mode(void)
free_hyp_pgds();
for_each_possible_cpu(cpu) {
+ if (per_cpu(kvm_hyp_initialized, cpu))
+ continue;
+
free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT);
- free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
+
+ if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu])
+ continue;
if (free_sve) {
struct cpu_sve_state *sve_state;
@@ -2354,6 +2355,9 @@ static void __init teardown_hyp_mode(void)
sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
}
+
+ free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
+
}
}
@@ -2764,7 +2768,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
struct kvm_kernel_irq_routing_entry *new)
{
- if (new->type != KVM_IRQ_ROUTING_MSI)
+ if (old->type != KVM_IRQ_ROUTING_MSI ||
+ new->type != KVM_IRQ_ROUTING_MSI)
return true;
return memcmp(&old->msi, &new->msi, sizeof(new->msi));
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 0e4c805e7e89..1a7dab333f55 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -216,9 +216,9 @@ void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu)
void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val)
{
if (val & OSLAR_EL1_OSLK)
- __vcpu_sys_reg(vcpu, OSLSR_EL1) |= OSLSR_EL1_OSLK;
+ __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, |=, OSLSR_EL1_OSLK);
else
- __vcpu_sys_reg(vcpu, OSLSR_EL1) &= ~OSLSR_EL1_OSLK;
+ __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, &=, ~OSLSR_EL1_OSLK);
preempt_disable();
kvm_arch_vcpu_put(vcpu);
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 7f6e43d25691..15e17aca1dec 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -15,32 +15,6 @@
#include <asm/sysreg.h>
/*
- * Called on entry to KVM_RUN unless this vcpu previously ran at least
- * once and the most recent prior KVM_RUN for this vcpu was called from
- * the same task as current (highly likely).
- *
- * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu),
- * such that on entering hyp the relevant parts of current are already
- * mapped.
- */
-int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
-{
- struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
- int ret;
-
- /* pKVM has its own tracking of the host fpsimd state. */
- if (is_protected_kvm_enabled())
- return 0;
-
- /* Make sure the host task fpsimd state is visible to hyp: */
- ret = kvm_share_hyp(fpsimd, fpsimd + 1);
- if (ret)
- return ret;
-
- return 0;
-}
-
-/*
* Prepare vcpu for saving the host's FPSIMD state and loading the guest's.
* The actual loading is done by the FPSIMD access trap taken to hyp.
*
@@ -103,8 +77,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fp_state.sve_state = vcpu->arch.sve_state;
fp_state.sve_vl = vcpu->arch.sve_max_vl;
fp_state.sme_state = NULL;
- fp_state.svcr = &__vcpu_sys_reg(vcpu, SVCR);
- fp_state.fpmr = &__vcpu_sys_reg(vcpu, FPMR);
+ fp_state.svcr = __ctxt_sys_reg(&vcpu->arch.ctxt, SVCR);
+ fp_state.fpmr = __ctxt_sys_reg(&vcpu->arch.ctxt, FPMR);
fp_state.fp_type = &vcpu->arch.fp_type;
if (vcpu_has_sve(vcpu))
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 424a5107cddb..6a2a899a344e 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -37,7 +37,7 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
if (unlikely(vcpu_has_nv(vcpu)))
vcpu_write_sys_reg(vcpu, val, reg);
else if (!__vcpu_write_sys_reg_to_cpu(val, reg))
- __vcpu_sys_reg(vcpu, reg) = val;
+ __vcpu_assign_sys_reg(vcpu, reg, val);
}
static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode,
@@ -51,7 +51,7 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode,
} else if (has_vhe()) {
write_sysreg_el1(val, SYS_SPSR);
} else {
- __vcpu_sys_reg(vcpu, SPSR_EL1) = val;
+ __vcpu_assign_sys_reg(vcpu, SPSR_EL1, val);
}
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index bb9f2eecfb67..2ad57b117385 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -45,7 +45,7 @@ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
if (!vcpu_el1_is_32bit(vcpu))
return;
- __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2);
+ __vcpu_assign_sys_reg(vcpu, FPEXC32_EL2, read_sysreg(fpexc32_el2));
}
static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
@@ -65,6 +65,136 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
}
}
+static inline void __activate_cptr_traps_nvhe(struct kvm_vcpu *vcpu)
+{
+ u64 val = CPTR_NVHE_EL2_RES1 | CPTR_EL2_TAM | CPTR_EL2_TTA;
+
+ /*
+ * Always trap SME since it's not supported in KVM.
+ * TSM is RES1 if SME isn't implemented.
+ */
+ val |= CPTR_EL2_TSM;
+
+ if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
+ val |= CPTR_EL2_TZ;
+
+ if (!guest_owns_fp_regs())
+ val |= CPTR_EL2_TFP;
+
+ write_sysreg(val, cptr_el2);
+}
+
+static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
+{
+ /*
+ * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
+ * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
+ * except for some missing controls, such as TAM.
+ * In this case, CPTR_EL2.TAM has the same position with or without
+ * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
+ * shift value for trapping the AMU accesses.
+ */
+ u64 val = CPTR_EL2_TAM | CPACR_EL1_TTA;
+ u64 cptr;
+
+ if (guest_owns_fp_regs()) {
+ val |= CPACR_EL1_FPEN;
+ if (vcpu_has_sve(vcpu))
+ val |= CPACR_EL1_ZEN;
+ }
+
+ if (!vcpu_has_nv(vcpu))
+ goto write;
+
+ /*
+ * The architecture is a bit crap (what a surprise): an EL2 guest
+ * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA,
+ * as they are RES0 in the guest's view. To work around it, trap the
+ * sucker using the very same bit it can't set...
+ */
+ if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu))
+ val |= CPTR_EL2_TCPAC;
+
+ /*
+ * Layer the guest hypervisor's trap configuration on top of our own if
+ * we're in a nested context.
+ */
+ if (is_hyp_ctxt(vcpu))
+ goto write;
+
+ cptr = vcpu_sanitised_cptr_el2(vcpu);
+
+ /*
+ * Pay attention, there's some interesting detail here.
+ *
+ * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two
+ * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest):
+ *
+ * - CPTR_EL2.xEN = x0, traps are enabled
+ * - CPTR_EL2.xEN = x1, traps are disabled
+ *
+ * In other words, bit[0] determines if guest accesses trap or not. In
+ * the interest of simplicity, clear the entire field if the guest
+ * hypervisor has traps enabled to dispel any illusion of something more
+ * complicated taking place.
+ */
+ if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0)))
+ val &= ~CPACR_EL1_FPEN;
+ if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
+ val &= ~CPACR_EL1_ZEN;
+
+ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
+ val |= cptr & CPACR_EL1_E0POE;
+
+ val |= cptr & CPTR_EL2_TCPAC;
+
+write:
+ write_sysreg(val, cpacr_el1);
+}
+
+static inline void __activate_cptr_traps(struct kvm_vcpu *vcpu)
+{
+ if (!guest_owns_fp_regs())
+ __activate_traps_fpsimd32(vcpu);
+
+ if (has_vhe() || has_hvhe())
+ __activate_cptr_traps_vhe(vcpu);
+ else
+ __activate_cptr_traps_nvhe(vcpu);
+}
+
+static inline void __deactivate_cptr_traps_nvhe(struct kvm_vcpu *vcpu)
+{
+ u64 val = CPTR_NVHE_EL2_RES1;
+
+ if (!cpus_have_final_cap(ARM64_SVE))
+ val |= CPTR_EL2_TZ;
+ if (!cpus_have_final_cap(ARM64_SME))
+ val |= CPTR_EL2_TSM;
+
+ write_sysreg(val, cptr_el2);
+}
+
+static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
+{
+ u64 val = CPACR_EL1_FPEN;
+
+ if (cpus_have_final_cap(ARM64_SVE))
+ val |= CPACR_EL1_ZEN;
+ if (cpus_have_final_cap(ARM64_SME))
+ val |= CPACR_EL1_SMEN;
+
+ write_sysreg(val, cpacr_el1);
+}
+
+static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
+{
+ if (has_vhe() || has_hvhe())
+ __deactivate_cptr_traps_vhe(vcpu);
+ else
+ __deactivate_cptr_traps_nvhe(vcpu);
+}
+
#define reg_to_fgt_masks(reg) \
({ \
struct fgt_masks *m; \
@@ -456,7 +586,7 @@ static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu)
*/
if (vcpu_has_sve(vcpu)) {
zcr_el1 = read_sysreg_el1(SYS_ZCR);
- __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1;
+ __vcpu_assign_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu), zcr_el1);
/*
* The guest's state is always saved using the guest's max VL.
@@ -486,11 +616,6 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
*/
if (system_supports_sve()) {
__hyp_sve_save_host();
-
- /* Re-enable SVE traps if not supported for the guest vcpu. */
- if (!vcpu_has_sve(vcpu))
- cpacr_clear_set(CPACR_EL1_ZEN, 0);
-
} else {
__fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs));
}
@@ -541,10 +666,7 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* Valid trap. Switch the context: */
/* First disable enough traps to allow us to update the registers */
- if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
- cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
- else
- cpacr_clear_set(0, CPACR_EL1_FPEN);
+ __deactivate_cptr_traps(vcpu);
isb();
/* Write out the host state if it's in the registers */
@@ -566,6 +688,13 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
*host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED;
+ /*
+ * Re-enable traps necessary for the current state of the guest, e.g.
+ * those enabled by a guest hypervisor. The ERET to the guest will
+ * provide the necessary context synchronization.
+ */
+ __activate_cptr_traps(vcpu);
+
return true;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index b9cff893bbe0..4d0dbea4c56f 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -307,11 +307,11 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq);
vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq);
- __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
- __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
+ __vcpu_assign_sys_reg(vcpu, DACR32_EL2, read_sysreg(dacr32_el2));
+ __vcpu_assign_sys_reg(vcpu, IFSR32_EL2, read_sysreg(ifsr32_el2));
if (has_vhe() || kvm_debug_regs_in_use(vcpu))
- __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
+ __vcpu_assign_sys_reg(vcpu, DBGVCR32_EL2, read_sysreg(dbgvcr32_el2));
}
static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 8e8848de4d47..3206b2c07f82 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -26,7 +26,7 @@ void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
{
- __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
+ __vcpu_assign_sys_reg(vcpu, ZCR_EL1, read_sysreg_el1(SYS_ZCR));
/*
* On saving/restoring guest sve state, always use the maximum VL for
* the guest. The layout of the data when saving the sve state depends
@@ -69,7 +69,10 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
if (!guest_owns_fp_regs())
return;
- cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+ /*
+ * Traps have been disabled by __deactivate_cptr_traps(), but there
+ * hasn't necessarily been a context synchronization event yet.
+ */
isb();
if (vcpu_has_sve(vcpu))
@@ -79,7 +82,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm));
if (has_fpmr)
- __vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR);
+ __vcpu_assign_sys_reg(vcpu, FPMR, read_sysreg_s(SYS_FPMR));
if (system_supports_sve())
__hyp_sve_restore_host();
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 95d7534c9679..8957734d6183 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -479,6 +479,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
{
struct kvm_mem_range cur;
kvm_pte_t pte;
+ u64 granule;
s8 level;
int ret;
@@ -496,18 +497,21 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
return -EPERM;
}
- do {
- u64 granule = kvm_granule_size(level);
+ for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) {
+ if (!kvm_level_supports_block_mapping(level))
+ continue;
+ granule = kvm_granule_size(level);
cur.start = ALIGN_DOWN(addr, granule);
cur.end = cur.start + granule;
- level++;
- } while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
- !(kvm_level_supports_block_mapping(level) &&
- range_included(&cur, range)));
+ if (!range_included(&cur, range))
+ continue;
+ *range = cur;
+ return 0;
+ }
- *range = cur;
+ WARN_ON(1);
- return 0;
+ return -EINVAL;
}
int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 73affe1333a4..0e752b515d0f 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -47,65 +47,6 @@ struct fgt_masks hdfgwtr2_masks;
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
-static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
-{
- u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
-
- if (!guest_owns_fp_regs())
- __activate_traps_fpsimd32(vcpu);
-
- if (has_hvhe()) {
- val |= CPACR_EL1_TTA;
-
- if (guest_owns_fp_regs()) {
- val |= CPACR_EL1_FPEN;
- if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN;
- }
-
- write_sysreg(val, cpacr_el1);
- } else {
- val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
-
- /*
- * Always trap SME since it's not supported in KVM.
- * TSM is RES1 if SME isn't implemented.
- */
- val |= CPTR_EL2_TSM;
-
- if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
- val |= CPTR_EL2_TZ;
-
- if (!guest_owns_fp_regs())
- val |= CPTR_EL2_TFP;
-
- write_sysreg(val, cptr_el2);
- }
-}
-
-static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
-{
- if (has_hvhe()) {
- u64 val = CPACR_EL1_FPEN;
-
- if (cpus_have_final_cap(ARM64_SVE))
- val |= CPACR_EL1_ZEN;
- if (cpus_have_final_cap(ARM64_SME))
- val |= CPACR_EL1_SMEN;
-
- write_sysreg(val, cpacr_el1);
- } else {
- u64 val = CPTR_NVHE_EL2_RES1;
-
- if (!cpus_have_final_cap(ARM64_SVE))
- val |= CPTR_EL2_TZ;
- if (!cpus_have_final_cap(ARM64_SME))
- val |= CPTR_EL2_TSM;
-
- write_sysreg(val, cptr_el2);
- }
-}
-
static void __activate_traps(struct kvm_vcpu *vcpu)
{
___activate_traps(vcpu, vcpu->arch.hcr_el2);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index c9b330dc2066..477f1580ffea 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -90,87 +90,6 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu)
return hcr | (guest_hcr & ~NV_HCR_GUEST_EXCLUDE);
}
-static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
-{
- u64 cptr;
-
- /*
- * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
- * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
- * except for some missing controls, such as TAM.
- * In this case, CPTR_EL2.TAM has the same position with or without
- * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
- * shift value for trapping the AMU accesses.
- */
- u64 val = CPACR_EL1_TTA | CPTR_EL2_TAM;
-
- if (guest_owns_fp_regs()) {
- val |= CPACR_EL1_FPEN;
- if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN;
- } else {
- __activate_traps_fpsimd32(vcpu);
- }
-
- if (!vcpu_has_nv(vcpu))
- goto write;
-
- /*
- * The architecture is a bit crap (what a surprise): an EL2 guest
- * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA,
- * as they are RES0 in the guest's view. To work around it, trap the
- * sucker using the very same bit it can't set...
- */
- if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu))
- val |= CPTR_EL2_TCPAC;
-
- /*
- * Layer the guest hypervisor's trap configuration on top of our own if
- * we're in a nested context.
- */
- if (is_hyp_ctxt(vcpu))
- goto write;
-
- cptr = vcpu_sanitised_cptr_el2(vcpu);
-
- /*
- * Pay attention, there's some interesting detail here.
- *
- * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two
- * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest):
- *
- * - CPTR_EL2.xEN = x0, traps are enabled
- * - CPTR_EL2.xEN = x1, traps are disabled
- *
- * In other words, bit[0] determines if guest accesses trap or not. In
- * the interest of simplicity, clear the entire field if the guest
- * hypervisor has traps enabled to dispel any illusion of something more
- * complicated taking place.
- */
- if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0)))
- val &= ~CPACR_EL1_FPEN;
- if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
- val &= ~CPACR_EL1_ZEN;
-
- if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
- val |= cptr & CPACR_EL1_E0POE;
-
- val |= cptr & CPTR_EL2_TCPAC;
-
-write:
- write_sysreg(val, cpacr_el1);
-}
-
-static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
-{
- u64 val = CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN;
-
- if (cpus_have_final_cap(ARM64_SME))
- val |= CPACR_EL1_SMEN_EL1EN;
-
- write_sysreg(val, cpacr_el1);
-}
-
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
@@ -223,9 +142,9 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
*/
val = read_sysreg_el0(SYS_CNTP_CVAL);
if (map.direct_ptimer == vcpu_ptimer(vcpu))
- __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
+ __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, val);
if (map.direct_ptimer == vcpu_hptimer(vcpu))
- __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
+ __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, val);
offset = read_sysreg_s(SYS_CNTPOFF_EL2);
@@ -639,10 +558,10 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- sysreg_save_host_state_vhe(host_ctxt);
-
fpsimd_lazy_switch_to_guest(vcpu);
+ sysreg_save_host_state_vhe(host_ctxt);
+
/*
* Note that ARM erratum 1165522 requires us to configure both stage 1
* and stage 2 translation for the guest context before we clear
@@ -667,15 +586,23 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__deactivate_traps(vcpu);
- fpsimd_lazy_switch_to_host(vcpu);
-
sysreg_restore_host_state_vhe(host_ctxt);
+ __debug_switch_to_host(vcpu);
+
+ /*
+ * Ensure that all system register writes above have taken effect
+ * before returning to the host. In VHE mode, CPTR traps for
+ * FPSIMD/SVE/SME also apply to EL2, so FPSIMD/SVE/SME state must be
+ * manipulated after the ISB.
+ */
+ isb();
+
+ fpsimd_lazy_switch_to_host(vcpu);
+
if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
- __debug_switch_to_host(vcpu);
-
return exit_code;
}
NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
@@ -705,12 +632,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
*/
local_daif_restore(DAIF_PROCCTX_NOIRQ);
- /*
- * When we exit from the guest we change a number of CPU configuration
- * parameters, such as traps. We rely on the isb() in kvm_call_hyp*()
- * to make sure these changes take effect before running the host or
- * additional guests.
- */
return ret;
}
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 3814b0b2c937..73e4bc7fde9e 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -18,17 +18,17 @@
static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
{
/* These registers are common with EL1 */
- __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1);
- __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1);
-
- __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR);
- __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0);
- __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1);
- __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR);
- __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR);
- __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR);
- __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR);
- __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR);
+ __vcpu_assign_sys_reg(vcpu, PAR_EL1, read_sysreg(par_el1));
+ __vcpu_assign_sys_reg(vcpu, TPIDR_EL1, read_sysreg(tpidr_el1));
+
+ __vcpu_assign_sys_reg(vcpu, ESR_EL2, read_sysreg_el1(SYS_ESR));
+ __vcpu_assign_sys_reg(vcpu, AFSR0_EL2, read_sysreg_el1(SYS_AFSR0));
+ __vcpu_assign_sys_reg(vcpu, AFSR1_EL2, read_sysreg_el1(SYS_AFSR1));
+ __vcpu_assign_sys_reg(vcpu, FAR_EL2, read_sysreg_el1(SYS_FAR));
+ __vcpu_assign_sys_reg(vcpu, MAIR_EL2, read_sysreg_el1(SYS_MAIR));
+ __vcpu_assign_sys_reg(vcpu, VBAR_EL2, read_sysreg_el1(SYS_VBAR));
+ __vcpu_assign_sys_reg(vcpu, CONTEXTIDR_EL2, read_sysreg_el1(SYS_CONTEXTIDR));
+ __vcpu_assign_sys_reg(vcpu, AMAIR_EL2, read_sysreg_el1(SYS_AMAIR));
/*
* In VHE mode those registers are compatible between EL1 and EL2,
@@ -46,21 +46,21 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
* are always trapped, ensuring that the in-memory
* copy is always up-to-date. A small blessing...
*/
- __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR);
- __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0);
- __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1);
- __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR);
+ __vcpu_assign_sys_reg(vcpu, SCTLR_EL2, read_sysreg_el1(SYS_SCTLR));
+ __vcpu_assign_sys_reg(vcpu, TTBR0_EL2, read_sysreg_el1(SYS_TTBR0));
+ __vcpu_assign_sys_reg(vcpu, TTBR1_EL2, read_sysreg_el1(SYS_TTBR1));
+ __vcpu_assign_sys_reg(vcpu, TCR_EL2, read_sysreg_el1(SYS_TCR));
if (ctxt_has_tcrx(&vcpu->arch.ctxt)) {
- __vcpu_sys_reg(vcpu, TCR2_EL2) = read_sysreg_el1(SYS_TCR2);
+ __vcpu_assign_sys_reg(vcpu, TCR2_EL2, read_sysreg_el1(SYS_TCR2));
if (ctxt_has_s1pie(&vcpu->arch.ctxt)) {
- __vcpu_sys_reg(vcpu, PIRE0_EL2) = read_sysreg_el1(SYS_PIRE0);
- __vcpu_sys_reg(vcpu, PIR_EL2) = read_sysreg_el1(SYS_PIR);
+ __vcpu_assign_sys_reg(vcpu, PIRE0_EL2, read_sysreg_el1(SYS_PIRE0));
+ __vcpu_assign_sys_reg(vcpu, PIR_EL2, read_sysreg_el1(SYS_PIR));
}
if (ctxt_has_s1poe(&vcpu->arch.ctxt))
- __vcpu_sys_reg(vcpu, POR_EL2) = read_sysreg_el1(SYS_POR);
+ __vcpu_assign_sys_reg(vcpu, POR_EL2, read_sysreg_el1(SYS_POR));
}
/*
@@ -70,13 +70,13 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
*/
val = read_sysreg_el1(SYS_CNTKCTL);
val &= CNTKCTL_VALID_BITS;
- __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS;
- __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val;
+ __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, &=, ~CNTKCTL_VALID_BITS);
+ __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, |=, val);
}
- __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1);
- __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR);
- __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR);
+ __vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1));
+ __vcpu_assign_sys_reg(vcpu, ELR_EL2, read_sysreg_el1(SYS_ELR));
+ __vcpu_assign_sys_reg(vcpu, SPSR_EL2, read_sysreg_el1(SYS_SPSR));
}
static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 4a53e4147fb0..dc1d26559bfa 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1402,6 +1402,21 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
}
}
+#define has_tgran_2(__r, __sz) \
+ ({ \
+ u64 _s1, _s2, _mmfr0 = __r; \
+ \
+ _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz##_2, _mmfr0); \
+ \
+ _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz, _mmfr0); \
+ \
+ ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \
+ _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \
+ (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \
+ _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \
+ })
/*
* Our emulated CPU doesn't support all the possible features. For the
* sake of simplicity (and probably mental sanity), wipe out a number
@@ -1411,6 +1426,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
*/
u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
{
+ u64 orig_val = val;
+
switch (reg) {
case SYS_ID_AA64ISAR0_EL1:
/* Support everything but TME */
@@ -1480,13 +1497,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
*/
switch (PAGE_SIZE) {
case SZ_4K:
- val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP);
+ if (has_tgran_2(orig_val, 4))
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP);
fallthrough;
case SZ_16K:
- val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP);
+ if (has_tgran_2(orig_val, 16))
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP);
fallthrough;
case SZ_64K:
- val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP);
+ if (has_tgran_2(orig_val, 64))
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP);
break;
}
@@ -1757,7 +1777,7 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
out:
for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++)
- (void)__vcpu_sys_reg(vcpu, sr);
+ __vcpu_rmw_sys_reg(vcpu, sr, |=, 0);
return 0;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 25c29107f13f..b03dbda7f1ab 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -178,7 +178,7 @@ static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force)
val |= lower_32_bits(val);
}
- __vcpu_sys_reg(vcpu, reg) = val;
+ __vcpu_assign_sys_reg(vcpu, reg, val);
/* Recreate the perf event to reflect the updated sample_period */
kvm_pmu_create_perf_event(pmc);
@@ -204,7 +204,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
{
kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, select_idx));
- __vcpu_sys_reg(vcpu, counter_index_to_reg(select_idx)) = val;
+ __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(select_idx), val);
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
}
@@ -239,7 +239,7 @@ static void kvm_pmu_stop_counter(struct kvm_pmc *pmc)
reg = counter_index_to_reg(pmc->idx);
- __vcpu_sys_reg(vcpu, reg) = val;
+ __vcpu_assign_sys_reg(vcpu, reg, val);
kvm_pmu_release_perf_event(pmc);
}
@@ -503,14 +503,14 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1;
if (!kvm_pmc_is_64bit(pmc))
reg = lower_32_bits(reg);
- __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg;
+ __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(i), reg);
/* No overflow? move on */
if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg))
continue;
/* Mark overflow */
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
+ __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(i));
if (kvm_pmu_counter_can_chain(pmc))
kvm_pmu_counter_increment(vcpu, BIT(i + 1),
@@ -556,7 +556,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
perf_event->attr.sample_period = period;
perf_event->hw.sample_period = period;
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
+ __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(idx));
if (kvm_pmu_counter_can_chain(pmc))
kvm_pmu_counter_increment(vcpu, BIT(idx + 1),
@@ -602,7 +602,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
/* The reset bits don't indicate any state, and shouldn't be saved. */
- __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
+ __vcpu_assign_sys_reg(vcpu, PMCR_EL0, (val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P)));
if (val & ARMV8_PMU_PMCR_C)
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
@@ -779,7 +779,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
u64 reg;
reg = counter_index_to_evtreg(pmc->idx);
- __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm);
+ __vcpu_assign_sys_reg(vcpu, reg, (data & kvm_pmu_evtyper_mask(vcpu->kvm)));
kvm_pmu_create_perf_event(pmc);
}
@@ -914,9 +914,9 @@ void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu)
{
u64 mask = kvm_pmu_implemented_counter_mask(vcpu);
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask;
- __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask;
- __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask;
+ __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, mask);
+ __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, mask);
+ __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, mask);
kvm_pmu_reprogram_counter_mask(vcpu, mask);
}
@@ -1038,7 +1038,7 @@ static void kvm_arm_set_nr_counters(struct kvm *kvm, unsigned int nr)
u64 val = __vcpu_sys_reg(vcpu, MDCR_EL2);
val &= ~MDCR_EL2_HPMN;
val |= FIELD_PREP(MDCR_EL2_HPMN, kvm->arch.nr_pmu_counters);
- __vcpu_sys_reg(vcpu, MDCR_EL2) = val;
+ __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val);
}
}
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a6cf2888d150..c20bd6f21e60 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -228,7 +228,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
* to reverse-translate virtual EL2 system registers for a
* non-VHE guest hypervisor.
*/
- __vcpu_sys_reg(vcpu, reg) = val;
+ __vcpu_assign_sys_reg(vcpu, reg, val);
switch (reg) {
case CNTHCTL_EL2:
@@ -263,7 +263,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
return;
memory_write:
- __vcpu_sys_reg(vcpu, reg) = val;
+ __vcpu_assign_sys_reg(vcpu, reg, val);
}
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
@@ -605,7 +605,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
if ((val ^ rd->val) & ~OSLSR_EL1_OSLK)
return -EINVAL;
- __vcpu_sys_reg(vcpu, rd->reg) = val;
+ __vcpu_assign_sys_reg(vcpu, rd->reg, val);
return 0;
}
@@ -791,7 +791,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
mask |= GENMASK(n - 1, 0);
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= mask;
+ __vcpu_rmw_sys_reg(vcpu, r->reg, &=, mask);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -799,7 +799,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static u64 reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0);
+ __vcpu_rmw_sys_reg(vcpu, r->reg, &=, GENMASK(31, 0));
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -811,7 +811,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
return 0;
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= kvm_pmu_evtyper_mask(vcpu->kvm);
+ __vcpu_rmw_sys_reg(vcpu, r->reg, &=, kvm_pmu_evtyper_mask(vcpu->kvm));
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -819,7 +819,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= PMSELR_EL0_SEL_MASK;
+ __vcpu_rmw_sys_reg(vcpu, r->reg, &=, PMSELR_EL0_SEL_MASK);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -835,7 +835,7 @@ static u64 reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
* The value of PMCR.N field is included when the
* vCPU register is read via kvm_vcpu_read_pmcr().
*/
- __vcpu_sys_reg(vcpu, r->reg) = pmcr;
+ __vcpu_assign_sys_reg(vcpu, r->reg, pmcr);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -907,7 +907,7 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
if (p->is_write)
- __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
+ __vcpu_assign_sys_reg(vcpu, PMSELR_EL0, p->regval);
else
/* return PMSELR.SEL field */
p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0)
@@ -1076,7 +1076,7 @@ static int set_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 va
{
u64 mask = kvm_pmu_accessible_counter_mask(vcpu);
- __vcpu_sys_reg(vcpu, r->reg) = val & mask;
+ __vcpu_assign_sys_reg(vcpu, r->reg, val & mask);
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
return 0;
@@ -1103,10 +1103,10 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
val = p->regval & mask;
if (r->Op2 & 0x1)
/* accessing PMCNTENSET_EL0 */
- __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
+ __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, |=, val);
else
/* accessing PMCNTENCLR_EL0 */
- __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
+ __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, ~val);
kvm_pmu_reprogram_counter_mask(vcpu, val);
} else {
@@ -1129,10 +1129,10 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->Op2 & 0x1)
/* accessing PMINTENSET_EL1 */
- __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
+ __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, |=, val);
else
/* accessing PMINTENCLR_EL1 */
- __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
+ __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, ~val);
} else {
p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
}
@@ -1151,10 +1151,10 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
if (r->CRm & 0x2)
/* accessing PMOVSSET_EL0 */
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
+ __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, (p->regval & mask));
else
/* accessing PMOVSCLR_EL0 */
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
+ __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, ~(p->regval & mask));
} else {
p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
}
@@ -1185,8 +1185,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (!vcpu_mode_priv(vcpu))
return undef_access(vcpu, p, r);
- __vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
- p->regval & ARMV8_PMU_USERENR_MASK;
+ __vcpu_assign_sys_reg(vcpu, PMUSERENR_EL0,
+ (p->regval & ARMV8_PMU_USERENR_MASK));
} else {
p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0)
& ARMV8_PMU_USERENR_MASK;
@@ -1237,7 +1237,7 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
if (!kvm_supports_32bit_el0())
val |= ARMV8_PMU_PMCR_LC;
- __vcpu_sys_reg(vcpu, r->reg) = val;
+ __vcpu_assign_sys_reg(vcpu, r->reg, val);
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
return 0;
@@ -2213,7 +2213,7 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
if (kvm_has_mte(vcpu->kvm))
clidr |= 2ULL << CLIDR_TTYPE_SHIFT(loc);
- __vcpu_sys_reg(vcpu, r->reg) = clidr;
+ __vcpu_assign_sys_reg(vcpu, r->reg, clidr);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -2227,7 +2227,7 @@ static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))
return -EINVAL;
- __vcpu_sys_reg(vcpu, rd->reg) = val;
+ __vcpu_assign_sys_reg(vcpu, rd->reg, val);
return 0;
}
@@ -2404,7 +2404,7 @@ static bool access_sp_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (p->is_write)
- __vcpu_sys_reg(vcpu, SP_EL1) = p->regval;
+ __vcpu_assign_sys_reg(vcpu, SP_EL1, p->regval);
else
p->regval = __vcpu_sys_reg(vcpu, SP_EL1);
@@ -2428,7 +2428,7 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (p->is_write)
- __vcpu_sys_reg(vcpu, SPSR_EL1) = p->regval;
+ __vcpu_assign_sys_reg(vcpu, SPSR_EL1, p->regval);
else
p->regval = __vcpu_sys_reg(vcpu, SPSR_EL1);
@@ -2440,7 +2440,7 @@ static bool access_cntkctl_el12(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (p->is_write)
- __vcpu_sys_reg(vcpu, CNTKCTL_EL1) = p->regval;
+ __vcpu_assign_sys_reg(vcpu, CNTKCTL_EL1, p->regval);
else
p->regval = __vcpu_sys_reg(vcpu, CNTKCTL_EL1);
@@ -2454,7 +2454,9 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
val |= HCR_E2H;
- return __vcpu_sys_reg(vcpu, r->reg) = val;
+ __vcpu_assign_sys_reg(vcpu, r->reg, val);
+
+ return __vcpu_sys_reg(vcpu, r->reg);
}
static unsigned int __el2_visibility(const struct kvm_vcpu *vcpu,
@@ -2622,10 +2624,10 @@ static bool access_mdcr(struct kvm_vcpu *vcpu,
*/
if (hpmn > vcpu->kvm->arch.nr_pmu_counters) {
hpmn = vcpu->kvm->arch.nr_pmu_counters;
- u64_replace_bits(val, hpmn, MDCR_EL2_HPMN);
+ u64p_replace_bits(&val, hpmn, MDCR_EL2_HPMN);
}
- __vcpu_sys_reg(vcpu, MDCR_EL2) = val;
+ __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val);
/*
* Request a reload of the PMU to enable/disable the counters
@@ -2754,7 +2756,7 @@ static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
static u64 reset_mdcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- __vcpu_sys_reg(vcpu, r->reg) = vcpu->kvm->arch.nr_pmu_counters;
+ __vcpu_assign_sys_reg(vcpu, r->reg, vcpu->kvm->arch.nr_pmu_counters);
return vcpu->kvm->arch.nr_pmu_counters;
}
@@ -4790,7 +4792,7 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
r->reset(vcpu, r);
if (r->reg >= __SANITISED_REG_START__ && r->reg < NR_SYS_REGS)
- (void)__vcpu_sys_reg(vcpu, r->reg);
+ __vcpu_rmw_sys_reg(vcpu, r->reg, |=, 0);
}
set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
@@ -5012,7 +5014,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
if (r->set_user) {
ret = (r->set_user)(vcpu, r, val);
} else {
- __vcpu_sys_reg(vcpu, r->reg) = val;
+ __vcpu_assign_sys_reg(vcpu, r->reg, val);
ret = 0;
}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index cc6338d38766..ef97d9fc67cc 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -137,7 +137,7 @@ static inline u64 reset_unknown(struct kvm_vcpu *vcpu,
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
- __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+ __vcpu_assign_sys_reg(vcpu, r->reg, 0x1de7ec7edbadc0deULL);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -145,7 +145,7 @@ static inline u64 reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
- __vcpu_sys_reg(vcpu, r->reg) = r->val;
+ __vcpu_assign_sys_reg(vcpu, r->reg, r->val);
return __vcpu_sys_reg(vcpu, r->reg);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
index 4f6954c30674..679aafe77de2 100644
--- a/arch/arm64/kvm/vgic/vgic-v3-nested.c
+++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
@@ -36,6 +36,11 @@ struct shadow_if {
static DEFINE_PER_CPU(struct shadow_if, shadow_if);
+static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
+{
+ return hweight16(shadow_if->lr_map & (BIT(idx) - 1));
+}
+
/*
* Nesting GICv3 support
*
@@ -209,6 +214,29 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
return reg;
}
+static u64 translate_lr_pintid(struct kvm_vcpu *vcpu, u64 lr)
+{
+ struct vgic_irq *irq;
+
+ if (!(lr & ICH_LR_HW))
+ return lr;
+
+ /* We have the HW bit set, check for validity of pINTID */
+ irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
+ /* If there was no real mapping, nuke the HW bit */
+ if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI)
+ lr &= ~ICH_LR_HW;
+
+ /* Translate the virtual mapping to the real one, even if invalid */
+ if (irq) {
+ lr &= ~ICH_LR_PHYS_ID_MASK;
+ lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ return lr;
+}
+
/*
* For LRs which have HW bit set such as timer interrupts, we modify them to
* have the host hardware interrupt number instead of the virtual one programmed
@@ -217,58 +245,37 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
struct vgic_v3_cpu_if *s_cpu_if)
{
- unsigned long lr_map = 0;
- int index = 0;
+ struct shadow_if *shadow_if;
+
+ shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif);
+ shadow_if->lr_map = 0;
for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
- struct vgic_irq *irq;
if (!(lr & ICH_LR_STATE))
- lr = 0;
-
- if (!(lr & ICH_LR_HW))
- goto next;
-
- /* We have the HW bit set, check for validity of pINTID */
- irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
- if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) {
- /* There was no real mapping, so nuke the HW bit */
- lr &= ~ICH_LR_HW;
- if (irq)
- vgic_put_irq(vcpu->kvm, irq);
- goto next;
- }
-
- /* Translate the virtual mapping to the real one */
- lr &= ~ICH_LR_PHYS_ID_MASK;
- lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
+ continue;
- vgic_put_irq(vcpu->kvm, irq);
+ lr = translate_lr_pintid(vcpu, lr);
-next:
- s_cpu_if->vgic_lr[index] = lr;
- if (lr) {
- lr_map |= BIT(i);
- index++;
- }
+ s_cpu_if->vgic_lr[hweight16(shadow_if->lr_map)] = lr;
+ shadow_if->lr_map |= BIT(i);
}
- container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map;
- s_cpu_if->used_lrs = index;
+ s_cpu_if->used_lrs = hweight16(shadow_if->lr_map);
}
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
{
struct shadow_if *shadow_if = get_shadow_if();
- int i, index = 0;
+ int i;
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
struct vgic_irq *irq;
if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
- goto next;
+ continue;
/*
* If we had a HW lr programmed by the guest hypervisor, we
@@ -277,15 +284,13 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
*/
irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
- goto next;
+ continue;
- lr = __gic_v3_get_lr(index);
+ lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
if (!(lr & ICH_LR_STATE))
irq->active = false;
vgic_put_irq(vcpu->kvm, irq);
- next:
- index++;
}
}
@@ -356,25 +361,23 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
val &= ~ICH_HCR_EL2_EOIcount_MASK;
val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK);
- __vcpu_sys_reg(vcpu, ICH_HCR_EL2) = val;
- __vcpu_sys_reg(vcpu, ICH_VMCR_EL2) = s_cpu_if->vgic_vmcr;
+ __vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val);
+ __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr);
for (i = 0; i < 4; i++) {
- __vcpu_sys_reg(vcpu, ICH_AP0RN(i)) = s_cpu_if->vgic_ap0r[i];
- __vcpu_sys_reg(vcpu, ICH_AP1RN(i)) = s_cpu_if->vgic_ap1r[i];
+ __vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]);
+ __vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]);
}
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
val &= ~ICH_LR_STATE;
- val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE;
+ val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE;
- __vcpu_sys_reg(vcpu, ICH_LRN(i)) = val;
- s_cpu_if->vgic_lr[i] = 0;
+ __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
}
- shadow_if->lr_map = 0;
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
}
@@ -398,9 +401,7 @@ void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu)
{
bool level;
- level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En;
- if (level)
- level &= vgic_v3_get_misr(vcpu);
+ level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu);
kvm_vgic_inject_irq(vcpu->kvm, vcpu,
vcpu->kvm->arch.vgic.mi_intid, level, vcpu);
}
diff --git a/arch/arm64/lib/crypto/poly1305-glue.c b/arch/arm64/lib/crypto/poly1305-glue.c
index 6a661cf04821..c9a74766785b 100644
--- a/arch/arm64/lib/crypto/poly1305-glue.c
+++ b/arch/arm64/lib/crypto/poly1305-glue.c
@@ -38,14 +38,14 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src,
unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_begin();
- poly1305_blocks_neon(state, src, todo, 1);
+ poly1305_blocks_neon(state, src, todo, padbit);
kernel_neon_end();
len -= todo;
src += todo;
} while (len);
} else
- poly1305_blocks(state, src, len, 1);
+ poly1305_blocks(state, src, len, padbit);
}
EXPORT_SYMBOL_GPL(poly1305_blocks_arch);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index ec0a337891dd..11eb8d1adc84 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -487,17 +487,29 @@ static void do_bad_area(unsigned long far, unsigned long esr,
}
}
-static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
- unsigned int mm_flags)
+static bool fault_from_pkey(struct vm_area_struct *vma, unsigned int mm_flags)
{
- unsigned long iss2 = ESR_ELx_ISS2(esr);
-
if (!system_supports_poe())
return false;
- if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay))
- return true;
-
+ /*
+ * We do not check whether an Overlay fault has occurred because we
+ * cannot make a decision based solely on its value:
+ *
+ * - If Overlay is set, a fault did occur due to POE, but it may be
+ * spurious in those cases where we update POR_EL0 without ISB (e.g.
+ * on context-switch). We would then need to manually check POR_EL0
+ * against vma_pkey(vma), which is exactly what
+ * arch_vma_access_permitted() does.
+ *
+ * - If Overlay is not set, we may still need to report a pkey fault.
+ * This is the case if an access was made within a mapping but with no
+ * page mapped, and POR_EL0 forbids the access (according to
+ * vma_pkey()). Such access will result in a SIGSEGV regardless
+ * because core code checks arch_vma_access_permitted(), but in order
+ * to report the correct error code - SEGV_PKUERR - we must handle
+ * that case here.
+ */
return !arch_vma_access_permitted(vma,
mm_flags & FAULT_FLAG_WRITE,
mm_flags & FAULT_FLAG_INSTRUCTION,
@@ -635,7 +647,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
goto bad_area;
}
- if (fault_from_pkey(esr, vma, mm_flags)) {
+ if (fault_from_pkey(vma, mm_flags)) {
pkey = vma_pkey(vma);
vma_end_read(vma);
fault = 0;
@@ -679,7 +691,7 @@ retry:
goto bad_area;
}
- if (fault_from_pkey(esr, vma, mm_flags)) {
+ if (fault_from_pkey(vma, mm_flags)) {
pkey = vma_pkey(vma);
mmap_read_unlock(mm);
fault = 0;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 8fcf59ba39db..00ab1d648db6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1305,7 +1305,8 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
next = addr;
end = addr + PUD_SIZE;
do {
- pmd_free_pte_page(pmdp, next);
+ if (pmd_present(pmdp_get(pmdp)))
+ pmd_free_pte_page(pmdp, next);
} while (pmdp++, next += PMD_SIZE, next != end);
pud_clear(pudp);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 80d470aa469d..54dccfd6aa11 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -518,7 +518,6 @@ alternative_else_nop_endif
msr REG_PIR_EL1, x0
orr tcr2, tcr2, TCR2_EL1_PIE
- msr REG_TCR2_EL1, x0
.Lskip_indirection:
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index b8378431aeff..5a394be09c35 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -200,7 +200,7 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile
index de1c3472e8f0..a406a4ac2378 100644
--- a/arch/csky/kernel/Makefile
+++ b/arch/csky/kernel/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
obj-y += head.o entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
obj-y += power.o syscall.o syscall_table.o setup.o
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index 9fbdfdbc539f..fbf24d1d1ca6 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -387,7 +387,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
(((type & 0x1f) << 1) | \
((offset & 0x3ffff8) << 10) | ((offset & 0x7) << 7)) })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/hexagon/kernel/Makefile b/arch/hexagon/kernel/Makefile
index 3fdf937eb572..8e0fb4a62315 100644
--- a/arch/hexagon/kernel/Makefile
+++ b/arch/hexagon/kernel/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
obj-y += head.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 1a2cf012b8f2..4b19f93379a1 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -69,6 +69,7 @@ config LOONGARCH
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_SUPPORTS_LTO_CLANG
select ARCH_SUPPORTS_LTO_CLANG_THIN
+ select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_RT
select ARCH_USE_BUILTIN_BSWAP
@@ -123,6 +124,7 @@ config LOONGARCH
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_STACKLEAK
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
@@ -187,6 +189,7 @@ config LOONGARCH
select MODULES_USE_ELF_RELA if MODULES
select NEED_PER_CPU_EMBED_FIRST_CHUNK
select NEED_PER_CPU_PAGE_FIRST_CHUNK
+ select NUMA_MEMBLKS if NUMA
select OF
select OF_EARLY_FLATTREE
select PCI
@@ -456,6 +459,15 @@ config SCHED_SMT
Improves scheduler's performance when there are multiple
threads in one physical core.
+config SCHED_MC
+ bool "Multi-core scheduler support"
+ depends on SMP
+ default y
+ help
+ Multi-core scheduler support improves the CPU scheduler's decision
+ making when dealing with multi-core CPU chips at a cost of slightly
+ increased overhead in some places.
+
config SMP
bool "Multi-Processing support"
help
@@ -485,10 +497,10 @@ config HOTPLUG_CPU
Say N if you want to disable CPU hotplug.
config NR_CPUS
- int "Maximum number of CPUs (2-256)"
- range 2 256
+ int "Maximum number of CPUs (2-2048)"
+ range 2 2048
+ default "2048"
depends on SMP
- default "64"
help
This allows you to specify the maximum number of CPUs which this
kernel will support.
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 0304eabbe606..b0703a4e02a2 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -181,11 +181,14 @@ vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@
install:
- $(Q)install -D -m 755 $(KBUILD_IMAGE) $(INSTALL_PATH)/$(image-name-y)-$(KERNELRELEASE)
- $(Q)install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE)
- $(Q)install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE)
+ $(call cmd,install)
define archhelp
- echo ' install - install kernel into $(INSTALL_PATH)'
+ echo ' vmlinux.elf - Uncompressed ELF kernel image (arch/loongarch/boot/vmlinux.elf)'
+ echo ' vmlinux.efi - Uncompressed EFI kernel image (arch/loongarch/boot/vmlinux.efi)'
+ echo ' vmlinuz.efi - GZIP/ZSTD-compressed EFI kernel image (arch/loongarch/boot/vmlinuz.efi)'
+ echo ' Default when CONFIG_EFI_ZBOOT=y'
+ echo ' install - Install kernel using (your) ~/bin/$(INSTALLKERNEL) or'
+ echo ' (distribution) /sbin/$(INSTALLKERNEL) or install.sh to $$(INSTALL_PATH)'
echo
endef
diff --git a/arch/loongarch/boot/dts/loongson-2k0500.dtsi b/arch/loongarch/boot/dts/loongson-2k0500.dtsi
index 3b38ff8853a7..760c60eebb89 100644
--- a/arch/loongarch/boot/dts/loongson-2k0500.dtsi
+++ b/arch/loongarch/boot/dts/loongson-2k0500.dtsi
@@ -169,6 +169,166 @@
interrupts = <3>;
};
+ pwm@1ff5c000 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c000 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <24 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c010 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c010 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <24 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c020 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c020 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <24 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c030 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c030 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <24 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c040 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c040 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <25 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c050 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c050 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <25 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c060 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c060 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <25 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c070 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c070 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <25 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c080 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c080 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <26 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c090 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c090 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <26 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c0a0 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c0a0 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <26 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c0b0 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c0b0 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <26 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c0c0 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c0c0 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <27 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c0d0 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c0d0 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <27 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c0e0 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c0e0 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <27 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1ff5c0f0 {
+ compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1ff5c0f0 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <27 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
gmac0: ethernet@1f020000 {
compatible = "snps,dwmac-3.70a";
reg = <0x0 0x1f020000 0x0 0x10000>;
diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
index 3514ea78f525..78ea995abf1c 100644
--- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
@@ -5,6 +5,7 @@
/dts-v1/;
+#include "dt-bindings/thermal/thermal.h"
#include "loongson-2k1000.dtsi"
/ {
@@ -38,6 +39,13 @@
linux,cma-default;
};
};
+
+ fan0: pwm-fan {
+ compatible = "pwm-fan";
+ cooling-levels = <255 153 85 25>;
+ pwms = <&pwm1 0 100000 0>;
+ #cooling-cells = <2>;
+ };
};
&gmac0 {
@@ -92,6 +100,22 @@
#size-cells = <0>;
};
+&pwm1 {
+ status = "okay";
+
+ pinctrl-0 = <&pwm1_pins_default>;
+ pinctrl-names = "default";
+};
+
+&cpu_thermal {
+ cooling-maps {
+ map0 {
+ trip = <&cpu_alert>;
+ cooling-device = <&fan0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+};
+
&ehci0 {
status = "okay";
};
diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
index 8dff2aa52417..1da3beb00f0e 100644
--- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi
+++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
@@ -68,7 +68,7 @@
};
thermal-zones {
- cpu-thermal {
+ cpu_thermal: cpu-thermal {
polling-delay-passive = <1000>;
polling-delay = <5000>;
thermal-sensors = <&tsensor 0>;
@@ -322,6 +322,46 @@
status = "disabled";
};
+ pwm@1fe22000 {
+ compatible = "loongson,ls2k1000-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1fe22000 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <24 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm1: pwm@1fe22010 {
+ compatible = "loongson,ls2k1000-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1fe22010 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <25 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1fe22020 {
+ compatible = "loongson,ls2k1000-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1fe22020 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <26 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@1fe22030 {
+ compatible = "loongson,ls2k1000-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x1fe22030 0x0 0x10>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <27 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
pmc: power-management@1fe27000 {
compatible = "loongson,ls2k1000-pmc", "loongson,ls2k0500-pmc", "syscon";
reg = <0x0 0x1fe27000 0x0 0x58>;
diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
index b4ff55a33e90..9e0411f2754c 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi
+++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
@@ -165,6 +165,66 @@
interrupt-parent = <&eiointc>;
};
+ pwm@100a0000 {
+ compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x100a0000 0x0 0x10>;
+ interrupt-parent = <&pic>;
+ interrupts = <24 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_MISC_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@100a0100 {
+ compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x100a0100 0x0 0x10>;
+ interrupt-parent = <&pic>;
+ interrupts = <25 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_MISC_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@100a0200 {
+ compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x100a0200 0x0 0x10>;
+ interrupt-parent = <&pic>;
+ interrupts = <26 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_MISC_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@100a0300 {
+ compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x100a0300 0x0 0x10>;
+ interrupt-parent = <&pic>;
+ interrupts = <27 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_MISC_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@100a0400 {
+ compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x100a0400 0x0 0x10>;
+ interrupt-parent = <&pic>;
+ interrupts = <38 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_MISC_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
+ pwm@100a0500 {
+ compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm";
+ reg = <0x0 0x100a0500 0x0 0x10>;
+ interrupt-parent = <&pic>;
+ interrupts = <39 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_MISC_CLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+
rtc0: rtc@100d0100 {
compatible = "loongson,ls2k2000-rtc", "loongson,ls7a-rtc";
reg = <0x0 0x100d0100 0x0 0x100>;
diff --git a/arch/loongarch/boot/install.sh b/arch/loongarch/boot/install.sh
new file mode 100755
index 000000000000..daac197d3315
--- /dev/null
+++ b/arch/loongarch/boot/install.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+# Adapted from code in arch/i386/boot/install.sh by Russell King
+#
+# "make install" script for the LoongArch Linux port
+#
+# Arguments:
+# $1 - kernel version
+# $2 - kernel image file
+# $3 - kernel map file
+# $4 - default install path (blank if root directory)
+
+set -e
+
+case "${2##*/}" in
+vmlinux.elf)
+ echo "Installing uncompressed vmlinux.elf kernel"
+ base=vmlinux
+ ;;
+vmlinux.efi)
+ echo "Installing uncompressed vmlinux.efi kernel"
+ base=vmlinux
+ ;;
+vmlinuz.efi)
+ echo "Installing gzip/zstd compressed vmlinuz.efi kernel"
+ base=vmlinuz
+ ;;
+*)
+ echo "Warning: Unexpected kernel type"
+ exit 1
+ ;;
+esac
+
+if [ -f $4/$base-$1 ]; then
+ mv $4/$base-$1 $4/$base-$1.old
+fi
+cat $2 > $4/$base-$1
+
+# Install system map file
+if [ -f $4/System.map-$1 ]; then
+ mv $4/System.map-$1 $4/System.map-$1.old
+fi
+cp $3 $4/System.map-$1
+
+# Install kernel config file
+if [ -f $4/config-$1 ]; then
+ mv $4/config-$1 $4/config-$1.old
+fi
+cp .config $4/config-$1
diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
index 313f66f7913a..7376840fa9f7 100644
--- a/arch/loongarch/include/asm/acpi.h
+++ b/arch/loongarch/include/asm/acpi.h
@@ -33,7 +33,7 @@ static inline bool acpi_has_cpu_in_madt(void)
return true;
}
-#define MAX_CORE_PIC 256
+#define MAX_CORE_PIC 2048
extern struct list_head acpi_wakeup_device_list;
extern struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC];
diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h
index fe198b473f84..e739dbc6329d 100644
--- a/arch/loongarch/include/asm/addrspace.h
+++ b/arch/loongarch/include/asm/addrspace.h
@@ -18,12 +18,12 @@
/*
* This gives the physical RAM offset.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifndef PHYS_OFFSET
#define PHYS_OFFSET _UL(0)
#endif
extern unsigned long vm_map_base;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#ifndef IO_BASE
#define IO_BASE CSR_DMW0_BASE
@@ -66,7 +66,7 @@ extern unsigned long vm_map_base;
#define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000)
#endif
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define _ATYPE_
#define _ATYPE32_
#define _ATYPE64_
@@ -85,7 +85,7 @@ extern unsigned long vm_map_base;
/*
* 32/64-bit LoongArch address spaces
*/
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define _ACAST32_
#define _ACAST64_
#else
diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h
index ff3d10ac393f..7dc29bd9b2f0 100644
--- a/arch/loongarch/include/asm/alternative-asm.h
+++ b/arch/loongarch/include/asm/alternative-asm.h
@@ -2,7 +2,7 @@
#ifndef _ASM_ALTERNATIVE_ASM_H
#define _ASM_ALTERNATIVE_ASM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#include <asm/asm.h>
@@ -77,6 +77,6 @@
.previous
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_ALTERNATIVE_ASM_H */
diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h
index cee7b29785ab..b5bae21fb3c8 100644
--- a/arch/loongarch/include/asm/alternative.h
+++ b/arch/loongarch/include/asm/alternative.h
@@ -2,7 +2,7 @@
#ifndef _ASM_ALTERNATIVE_H
#define _ASM_ALTERNATIVE_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/stddef.h>
@@ -106,6 +106,6 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
(asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory"))
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_ALTERNATIVE_H */
diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h
index df05005f2b80..d60bdf2e6377 100644
--- a/arch/loongarch/include/asm/asm-extable.h
+++ b/arch/loongarch/include/asm/asm-extable.h
@@ -7,7 +7,7 @@
#define EX_TYPE_UACCESS_ERR_ZERO 2
#define EX_TYPE_BPF 3
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
.pushsection __ex_table, "a"; \
@@ -22,7 +22,7 @@
__ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0)
.endm
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#include <linux/bits.h>
#include <linux/stringify.h>
@@ -60,6 +60,6 @@
#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_ASM_EXTABLE_H */
diff --git a/arch/loongarch/include/asm/asm.h b/arch/loongarch/include/asm/asm.h
index f591b3245def..f018d26fc995 100644
--- a/arch/loongarch/include/asm/asm.h
+++ b/arch/loongarch/include/asm/asm.h
@@ -110,7 +110,7 @@
#define LONG_SRA srai.w
#define LONG_SRAV sra.w
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define LONG .word
#endif
#define LONGSIZE 4
@@ -131,7 +131,7 @@
#define LONG_SRA srai.d
#define LONG_SRAV sra.d
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define LONG .dword
#endif
#define LONGSIZE 8
@@ -158,7 +158,7 @@
#define PTR_SCALESHIFT 2
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define PTR .word
#endif
#define PTRSIZE 4
@@ -181,7 +181,7 @@
#define PTR_SCALESHIFT 3
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define PTR .dword
#endif
#define PTRSIZE 8
diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h
index 98cf4d7b4b0a..dfb982fe8701 100644
--- a/arch/loongarch/include/asm/cpu.h
+++ b/arch/loongarch/include/asm/cpu.h
@@ -46,7 +46,7 @@
#define PRID_PRODUCT_MASK 0x0fff
-#if !defined(__ASSEMBLY__)
+#if !defined(__ASSEMBLER__)
enum cpu_type_enum {
CPU_UNKNOWN,
@@ -55,7 +55,7 @@ enum cpu_type_enum {
CPU_LAST
};
-#endif /* !__ASSEMBLY */
+#endif /* !__ASSEMBLER__ */
/*
* ISA Level encodings
diff --git a/arch/loongarch/include/asm/entry-common.h b/arch/loongarch/include/asm/entry-common.h
index 0fe2a098ded9..099132980dc9 100644
--- a/arch/loongarch/include/asm/entry-common.h
+++ b/arch/loongarch/include/asm/entry-common.h
@@ -2,12 +2,6 @@
#ifndef ARCH_LOONGARCH_ENTRY_COMMON_H
#define ARCH_LOONGARCH_ENTRY_COMMON_H
-#include <linux/sched.h>
-#include <linux/processor.h>
-
-static inline bool on_thread_stack(void)
-{
- return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
-}
+#include <asm/stacktrace.h> /* For on_thread_stack() */
#endif
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index 6e0a99763a9a..f4caaf764f9e 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -14,7 +14,7 @@
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifndef CONFIG_DYNAMIC_FTRACE
@@ -84,7 +84,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h
index 996038da806d..af95b941f48b 100644
--- a/arch/loongarch/include/asm/gpr-num.h
+++ b/arch/loongarch/include/asm/gpr-num.h
@@ -2,7 +2,7 @@
#ifndef __ASM_GPR_NUM_H
#define __ASM_GPR_NUM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.equ .L__gpr_num_zero, 0
.irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
@@ -25,7 +25,7 @@
.equ .L__gpr_num_$s\num, 23 + \num
.endr
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define __DEFINE_ASM_GPR_NUMS \
" .equ .L__gpr_num_zero, 0\n" \
@@ -47,6 +47,6 @@
" .equ .L__gpr_num_$s\\num, 23 + \\num\n" \
" .endr\n" \
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_GPR_NUM_H */
diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h
index 319a8c616f1f..620163628a7f 100644
--- a/arch/loongarch/include/asm/irqflags.h
+++ b/arch/loongarch/include/asm/irqflags.h
@@ -5,7 +5,7 @@
#ifndef _ASM_IRQFLAGS_H
#define _ASM_IRQFLAGS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/compiler.h>
#include <linux/stringify.h>
@@ -14,40 +14,48 @@
static inline void arch_local_irq_enable(void)
{
u32 flags = CSR_CRMD_IE;
+ register u32 mask asm("t0") = CSR_CRMD_IE;
+
__asm__ __volatile__(
"csrxchg %[val], %[mask], %[reg]\n\t"
: [val] "+r" (flags)
- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
: "memory");
}
static inline void arch_local_irq_disable(void)
{
u32 flags = 0;
+ register u32 mask asm("t0") = CSR_CRMD_IE;
+
__asm__ __volatile__(
"csrxchg %[val], %[mask], %[reg]\n\t"
: [val] "+r" (flags)
- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
: "memory");
}
static inline unsigned long arch_local_irq_save(void)
{
u32 flags = 0;
+ register u32 mask asm("t0") = CSR_CRMD_IE;
+
__asm__ __volatile__(
"csrxchg %[val], %[mask], %[reg]\n\t"
: [val] "+r" (flags)
- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
: "memory");
return flags;
}
static inline void arch_local_irq_restore(unsigned long flags)
{
+ register u32 mask asm("t0") = CSR_CRMD_IE;
+
__asm__ __volatile__(
"csrxchg %[val], %[mask], %[reg]\n\t"
: [val] "+r" (flags)
- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
: "memory");
}
@@ -72,6 +80,6 @@ static inline int arch_irqs_disabled(void)
return arch_irqs_disabled_flags(arch_local_save_flags());
}
-#endif /* #ifndef __ASSEMBLY__ */
+#endif /* #ifndef __ASSEMBLER__ */
#endif /* _ASM_IRQFLAGS_H */
diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h
index 8a924bd69d19..4000c7603d8e 100644
--- a/arch/loongarch/include/asm/jump_label.h
+++ b/arch/loongarch/include/asm/jump_label.h
@@ -7,7 +7,7 @@
#ifndef __ASM_JUMP_LABEL_H
#define __ASM_JUMP_LABEL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -50,5 +50,5 @@ l_yes:
return true;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h
index 7f52bd31b9d4..62f139a9c87d 100644
--- a/arch/loongarch/include/asm/kasan.h
+++ b/arch/loongarch/include/asm/kasan.h
@@ -2,7 +2,7 @@
#ifndef __ASM_KASAN_H
#define __ASM_KASAN_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/linkage.h>
#include <linux/mmzone.h>
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 52651aa0e583..a0994d226eff 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -9,15 +9,15 @@
#include <linux/linkage.h>
#include <linux/types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <larchintrin.h>
/* CPUCFG */
#define read_cpucfg(reg) __cpucfg(reg)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/* LoongArch Registers */
#define REG_ZERO 0x0
@@ -53,7 +53,7 @@
#define REG_S7 0x1e
#define REG_S8 0x1f
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/* Bit fields for CPUCFG registers */
#define LOONGARCH_CPUCFG0 0x0
@@ -171,7 +171,7 @@
* SW emulation for KVM hypervirsor, see arch/loongarch/include/uapi/asm/kvm_para.h
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* CSR */
#define csr_read32(reg) __csrrd_w(reg)
@@ -187,7 +187,7 @@
#define iocsr_write32(val, reg) __iocsrwr_w(val, reg)
#define iocsr_write64(val, reg) __iocsrwr_d(val, reg)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/* CSR register number */
@@ -411,8 +411,8 @@
/* Config CSR registers */
#define LOONGARCH_CSR_CPUID 0x20 /* CPU core id */
-#define CSR_CPUID_COREID_WIDTH 9
-#define CSR_CPUID_COREID _ULCAST_(0x1ff)
+#define CSR_CPUID_COREID_WIDTH 11
+#define CSR_CPUID_COREID _ULCAST_(0x7ff)
#define LOONGARCH_CSR_PRCFG1 0x21 /* Config1 */
#define CSR_CONF1_VSMAX_SHIFT 12
@@ -1195,7 +1195,7 @@
#define LOONGARCH_IOCSR_EXTIOI_ROUTE_BASE 0x1c00
#define IOCSR_EXTIOI_VECTOR_NUM 256
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
static __always_inline u64 drdtime(void)
{
@@ -1357,7 +1357,7 @@ __BUILD_CSR_OP(tlbidx)
#define clear_csr_estat(val) \
csr_xchg32(~(val), val, LOONGARCH_CSR_ESTAT)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/* Generic EntryLo bit definitions */
#define ENTRYLO_V (_ULCAST_(1) << 0)
diff --git a/arch/loongarch/include/asm/numa.h b/arch/loongarch/include/asm/numa.h
index b5f9de9f102e..bbf9f70bd25f 100644
--- a/arch/loongarch/include/asm/numa.h
+++ b/arch/loongarch/include/asm/numa.h
@@ -22,20 +22,6 @@ extern int numa_off;
extern s16 __cpuid_to_node[CONFIG_NR_CPUS];
extern nodemask_t numa_nodes_parsed __initdata;
-struct numa_memblk {
- u64 start;
- u64 end;
- int nid;
-};
-
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-struct numa_meminfo {
- int nr_blks;
- struct numa_memblk blk[NR_NODE_MEMBLKS];
-};
-
-extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-
extern void __init early_numa_add_cpu(int cpuid, s16 node);
extern void numa_add_cpu(unsigned int cpu);
extern void numa_remove_cpu(unsigned int cpu);
diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h
index caf1f71a1057..d5fa98d1d177 100644
--- a/arch/loongarch/include/asm/orc_types.h
+++ b/arch/loongarch/include/asm/orc_types.h
@@ -34,7 +34,7 @@
#define ORC_TYPE_REGS 3
#define ORC_TYPE_REGS_PARTIAL 4
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* This struct is more or less a vastly simplified version of the DWARF Call
* Frame Information standard. It contains only the necessary parts of DWARF
@@ -53,6 +53,6 @@ struct orc_entry {
unsigned int type:3;
unsigned int signal:1;
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ORC_TYPES_H */
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 7368f12b7cb1..a3aaf34fba16 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -15,7 +15,7 @@
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/kernel.h>
#include <linux/pfn.h>
@@ -110,6 +110,6 @@ extern int __virt_addr_valid(volatile void *kaddr);
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_PAGE_H */
diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h
index 45bfc65a0c9f..7bbfb04a54cc 100644
--- a/arch/loongarch/include/asm/pgtable-bits.h
+++ b/arch/loongarch/include/asm/pgtable-bits.h
@@ -92,7 +92,7 @@
#define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
_PAGE_GLOBAL | _PAGE_KERN | _CACHE_WUC)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_SUC)
@@ -127,6 +127,6 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
return __pgprot(prot);
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_PGTABLE_BITS_H */
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index a3f17914dbab..f2aeff544cee 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -55,7 +55,7 @@
#define USER_PTRS_PER_PGD ((TASK_SIZE64 / PGDIR_SIZE)?(TASK_SIZE64 / PGDIR_SIZE):1)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/mm_types.h>
#include <linux/mmzone.h>
@@ -301,7 +301,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
#define __swp_entry_to_pmd(x) ((pmd_t) { (x).val | _PAGE_HUGE })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
@@ -618,6 +618,6 @@ static inline long pmd_protnone(pmd_t pmd)
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_PGTABLE_H */
diff --git a/arch/loongarch/include/asm/prefetch.h b/arch/loongarch/include/asm/prefetch.h
index 1672262a5e2e..0b168cdaae9a 100644
--- a/arch/loongarch/include/asm/prefetch.h
+++ b/arch/loongarch/include/asm/prefetch.h
@@ -8,7 +8,7 @@
#define Pref_Load 0
#define Pref_Store 8
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro __pref hint addr
#ifdef CONFIG_CPU_HAS_PREFETCH
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index b87d1d5e5890..3a47f52959a8 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -25,6 +25,7 @@ extern int smp_num_siblings;
extern int num_processors;
extern int disabled_cpus;
extern cpumask_t cpu_sibling_map[];
+extern cpumask_t cpu_llc_shared_map[];
extern cpumask_t cpu_core_map[];
extern cpumask_t cpu_foreign_map[];
@@ -38,7 +39,7 @@ int loongson_cpu_disable(void);
void loongson_cpu_die(unsigned int cpu);
#endif
-static inline void plat_smp_setup(void)
+static inline void __init plat_smp_setup(void)
{
loongson_smp_setup();
}
diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h
index 8d4af6aff8a8..4501efac1a87 100644
--- a/arch/loongarch/include/asm/sparsemem.h
+++ b/arch/loongarch/include/asm/sparsemem.h
@@ -21,11 +21,6 @@
#define VMEMMAP_SIZE 0 /* 1, For FLATMEM; 2, For SPARSEMEM without VMEMMAP. */
#endif
-#ifdef CONFIG_MEMORY_HOTPLUG
-int memory_add_physaddr_to_nid(u64 addr);
-#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
-#endif
-
#define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS)
#endif /* _LOONGARCH_SPARSEMEM_H */
diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
index 66736837085b..3eda298702b1 100644
--- a/arch/loongarch/include/asm/stackframe.h
+++ b/arch/loongarch/include/asm/stackframe.h
@@ -57,6 +57,12 @@
jirl zero, \temp1, 0xc
.endm
+ .macro STACKLEAK_ERASE
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ bl stackleak_erase_on_task_stack
+#endif
+ .endm
+
.macro BACKUP_T0T1
csrwr t0, EXCEPTION_KS0
csrwr t1, EXCEPTION_KS1
diff --git a/arch/loongarch/include/asm/stacktrace.h b/arch/loongarch/include/asm/stacktrace.h
index fc8b64773794..5c8be156567c 100644
--- a/arch/loongarch/include/asm/stacktrace.h
+++ b/arch/loongarch/include/asm/stacktrace.h
@@ -31,6 +31,11 @@ bool in_irq_stack(unsigned long stack, struct stack_info *info);
bool in_task_stack(unsigned long stack, struct task_struct *task, struct stack_info *info);
int get_stack_info(unsigned long stack, struct task_struct *task, struct stack_info *info);
+static __always_inline bool on_thread_stack(void)
+{
+ return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
+}
+
#define STR_LONG_L __stringify(LONG_L)
#define STR_LONG_S __stringify(LONG_S)
#define STR_LONGSIZE __stringify(LONGSIZE)
diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h
index 4f5a9441754e..9dfa2ef00816 100644
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -10,7 +10,7 @@
#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/processor.h>
@@ -53,7 +53,7 @@ static inline struct thread_info *current_thread_info(void)
register unsigned long current_stack_pointer __asm__("$sp");
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/* thread information allocation */
#define THREAD_SIZE SZ_16K
diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h
index 50273c9187d0..f06e7ff25bb7 100644
--- a/arch/loongarch/include/asm/topology.h
+++ b/arch/loongarch/include/asm/topology.h
@@ -19,17 +19,22 @@ extern int pcibus_to_node(struct pci_bus *);
#define cpumask_of_pcibus(bus) (cpu_online_mask)
-extern unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES];
-
-void numa_set_distance(int from, int to, int distance);
-
-#define node_distance(from, to) (node_distances[(from)][(to)])
+int __node_distance(int from, int to);
+#define node_distance(from, to) __node_distance(from, to)
#else
#define pcibus_to_node(bus) 0
#endif
#ifdef CONFIG_SMP
+/*
+ * Return cpus that shares the last level cache.
+ */
+static inline const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return &cpu_llc_shared_map[cpu];
+}
+
#define topology_physical_package_id(cpu) (cpu_data[cpu].package)
#define topology_core_id(cpu) (cpu_data[cpu].core)
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
diff --git a/arch/loongarch/include/asm/types.h b/arch/loongarch/include/asm/types.h
index baf15a0dcf8b..0edd731f3d6a 100644
--- a/arch/loongarch/include/asm/types.h
+++ b/arch/loongarch/include/asm/types.h
@@ -8,7 +8,7 @@
#include <asm-generic/int-ll64.h>
#include <uapi/asm/types.h>
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define _ULCAST_
#define _U64CAST_
#else
diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h
index 2c68bc72736c..16c7f7e465a0 100644
--- a/arch/loongarch/include/asm/unwind_hints.h
+++ b/arch/loongarch/include/asm/unwind_hints.h
@@ -5,7 +5,7 @@
#include <linux/objtool.h>
#include <asm/orc_types.h>
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro UNWIND_HINT_UNDEFINED
UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED
@@ -23,7 +23,7 @@
UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL
.endm
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
#define UNWIND_HINT_SAVE \
UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0)
@@ -31,6 +31,6 @@
#define UNWIND_HINT_RESTORE \
UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */
diff --git a/arch/loongarch/include/asm/vdso/arch_data.h b/arch/loongarch/include/asm/vdso/arch_data.h
index 322d0a5f1c84..395ec223bcbe 100644
--- a/arch/loongarch/include/asm/vdso/arch_data.h
+++ b/arch/loongarch/include/asm/vdso/arch_data.h
@@ -7,7 +7,7 @@
#ifndef _VDSO_ARCH_DATA_H
#define _VDSO_ARCH_DATA_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/asm.h>
#include <asm/vdso.h>
@@ -20,6 +20,6 @@ struct vdso_arch_data {
struct vdso_pcpu_data pdata[NR_CPUS];
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/loongarch/include/asm/vdso/getrandom.h b/arch/loongarch/include/asm/vdso/getrandom.h
index 48c43f55b039..2ff05003c6e7 100644
--- a/arch/loongarch/include/asm/vdso/getrandom.h
+++ b/arch/loongarch/include/asm/vdso/getrandom.h
@@ -5,7 +5,7 @@
#ifndef __ASM_VDSO_GETRANDOM_H
#define __ASM_VDSO_GETRANDOM_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/unistd.h>
#include <asm/vdso/vdso.h>
@@ -20,7 +20,7 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns
asm volatile(
" syscall 0\n"
- : "+r" (ret)
+ : "=r" (ret)
: "r" (nr), "r" (buffer), "r" (len), "r" (flags)
: "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8",
"memory");
@@ -28,6 +28,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns
return ret;
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h
index 88cfcf133116..dcafabca9bb6 100644
--- a/arch/loongarch/include/asm/vdso/gettimeofday.h
+++ b/arch/loongarch/include/asm/vdso/gettimeofday.h
@@ -7,7 +7,7 @@
#ifndef __ASM_VDSO_GETTIMEOFDAY_H
#define __ASM_VDSO_GETTIMEOFDAY_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/unistd.h>
#include <asm/vdso/vdso.h>
@@ -25,7 +25,7 @@ static __always_inline long gettimeofday_fallback(
asm volatile(
" syscall 0\n"
- : "+r" (ret)
+ : "=r" (ret)
: "r" (nr), "r" (tv), "r" (tz)
: "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7",
"$t8", "memory");
@@ -44,7 +44,7 @@ static __always_inline long clock_gettime_fallback(
asm volatile(
" syscall 0\n"
- : "+r" (ret)
+ : "=r" (ret)
: "r" (nr), "r" (clkid), "r" (ts)
: "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7",
"$t8", "memory");
@@ -63,7 +63,7 @@ static __always_inline int clock_getres_fallback(
asm volatile(
" syscall 0\n"
- : "+r" (ret)
+ : "=r" (ret)
: "r" (nr), "r" (clkid), "r" (ts)
: "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7",
"$t8", "memory");
@@ -89,6 +89,6 @@ static inline bool loongarch_vdso_hres_capable(void)
}
#define __arch_vdso_hres_capable loongarch_vdso_hres_capable
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/loongarch/include/asm/vdso/processor.h b/arch/loongarch/include/asm/vdso/processor.h
index ef5770b343a0..1e255373b0b8 100644
--- a/arch/loongarch/include/asm/vdso/processor.h
+++ b/arch/loongarch/include/asm/vdso/processor.h
@@ -5,10 +5,10 @@
#ifndef __ASM_VDSO_PROCESSOR_H
#define __ASM_VDSO_PROCESSOR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define cpu_relax() barrier()
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h
index 50c65fb29daf..04bd2d452876 100644
--- a/arch/loongarch/include/asm/vdso/vdso.h
+++ b/arch/loongarch/include/asm/vdso/vdso.h
@@ -7,7 +7,7 @@
#ifndef _ASM_VDSO_VDSO_H
#define _ASM_VDSO_VDSO_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/asm.h>
#include <asm/page.h>
@@ -16,6 +16,6 @@
#define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/loongarch/include/asm/vdso/vsyscall.h b/arch/loongarch/include/asm/vdso/vsyscall.h
index 1140b54b4bc8..558eb9dfda52 100644
--- a/arch/loongarch/include/asm/vdso/vsyscall.h
+++ b/arch/loongarch/include/asm/vdso/vsyscall.h
@@ -2,13 +2,13 @@
#ifndef __ASM_VDSO_VSYSCALL_H
#define __ASM_VDSO_VSYSCALL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <vdso/datapage.h>
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index f9dcaa60033d..6f5a4574a911 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -5,7 +5,7 @@
OBJECT_FILES_NON_STANDARD_head.o := y
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
traps.o irq.o idle.o process.o dma.o mem.o reset.o switch.o \
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index 1120ac2824f6..1367ca759468 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -10,6 +10,7 @@
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/efi-bgrt.h>
+#include <linux/export.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/memblock.h>
@@ -244,22 +245,6 @@ fdt_earlycon:
#ifdef CONFIG_ACPI_NUMA
-static __init int setup_node(int pxm)
-{
- return acpi_map_pxm_to_node(pxm);
-}
-
-void __init numa_set_distance(int from, int to, int distance)
-{
- if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) {
- pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
- from, to, distance);
- return;
- }
-
- node_distances[from][to] = distance;
-}
-
/* Callback for Proximity Domain -> CPUID mapping */
void __init
acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
@@ -280,7 +265,41 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
pxm |= (pa->proximity_domain_hi[1] << 16);
pxm |= (pa->proximity_domain_hi[2] << 24);
}
- node = setup_node(pxm);
+ node = acpi_map_pxm_to_node(pxm);
+ if (node < 0) {
+ pr_err("SRAT: Too many proximity domains %x\n", pxm);
+ bad_srat();
+ return;
+ }
+
+ if (pa->apic_id >= CONFIG_NR_CPUS) {
+ pr_info("SRAT: PXM %u -> CPU 0x%02x -> Node %u skipped apicid that is too big\n",
+ pxm, pa->apic_id, node);
+ return;
+ }
+
+ early_numa_add_cpu(pa->apic_id, node);
+
+ set_cpuid_to_node(pa->apic_id, node);
+ node_set(node, numa_nodes_parsed);
+ pr_info("SRAT: PXM %u -> CPU 0x%02x -> Node %u\n", pxm, pa->apic_id, node);
+}
+
+void __init
+acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
+{
+ int pxm, node;
+
+ if (srat_disabled())
+ return;
+ if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
+ bad_srat();
+ return;
+ }
+ if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
+ return;
+ pxm = pa->proximity_domain;
+ node = acpi_map_pxm_to_node(pxm);
if (node < 0) {
pr_err("SRAT: Too many proximity domains %x\n", pxm);
bad_srat();
diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c
index 4ad13847e962..0e0c766df1e3 100644
--- a/arch/loongarch/kernel/alternative.c
+++ b/arch/loongarch/kernel/alternative.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/export.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <asm/alternative.h>
diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c
index de21e72759ee..860a3bc030e0 100644
--- a/arch/loongarch/kernel/efi.c
+++ b/arch/loongarch/kernel/efi.c
@@ -144,6 +144,18 @@ void __init efi_init(void)
if (efi_memmap_init_early(&data) < 0)
panic("Unable to map EFI memory map.\n");
+ /*
+ * Reserve the physical memory region occupied by the EFI
+ * memory map table (header + descriptors). This is crucial
+ * for kdump, as the kdump kernel relies on this original
+ * memmap passed by the bootloader. Without reservation,
+ * this region could be overwritten by the primary kernel.
+ * Also, set the EFI_PRESERVE_BS_REGIONS flag to indicate that
+ * critical boot services code/data regions like this are preserved.
+ */
+ memblock_reserve((phys_addr_t)boot_memmap, sizeof(*tbl) + data.size);
+ set_bit(EFI_PRESERVE_BS_REGIONS, &efi.flags);
+
early_memunmap(tbl, sizeof(*tbl));
}
diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c
index 0fa81ced28dc..3d98c6aa00db 100644
--- a/arch/loongarch/kernel/elf.c
+++ b/arch/loongarch/kernel/elf.c
@@ -6,7 +6,6 @@
#include <linux/binfmts.h>
#include <linux/elf.h>
-#include <linux/export.h>
#include <linux/sched.h>
#include <asm/cpu-features.h>
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index 2abc29e57381..47e1db9a1ce4 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -73,6 +73,7 @@ SYM_CODE_START(handle_syscall)
move a0, sp
bl do_syscall
+ STACKLEAK_ERASE
RESTORE_ALL_AND_RET
SYM_CODE_END(handle_syscall)
_ASM_NOKPROBE(handle_syscall)
@@ -81,6 +82,7 @@ SYM_CODE_START(ret_from_fork_asm)
UNWIND_HINT_REGS
move a1, sp
bl ret_from_fork
+ STACKLEAK_ERASE
RESTORE_STATIC
RESTORE_SOME
RESTORE_SP_AND_RET
@@ -92,6 +94,7 @@ SYM_CODE_START(ret_from_kernel_thread_asm)
move a2, s0
move a3, s1
bl ret_from_kernel_thread
+ STACKLEAK_ERASE
RESTORE_STATIC
RESTORE_SOME
RESTORE_SP_AND_RET
diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
index 4c476904227f..141b49bd989c 100644
--- a/arch/loongarch/kernel/kfpu.c
+++ b/arch/loongarch/kernel/kfpu.c
@@ -4,6 +4,7 @@
*/
#include <linux/cpu.h>
+#include <linux/export.h>
#include <linux/init.h>
#include <asm/fpu.h>
#include <asm/smp.h>
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index 30a72fd528c0..d6e73e8f9c0b 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -11,6 +11,7 @@
#include <linux/mmzone.h>
#include <linux/export.h>
#include <linux/nodemask.h>
+#include <linux/numa_memblks.h>
#include <linux/swap.h>
#include <linux/memblock.h>
#include <linux/pfn.h>
@@ -27,10 +28,6 @@
#include <asm/time.h>
int numa_off;
-unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES];
-EXPORT_SYMBOL(node_distances);
-
-static struct numa_meminfo numa_meminfo;
cpumask_t cpus_on_node[MAX_NUMNODES];
cpumask_t phys_cpus_on_node[MAX_NUMNODES];
EXPORT_SYMBOL(cpus_on_node);
@@ -43,8 +40,6 @@ s16 __cpuid_to_node[CONFIG_NR_CPUS] = {
};
EXPORT_SYMBOL(__cpuid_to_node);
-nodemask_t numa_nodes_parsed __initdata;
-
#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
@@ -145,48 +140,6 @@ void numa_remove_cpu(unsigned int cpu)
cpumask_clear_cpu(cpu, &cpus_on_node[nid]);
}
-static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
- struct numa_meminfo *mi)
-{
- /* ignore zero length blks */
- if (start == end)
- return 0;
-
- /* whine about and ignore invalid blks */
- if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
- pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
- nid, start, end - 1);
- return 0;
- }
-
- if (mi->nr_blks >= NR_NODE_MEMBLKS) {
- pr_err("NUMA: too many memblk ranges\n");
- return -EINVAL;
- }
-
- mi->blk[mi->nr_blks].start = PFN_ALIGN(start);
- mi->blk[mi->nr_blks].end = PFN_ALIGN(end - PAGE_SIZE + 1);
- mi->blk[mi->nr_blks].nid = nid;
- mi->nr_blks++;
- return 0;
-}
-
-/**
- * numa_add_memblk - Add one numa_memblk to numa_meminfo
- * @nid: NUMA node ID of the new memblk
- * @start: Start address of the new memblk
- * @end: End address of the new memblk
- *
- * Add a new memblk to the default numa_meminfo.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_add_memblk(int nid, u64 start, u64 end)
-{
- return numa_add_memblk_to(nid, start, end, &numa_meminfo);
-}
-
static void __init node_mem_init(unsigned int node)
{
unsigned long start_pfn, end_pfn;
@@ -205,18 +158,6 @@ static void __init node_mem_init(unsigned int node)
#ifdef CONFIG_ACPI_NUMA
-static void __init add_node_intersection(u32 node, u64 start, u64 size, u32 type)
-{
- static unsigned long num_physpages;
-
- num_physpages += (size >> PAGE_SHIFT);
- pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n",
- node, type, start, size);
- pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
- start >> PAGE_SHIFT, (start + size) >> PAGE_SHIFT, num_physpages);
- memblock_set_node(start, size, &memblock.memory, node);
-}
-
/*
* add_numamem_region
*
@@ -228,28 +169,21 @@ static void __init add_node_intersection(u32 node, u64 start, u64 size, u32 type
*/
static void __init add_numamem_region(u64 start, u64 end, u32 type)
{
- u32 i;
- u64 ofs = start;
+ u32 node = pa_to_nid(start);
+ u64 size = end - start;
+ static unsigned long num_physpages;
if (start >= end) {
pr_debug("Invalid region: %016llx-%016llx\n", start, end);
return;
}
- for (i = 0; i < numa_meminfo.nr_blks; i++) {
- struct numa_memblk *mb = &numa_meminfo.blk[i];
-
- if (ofs > mb->end)
- continue;
-
- if (end > mb->end) {
- add_node_intersection(mb->nid, ofs, mb->end - ofs, type);
- ofs = mb->end;
- } else {
- add_node_intersection(mb->nid, ofs, end - ofs, type);
- break;
- }
- }
+ num_physpages += (size >> PAGE_SHIFT);
+ pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n",
+ node, type, start, size);
+ pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT, num_physpages);
+ memblock_set_node(start, size, &memblock.memory, node);
}
static void __init init_node_memblock(void)
@@ -291,24 +225,6 @@ static void __init init_node_memblock(void)
}
}
-static void __init numa_default_distance(void)
-{
- int row, col;
-
- for (row = 0; row < MAX_NUMNODES; row++)
- for (col = 0; col < MAX_NUMNODES; col++) {
- if (col == row)
- node_distances[row][col] = LOCAL_DISTANCE;
- else
- /* We assume that one node per package here!
- *
- * A SLIT should be used for multiple nodes
- * per package to override default setting.
- */
- node_distances[row][col] = REMOTE_DISTANCE;
- }
-}
-
/*
* fake_numa_init() - For Non-ACPI systems
* Return: 0 on success, -errno on failure.
@@ -333,11 +249,11 @@ int __init init_numa_memory(void)
for (i = 0; i < NR_CPUS; i++)
set_cpuid_to_node(i, NUMA_NO_NODE);
- numa_default_distance();
+ numa_reset_distance();
nodes_clear(numa_nodes_parsed);
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
- memset(&numa_meminfo, 0, sizeof(numa_meminfo));
+ WARN_ON(memblock_clear_hotplug(0, PHYS_ADDR_MAX));
/* Parse SRAT and SLIT if provided by firmware. */
ret = acpi_disabled ? fake_numa_init() : acpi_numa_init();
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index e5a39bbad078..b1b51f920b23 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/export.h>
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/irq_work.h>
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 4b24589c0b56..46036d98da75 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -46,6 +46,10 @@ EXPORT_SYMBOL(__cpu_logical_map);
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_sibling_map);
+/* Representing the last level cache shared map of each logical CPU */
+cpumask_t cpu_llc_shared_map[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(cpu_llc_shared_map);
+
/* Representing the core map of multi-core chips of each logical CPU */
cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_core_map);
@@ -63,6 +67,9 @@ EXPORT_SYMBOL(cpu_foreign_map);
/* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map;
+/* representing cpus for which llc shared maps can be computed */
+static cpumask_t cpu_llc_shared_setup_map;
+
/* representing cpus for which core maps can be computed */
static cpumask_t cpu_core_setup_map;
@@ -102,6 +109,34 @@ static inline void set_cpu_core_map(int cpu)
}
}
+static inline void set_cpu_llc_shared_map(int cpu)
+{
+ int i;
+
+ cpumask_set_cpu(cpu, &cpu_llc_shared_setup_map);
+
+ for_each_cpu(i, &cpu_llc_shared_setup_map) {
+ if (cpu_to_node(cpu) == cpu_to_node(i)) {
+ cpumask_set_cpu(i, &cpu_llc_shared_map[cpu]);
+ cpumask_set_cpu(cpu, &cpu_llc_shared_map[i]);
+ }
+ }
+}
+
+static inline void clear_cpu_llc_shared_map(int cpu)
+{
+ int i;
+
+ for_each_cpu(i, &cpu_llc_shared_setup_map) {
+ if (cpu_to_node(cpu) == cpu_to_node(i)) {
+ cpumask_clear_cpu(i, &cpu_llc_shared_map[cpu]);
+ cpumask_clear_cpu(cpu, &cpu_llc_shared_map[i]);
+ }
+ }
+
+ cpumask_clear_cpu(cpu, &cpu_llc_shared_setup_map);
+}
+
static inline void set_cpu_sibling_map(int cpu)
{
int i;
@@ -406,6 +441,7 @@ int loongson_cpu_disable(void)
#endif
set_cpu_online(cpu, false);
clear_cpu_sibling_map(cpu);
+ clear_cpu_llc_shared_map(cpu);
calculate_cpu_foreign_map();
local_irq_save(flags);
irq_migrate_all_off_this_cpu();
@@ -572,6 +608,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
current_thread_info()->cpu = 0;
loongson_prepare_cpus(max_cpus);
set_cpu_sibling_map(0);
+ set_cpu_llc_shared_map(0);
set_cpu_core_map(0);
calculate_cpu_foreign_map();
#ifndef CONFIG_HOTPLUG_CPU
@@ -613,6 +650,7 @@ asmlinkage void start_secondary(void)
loongson_init_secondary();
set_cpu_sibling_map(cpu);
+ set_cpu_llc_shared_map(cpu);
set_cpu_core_map(cpu);
notify_cpu_starting(cpu);
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index bc75a3a69fc8..367906b10f81 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -102,7 +102,7 @@ static int constant_timer_next_event(unsigned long delta, struct clock_event_dev
return 0;
}
-static unsigned long __init get_loops_per_jiffy(void)
+static unsigned long get_loops_per_jiffy(void)
{
unsigned long lpj = (unsigned long)const_clock_freq;
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 47fc2de6d150..3d9be6ca7ec5 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/kexec.h>
#include <linux/module.h>
+#include <linux/export.h>
#include <linux/extable.h>
#include <linux/mm.h>
#include <linux/sched/mm.h>
diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c
index 98379b7d4147..08d7951b2f60 100644
--- a/arch/loongarch/kernel/unwind_guess.c
+++ b/arch/loongarch/kernel/unwind_guess.c
@@ -3,6 +3,7 @@
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
#include <asm/unwind.h>
+#include <linux/export.h>
unsigned long unwind_get_return_address(struct unwind_state *state)
{
diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c
index d623935a7547..0005be49b056 100644
--- a/arch/loongarch/kernel/unwind_orc.c
+++ b/arch/loongarch/kernel/unwind_orc.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/objtool.h>
+#include <linux/export.h>
#include <linux/module.h>
+#include <linux/objtool.h>
#include <linux/sort.h>
#include <asm/exception.h>
#include <asm/orc_header.h>
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index 929ae240280a..729e775bd40d 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -3,6 +3,7 @@
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
#include <linux/cpumask.h>
+#include <linux/export.h>
#include <linux/ftrace.h>
#include <linux/kallsyms.h>
diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c
index 10cf1608c7b3..7b888d9085a0 100644
--- a/arch/loongarch/kernel/vdso.c
+++ b/arch/loongarch/kernel/vdso.c
@@ -105,7 +105,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
vdso_addr = data_addr + VVAR_SIZE;
vma = _install_special_mapping(mm, vdso_addr, info->size,
- VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+ VM_READ | VM_EXEC |
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC |
+ VM_SEALED_SYSMAP,
&info->code_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c
index f39929d7bf8a..a75f865d6fb9 100644
--- a/arch/loongarch/kvm/intc/eiointc.c
+++ b/arch/loongarch/kvm/intc/eiointc.c
@@ -9,7 +9,8 @@
static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s)
{
- int ipnum, cpu, irq_index, irq_mask, irq;
+ int ipnum, cpu, cpuid, irq_index, irq_mask, irq;
+ struct kvm_vcpu *vcpu;
for (irq = 0; irq < EIOINTC_IRQS; irq++) {
ipnum = s->ipmap.reg_u8[irq / 32];
@@ -20,7 +21,12 @@ static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s)
irq_index = irq / 32;
irq_mask = BIT(irq & 0x1f);
- cpu = s->coremap.reg_u8[irq];
+ cpuid = s->coremap.reg_u8[irq];
+ vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid);
+ if (!vcpu)
+ continue;
+
+ cpu = vcpu->vcpu_id;
if (!!(s->coreisr.reg_u32[cpu][irq_index] & irq_mask))
set_bit(irq, s->sw_coreisr[cpu][ipnum]);
else
@@ -66,20 +72,25 @@ static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level)
}
static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s,
- int irq, void *pvalue, u32 len, bool notify)
+ int irq, u64 val, u32 len, bool notify)
{
- int i, cpu;
- u64 val = *(u64 *)pvalue;
+ int i, cpu, cpuid;
+ struct kvm_vcpu *vcpu;
for (i = 0; i < len; i++) {
- cpu = val & 0xff;
+ cpuid = val & 0xff;
val = val >> 8;
if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) {
- cpu = ffs(cpu) - 1;
- cpu = (cpu >= 4) ? 0 : cpu;
+ cpuid = ffs(cpuid) - 1;
+ cpuid = (cpuid >= 4) ? 0 : cpuid;
}
+ vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid);
+ if (!vcpu)
+ continue;
+
+ cpu = vcpu->vcpu_id;
if (s->sw_coremap[irq + i] == cpu)
continue;
@@ -305,6 +316,11 @@ static int kvm_eiointc_read(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+ if (addr & (len - 1)) {
+ kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len);
+ return -EINVAL;
+ }
+
vcpu->kvm->stat.eiointc_read_exits++;
spin_lock_irqsave(&eiointc->lock, flags);
switch (len) {
@@ -398,7 +414,7 @@ static int loongarch_eiointc_writeb(struct kvm_vcpu *vcpu,
irq = offset - EIOINTC_COREMAP_START;
index = irq;
s->coremap.reg_u8[index] = data;
- eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ eiointc_update_sw_coremap(s, irq, data, sizeof(data), true);
break;
default:
ret = -EINVAL;
@@ -436,17 +452,16 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu,
break;
case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
index = (offset - EIOINTC_ENABLE_START) >> 1;
- old_data = s->enable.reg_u32[index];
+ old_data = s->enable.reg_u16[index];
s->enable.reg_u16[index] = data;
/*
* 1: enable irq.
* update irq when isr is set.
*/
data = s->enable.reg_u16[index] & ~old_data & s->isr.reg_u16[index];
- index = index << 1;
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index + i, mask, 1);
+ eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 1);
}
/*
* 0: disable irq.
@@ -455,7 +470,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu,
data = ~s->enable.reg_u16[index] & old_data & s->isr.reg_u16[index];
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index, mask, 0);
+ eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 0);
}
break;
case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
@@ -484,7 +499,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu,
irq = offset - EIOINTC_COREMAP_START;
index = irq >> 1;
s->coremap.reg_u16[index] = data;
- eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ eiointc_update_sw_coremap(s, irq, data, sizeof(data), true);
break;
default:
ret = -EINVAL;
@@ -529,10 +544,9 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu,
* update irq when isr is set.
*/
data = s->enable.reg_u32[index] & ~old_data & s->isr.reg_u32[index];
- index = index << 2;
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index + i, mask, 1);
+ eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 1);
}
/*
* 0: disable irq.
@@ -541,7 +555,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu,
data = ~s->enable.reg_u32[index] & old_data & s->isr.reg_u32[index];
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index, mask, 0);
+ eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 0);
}
break;
case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
@@ -570,7 +584,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu,
irq = offset - EIOINTC_COREMAP_START;
index = irq >> 2;
s->coremap.reg_u32[index] = data;
- eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ eiointc_update_sw_coremap(s, irq, data, sizeof(data), true);
break;
default:
ret = -EINVAL;
@@ -615,10 +629,9 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu,
* update irq when isr is set.
*/
data = s->enable.reg_u64[index] & ~old_data & s->isr.reg_u64[index];
- index = index << 3;
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index + i, mask, 1);
+ eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 1);
}
/*
* 0: disable irq.
@@ -627,7 +640,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu,
data = ~s->enable.reg_u64[index] & old_data & s->isr.reg_u64[index];
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index, mask, 0);
+ eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 0);
}
break;
case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
@@ -656,7 +669,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu,
irq = offset - EIOINTC_COREMAP_START;
index = irq >> 3;
s->coremap.reg_u64[index] = data;
- eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ eiointc_update_sw_coremap(s, irq, data, sizeof(data), true);
break;
default:
ret = -EINVAL;
@@ -679,6 +692,11 @@ static int kvm_eiointc_write(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+ if (addr & (len - 1)) {
+ kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len);
+ return -EINVAL;
+ }
+
vcpu->kvm->stat.eiointc_write_exits++;
spin_lock_irqsave(&eiointc->lock, flags);
switch (len) {
@@ -787,7 +805,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev,
int ret = 0;
unsigned long flags;
unsigned long type = (unsigned long)attr->attr;
- u32 i, start_irq;
+ u32 i, start_irq, val;
void __user *data;
struct loongarch_eiointc *s = dev->kvm->arch.eiointc;
@@ -795,8 +813,14 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev,
spin_lock_irqsave(&s->lock, flags);
switch (type) {
case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU:
- if (copy_from_user(&s->num_cpu, data, 4))
+ if (copy_from_user(&val, data, 4))
ret = -EFAULT;
+ else {
+ if (val >= EIOINTC_ROUTE_MAX_VCPUS)
+ ret = -EINVAL;
+ else
+ s->num_cpu = val;
+ }
break;
case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE:
if (copy_from_user(&s->features, data, 4))
@@ -809,7 +833,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev,
for (i = 0; i < (EIOINTC_IRQS / 4); i++) {
start_irq = i * 4;
eiointc_update_sw_coremap(s, start_irq,
- (void *)&s->coremap.reg_u32[i], sizeof(u32), false);
+ s->coremap.reg_u32[i], sizeof(u32), false);
}
break;
default:
@@ -824,7 +848,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev,
struct kvm_device_attr *attr,
bool is_write)
{
- int addr, cpuid, offset, ret = 0;
+ int addr, cpu, offset, ret = 0;
unsigned long flags;
void *p = NULL;
void __user *data;
@@ -832,7 +856,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev,
s = dev->kvm->arch.eiointc;
addr = attr->attr;
- cpuid = addr >> 16;
+ cpu = addr >> 16;
addr &= 0xffff;
data = (void __user *)attr->addr;
switch (addr) {
@@ -857,8 +881,11 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev,
p = &s->isr.reg_u32[offset];
break;
case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
+ if (cpu >= s->num_cpu)
+ return -EINVAL;
+
offset = (addr - EIOINTC_COREISR_START) / 4;
- p = &s->coreisr.reg_u32[cpuid][offset];
+ p = &s->coreisr.reg_u32[cpu][offset];
break;
case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
offset = (addr - EIOINTC_COREMAP_START) / 4;
@@ -899,9 +926,15 @@ static int kvm_eiointc_sw_status_access(struct kvm_device *dev,
data = (void __user *)attr->addr;
switch (addr) {
case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU:
+ if (is_write)
+ return ret;
+
p = &s->num_cpu;
break;
case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE:
+ if (is_write)
+ return ret;
+
p = &s->features;
break;
case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE:
diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c
index b37cd8537b45..db22c2ec55e2 100644
--- a/arch/loongarch/lib/crc32-loongarch.c
+++ b/arch/loongarch/lib/crc32-loongarch.c
@@ -11,6 +11,7 @@
#include <asm/cpu-features.h>
#include <linux/crc32.h>
+#include <linux/export.h>
#include <linux/module.h>
#include <linux/unaligned.h>
diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c
index df309ae4045d..bcc9d01d8c41 100644
--- a/arch/loongarch/lib/csum.c
+++ b/arch/loongarch/lib/csum.c
@@ -2,6 +2,7 @@
// Copyright (C) 2019-2020 Arm Ltd.
#include <linux/compiler.h>
+#include <linux/export.h>
#include <linux/kasan-checks.h>
#include <linux/kernel.h>
diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c
index cea84d7f2b91..02dad4624fe3 100644
--- a/arch/loongarch/mm/hugetlbpage.c
+++ b/arch/loongarch/mm/hugetlbpage.c
@@ -47,7 +47,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
pmd = pmd_offset(pud, addr);
}
}
- return pmd_none(pmdp_get(pmd)) ? NULL : (pte_t *) pmd;
+
+ return (!pmd || pmd_none(pmdp_get(pmd))) ? NULL : (pte_t *) pmd;
}
uint64_t pmd_to_entrylo(unsigned long pmd_val)
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 06f11d9e4ec1..c3e4586a7975 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -106,14 +106,6 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
page += vmem_altmap_offset(altmap);
__remove_pages(start_pfn, nr_pages, altmap);
}
-
-#ifdef CONFIG_NUMA
-int memory_add_physaddr_to_nid(u64 start)
-{
- return pa_to_nid(start);
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
#endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c
index 70ca73019811..df949a3d0f34 100644
--- a/arch/loongarch/mm/ioremap.c
+++ b/arch/loongarch/mm/ioremap.c
@@ -16,12 +16,12 @@ void __init early_iounmap(void __iomem *addr, unsigned long size)
}
-void *early_memremap_ro(resource_size_t phys_addr, unsigned long size)
+void * __init early_memremap_ro(resource_size_t phys_addr, unsigned long size)
{
return early_memremap(phys_addr, size);
}
-void *early_memremap_prot(resource_size_t phys_addr, unsigned long size,
+void * __init early_memremap_prot(resource_size_t phys_addr, unsigned long size,
unsigned long prot_val)
{
return early_memremap(phys_addr, size);
diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c
index 1da4dc46df43..50c9016641a4 100644
--- a/arch/loongarch/pci/acpi.c
+++ b/arch/loongarch/pci/acpi.c
@@ -194,6 +194,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
{
struct pci_bus *bus;
struct pci_root_info *info;
+ struct pci_host_bridge *host;
struct acpi_pci_root_ops *root_ops;
int domain = root->segment;
int busnum = root->secondary.start;
@@ -237,8 +238,17 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
return NULL;
}
- pci_bus_size_bridges(bus);
- pci_bus_assign_resources(bus);
+ /* If we must preserve the resource configuration, claim now */
+ host = pci_find_host_bridge(bus);
+ if (host->preserve_config)
+ pci_bus_claim_resources(bus);
+
+ /*
+ * Assign whatever was left unassigned. If we didn't claim above,
+ * this will reassign everything.
+ */
+ pci_assign_unassigned_root_bus_resources(bus);
+
list_for_each_entry(child, &bus->children, node)
pcie_bus_configure_settings(child);
}
diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c
index 2726639150bc..5bc9627a6cf9 100644
--- a/arch/loongarch/pci/pci.c
+++ b/arch/loongarch/pci/pci.c
@@ -3,7 +3,6 @@
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#include <linux/kernel.h>
-#include <linux/export.h>
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/types.h>
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index f5c596b211d4..d79fef609194 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -268,7 +268,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) (__pte((x).val))
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index 040ac3bad713..14fee64d3e60 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -185,7 +185,7 @@ extern pgd_t kernel_pg_dir[128];
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index 73745dc0ec0e..858cbe936f5b 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -169,7 +169,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index 6c732ed3998b..57c1b3e8d60e 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile
@@ -3,7 +3,7 @@
# Makefile for the linux kernel.
#
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
obj-$(CONFIG_MMU_MOTOROLA) := head.o
obj-$(CONFIG_SUN3) := sun3-head.o
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index b1bb2c65dd04..bae1abfa6f6b 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -398,7 +398,7 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 2 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 2 })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile
index 85c4d29ef43e..241e466e7333 100644
--- a/arch/microblaze/kernel/Makefile
+++ b/arch/microblaze/kernel/Makefile
@@ -11,7 +11,7 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_process.o = -pg
endif
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
obj-y += head.o dma.o exceptions.o \
hw_exception_handler.o irq.o \
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index bca37ddf974b..41a00fa860c1 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -11,6 +11,7 @@ platform-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon/
platform-$(CONFIG_EYEQ) += mobileye/
platform-$(CONFIG_MIPS_COBALT) += cobalt/
platform-$(CONFIG_MACH_DECSTATION) += dec/
+platform-$(CONFIG_ECONET) += econet/
platform-$(CONFIG_MIPS_GENERIC) += generic/
platform-$(CONFIG_MACH_JAZZ) += jazz/
platform-$(CONFIG_LANTIQ) += lantiq/
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index fc0772c1bad4..1e48184ecf1e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -391,6 +391,31 @@ config MACH_DECSTATION
otherwise choose R3000.
+config ECONET
+ bool "EcoNet MIPS family"
+ select BOOT_RAW
+ select CPU_BIG_ENDIAN
+ select DEBUG_ZBOOT if DEBUG_KERNEL
+ select EARLY_PRINTK_8250
+ select ECONET_EN751221_TIMER
+ select SERIAL_8250
+ select SERIAL_OF_PLATFORM
+ select SYS_SUPPORTS_BIG_ENDIAN
+ select SYS_HAS_CPU_MIPS32_R1
+ select SYS_HAS_CPU_MIPS32_R2
+ select SYS_HAS_EARLY_PRINTK
+ select SYS_SUPPORTS_32BIT_KERNEL
+ select SYS_SUPPORTS_MIPS16
+ select SYS_SUPPORTS_ZBOOT_UART16550
+ select USE_GENERIC_EARLY_PRINTK_8250
+ select USE_OF
+ help
+ EcoNet EN75xx MIPS devices are big endian MIPS machines used
+ in XPON (fiber) and DSL applications. They have SPI, PCI, USB,
+ GPIO, and Ethernet, with optional XPON, DSL, and VoIP DSP cores.
+ Don't confuse these with the Airoha ARM devices sometimes referred
+ to as "EcoNet", this family is for MIPS based devices only.
+
config MACH_JAZZ
bool "Jazz family of machines"
select ARC_MEMORY
@@ -617,6 +642,7 @@ config EYEQ
select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USE_OF
+ select HOTPLUG_PARALLEL if SMP
help
Select this to build a kernel supporting EyeQ SoC from Mobileye.
@@ -1020,6 +1046,7 @@ source "arch/mips/ath79/Kconfig"
source "arch/mips/bcm47xx/Kconfig"
source "arch/mips/bcm63xx/Kconfig"
source "arch/mips/bmips/Kconfig"
+source "arch/mips/econet/Kconfig"
source "arch/mips/generic/Kconfig"
source "arch/mips/ingenic/Kconfig"
source "arch/mips/jazz/Kconfig"
@@ -2287,6 +2314,7 @@ config MIPS_CPS
select MIPS_CM
select MIPS_CPS_PM if HOTPLUG_CPU
select SMP
+ select HOTPLUG_SMT if HOTPLUG_PARALLEL
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
select SYS_SUPPORTS_HOTPLUG_CPU
diff --git a/arch/mips/alchemy/common/gpiolib.c b/arch/mips/alchemy/common/gpiolib.c
index 1b16daaa86ae..411f70ceb762 100644
--- a/arch/mips/alchemy/common/gpiolib.c
+++ b/arch/mips/alchemy/common/gpiolib.c
@@ -119,9 +119,11 @@ static int alchemy_gpic_get(struct gpio_chip *chip, unsigned int off)
return !!au1300_gpio_get_value(off + AU1300_GPIO_BASE);
}
-static void alchemy_gpic_set(struct gpio_chip *chip, unsigned int off, int v)
+static int alchemy_gpic_set(struct gpio_chip *chip, unsigned int off, int v)
{
au1300_gpio_set_value(off + AU1300_GPIO_BASE, v);
+
+ return 0;
}
static int alchemy_gpic_dir_input(struct gpio_chip *chip, unsigned int off)
@@ -145,7 +147,7 @@ static struct gpio_chip au1300_gpiochip = {
.direction_input = alchemy_gpic_dir_input,
.direction_output = alchemy_gpic_dir_output,
.get = alchemy_gpic_get,
- .set = alchemy_gpic_set,
+ .set_rv = alchemy_gpic_set,
.to_irq = alchemy_gpic_gpio_to_irq,
.base = AU1300_GPIO_BASE,
.ngpio = AU1300_GPIO_NUM,
diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 9cc8fbf218a5..c5617b889b1c 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -764,7 +764,7 @@ void __init board_prom_init(void)
snprintf(cfe_version, 12, "%s", (char *) &cfe[4]);
}
} else {
- strcpy(cfe_version, "unknown");
+ strscpy(cfe_version, "unknown");
}
pr_info("CFE version: %s\n", cfe_version);
diff --git a/arch/mips/bcm63xx/gpio.c b/arch/mips/bcm63xx/gpio.c
index 5c4a233db55f..e7a53cd0dec5 100644
--- a/arch/mips/bcm63xx/gpio.c
+++ b/arch/mips/bcm63xx/gpio.c
@@ -35,8 +35,7 @@ static void bcm63xx_gpio_out_low_reg_init(void)
static DEFINE_SPINLOCK(bcm63xx_gpio_lock);
static u32 gpio_out_low, gpio_out_high;
-static void bcm63xx_gpio_set(struct gpio_chip *chip,
- unsigned gpio, int val)
+static int bcm63xx_gpio_set(struct gpio_chip *chip, unsigned int gpio, int val)
{
u32 reg;
u32 mask;
@@ -62,6 +61,8 @@ static void bcm63xx_gpio_set(struct gpio_chip *chip,
*v &= ~mask;
bcm_gpio_writel(*v, reg);
spin_unlock_irqrestore(&bcm63xx_gpio_lock, flags);
+
+ return 0;
}
static int bcm63xx_gpio_get(struct gpio_chip *chip, unsigned gpio)
@@ -130,7 +131,7 @@ static struct gpio_chip bcm63xx_gpio_chip = {
.direction_input = bcm63xx_gpio_direction_input,
.direction_output = bcm63xx_gpio_direction_output,
.get = bcm63xx_gpio_get,
- .set = bcm63xx_gpio_set,
+ .set_rv = bcm63xx_gpio_set,
.base = 0,
};
diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c
index db618e72a0c4..529e77a6487c 100644
--- a/arch/mips/boot/compressed/uart-16550.c
+++ b/arch/mips/boot/compressed/uart-16550.c
@@ -20,6 +20,11 @@
#define PORT(offset) (CKSEG1ADDR(INGENIC_UART_BASE_ADDR) + (4 * offset))
#endif
+#ifdef CONFIG_ECONET
+#define EN75_UART_BASE 0x1fbf0003
+#define PORT(offset) (CKSEG1ADDR(EN75_UART_BASE) + (4 * (offset)))
+#endif
+
#ifndef IOTYPE
#define IOTYPE char
#endif
diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
index ff468439a8c4..7375c6ced82b 100644
--- a/arch/mips/boot/dts/Makefile
+++ b/arch/mips/boot/dts/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
subdir-$(CONFIG_BMIPS_GENERIC) += brcm
subdir-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon
+subdir-$(CONFIG_ECONET) += econet
subdir-$(CONFIG_EYEQ) += mobileye
subdir-$(CONFIG_FIT_IMAGE_FDT_MARDUK) += img
subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += img
diff --git a/arch/mips/boot/dts/econet/Makefile b/arch/mips/boot/dts/econet/Makefile
new file mode 100644
index 000000000000..b467d5624e39
--- /dev/null
+++ b/arch/mips/boot/dts/econet/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_DTB_ECONET_SMARTFIBER_XP8421_B) += en751221_smartfiber_xp8421-b.dtb
diff --git a/arch/mips/boot/dts/econet/en751221.dtsi b/arch/mips/boot/dts/econet/en751221.dtsi
new file mode 100644
index 000000000000..66197e73d4f0
--- /dev/null
+++ b/arch/mips/boot/dts/econet/en751221.dtsi
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/dts-v1/;
+
+/ {
+ compatible = "econet,en751221";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ hpt_clock: clock {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <200000000>; /* 200 MHz */
+ };
+
+ cpus: cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "mips,mips24KEc";
+ reg = <0>;
+ };
+ };
+
+ cpuintc: interrupt-controller {
+ compatible = "mti,cpu-interrupt-controller";
+ interrupt-controller;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ };
+
+ intc: interrupt-controller@1fb40000 {
+ compatible = "econet,en751221-intc";
+ reg = <0x1fb40000 0x100>;
+ interrupt-parent = <&cpuintc>;
+ interrupts = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ econet,shadow-interrupts = <7 2>, <8 3>, <13 12>, <30 29>;
+ };
+
+ uart: serial@1fbf0000 {
+ compatible = "ns16550";
+ reg = <0x1fbf0000 0x30>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&intc>;
+ interrupts = <0>;
+ /*
+ * Conversion of baud rate to clock frequency requires a
+ * computation that is not in the ns16550 driver, so this
+ * uart is fixed at 115200 baud.
+ */
+ clock-frequency = <1843200>;
+ };
+
+ timer_hpt: timer@1fbf0400 {
+ compatible = "econet,en751221-timer";
+ reg = <0x1fbf0400 0x100>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <30>;
+ clocks = <&hpt_clock>;
+ };
+};
diff --git a/arch/mips/boot/dts/econet/en751221_smartfiber_xp8421-b.dts b/arch/mips/boot/dts/econet/en751221_smartfiber_xp8421-b.dts
new file mode 100644
index 000000000000..8223c5bce67f
--- /dev/null
+++ b/arch/mips/boot/dts/econet/en751221_smartfiber_xp8421-b.dts
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/dts-v1/;
+
+#include "en751221.dtsi"
+
+/ {
+ model = "SmartFiber XP8421-B";
+ compatible = "smartfiber,xp8421-b", "econet,en751221";
+
+ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x1c000000>;
+ };
+
+ chosen {
+ stdout-path = "/serial@1fbf0000:115200";
+ linux,usable-memory-range = <0x00020000 0x1bfe0000>;
+ };
+};
diff --git a/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts b/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts
index c7ea4f1c0bb2..6c277ab83d4b 100644
--- a/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts
+++ b/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts
@@ -29,6 +29,7 @@
compatible = "loongson,pch-msi-1.0";
reg = <0 0x2ff00000 0 0x8>;
interrupt-controller;
+ #interrupt-cells = <1>;
msi-controller;
loongson,msi-base-vec = <64>;
loongson,msi-num-vecs = <64>;
diff --git a/arch/mips/boot/dts/pic32/pic32mzda.dtsi b/arch/mips/boot/dts/pic32/pic32mzda.dtsi
index fdc721b414a8..feca35ba56a4 100644
--- a/arch/mips/boot/dts/pic32/pic32mzda.dtsi
+++ b/arch/mips/boot/dts/pic32/pic32mzda.dtsi
@@ -225,7 +225,7 @@
gpio-ranges = <&pic32_pinctrl 0 144 16>;
};
- sdhci: sdhci@1f8ec000 {
+ sdhci: mmc@1f8ec000 {
compatible = "microchip,pic32mzda-sdhci";
reg = <0x1f8ec000 0x100>;
interrupts = <191 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/mips/boot/dts/realtek/rtl930x.dtsi b/arch/mips/boot/dts/realtek/rtl930x.dtsi
index f2e57ea3a60c..101bab72a95f 100644
--- a/arch/mips/boot/dts/realtek/rtl930x.dtsi
+++ b/arch/mips/boot/dts/realtek/rtl930x.dtsi
@@ -69,6 +69,39 @@
#size-cells = <0>;
status = "disabled";
};
+
+ mdio_controller: mdio-controller@ca00 {
+ compatible = "realtek,rtl9301-mdio";
+ reg = <0xca00 0x200>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+
+ mdio0: mdio-bus@0 {
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ mdio1: mdio-bus@1 {
+ reg = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ mdio2: mdio-bus@2 {
+ reg = <2>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ mdio3: mdio-bus@3 {
+ reg = <3>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ };
};
soc: soc@18000000 {
diff --git a/arch/mips/econet/Kconfig b/arch/mips/econet/Kconfig
new file mode 100644
index 000000000000..fd69884cc9a8
--- /dev/null
+++ b/arch/mips/econet/Kconfig
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0
+if ECONET
+
+choice
+ prompt "EcoNet SoC selection"
+ default SOC_ECONET_EN751221
+ help
+ Select EcoNet MIPS SoC type. Individual SoCs within a family are
+ very similar, so is it enough to select the right family, and
+ then customize to the specific SoC using the device tree only.
+
+ config SOC_ECONET_EN751221
+ bool "EN751221 family"
+ select COMMON_CLK
+ select ECONET_EN751221_INTC
+ select IRQ_MIPS_CPU
+ select SMP
+ select SMP_UP
+ select SYS_SUPPORTS_SMP
+ help
+ The EN751221 family includes EN7512, RN7513, EN7521, EN7526.
+ They are based on single core MIPS 34Kc processors. To boot
+ this kernel, you will need a device tree such as
+ MIPS_RAW_APPENDED_DTB=y, and a root filesystem.
+endchoice
+
+choice
+ prompt "Devicetree selection"
+ default DTB_ECONET_NONE
+ help
+ Select the devicetree.
+
+ config DTB_ECONET_NONE
+ bool "None"
+
+ config DTB_ECONET_SMARTFIBER_XP8421_B
+ bool "EN751221 SmartFiber XP8421-B"
+ depends on SOC_ECONET_EN751221
+ select BUILTIN_DTB
+ help
+ The SmartFiber XP8421-B is a device based on the EN751221 SoC.
+ It has 512MB of memory and 256MB of NAND flash. This kernel
+ needs only an appended initramfs to boot. It can be loaded
+ through XMODEM and booted from memory in the bootloader, or
+ it can be packed in tclinux.trx format and written to flash.
+endchoice
+
+endif
diff --git a/arch/mips/econet/Makefile b/arch/mips/econet/Makefile
new file mode 100644
index 000000000000..7e4529e7d3d7
--- /dev/null
+++ b/arch/mips/econet/Makefile
@@ -0,0 +1,2 @@
+
+obj-y := init.o
diff --git a/arch/mips/econet/Platform b/arch/mips/econet/Platform
new file mode 100644
index 000000000000..ea5616447bcd
--- /dev/null
+++ b/arch/mips/econet/Platform
@@ -0,0 +1,5 @@
+# To address a 7.2MB kernel size limit in the EcoNet SDK bootloader,
+# we put the load address well above where the bootloader loads and then use
+# zboot. So please set CONFIG_ZBOOT_LOAD_ADDRESS to the address where your
+# bootloader actually places the kernel.
+load-$(CONFIG_ECONET) += 0xffffffff81000000
diff --git a/arch/mips/econet/init.c b/arch/mips/econet/init.c
new file mode 100644
index 000000000000..6f43ffb209cb
--- /dev/null
+++ b/arch/mips/econet/init.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * EcoNet setup code
+ *
+ * Copyright (C) 2025 Caleb James DeLisle <cjd@cjdns.fr>
+ */
+
+#include <linux/init.h>
+#include <linux/of_clk.h>
+#include <linux/irqchip.h>
+
+#include <asm/addrspace.h>
+#include <asm/io.h>
+#include <asm/bootinfo.h>
+#include <asm/time.h>
+#include <asm/prom.h>
+#include <asm/smp-ops.h>
+#include <asm/reboot.h>
+
+#define CR_AHB_RSTCR ((void __iomem *)CKSEG1ADDR(0x1fb00040))
+#define RESET BIT(31)
+
+#define UART_BASE CKSEG1ADDR(0x1fbf0003)
+#define UART_REG_SHIFT 2
+
+static void hw_reset(char *command)
+{
+ iowrite32(RESET, CR_AHB_RSTCR);
+}
+
+/* 1. Bring up early printk. */
+void __init prom_init(void)
+{
+ setup_8250_early_printk_port(UART_BASE, UART_REG_SHIFT, 0);
+ _machine_restart = hw_reset;
+}
+
+/* 2. Parse the DT and find memory */
+void __init plat_mem_setup(void)
+{
+ void *dtb;
+
+ set_io_port_base(KSEG1);
+
+ dtb = get_fdt();
+ if (!dtb)
+ panic("no dtb found");
+
+ __dt_setup_arch(dtb);
+
+ early_init_dt_scan_memory();
+}
+
+/* 3. Overload __weak device_tree_init(), add SMP_UP ops */
+void __init device_tree_init(void)
+{
+ unflatten_and_copy_device_tree();
+
+ register_up_smp_ops();
+}
+
+const char *get_system_type(void)
+{
+ return "EcoNet-EN75xx";
+}
+
+/* 4. Initialize the IRQ subsystem */
+void __init arch_init_irq(void)
+{
+ irqchip_init();
+}
+
+/* 5. Timers */
+void __init plat_time_init(void)
+{
+ of_clk_init(NULL);
+ timer_probe();
+}
diff --git a/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h
index a0d4b752899e..5dbc9b13d15b 100644
--- a/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h
@@ -12,12 +12,32 @@
#ifndef _CS5536_PCI_H
#define _CS5536_PCI_H
+#include <linux/init.h>
#include <linux/types.h>
#include <linux/pci_regs.h>
extern void cs5536_pci_conf_write4(int function, int reg, u32 value);
extern u32 cs5536_pci_conf_read4(int function, int reg);
+extern void pci_ehci_write_reg(int reg, u32 value);
+extern u32 pci_ehci_read_reg(int reg);
+
+extern void pci_ide_write_reg(int reg, u32 value);
+extern u32 pci_ide_read_reg(int reg);
+
+extern void pci_acc_write_reg(int reg, u32 value);
+extern u32 pci_acc_read_reg(int reg);
+
+extern void pci_ohci_write_reg(int reg, u32 value);
+extern u32 pci_ohci_read_reg(int reg);
+
+extern void pci_isa_write_bar(int n, u32 value);
+extern u32 pci_isa_read_bar(int n);
+extern void pci_isa_write_reg(int reg, u32 value);
+extern u32 pci_isa_read_reg(int reg);
+
+extern int __init init_mfgpt_clocksource(void);
+
#define CS5536_ACC_INTR 9
#define CS5536_IDE_INTR 14
#define CS5536_USB_INTR 11
diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h
index ca039b8dcde3..4a098fb10232 100644
--- a/arch/mips/include/asm/mach-loongson2ef/loongson.h
+++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h
@@ -18,6 +18,9 @@ extern void bonito_irq_init(void);
extern void mach_prepare_reboot(void);
extern void mach_prepare_shutdown(void);
+/* machine-specific PROM functions */
+extern void __init mach_prom_init_machtype(void);
+
/* environment arguments from bootloader */
extern u32 cpu_clock_freq;
extern u32 memsize, highmemsize;
@@ -45,6 +48,12 @@ extern void __init mach_init_irq(void);
extern void mach_irq_dispatch(unsigned int pending);
extern int mach_i8259_irq(void);
+/* power management functions */
+extern void setup_wakeup_events(void);
+extern int wakeup_loongson(void);
+extern void __weak mach_suspend(void);
+extern void __weak mach_resume(void);
+
/* We need this in some places... */
#define delay() ({ \
int x; \
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 4852b005a72d..ae73ecf4c41a 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -534,7 +534,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#endif
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte.pte_low & _PAGE_SWP_EXCLUSIVE;
}
@@ -551,7 +551,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
return pte;
}
#else
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h
index 0673d2d0f2e6..5158c802eb65 100644
--- a/arch/mips/include/asm/topology.h
+++ b/arch/mips/include/asm/topology.h
@@ -16,6 +16,9 @@
#define topology_core_id(cpu) (cpu_core(&cpu_data[cpu]))
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
#define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu])
+
+extern struct cpumask __cpu_primary_thread_mask;
+#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
#endif
#endif /* __ASM_TOPOLOGY_H */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index ecf3278a32f7..95a1e674fd67 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -3,7 +3,7 @@
# Makefile for the Linux/MIPS kernel.
#
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
obj-y += head.o branch.o cmpxchg.o elf.o entry.o genex.o idle.o irq.o \
process.o prom.o ptrace.o reset.o setup.o signal.o \
diff --git a/arch/mips/kernel/gpio_txx9.c b/arch/mips/kernel/gpio_txx9.c
index 8c083612df9d..027fb57d0d79 100644
--- a/arch/mips/kernel/gpio_txx9.c
+++ b/arch/mips/kernel/gpio_txx9.c
@@ -32,14 +32,16 @@ static void txx9_gpio_set_raw(unsigned int offset, int value)
__raw_writel(val, &txx9_pioptr->dout);
}
-static void txx9_gpio_set(struct gpio_chip *chip, unsigned int offset,
- int value)
+static int txx9_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
unsigned long flags;
spin_lock_irqsave(&txx9_gpio_lock, flags);
txx9_gpio_set_raw(offset, value);
mmiowb();
spin_unlock_irqrestore(&txx9_gpio_lock, flags);
+
+ return 0;
}
static int txx9_gpio_dir_in(struct gpio_chip *chip, unsigned int offset)
@@ -68,7 +70,7 @@ static int txx9_gpio_dir_out(struct gpio_chip *chip, unsigned int offset,
static struct gpio_chip txx9_gpio_chip = {
.get = txx9_gpio_get,
- .set = txx9_gpio_set,
+ .set_rv = txx9_gpio_set,
.direction_input = txx9_gpio_dir_in,
.direction_output = txx9_gpio_dir_out,
.label = "TXx9",
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index f7107479c7fa..b890d64d352c 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -922,11 +922,13 @@ static const struct pt_regs_offset regoffset_table[] = {
*/
int regs_query_register_offset(const char *name)
{
- const struct pt_regs_offset *roff;
- for (roff = regoffset_table; roff->name != NULL; roff++)
- if (!strcmp(roff->name, name))
- return roff->offset;
- return -EINVAL;
+ const struct pt_regs_offset *roff;
+
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+
+ return -EINVAL;
}
#if defined(CONFIG_32BIT) || defined(CONFIG_MIPS32_O32)
@@ -937,7 +939,7 @@ static const struct user_regset mips_regsets[] = {
.n = ELF_NGREG,
.size = sizeof(unsigned int),
.align = sizeof(unsigned int),
- .regset_get = gpr32_get,
+ .regset_get = gpr32_get,
.set = gpr32_set,
},
[REGSET_DSP] = {
@@ -945,7 +947,7 @@ static const struct user_regset mips_regsets[] = {
.n = NUM_DSP_REGS + 1,
.size = sizeof(u32),
.align = sizeof(u32),
- .regset_get = dsp32_get,
+ .regset_get = dsp32_get,
.set = dsp32_set,
.active = dsp_active,
},
@@ -955,7 +957,7 @@ static const struct user_regset mips_regsets[] = {
.n = ELF_NFPREG,
.size = sizeof(elf_fpreg_t),
.align = sizeof(elf_fpreg_t),
- .regset_get = fpr_get,
+ .regset_get = fpr_get,
.set = fpr_set,
},
[REGSET_FP_MODE] = {
@@ -963,7 +965,7 @@ static const struct user_regset mips_regsets[] = {
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
- .regset_get = fp_mode_get,
+ .regset_get = fp_mode_get,
.set = fp_mode_set,
},
#endif
@@ -973,7 +975,7 @@ static const struct user_regset mips_regsets[] = {
.n = NUM_FPU_REGS + 1,
.size = 16,
.align = 16,
- .regset_get = msa_get,
+ .regset_get = msa_get,
.set = msa_set,
},
#endif
@@ -997,7 +999,7 @@ static const struct user_regset mips64_regsets[] = {
.n = ELF_NGREG,
.size = sizeof(unsigned long),
.align = sizeof(unsigned long),
- .regset_get = gpr64_get,
+ .regset_get = gpr64_get,
.set = gpr64_set,
},
[REGSET_DSP] = {
@@ -1005,7 +1007,7 @@ static const struct user_regset mips64_regsets[] = {
.n = NUM_DSP_REGS + 1,
.size = sizeof(u64),
.align = sizeof(u64),
- .regset_get = dsp64_get,
+ .regset_get = dsp64_get,
.set = dsp64_set,
.active = dsp_active,
},
@@ -1015,7 +1017,7 @@ static const struct user_regset mips64_regsets[] = {
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
- .regset_get = fp_mode_get,
+ .regset_get = fp_mode_get,
.set = fp_mode_set,
},
[REGSET_FPR] = {
@@ -1023,7 +1025,7 @@ static const struct user_regset mips64_regsets[] = {
.n = ELF_NFPREG,
.size = sizeof(elf_fpreg_t),
.align = sizeof(elf_fpreg_t),
- .regset_get = fpr_get,
+ .regset_get = fpr_get,
.set = fpr_set,
},
#endif
@@ -1033,7 +1035,7 @@ static const struct user_regset mips64_regsets[] = {
.n = NUM_FPU_REGS + 1,
.size = 16,
.align = 16,
- .regset_get = msa_get,
+ .regset_get = msa_get,
.set = msa_set,
},
#endif
@@ -1351,7 +1353,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs)
*/
asmlinkage void syscall_trace_leave(struct pt_regs *regs)
{
- /*
+ /*
* We may come here right after calling schedule_user()
* or do_notify_resume(), in which case we can be in RCU
* user mode.
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index cc26d56f3ab6..7b0e69af4097 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -236,6 +236,7 @@ static void __init cps_smp_setup(void)
/* Use the number of VPEs in cluster 0 core 0 for smp_num_siblings */
if (!cl && !c)
smp_num_siblings = core_vpes;
+ cpumask_set_cpu(nvpes, &__cpu_primary_thread_mask);
for (v = 0; v < min_t(int, core_vpes, NR_CPUS - nvpes); v++) {
cpu_set_cluster(&cpu_data[nvpes + v], cl);
@@ -368,6 +369,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
cl = cpu_cluster(&current_cpu_data);
c = cpu_core(&current_cpu_data);
cluster_bootcfg = &mips_cps_cluster_bootcfg[cl];
+ cpu_smt_set_num_threads(core_vpes, core_vpes);
core_bootcfg = &cluster_bootcfg->core_config[c];
bitmap_set(cluster_bootcfg->core_power, cpu_core(&current_cpu_data), 1);
atomic_set(&core_bootcfg->vpe_mask, 1 << cpu_vpe_id(&current_cpu_data));
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 39e193cad2b9..4868e79f3b30 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -56,8 +56,10 @@ EXPORT_SYMBOL(cpu_sibling_map);
cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_core_map);
+#ifndef CONFIG_HOTPLUG_PARALLEL
static DECLARE_COMPLETION(cpu_starting);
static DECLARE_COMPLETION(cpu_running);
+#endif
/*
* A logical cpu mask containing only one VPE per core to
@@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
cpumask_t cpu_coherent_mask;
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
unsigned int smp_max_threads __initdata = UINT_MAX;
static int __init early_nosmt(char *s)
@@ -367,6 +371,9 @@ asmlinkage void start_secondary(void)
* to an option instead of something based on .cputype
*/
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ cpuhp_ap_sync_alive();
+#endif
calibrate_delay();
cpu_data[cpu].udelay_val = loops_per_jiffy;
@@ -376,8 +383,10 @@ asmlinkage void start_secondary(void)
cpumask_set_cpu(cpu, &cpu_coherent_mask);
notify_cpu_starting(cpu);
+#ifndef CONFIG_HOTPLUG_PARALLEL
/* Notify boot CPU that we're starting & ready to sync counters */
complete(&cpu_starting);
+#endif
synchronise_count_slave(cpu);
@@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
calculate_cpu_foreign_map();
+#ifndef CONFIG_HOTPLUG_PARALLEL
/*
* Notify boot CPU that we're up & online and it can safely return
* from __cpu_up
*/
complete(&cpu_running);
+#endif
/*
* irq will be enabled in ->smp_finish(), enabling it too early
@@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
set_cpu_online(0, true);
}
+#ifdef CONFIG_HOTPLUG_PARALLEL
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
+{
+ return mp_ops->boot_secondary(cpu, tidle);
+}
+#else
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int err;
@@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
wait_for_completion(&cpu_running);
return 0;
}
+#endif
#ifdef CONFIG_PROFILING
/* Not really SMP stuff ... */
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 737d0d4fdcd3..2b67c44adab9 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -22,6 +22,7 @@
#include <linux/vmalloc.h>
#include <linux/elf.h>
#include <linux/seq_file.h>
+#include <linux/string.h>
#include <linux/syscalls.h>
#include <linux/moduleloader.h>
#include <linux/interrupt.h>
@@ -582,7 +583,7 @@ static int vpe_elfload(struct vpe *v)
struct module mod; /* so we can re-use the relocations code */
memset(&mod, 0, sizeof(struct module));
- strcpy(mod.name, "VPE loader");
+ strscpy(mod.name, "VPE loader");
hdr = (Elf_Ehdr *) v->pbuffer;
len = v->plen;
diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c
index ea6ebfea4a67..0e47cd59b6cb 100644
--- a/arch/mips/rb532/gpio.c
+++ b/arch/mips/rb532/gpio.c
@@ -105,13 +105,15 @@ static int rb532_gpio_get(struct gpio_chip *chip, unsigned offset)
/*
* Set output GPIO level
*/
-static void rb532_gpio_set(struct gpio_chip *chip,
- unsigned offset, int value)
+static int rb532_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct rb532_gpio_chip *gpch;
gpch = gpiochip_get_data(chip);
rb532_set_bit(value, offset, gpch->regbase + GPIOD);
+
+ return 0;
}
/*
@@ -162,7 +164,7 @@ static struct rb532_gpio_chip rb532_gpio_chip[] = {
.direction_input = rb532_gpio_direction_input,
.direction_output = rb532_gpio_direction_output,
.get = rb532_gpio_get,
- .set = rb532_gpio_set,
+ .set_rv = rb532_gpio_set,
.to_irq = rb532_gpio_to_irq,
.base = 0,
.ngpio = 32,
diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index 1e67fecd466e..0586ca7668b4 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -603,8 +603,8 @@ static int txx9_iocled_get(struct gpio_chip *chip, unsigned int offset)
return !!(data->cur_val & (1 << offset));
}
-static void txx9_iocled_set(struct gpio_chip *chip, unsigned int offset,
- int value)
+static int txx9_iocled_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct txx9_iocled_data *data = gpiochip_get_data(chip);
unsigned long flags;
@@ -616,6 +616,8 @@ static void txx9_iocled_set(struct gpio_chip *chip, unsigned int offset,
writeb(data->cur_val, data->mmioaddr);
mmiowb();
spin_unlock_irqrestore(&txx9_iocled_lock, flags);
+
+ return 0;
}
static int txx9_iocled_dir_in(struct gpio_chip *chip, unsigned int offset)
@@ -653,7 +655,7 @@ void __init txx9_iocled_init(unsigned long baseaddr,
if (!iocled->mmioaddr)
goto out_free;
iocled->chip.get = txx9_iocled_get;
- iocled->chip.set = txx9_iocled_set;
+ iocled->chip.set_rv = txx9_iocled_set;
iocled->chip.direction_input = txx9_iocled_dir_in;
iocled->chip.direction_output = txx9_iocled_dir_out;
iocled->chip.label = "iocled";
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index fb4c493aaffa..69d4593f64fe 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -27,6 +27,7 @@ endif
# offsets.
cflags-vdso := $(ccflags-vdso) \
$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
+ $(filter -std=%,$(KBUILD_CFLAGS)) \
-O3 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \
-mrelax-pic-calls $(call cc-option, -mexplicit-relocs) \
-fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index e98578e27e26..844dce55569f 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -259,7 +259,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/nios2/kernel/Makefile b/arch/nios2/kernel/Makefile
index 78a913181fa1..4dce965a7b73 100644
--- a/arch/nios2/kernel/Makefile
+++ b/arch/nios2/kernel/Makefile
@@ -3,7 +3,7 @@
# Makefile for the nios2 linux kernel.
#
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
obj-y += head.o
obj-y += cpuinfo.o
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 71bfb8c8c482..5bd6463bd514 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -411,7 +411,7 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile
index e4c7d9bdd598..58e6a1b525b7 100644
--- a/arch/openrisc/kernel/Makefile
+++ b/arch/openrisc/kernel/Makefile
@@ -3,7 +3,7 @@
# Makefile for the linux kernel.
#
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
obj-y := head.o setup.o or32_ksyms.o process.o dma.o \
traps.o time.o irq.o entry.o ptrace.o signal.o \
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 80f5e2a28413..1a86a4370b29 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -425,7 +425,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 5ab0467be70a..d5055ba33722 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -3,7 +3,7 @@
# Makefile for arch/parisc/kernel
#
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
obj-y := head.o cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \
syscall.o entry.o sys_parisc.o firmware.o \
diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
index c4e4d2a9b460..b7eac4e56019 100644
--- a/arch/powerpc/boot/dts/microwatt.dts
+++ b/arch/powerpc/boot/dts/microwatt.dts
@@ -4,7 +4,7 @@
/ {
#size-cells = <0x02>;
#address-cells = <0x02>;
- model-name = "microwatt";
+ model = "microwatt";
compatible = "microwatt-soc";
aliases {
diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts
index e09b37d7489d..a89cb3139ca8 100644
--- a/arch/powerpc/boot/dts/mpc8315erdb.dts
+++ b/arch/powerpc/boot/dts/mpc8315erdb.dts
@@ -6,6 +6,7 @@
*/
/dts-v1/;
+#include <dt-bindings/interrupt-controller/irq.h>
/ {
compatible = "fsl,mpc8315erdb";
@@ -358,6 +359,15 @@
interrupt-parent = <&ipic>;
fsl,mpc8313-wakeup-timer = <&gtm1>;
};
+
+ gpio: gpio-controller@c00 {
+ compatible = "fsl,mpc8314-gpio";
+ reg = <0xc00 0x100>;
+ interrupts = <74 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-parent = <&ipic>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
};
pci0: pci@e0008500 {
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 42c3af90d1f0..92d21c6faf1e 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -365,7 +365,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 6ed93e290c2f..a2ddcbb3fcb9 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -693,7 +693,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte)
return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
}
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 8d1f0b7062eb..7d6b9e5b286e 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -286,7 +286,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 02897f4b0dbf..b891910fce8a 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -183,7 +183,7 @@
/*
* Used to name C functions called from asm
*/
-#ifdef CONFIG_PPC_KERNEL_PCREL
+#if defined(__powerpc64__) && defined(CONFIG_PPC_KERNEL_PCREL)
#define CFUNC(name) name@notoc
#else
#define CFUNC(name) name
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
index 2c145da3b774..b5211e413829 100644
--- a/arch/powerpc/include/uapi/asm/ioctls.h
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -23,10 +23,10 @@
#define TCSETSW _IOW('t', 21, struct termios)
#define TCSETSF _IOW('t', 22, struct termios)
-#define TCGETA _IOR('t', 23, struct termio)
-#define TCSETA _IOW('t', 24, struct termio)
-#define TCSETAW _IOW('t', 25, struct termio)
-#define TCSETAF _IOW('t', 28, struct termio)
+#define TCGETA 0x40147417 /* _IOR('t', 23, struct termio) */
+#define TCSETA 0x80147418 /* _IOW('t', 24, struct termio) */
+#define TCSETAW 0x80147419 /* _IOW('t', 25, struct termio) */
+#define TCSETAF 0x8014741c /* _IOW('t', 28, struct termio) */
#define TCSBRK _IO('t', 29)
#define TCXONC _IO('t', 30)
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 9d1ab3971694..fb2b95267022 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -126,7 +126,7 @@ obj-$(CONFIG_PPC_BOOK3S_32) += head_book3s_32.o
obj-$(CONFIG_44x) += head_44x.o
obj-$(CONFIG_PPC_8xx) += head_8xx.o
obj-$(CONFIG_PPC_85xx) += head_85xx.o
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 83fe99861eb1..ca7f7bb2b478 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1509,6 +1509,8 @@ int eeh_pe_configure(struct eeh_pe *pe)
/* Invalid PE ? */
if (!pe)
return -ENODEV;
+ else
+ ret = eeh_ops->configure_bridge(pe);
return ret;
}
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
index e8824f933326..8834dfe9d727 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -53,7 +53,7 @@ ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WAR
ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS))
CC32FLAGS := -m32
-CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc
+CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc -mpcrel
ifdef CONFIG_CC_IS_CLANG
# This flag is supported by clang for 64-bit but not 32-bit so it will cause
# an unused command line flag warning for this file.
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 791d1942a058..3401b96be475 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -628,7 +628,7 @@ static void arm_next_watchdog(struct kvm_vcpu *vcpu)
static void kvmppc_watchdog_func(struct timer_list *t)
{
- struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.wdt_timer);
+ struct kvm_vcpu *vcpu = timer_container_of(vcpu, t, arch.wdt_timer);
u32 tsr, new_tsr;
int final;
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
index 0b6365d85d11..dc6f75d3ac6e 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -521,6 +521,15 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
return -EINVAL;
}
+ /*
+ * Map complete page to the paste address. So the user
+ * space should pass 0ULL to the offset parameter.
+ */
+ if (vma->vm_pgoff) {
+ pr_debug("Page offset unsupported to map paste address\n");
+ return -EINVAL;
+ }
+
/* Ensure instance has an open send window */
if (!txwin) {
pr_err("No send window open?\n");
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index a0ae58636e10..02474e27df9b 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -359,7 +359,8 @@ static irqreturn_t kw_i2c_irq(int irq, void *dev_id)
static void kw_i2c_timeout(struct timer_list *t)
{
- struct pmac_i2c_host_kw *host = from_timer(host, t, timeout_timer);
+ struct pmac_i2c_host_kw *host = timer_container_of(host, t,
+ timeout_timer);
unsigned long flags;
spin_lock_irqsave(&host->lock, flags);
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 4ac9808e55a4..2ea30b343354 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -48,11 +48,15 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct memtrace_entry *ent = filp->private_data;
+ unsigned long ent_nrpages = ent->size >> PAGE_SHIFT;
+ unsigned long vma_nrpages = vma_pages(vma);
- if (ent->size < vma->vm_end - vma->vm_start)
+ /* The requested page offset should be within object's page count */
+ if (vma->vm_pgoff >= ent_nrpages)
return -EINVAL;
- if (vma->vm_pgoff << PAGE_SHIFT >= ent->size)
+ /* The requested mapping range should remain within the bounds */
+ if (vma_nrpages > ent_nrpages - vma->vm_pgoff)
return -EINVAL;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index bbec87b79309..1c5544401530 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -63,13 +63,15 @@ config RISCV
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
- select ARCH_SUPPORTS_CFI_CLANG
+ # clang >= 17: https://github.com/llvm/llvm-project/commit/62fa708ceb027713b386c7e0efda994f8bdc27e2
+ select ARCH_SUPPORTS_CFI_CLANG if CLANG_VERSION >= 170000
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_HUGETLBFS if MMU
# LLD >= 14: https://github.com/llvm/llvm-project/issues/50505
select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000
select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000
+ select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS if 64BIT && MMU
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
select ARCH_SUPPORTS_RT
@@ -96,9 +98,11 @@ config RISCV
select CLONE_BACKWARDS
select COMMON_CLK
select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND
+ select DYNAMIC_FTRACE if FUNCTION_TRACER
select EDAC_SUPPORT
select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE)
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE
+ select FUNCTION_ALIGNMENT_8B if DYNAMIC_FTRACE_WITH_CALL_OPS
select GENERIC_ARCH_TOPOLOGY
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
@@ -143,6 +147,7 @@ config RISCV
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if 64BIT && MMU
select HAVE_ARCH_USERFAULTFD_MINOR if 64BIT && USERFAULTFD
select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
select HAVE_ASM_MODVERSIONS
@@ -150,13 +155,15 @@ config RISCV
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
- select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ select FUNCTION_ALIGNMENT_4B if HAVE_DYNAMIC_FTRACE && RISCV_ISA_C
+ select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS if HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
+ select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG)
select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER if HAVE_DYNAMIC_FTRACE_WITH_ARGS
select HAVE_FUNCTION_GRAPH_FREGS
- select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
+ select HAVE_FUNCTION_TRACER if !XIP_KERNEL && HAVE_DYNAMIC_FTRACE
select HAVE_EBPF_JIT if MMU
select HAVE_GUP_FAST if MMU
select HAVE_FUNCTION_ARG_ACCESS_API
@@ -218,6 +225,7 @@ config RISCV
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
select UACCESS_MEMCPY if !MMU
+ select VDSO_GETRANDOM if HAVE_GENERIC_VDSO
select USER_STACKTRACE_SUPPORT
select ZONE_DMA32 if 64BIT
@@ -236,6 +244,7 @@ config CLANG_SUPPORTS_DYNAMIC_FTRACE
config GCC_SUPPORTS_DYNAMIC_FTRACE
def_bool CC_IS_GCC
depends on $(cc-option,-fpatchable-function-entry=8)
+ depends on CC_HAS_MIN_FUNCTION_ALIGNMENT || !RISCV_ISA_C
config HAVE_SHADOW_CALL_STACK
def_bool $(cc-option,-fsanitize=shadow-call-stack)
@@ -664,12 +673,12 @@ config RISCV_ISA_V_PREEMPTIVE
default y
help
Usually, in-kernel SIMD routines are run with preemption disabled.
- Functions which envoke long running SIMD thus must yield core's
+ Functions which invoke long running SIMD thus must yield the core's
vector unit to prevent blocking other tasks for too long.
- This config allows kernel to run SIMD without explicitly disable
- preemption. Enabling this config will result in higher memory
- consumption due to the allocation of per-task's kernel Vector context.
+ This config allows the kernel to run SIMD without explicitly disabling
+ preemption. Enabling this config will result in higher memory consumption
+ due to the allocation of per-task's kernel Vector context.
config RISCV_ISA_ZAWRS
bool "Zawrs extension support for more efficient busy waiting"
@@ -842,6 +851,21 @@ config RISCV_ISA_ZICBOZ
If you don't know what to do here, say Y.
+config RISCV_ISA_ZICBOP
+ bool "Zicbop extension support for cache block prefetch"
+ depends on MMU
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Adds support to dynamically detect the presence of the ZICBOP
+ extension (Cache Block Prefetch Operations) and enable its
+ usage.
+
+ The Zicbop extension can be used to prefetch cache blocks for
+ read/write fetch.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
def_bool y
# https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc
@@ -1171,8 +1195,8 @@ config CMDLINE_FALLBACK
config CMDLINE_EXTEND
bool "Extend bootloader kernel arguments"
help
- The command-line arguments provided during boot will be
- appended to the built-in command line. This is useful in
+ The built-in command line will be appended to the command-
+ line arguments provided during boot. This is useful in
cases where the provided arguments are insufficient and
you don't want to or cannot modify them.
diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor
index b096548fe0ff..e14f26368963 100644
--- a/arch/riscv/Kconfig.vendor
+++ b/arch/riscv/Kconfig.vendor
@@ -16,6 +16,19 @@ config RISCV_ISA_VENDOR_EXT_ANDES
If you don't know what to do here, say Y.
endmenu
+menu "SiFive"
+config RISCV_ISA_VENDOR_EXT_SIFIVE
+ bool "SiFive vendor extension support"
+ select RISCV_ISA_VENDOR_EXT
+ default y
+ help
+ Say N here if you want to disable all SiFive vendor extension
+ support. This will cause any SiFive vendor extensions that are
+ requested by hardware probing to be ignored.
+
+ If you don't know what to do here, say Y.
+endmenu
+
menu "T-Head"
config RISCV_ISA_VENDOR_EXT_THEAD
bool "T-Head vendor extension support"
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 539d2aef5cab..df57654a615e 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -15,9 +15,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
LDFLAGS_vmlinux += --no-relax
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
ifeq ($(CONFIG_RISCV_ISA_C),y)
- CC_FLAGS_FTRACE := -fpatchable-function-entry=4
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=8,4
else
- CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=4,2
endif
endif
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index eea825ee58e1..fe8bd8afb418 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -18,12 +18,9 @@ CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
CONFIG_CGROUP_BPF=y
-CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
CONFIG_ARCH_MICROCHIP=y
CONFIG_ARCH_SIFIVE=y
@@ -182,6 +179,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_REGULATOR_AXP20X=y
CONFIG_REGULATOR_GPIO=y
CONFIG_MEDIA_SUPPORT=m
+CONFIG_MEDIA_PLATFORM_SUPPORT=y
CONFIG_VIDEO_CADENCE_CSI2RX=m
CONFIG_DRM=m
CONFIG_DRM_RADEON=m
@@ -297,25 +295,7 @@ CONFIG_DEFAULT_SECURITY_DAC=y
CONFIG_CRYPTO_USER_API_HASH=y
CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_PAGEALLOC=y
-CONFIG_SCHED_STACK_END_CHECK=y
-CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_VM_PGFLAGS=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_DEBUG_PER_CPU_MAPS=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_WQ_WATCHDOG=y
-CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_RWSEMS=y
-CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_PLIST=y
-CONFIG_DEBUG_SG=y
-# CONFIG_RCU_TRACE is not set
-CONFIG_RCU_EQS_DEBUG=y
-# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index bfc8ea5f9319..a9988bf21ec8 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -12,7 +12,7 @@ long long __ashlti3(long long a, int b);
#ifdef CONFIG_RISCV_ISA_V
#ifdef CONFIG_MMU
-asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n);
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n, bool enable_sum);
#endif /* CONFIG_MMU */
void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index e1d9bf1deca6..b8c5726d86ac 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -14,11 +14,6 @@
#include <asm/cmpxchg.h>
#include <asm/fence.h>
-#define nop() __asm__ __volatile__ ("nop")
-#define __nops(n) ".rept " #n "\nnop\n.endr\n"
-#define nops(n) __asm__ __volatile__ (__nops(n))
-
-
/* These barriers need to enforce ordering on both devices or memory. */
#define __mb() RISCV_FENCE(iorw, iorw)
#define __rmb() RISCV_FENCE(ir, ir)
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index b59ffeb668d6..6086b38d5427 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -85,6 +85,7 @@ static inline void flush_icache_range(unsigned long start, unsigned long end)
extern unsigned int riscv_cbom_block_size;
extern unsigned int riscv_cboz_block_size;
+extern unsigned int riscv_cbop_block_size;
void riscv_init_cbo_blocksizes(void);
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 2ec119eb147b..0b749e710216 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -13,6 +13,7 @@
#include <asm/hwcap.h>
#include <asm/insn-def.h>
#include <asm/cpufeature-macros.h>
+#include <asm/processor.h>
#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
swap_append, r, p, n) \
@@ -37,6 +38,7 @@
\
__asm__ __volatile__ ( \
prepend \
+ PREFETCHW_ASM(%5) \
"0: lr.w %0, %2\n" \
" and %1, %0, %z4\n" \
" or %1, %1, %z3\n" \
@@ -44,7 +46,7 @@
" bnez %1, 0b\n" \
sc_append \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
- : "rJ" (__newx), "rJ" (~__mask) \
+ : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \
: "memory"); \
\
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index f56b409361fb..fbd0e4306c93 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -67,11 +67,11 @@ void __init riscv_user_isa_enable(void);
_RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
bool __init check_unaligned_access_emulated_all_cpus(void);
+void unaligned_access_init(void);
+int cpu_online_unaligned_access_init(unsigned int cpu);
#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
-void check_unaligned_access_emulated(struct work_struct *work __always_unused);
void unaligned_emulation_finish(void);
bool unaligned_ctl_available(void);
-DECLARE_PER_CPU(long, misaligned_access_speed);
#else
static inline bool unaligned_ctl_available(void)
{
@@ -79,6 +79,16 @@ static inline bool unaligned_ctl_available(void)
}
#endif
+#if defined(CONFIG_RISCV_MISALIGNED)
+DECLARE_PER_CPU(long, misaligned_access_speed);
+bool misaligned_traps_can_delegate(void);
+#else
+static inline bool misaligned_traps_can_delegate(void)
+{
+ return false;
+}
+#endif
+
bool __init check_vector_unaligned_access_emulated_all_cpus(void);
#if defined(CONFIG_RISCV_VECTOR_MISALIGNED)
void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused);
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index d627f63ee289..22ebea3c2b26 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -20,10 +20,9 @@ extern void *return_address(unsigned int level);
#define ftrace_return_address(n) return_address(n)
void _mcount(void);
-static inline unsigned long ftrace_call_adjust(unsigned long addr)
-{
- return addr;
-}
+unsigned long ftrace_call_adjust(unsigned long addr);
+unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip);
+#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip)
/*
* Let's do like x86/arm64 and ignore the compat syscalls.
@@ -57,12 +56,21 @@ struct dyn_arch_ftrace {
* 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to
* return address (original pc + 4)
*
+ * The first 2 instructions for each tracable function is compiled to 2 nop
+ * instructions. Then, the kernel initializes the first instruction to auipc at
+ * boot time (<ftrace disable>). The second instruction is patched to jalr to
+ * start the trace.
+ *
+ *<Image>:
+ * 0: nop
+ * 4: nop
+ *
*<ftrace enable>:
- * 0: auipc t0/ra, 0x?
- * 4: jalr t0/ra, ?(t0/ra)
+ * 0: auipc t0, 0x?
+ * 4: jalr t0, ?(t0)
*
*<ftrace disable>:
- * 0: nop
+ * 0: auipc t0, 0x?
* 4: nop
*
* Dynamic ftrace generates probes to call sites, so we must deal with
@@ -75,10 +83,9 @@ struct dyn_arch_ftrace {
#define AUIPC_OFFSET_MASK (0xfffff000)
#define AUIPC_PAD (0x00001000)
#define JALR_SHIFT 20
-#define JALR_RA (0x000080e7)
-#define AUIPC_RA (0x00000097)
#define JALR_T0 (0x000282e7)
#define AUIPC_T0 (0x00000297)
+#define JALR_RANGE (JALR_SIGN_MASK - 1)
#define to_jalr_t0(offset) \
(((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0)
@@ -96,26 +103,14 @@ do { \
call[1] = to_jalr_t0(offset); \
} while (0)
-#define to_jalr_ra(offset) \
- (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA)
-
-#define to_auipc_ra(offset) \
- ((offset & JALR_SIGN_MASK) ? \
- (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) : \
- ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA))
-
-#define make_call_ra(caller, callee, call) \
-do { \
- unsigned int offset = \
- (unsigned long) (callee) - (unsigned long) (caller); \
- call[0] = to_auipc_ra(offset); \
- call[1] = to_jalr_ra(offset); \
-} while (0)
-
/*
- * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
+ * Only the jalr insn in the auipc+jalr is patched, so we make it 4
+ * bytes here.
*/
-#define MCOUNT_INSN_SIZE 8
+#define MCOUNT_INSN_SIZE 4
+#define MCOUNT_AUIPC_SIZE 4
+#define MCOUNT_JALR_SIZE 4
+#define MCOUNT_NOP4_SIZE 4
#ifndef __ASSEMBLY__
struct dyn_ftrace;
@@ -135,6 +130,9 @@ struct __arch_ftrace_regs {
unsigned long sp;
unsigned long s0;
unsigned long t1;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ unsigned long direct_tramp;
+#endif
union {
unsigned long args[8];
struct {
@@ -146,6 +144,13 @@ struct __arch_ftrace_regs {
unsigned long a5;
unsigned long a6;
unsigned long a7;
+#ifdef CONFIG_CC_IS_CLANG
+ unsigned long t2;
+ unsigned long t3;
+ unsigned long t4;
+ unsigned long t5;
+ unsigned long t6;
+#endif
};
};
};
@@ -221,10 +226,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
#define ftrace_graph_func ftrace_graph_func
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr)
{
arch_ftrace_regs(fregs)->t1 = addr;
}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
#endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index e3cbf203cdde..affd63e11b0a 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -105,6 +105,7 @@
#define RISCV_ISA_EXT_ZVFBFWMA 96
#define RISCV_ISA_EXT_ZAAMO 97
#define RISCV_ISA_EXT_ZALRSC 98
+#define RISCV_ISA_EXT_ZICBOP 99
#define RISCV_ISA_EXT_XLINUXENVCFG 127
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 1f690fea0e03..7fe0a379474a 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -8,7 +8,7 @@
#include <uapi/asm/hwprobe.h>
-#define RISCV_HWPROBE_MAX_KEY 12
+#define RISCV_HWPROBE_MAX_KEY 13
static inline bool riscv_hwprobe_key_is_valid(__s64 key)
{
@@ -22,6 +22,7 @@ static inline bool hwprobe_key_is_bitmask(__s64 key)
case RISCV_HWPROBE_KEY_IMA_EXT_0:
case RISCV_HWPROBE_KEY_CPUPERF_0:
case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0:
+ case RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0:
return true;
}
diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h
index e0b319af3681..8927a6ea1127 100644
--- a/arch/riscv/include/asm/image.h
+++ b/arch/riscv/include/asm/image.h
@@ -30,6 +30,8 @@
RISCV_HEADER_VERSION_MINOR)
#ifndef __ASSEMBLY__
+#define riscv_image_flag_field(flags, field)\
+ (((flags) >> field##_SHIFT) & field##_MASK)
/**
* struct riscv_image_header - riscv kernel image header
* @code0: Executable code
diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h
index 71060a2f838e..d5adbaec1d01 100644
--- a/arch/riscv/include/asm/insn-def.h
+++ b/arch/riscv/include/asm/insn-def.h
@@ -18,6 +18,13 @@
#define INSN_I_RD_SHIFT 7
#define INSN_I_OPCODE_SHIFT 0
+#define INSN_S_SIMM7_SHIFT 25
+#define INSN_S_RS2_SHIFT 20
+#define INSN_S_RS1_SHIFT 15
+#define INSN_S_FUNC3_SHIFT 12
+#define INSN_S_SIMM5_SHIFT 7
+#define INSN_S_OPCODE_SHIFT 0
+
#ifdef __ASSEMBLY__
#ifdef CONFIG_AS_HAS_INSN
@@ -30,6 +37,10 @@
.insn i \opcode, \func3, \rd, \rs1, \simm12
.endm
+ .macro insn_s, opcode, func3, rs2, simm12, rs1
+ .insn s \opcode, \func3, \rs2, \simm12(\rs1)
+ .endm
+
#else
#include <asm/gpr-num.h>
@@ -51,10 +62,20 @@
(\simm12 << INSN_I_SIMM12_SHIFT))
.endm
+ .macro insn_s, opcode, func3, rs2, simm12, rs1
+ .4byte ((\opcode << INSN_S_OPCODE_SHIFT) | \
+ (\func3 << INSN_S_FUNC3_SHIFT) | \
+ (.L__gpr_num_\rs2 << INSN_S_RS2_SHIFT) | \
+ (.L__gpr_num_\rs1 << INSN_S_RS1_SHIFT) | \
+ ((\simm12 & 0x1f) << INSN_S_SIMM5_SHIFT) | \
+ (((\simm12 >> 5) & 0x7f) << INSN_S_SIMM7_SHIFT))
+ .endm
+
#endif
#define __INSN_R(...) insn_r __VA_ARGS__
#define __INSN_I(...) insn_i __VA_ARGS__
+#define __INSN_S(...) insn_s __VA_ARGS__
#else /* ! __ASSEMBLY__ */
@@ -66,6 +87,9 @@
#define __INSN_I(opcode, func3, rd, rs1, simm12) \
".insn i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n"
+#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
+ ".insn s " opcode ", " func3 ", " rs2 ", " simm12 "(" rs1 ")\n"
+
#else
#include <linux/stringify.h>
@@ -92,12 +116,26 @@
" (\\simm12 << " __stringify(INSN_I_SIMM12_SHIFT) "))\n" \
" .endm\n"
+#define DEFINE_INSN_S \
+ __DEFINE_ASM_GPR_NUMS \
+" .macro insn_s, opcode, func3, rs2, simm12, rs1\n" \
+" .4byte ((\\opcode << " __stringify(INSN_S_OPCODE_SHIFT) ") |" \
+" (\\func3 << " __stringify(INSN_S_FUNC3_SHIFT) ") |" \
+" (.L__gpr_num_\\rs2 << " __stringify(INSN_S_RS2_SHIFT) ") |" \
+" (.L__gpr_num_\\rs1 << " __stringify(INSN_S_RS1_SHIFT) ") |" \
+" ((\\simm12 & 0x1f) << " __stringify(INSN_S_SIMM5_SHIFT) ") |" \
+" (((\\simm12 >> 5) & 0x7f) << " __stringify(INSN_S_SIMM7_SHIFT) "))\n" \
+" .endm\n"
+
#define UNDEFINE_INSN_R \
" .purgem insn_r\n"
#define UNDEFINE_INSN_I \
" .purgem insn_i\n"
+#define UNDEFINE_INSN_S \
+" .purgem insn_s\n"
+
#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \
DEFINE_INSN_R \
"insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \
@@ -108,6 +146,11 @@
"insn_i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" \
UNDEFINE_INSN_I
+#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
+ DEFINE_INSN_S \
+ "insn_s " opcode ", " func3 ", " rs2 ", " simm12 ", " rs1 "\n" \
+ UNDEFINE_INSN_S
+
#endif
#endif /* ! __ASSEMBLY__ */
@@ -120,6 +163,10 @@
__INSN_I(RV_##opcode, RV_##func3, RV_##rd, \
RV_##rs1, RV_##simm12)
+#define INSN_S(opcode, func3, rs2, simm12, rs1) \
+ __INSN_S(RV_##opcode, RV_##func3, RV_##rs2, \
+ RV_##simm12, RV_##rs1)
+
#define RV_OPCODE(v) __ASM_STR(v)
#define RV_FUNC3(v) __ASM_STR(v)
#define RV_FUNC7(v) __ASM_STR(v)
@@ -133,6 +180,7 @@
#define RV___RS2(v) __RV_REG(v)
#define RV_OPCODE_MISC_MEM RV_OPCODE(15)
+#define RV_OPCODE_OP_IMM RV_OPCODE(19)
#define RV_OPCODE_SYSTEM RV_OPCODE(115)
#define HFENCE_VVMA(vaddr, asid) \
@@ -196,6 +244,18 @@
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
RS1(base), SIMM12(4))
+#define PREFETCH_I(base, offset) \
+ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(0), \
+ SIMM12((offset) & 0xfe0), RS1(base))
+
+#define PREFETCH_R(base, offset) \
+ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(1), \
+ SIMM12((offset) & 0xfe0), RS1(base))
+
+#define PREFETCH_W(base, offset) \
+ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \
+ SIMM12((offset) & 0xfe0), RS1(base))
+
#define RISCV_PAUSE ".4byte 0x100000f"
#define ZAWRS_WRS_NTO ".4byte 0x00d00073"
#define ZAWRS_WRS_STO ".4byte 0x01d00073"
@@ -203,4 +263,10 @@
#define RISCV_INSN_NOP4 _AC(0x00000013, U)
+#ifndef __ASSEMBLY__
+#define nop() __asm__ __volatile__ ("nop")
+#define __nops(n) ".rept " #n "\nnop\n.endr\n"
+#define nops(n) __asm__ __volatile__ (__nops(n))
+#endif
+
#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h
index 2b56769cb530..b9ee8346cc8c 100644
--- a/arch/riscv/include/asm/kexec.h
+++ b/arch/riscv/include/asm/kexec.h
@@ -56,6 +56,7 @@ extern riscv_kexec_method riscv_kexec_norelocate;
#ifdef CONFIG_KEXEC_FILE
extern const struct kexec_file_ops elf_kexec_ops;
+extern const struct kexec_file_ops image_kexec_ops;
struct purgatory_info;
int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
@@ -67,6 +68,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
struct kimage;
int arch_kimage_file_post_load_cleanup(struct kimage *image);
#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+
+int load_extra_segments(struct kimage *image, unsigned long kernel_start,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len);
#endif
#endif
diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index 3b643b9efc07..5acce285e56e 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -87,6 +87,9 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
extern struct kvm_device_ops kvm_riscv_aia_device_ops;
+bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu);
+void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu);
int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu);
@@ -161,7 +164,6 @@ void kvm_riscv_aia_destroy_vm(struct kvm *kvm);
int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
void __iomem **hgei_va, phys_addr_t *hgei_pa);
void kvm_riscv_aia_free_hgei(int cpu, int hgei);
-void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable);
void kvm_riscv_aia_enable(void);
void kvm_riscv_aia_disable(void);
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 85cfebc32e4c..bcbf8b1ec115 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -306,6 +306,9 @@ static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu;
}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 188fadc1c21f..7de05db7d3bd 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -184,7 +184,7 @@ static inline int pud_none(pud_t pud)
static inline int pud_bad(pud_t pud)
{
- return !pud_present(pud);
+ return !pud_present(pud) || (pud_val(pud) & _PAGE_LEAF);
}
#define pud_leaf pud_leaf
@@ -399,6 +399,7 @@ p4d_t *p4d_offset(pgd_t *pgd, unsigned long address);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pte_devmap(pte_t pte);
static inline pte_t pmd_pte(pmd_t pmd);
+static inline pte_t pud_pte(pud_t pud);
static inline int pmd_devmap(pmd_t pmd)
{
@@ -407,7 +408,7 @@ static inline int pmd_devmap(pmd_t pmd)
static inline int pud_devmap(pud_t pud)
{
- return 0;
+ return pte_devmap(pud_pte(pud));
}
static inline int pgd_devmap(pgd_t pgd)
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index f19240fd018e..5bd5aae60d53 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -900,6 +900,103 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
#define pmdp_collapse_flush pmdp_collapse_flush
extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
+
+static inline pud_t pud_wrprotect(pud_t pud)
+{
+ return pte_pud(pte_wrprotect(pud_pte(pud)));
+}
+
+static inline int pud_trans_huge(pud_t pud)
+{
+ return pud_leaf(pud);
+}
+
+static inline int pud_dirty(pud_t pud)
+{
+ return pte_dirty(pud_pte(pud));
+}
+
+static inline pud_t pud_mkyoung(pud_t pud)
+{
+ return pte_pud(pte_mkyoung(pud_pte(pud)));
+}
+
+static inline pud_t pud_mkold(pud_t pud)
+{
+ return pte_pud(pte_mkold(pud_pte(pud)));
+}
+
+static inline pud_t pud_mkdirty(pud_t pud)
+{
+ return pte_pud(pte_mkdirty(pud_pte(pud)));
+}
+
+static inline pud_t pud_mkclean(pud_t pud)
+{
+ return pte_pud(pte_mkclean(pud_pte(pud)));
+}
+
+static inline pud_t pud_mkwrite(pud_t pud)
+{
+ return pte_pud(pte_mkwrite_novma(pud_pte(pud)));
+}
+
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+ return pud;
+}
+
+static inline pud_t pud_mkdevmap(pud_t pud)
+{
+ return pte_pud(pte_mkdevmap(pud_pte(pud)));
+}
+
+static inline int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty)
+{
+ return ptep_set_access_flags(vma, address, (pte_t *)pudp, pud_pte(entry), dirty);
+}
+
+static inline int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp)
+{
+ return ptep_test_and_clear_young(vma, address, (pte_t *)pudp);
+}
+
+static inline int pud_young(pud_t pud)
+{
+ return pte_young(pud_pte(pud));
+}
+
+static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp)
+{
+ pte_t *ptep = (pte_t *)pudp;
+
+ update_mmu_cache(vma, address, ptep);
+}
+
+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ return __pud(atomic_long_xchg((atomic_long_t *)pudp, pud_val(pud)));
+}
+
+static inline pud_t pud_mkinvalid(pud_t pud)
+{
+ return __pud(pud_val(pud) & ~(_PAGE_PRESENT | _PAGE_PROT_NONE));
+}
+
+extern pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
+
+static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
+{
+ return pte_pud(pte_modify(pud_pte(pud), newprot));
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
@@ -931,7 +1028,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
@@ -978,7 +1075,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
*/
#ifdef CONFIG_64BIT
#define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2)
-#define TASK_SIZE_MAX LONG_MAX
#ifdef CONFIG_COMPAT
#define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE)
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 5f56eb9d114a..24d3af4d3807 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -13,6 +13,9 @@
#include <vdso/processor.h>
#include <asm/ptrace.h>
+#include <asm/insn-def.h>
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
#define arch_get_mmap_end(addr, len, flags) \
({ \
@@ -52,7 +55,6 @@
#endif
#ifndef __ASSEMBLY__
-#include <linux/cpumask.h>
struct task_struct;
struct pt_regs;
@@ -79,6 +81,10 @@ struct pt_regs;
* Thus, the task does not own preempt_v. Any use of Vector will have to
* save preempt_v, if dirty, and fallback to non-preemptible kernel-mode
* Vector.
+ * - bit 29: The thread voluntarily calls schedule() while holding an active
+ * preempt_v. All preempt_v context should be dropped in such case because
+ * V-regs are caller-saved. Only sstatus.VS=ON is persisted across a
+ * schedule() call.
* - bit 30: The in-kernel preempt_v context is saved, and requries to be
* restored when returning to the context that owns the preempt_v.
* - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
@@ -93,6 +99,7 @@ struct pt_regs;
#define RISCV_PREEMPT_V 0x00000100
#define RISCV_PREEMPT_V_DIRTY 0x80000000
#define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000
+#define RISCV_PREEMPT_V_IN_SCHEDULE 0x20000000
/* CPU-specific state of a task */
struct thread_struct {
@@ -103,6 +110,7 @@ struct thread_struct {
struct __riscv_d_ext_state fstate;
unsigned long bad_cause;
unsigned long envcfg;
+ unsigned long sum;
u32 riscv_v_flags;
u32 vstate_ctrl;
struct __riscv_v_ext_state vstate;
@@ -136,6 +144,27 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->epc)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp)
+#define PREFETCH_ASM(x) \
+ ALTERNATIVE(__nops(1), PREFETCH_R(x, 0), 0, \
+ RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
+
+#define PREFETCHW_ASM(x) \
+ ALTERNATIVE(__nops(1), PREFETCH_W(x, 0), 0, \
+ RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
+
+#ifdef CONFIG_RISCV_ISA_ZICBOP
+#define ARCH_HAS_PREFETCH
+static inline void prefetch(const void *x)
+{
+ __asm__ __volatile__(PREFETCH_ASM(%0) : : "r" (x) : "memory");
+}
+
+#define ARCH_HAS_PREFETCHW
+static inline void prefetchw(const void *x)
+{
+ __asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
+}
+#endif /* CONFIG_RISCV_ISA_ZICBOP */
/* Do necessary setup to start up a newly executed thread. */
extern void start_thread(struct pt_regs *regs,
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index 2910231977cb..a7dc0e330757 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -175,7 +175,7 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
return 0;
}
-static inline int regs_irqs_disabled(struct pt_regs *regs)
+static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
{
return !(regs->status & SR_PIE);
}
diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h
index 451fd76b8811..d766e2b9e6df 100644
--- a/arch/riscv/include/asm/runtime-const.h
+++ b/arch/riscv/include/asm/runtime-const.h
@@ -206,7 +206,7 @@ static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, u
addi_insn_mask &= 0x07fff;
}
- if (lower_immediate & 0x00000fff) {
+ if (lower_immediate & 0x00000fff || lui_insn == RISCV_INSN_NOP4) {
/* replace upper 12 bits of addi with lower 12 bits of val */
addi_insn &= addi_insn_mask;
addi_insn |= (lower_immediate & 0x00000fff) << 20;
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 3d250824178b..341e74238aa0 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -35,6 +35,7 @@ enum sbi_ext_id {
SBI_EXT_DBCN = 0x4442434E,
SBI_EXT_STA = 0x535441,
SBI_EXT_NACL = 0x4E41434C,
+ SBI_EXT_FWFT = 0x46574654,
/* Experimentals extensions must lie within this range */
SBI_EXT_EXPERIMENTAL_START = 0x08000000,
@@ -402,6 +403,33 @@ enum sbi_ext_nacl_feature {
#define SBI_NACL_SHMEM_SRET_X(__i) ((__riscv_xlen / 8) * (__i))
#define SBI_NACL_SHMEM_SRET_X_LAST 31
+/* SBI function IDs for FW feature extension */
+#define SBI_EXT_FWFT_SET 0x0
+#define SBI_EXT_FWFT_GET 0x1
+
+enum sbi_fwft_feature_t {
+ SBI_FWFT_MISALIGNED_EXC_DELEG = 0x0,
+ SBI_FWFT_LANDING_PAD = 0x1,
+ SBI_FWFT_SHADOW_STACK = 0x2,
+ SBI_FWFT_DOUBLE_TRAP = 0x3,
+ SBI_FWFT_PTE_AD_HW_UPDATING = 0x4,
+ SBI_FWFT_POINTER_MASKING_PMLEN = 0x5,
+ SBI_FWFT_LOCAL_RESERVED_START = 0x6,
+ SBI_FWFT_LOCAL_RESERVED_END = 0x3fffffff,
+ SBI_FWFT_LOCAL_PLATFORM_START = 0x40000000,
+ SBI_FWFT_LOCAL_PLATFORM_END = 0x7fffffff,
+
+ SBI_FWFT_GLOBAL_RESERVED_START = 0x80000000,
+ SBI_FWFT_GLOBAL_RESERVED_END = 0xbfffffff,
+ SBI_FWFT_GLOBAL_PLATFORM_START = 0xc0000000,
+ SBI_FWFT_GLOBAL_PLATFORM_END = 0xffffffff,
+};
+
+#define SBI_FWFT_PLATFORM_FEATURE_BIT BIT(30)
+#define SBI_FWFT_GLOBAL_FEATURE_BIT BIT(31)
+
+#define SBI_FWFT_SET_FLAG_LOCK BIT(0)
+
/* SBI spec version fields */
#define SBI_SPEC_VERSION_DEFAULT 0x1
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
@@ -419,6 +447,11 @@ enum sbi_ext_nacl_feature {
#define SBI_ERR_ALREADY_STARTED -7
#define SBI_ERR_ALREADY_STOPPED -8
#define SBI_ERR_NO_SHMEM -9
+#define SBI_ERR_INVALID_STATE -10
+#define SBI_ERR_BAD_RANGE -11
+#define SBI_ERR_TIMEOUT -12
+#define SBI_ERR_IO -13
+#define SBI_ERR_DENIED_LOCKED -14
extern unsigned long sbi_spec_version;
struct sbiret {
@@ -470,6 +503,23 @@ int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long asid);
long sbi_probe_extension(int ext);
+int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags);
+int sbi_fwft_set_cpumask(const cpumask_t *mask, u32 feature,
+ unsigned long value, unsigned long flags);
+/**
+ * sbi_fwft_set_online_cpus() - Set a feature on all online cpus
+ * @feature: The feature to be set
+ * @value: The feature value to be set
+ * @flags: FWFT feature set flags
+ *
+ * Return: 0 on success, appropriate linux error code otherwise.
+ */
+static inline int sbi_fwft_set_online_cpus(u32 feature, unsigned long value,
+ unsigned long flags)
+{
+ return sbi_fwft_set_cpumask(cpu_online_mask, feature, value, flags);
+}
+
/* Check if current SBI specification version is 0.1 or not */
static inline int sbi_spec_is_0_1(void)
{
@@ -503,11 +553,21 @@ static inline int sbi_err_map_linux_errno(int err)
case SBI_SUCCESS:
return 0;
case SBI_ERR_DENIED:
+ case SBI_ERR_DENIED_LOCKED:
return -EPERM;
case SBI_ERR_INVALID_PARAM:
+ case SBI_ERR_INVALID_STATE:
return -EINVAL;
+ case SBI_ERR_BAD_RANGE:
+ return -ERANGE;
case SBI_ERR_INVALID_ADDRESS:
return -EFAULT;
+ case SBI_ERR_NO_SHMEM:
+ return -ENOMEM;
+ case SBI_ERR_TIMEOUT:
+ return -ETIMEDOUT;
+ case SBI_ERR_IO:
+ return -EIO;
case SBI_ERR_NOT_SUPPORTED:
case SBI_ERR_FAILURE:
default:
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index ce0dd0fed764..1a20dd746a49 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -56,6 +56,8 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
+void flush_pud_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end);
#endif
bool arch_tlbbatch_should_defer(struct mm_struct *mm);
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index fee56b0c8058..b88a6218b7f2 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -62,6 +62,19 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigne
__asm__ __volatile__ ("csrc sstatus, %0" : : "r" (SR_SUM) : "memory")
/*
+ * This is the smallest unsigned integer type that can fit a value
+ * (up to 'long long')
+ */
+#define __inttype(x) __typeof__( \
+ __typefits(x, char, \
+ __typefits(x, short, \
+ __typefits(x, int, \
+ __typefits(x, long, 0ULL)))))
+
+#define __typefits(x, type, not) \
+ __builtin_choose_expr(sizeof(x) <= sizeof(type), (unsigned type)0, not)
+
+/*
* The exception table consists of pairs of addresses: the first is the
* address of an instruction that is allowed to fault, and the second is
* the address at which the program should continue. No registers are
@@ -83,27 +96,59 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigne
* call.
*/
-#define __get_user_asm(insn, x, ptr, err) \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define __get_user_asm(insn, x, ptr, label) \
+ asm_goto_output( \
+ "1:\n" \
+ " " insn " %0, %1\n" \
+ _ASM_EXTABLE_UACCESS_ERR(1b, %l2, %0) \
+ : "=&r" (x) \
+ : "m" (*(ptr)) : : label)
+#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+#define __get_user_asm(insn, x, ptr, label) \
do { \
- __typeof__(x) __x; \
+ long __gua_err = 0; \
__asm__ __volatile__ ( \
"1:\n" \
" " insn " %1, %2\n" \
"2:\n" \
_ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \
- : "+r" (err), "=&r" (__x) \
+ : "+r" (__gua_err), "=&r" (x) \
: "m" (*(ptr))); \
- (x) = __x; \
+ if (__gua_err) \
+ goto label; \
} while (0)
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
#ifdef CONFIG_64BIT
-#define __get_user_8(x, ptr, err) \
- __get_user_asm("ld", x, ptr, err)
+#define __get_user_8(x, ptr, label) \
+ __get_user_asm("ld", x, ptr, label)
#else /* !CONFIG_64BIT */
-#define __get_user_8(x, ptr, err) \
+
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define __get_user_8(x, ptr, label) \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
u32 __lo, __hi; \
+ asm_goto_output( \
+ "1:\n" \
+ " lw %0, %2\n" \
+ "2:\n" \
+ " lw %1, %3\n" \
+ _ASM_EXTABLE_UACCESS_ERR(1b, %l4, %0) \
+ _ASM_EXTABLE_UACCESS_ERR(2b, %l4, %0) \
+ : "=&r" (__lo), "=r" (__hi) \
+ : "m" (__ptr[__LSW]), "m" (__ptr[__MSW]) \
+ : : label); \
+ (x) = (__typeof__(x))((__typeof__((x) - (x)))( \
+ (((u64)__hi << 32) | __lo))); \
+} while (0)
+#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+#define __get_user_8(x, ptr, label) \
+do { \
+ u32 __user *__ptr = (u32 __user *)(ptr); \
+ u32 __lo, __hi; \
+ long __gu8_err = 0; \
__asm__ __volatile__ ( \
"1:\n" \
" lw %1, %3\n" \
@@ -112,35 +157,62 @@ do { \
"3:\n" \
_ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 3b, %0, %1) \
_ASM_EXTABLE_UACCESS_ERR_ZERO(2b, 3b, %0, %1) \
- : "+r" (err), "=&r" (__lo), "=r" (__hi) \
+ : "+r" (__gu8_err), "=&r" (__lo), "=r" (__hi) \
: "m" (__ptr[__LSW]), "m" (__ptr[__MSW])); \
- if (err) \
+ if (__gu8_err) { \
__hi = 0; \
- (x) = (__typeof__(x))((__typeof__((x)-(x)))( \
+ goto label; \
+ } \
+ (x) = (__typeof__(x))((__typeof__((x) - (x)))( \
(((u64)__hi << 32) | __lo))); \
} while (0)
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
#endif /* CONFIG_64BIT */
-#define __get_user_nocheck(x, __gu_ptr, __gu_err) \
+unsigned long __must_check __asm_copy_to_user_sum_enabled(void __user *to,
+ const void *from, unsigned long n);
+unsigned long __must_check __asm_copy_from_user_sum_enabled(void *to,
+ const void __user *from, unsigned long n);
+
+#define __get_user_nocheck(x, __gu_ptr, label) \
do { \
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \
+ !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \
+ if (__asm_copy_from_user_sum_enabled(&(x), __gu_ptr, sizeof(*__gu_ptr))) \
+ goto label; \
+ break; \
+ } \
switch (sizeof(*__gu_ptr)) { \
case 1: \
- __get_user_asm("lb", (x), __gu_ptr, __gu_err); \
+ __get_user_asm("lb", (x), __gu_ptr, label); \
break; \
case 2: \
- __get_user_asm("lh", (x), __gu_ptr, __gu_err); \
+ __get_user_asm("lh", (x), __gu_ptr, label); \
break; \
case 4: \
- __get_user_asm("lw", (x), __gu_ptr, __gu_err); \
+ __get_user_asm("lw", (x), __gu_ptr, label); \
break; \
case 8: \
- __get_user_8((x), __gu_ptr, __gu_err); \
+ __get_user_8((x), __gu_ptr, label); \
break; \
default: \
BUILD_BUG(); \
} \
} while (0)
+#define __get_user_error(x, ptr, err) \
+do { \
+ __label__ __gu_failed; \
+ \
+ __get_user_nocheck(x, ptr, __gu_failed); \
+ err = 0; \
+ break; \
+__gu_failed: \
+ x = 0; \
+ err = -EFAULT; \
+} while (0)
+
/**
* __get_user: - Get a simple variable from user space, with less checking.
* @x: Variable to store result.
@@ -165,13 +237,16 @@ do { \
({ \
const __typeof__(*(ptr)) __user *__gu_ptr = untagged_addr(ptr); \
long __gu_err = 0; \
+ __typeof__(x) __gu_val; \
\
__chk_user_ptr(__gu_ptr); \
\
__enable_user_access(); \
- __get_user_nocheck(x, __gu_ptr, __gu_err); \
+ __get_user_error(__gu_val, __gu_ptr, __gu_err); \
__disable_user_access(); \
\
+ (x) = __gu_val; \
+ \
__gu_err; \
})
@@ -201,61 +276,73 @@ do { \
((x) = (__force __typeof__(x))0, -EFAULT); \
})
-#define __put_user_asm(insn, x, ptr, err) \
+#define __put_user_asm(insn, x, ptr, label) \
do { \
__typeof__(*(ptr)) __x = x; \
- __asm__ __volatile__ ( \
+ asm goto( \
"1:\n" \
- " " insn " %z2, %1\n" \
- "2:\n" \
- _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \
- : "+r" (err), "=m" (*(ptr)) \
- : "rJ" (__x)); \
+ " " insn " %z0, %1\n" \
+ _ASM_EXTABLE(1b, %l2) \
+ : : "rJ" (__x), "m"(*(ptr)) : : label); \
} while (0)
#ifdef CONFIG_64BIT
-#define __put_user_8(x, ptr, err) \
- __put_user_asm("sd", x, ptr, err)
+#define __put_user_8(x, ptr, label) \
+ __put_user_asm("sd", x, ptr, label)
#else /* !CONFIG_64BIT */
-#define __put_user_8(x, ptr, err) \
+#define __put_user_8(x, ptr, label) \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
u64 __x = (__typeof__((x)-(x)))(x); \
- __asm__ __volatile__ ( \
+ asm goto( \
"1:\n" \
- " sw %z3, %1\n" \
+ " sw %z0, %2\n" \
"2:\n" \
- " sw %z4, %2\n" \
- "3:\n" \
- _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \
- _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \
- : "+r" (err), \
- "=m" (__ptr[__LSW]), \
- "=m" (__ptr[__MSW]) \
- : "rJ" (__x), "rJ" (__x >> 32)); \
+ " sw %z1, %3\n" \
+ _ASM_EXTABLE(1b, %l4) \
+ _ASM_EXTABLE(2b, %l4) \
+ : : "rJ" (__x), "rJ" (__x >> 32), \
+ "m" (__ptr[__LSW]), \
+ "m" (__ptr[__MSW]) : : label); \
} while (0)
#endif /* CONFIG_64BIT */
-#define __put_user_nocheck(x, __gu_ptr, __pu_err) \
+#define __put_user_nocheck(x, __gu_ptr, label) \
do { \
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \
+ !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \
+ __inttype(x) ___val = (__inttype(x))x; \
+ if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(___val), sizeof(*__gu_ptr))) \
+ goto label; \
+ break; \
+ } \
switch (sizeof(*__gu_ptr)) { \
case 1: \
- __put_user_asm("sb", (x), __gu_ptr, __pu_err); \
+ __put_user_asm("sb", (x), __gu_ptr, label); \
break; \
case 2: \
- __put_user_asm("sh", (x), __gu_ptr, __pu_err); \
+ __put_user_asm("sh", (x), __gu_ptr, label); \
break; \
case 4: \
- __put_user_asm("sw", (x), __gu_ptr, __pu_err); \
+ __put_user_asm("sw", (x), __gu_ptr, label); \
break; \
case 8: \
- __put_user_8((x), __gu_ptr, __pu_err); \
+ __put_user_8((x), __gu_ptr, label); \
break; \
default: \
BUILD_BUG(); \
} \
} while (0)
+#define __put_user_error(x, ptr, err) \
+do { \
+ __label__ err_label; \
+ __put_user_nocheck(x, ptr, err_label); \
+ break; \
+err_label: \
+ (err) = -EFAULT; \
+} while (0)
+
/**
* __put_user: - Write a simple value into user space, with less checking.
* @x: Value to copy to user space.
@@ -286,7 +373,7 @@ do { \
__chk_user_ptr(__gu_ptr); \
\
__enable_user_access(); \
- __put_user_nocheck(__val, __gu_ptr, __pu_err); \
+ __put_user_error(__val, __gu_ptr, __pu_err); \
__disable_user_access(); \
\
__pu_err; \
@@ -351,23 +438,45 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
}
#define __get_kernel_nofault(dst, src, type, err_label) \
-do { \
- long __kr_err = 0; \
- \
- __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
- if (unlikely(__kr_err)) \
- goto err_label; \
-} while (0)
+ __get_user_nocheck(*((type *)(dst)), (type *)(src), err_label)
#define __put_kernel_nofault(dst, src, type, err_label) \
-do { \
- long __kr_err = 0; \
- \
- __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
- if (unlikely(__kr_err)) \
- goto err_label; \
+ __put_user_nocheck(*((type *)(src)), (type *)(dst), err_label)
+
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
+{
+ if (unlikely(!access_ok(ptr, len)))
+ return 0;
+ __enable_user_access();
+ return 1;
+}
+#define user_access_begin user_access_begin
+#define user_access_end __disable_user_access
+
+static inline unsigned long user_access_save(void) { return 0UL; }
+static inline void user_access_restore(unsigned long enabled) { }
+
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_put_user(x, ptr, label) \
+ __put_user_nocheck(x, (ptr), label)
+
+#define unsafe_get_user(x, ptr, label) do { \
+ __inttype(*(ptr)) __gu_val; \
+ __get_user_nocheck(__gu_val, (ptr), label); \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
} while (0)
+#define unsafe_copy_to_user(_dst, _src, _len, label) \
+ if (__asm_copy_to_user_sum_enabled(_dst, _src, _len)) \
+ goto label;
+
+#define unsafe_copy_from_user(_dst, _src, _len, label) \
+ if (__asm_copy_from_user_sum_enabled(_dst, _src, _len)) \
+ goto label;
+
#else /* CONFIG_MMU */
#include <asm-generic/uaccess.h>
#endif /* CONFIG_MMU */
diff --git a/arch/riscv/include/asm/vdso/getrandom.h b/arch/riscv/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..c6d66895c1f5
--- /dev/null
+++ b/arch/riscv/include/asm/vdso/getrandom.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ */
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+
+static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags)
+{
+ register long ret asm("a0");
+ register long nr asm("a7") = __NR_getrandom;
+ register void *buffer asm("a0") = _buffer;
+ register size_t len asm("a1") = _len;
+ register unsigned int flags asm("a2") = _flags;
+
+ asm volatile ("ecall\n"
+ : "=r" (ret)
+ : "r" (nr), "r" (buffer), "r" (len), "r" (flags)
+ : "memory");
+
+ return ret;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index e8a83f55be2b..b61786d43c20 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -120,6 +120,11 @@ static __always_inline void riscv_v_disable(void)
csr_clear(CSR_SSTATUS, SR_VS);
}
+static __always_inline bool riscv_v_is_on(void)
+{
+ return !!(csr_read(CSR_SSTATUS) & SR_VS);
+}
+
static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
{
asm volatile (
@@ -200,11 +205,11 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
THEAD_VSETVLI_T4X0E8M8D1
THEAD_VSB_V_V0T0
"add t0, t0, t4\n\t"
- THEAD_VSB_V_V0T0
+ THEAD_VSB_V_V8T0
"add t0, t0, t4\n\t"
- THEAD_VSB_V_V0T0
+ THEAD_VSB_V_V16T0
"add t0, t0, t4\n\t"
- THEAD_VSB_V_V0T0
+ THEAD_VSB_V_V24T0
: : "r" (datap) : "memory", "t0", "t4");
} else {
asm volatile (
@@ -236,11 +241,11 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
THEAD_VSETVLI_T4X0E8M8D1
THEAD_VLB_V_V0T0
"add t0, t0, t4\n\t"
- THEAD_VLB_V_V0T0
+ THEAD_VLB_V_V8T0
"add t0, t0, t4\n\t"
- THEAD_VLB_V_V0T0
+ THEAD_VLB_V_V16T0
"add t0, t0, t4\n\t"
- THEAD_VLB_V_V0T0
+ THEAD_VLB_V_V24T0
: : "r" (datap) : "memory", "t0", "t4");
} else {
asm volatile (
@@ -366,6 +371,11 @@ static inline void __switch_to_vector(struct task_struct *prev,
struct pt_regs *regs;
if (riscv_preempt_v_started(prev)) {
+ if (riscv_v_is_on()) {
+ WARN_ON(prev->thread.riscv_v_flags & RISCV_V_CTX_DEPTH_MASK);
+ riscv_v_disable();
+ prev->thread.riscv_v_flags |= RISCV_PREEMPT_V_IN_SCHEDULE;
+ }
if (riscv_preempt_v_dirty(prev)) {
__riscv_v_vstate_save(&prev->thread.kernel_vstate,
prev->thread.kernel_vstate.datap);
@@ -376,10 +386,16 @@ static inline void __switch_to_vector(struct task_struct *prev,
riscv_v_vstate_save(&prev->thread.vstate, regs);
}
- if (riscv_preempt_v_started(next))
- riscv_preempt_v_set_restore(next);
- else
+ if (riscv_preempt_v_started(next)) {
+ if (next->thread.riscv_v_flags & RISCV_PREEMPT_V_IN_SCHEDULE) {
+ next->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_IN_SCHEDULE;
+ riscv_v_enable();
+ } else {
+ riscv_preempt_v_set_restore(next);
+ }
+ } else {
riscv_v_vstate_set_restore(next, task_pt_regs(next));
+ }
}
void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
diff --git a/arch/riscv/include/asm/vendor_extensions/sifive.h b/arch/riscv/include/asm/vendor_extensions/sifive.h
new file mode 100644
index 000000000000..ac00e500361c
--- /dev/null
+++ b/arch/riscv/include/asm/vendor_extensions/sifive.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_H
+#define _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_H
+
+#include <asm/vendor_extensions.h>
+
+#include <linux/types.h>
+
+#define RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD 0
+#define RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ 1
+#define RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF 2
+#define RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ 3
+
+extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive;
+
+#endif
diff --git a/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h
new file mode 100644
index 000000000000..90a61abd033c
--- /dev/null
+++ b/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_HWPROBE_H
+#define _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_HWPROBE_H
+
+#include <linux/cpumask.h>
+
+#include <uapi/asm/hwprobe.h>
+
+#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE
+void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cpumask *cpus);
+#else
+static inline void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair,
+ const struct cpumask *cpus)
+{
+ pair->value = 0;
+}
+#endif
+
+#endif
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index 3c2fce939673..aaf6ad970499 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -81,6 +81,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_EXT_ZICBOM (1ULL << 55)
#define RISCV_HWPROBE_EXT_ZAAMO (1ULL << 56)
#define RISCV_HWPROBE_EXT_ZALRSC (1ULL << 57)
+#define RISCV_HWPROBE_EXT_ZABHA (1ULL << 58)
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
@@ -104,6 +105,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4
#define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 11
#define RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE 12
+#define RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0 13
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
/* Flags */
diff --git a/arch/riscv/include/uapi/asm/vendor/sifive.h b/arch/riscv/include/uapi/asm/vendor/sifive.h
new file mode 100644
index 000000000000..9f3278a4b298
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/vendor/sifive.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD (1 << 0)
+#define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ (1 << 1)
+#define RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF (1 << 2)
+#define RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ (1 << 3)
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index f7480c9c6f8d..c7b542573407 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -43,7 +43,7 @@ CFLAGS_sbi_ecall.o += -D__NO_FORTIFY
endif
endif
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
obj-y += head.o
obj-y += soc.o
@@ -107,7 +107,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o
-obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o
+obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o kexec_image.o machine_kexec_file.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 16490755304e..6e8c0d6feae9 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -34,6 +34,7 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]);
OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]);
OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]);
+ OFFSET(TASK_THREAD_SUM, task_struct, thread.sum);
OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
@@ -346,6 +347,10 @@ void asm_offsets(void)
offsetof(struct task_struct, thread.s[11])
- offsetof(struct task_struct, thread.ra)
);
+ DEFINE(TASK_THREAD_SUM_RA,
+ offsetof(struct task_struct, thread.sum)
+ - offsetof(struct task_struct, thread.ra)
+ );
DEFINE(TASK_THREAD_F0_F0,
offsetof(struct task_struct, thread.fstate.f[0])
@@ -493,6 +498,12 @@ void asm_offsets(void)
DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN));
OFFSET(STACKFRAME_FP, stackframe, fp);
OFFSET(STACKFRAME_RA, stackframe, ra);
+#ifdef CONFIG_FUNCTION_TRACER
+ DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func));
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call));
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+#endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct __arch_ftrace_regs), STACK_ALIGN));
@@ -501,6 +512,13 @@ void asm_offsets(void)
DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp));
DEFINE(FREGS_S0, offsetof(struct __arch_ftrace_regs, s0));
DEFINE(FREGS_T1, offsetof(struct __arch_ftrace_regs, t1));
+#ifdef CONFIG_CC_IS_CLANG
+ DEFINE(FREGS_T2, offsetof(struct __arch_ftrace_regs, t2));
+ DEFINE(FREGS_T3, offsetof(struct __arch_ftrace_regs, t3));
+ DEFINE(FREGS_T4, offsetof(struct __arch_ftrace_regs, t4));
+ DEFINE(FREGS_T5, offsetof(struct __arch_ftrace_regs, t5));
+ DEFINE(FREGS_T6, offsetof(struct __arch_ftrace_regs, t6));
+#endif
DEFINE(FREGS_A0, offsetof(struct __arch_ftrace_regs, a0));
DEFINE(FREGS_A1, offsetof(struct __arch_ftrace_regs, a1));
DEFINE(FREGS_A2, offsetof(struct __arch_ftrace_regs, a2));
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index e6fbaaf54956..87d655944803 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -18,10 +18,10 @@ const struct cpu_operations cpu_ops_sbi;
/*
* Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can
- * be invoked from multiple threads in parallel. Define a per cpu data
+ * be invoked from multiple threads in parallel. Define an array of boot data
* to handle that.
*/
-static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data);
+static struct sbi_hart_boot_data boot_data[NR_CPUS];
static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr,
unsigned long priv)
@@ -67,7 +67,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
unsigned long boot_addr = __pa_symbol(secondary_start_sbi);
unsigned long hartid = cpuid_to_hartid_map(cpuid);
unsigned long hsm_data;
- struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid);
+ struct sbi_hart_boot_data *bdata = &boot_data[cpuid];
/* Make sure tidle is updated */
smp_mb();
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 2054f6c4b0ae..743d53415572 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -32,6 +32,7 @@
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
static bool any_cpu_has_zicboz;
+static bool any_cpu_has_zicbop;
static bool any_cpu_has_zicbom;
unsigned long elf_hwcap __read_mostly;
@@ -119,6 +120,21 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data,
return 0;
}
+static int riscv_ext_zicbop_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!riscv_cbop_block_size) {
+ pr_err("Zicbop detected in ISA string, disabling as no cbop-block-size found\n");
+ return -EINVAL;
+ }
+ if (!is_power_of_2(riscv_cbop_block_size)) {
+ pr_err("Zicbop disabled as cbop-block-size present, but is not a power-of-2\n");
+ return -EINVAL;
+ }
+ any_cpu_has_zicbop = true;
+ return 0;
+}
+
static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data,
const unsigned long *isa_bitmap)
{
@@ -442,6 +458,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate),
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zicbop, RISCV_ISA_EXT_ZICBOP, riscv_ext_zicbop_validate),
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate),
__RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
@@ -1112,6 +1129,10 @@ void __init riscv_user_isa_enable(void)
current->thread.envcfg |= ENVCFG_CBCFE;
else if (any_cpu_has_zicbom)
pr_warn("Zicbom disabled as it is unavailable on some harts\n");
+
+ if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOP) &&
+ any_cpu_has_zicbop)
+ pr_warn("Zicbop disabled as it is unavailable on some harts\n");
}
#ifdef CONFIG_RISCV_ALTERNATIVE
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c
deleted file mode 100644
index e783a72d051f..000000000000
--- a/arch/riscv/kernel/elf_kexec.c
+++ /dev/null
@@ -1,485 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Load ELF vmlinux file for the kexec_file_load syscall.
- *
- * Copyright (C) 2021 Huawei Technologies Co, Ltd.
- *
- * Author: Liao Chang (liaochang1@huawei.com)
- *
- * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
- * for kernel.
- */
-
-#define pr_fmt(fmt) "kexec_image: " fmt
-
-#include <linux/elf.h>
-#include <linux/kexec.h>
-#include <linux/slab.h>
-#include <linux/of.h>
-#include <linux/libfdt.h>
-#include <linux/types.h>
-#include <linux/memblock.h>
-#include <linux/vmalloc.h>
-#include <asm/setup.h>
-
-int arch_kimage_file_post_load_cleanup(struct kimage *image)
-{
- kvfree(image->arch.fdt);
- image->arch.fdt = NULL;
-
- vfree(image->elf_headers);
- image->elf_headers = NULL;
- image->elf_headers_sz = 0;
-
- return kexec_image_post_load_cleanup_default(image);
-}
-
-static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
- struct kexec_elf_info *elf_info, unsigned long old_pbase,
- unsigned long new_pbase)
-{
- int i;
- int ret = 0;
- size_t size;
- struct kexec_buf kbuf;
- const struct elf_phdr *phdr;
-
- kbuf.image = image;
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- phdr = &elf_info->proghdrs[i];
- if (phdr->p_type != PT_LOAD)
- continue;
-
- size = phdr->p_filesz;
- if (size > phdr->p_memsz)
- size = phdr->p_memsz;
-
- kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
- kbuf.bufsz = size;
- kbuf.buf_align = phdr->p_align;
- kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
- kbuf.memsz = phdr->p_memsz;
- kbuf.top_down = false;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- break;
- }
-
- return ret;
-}
-
-/*
- * Go through the available phsyical memory regions and find one that hold
- * an image of the specified size.
- */
-static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
- struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
- unsigned long *old_pbase, unsigned long *new_pbase)
-{
- int i;
- int ret;
- struct kexec_buf kbuf;
- const struct elf_phdr *phdr;
- unsigned long lowest_paddr = ULONG_MAX;
- unsigned long lowest_vaddr = ULONG_MAX;
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- phdr = &elf_info->proghdrs[i];
- if (phdr->p_type != PT_LOAD)
- continue;
-
- if (lowest_paddr > phdr->p_paddr)
- lowest_paddr = phdr->p_paddr;
-
- if (lowest_vaddr > phdr->p_vaddr)
- lowest_vaddr = phdr->p_vaddr;
- }
-
- kbuf.image = image;
- kbuf.buf_min = lowest_paddr;
- kbuf.buf_max = ULONG_MAX;
-
- /*
- * Current riscv boot protocol requires 2MB alignment for
- * RV64 and 4MB alignment for RV32
- *
- */
- kbuf.buf_align = PMD_SIZE;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
- kbuf.top_down = false;
- ret = arch_kexec_locate_mem_hole(&kbuf);
- if (!ret) {
- *old_pbase = lowest_paddr;
- *new_pbase = kbuf.mem;
- image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
- }
- return ret;
-}
-
-#ifdef CONFIG_CRASH_DUMP
-static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
-{
- unsigned int *nr_ranges = arg;
-
- (*nr_ranges)++;
- return 0;
-}
-
-static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
-{
- struct crash_mem *cmem = arg;
-
- cmem->ranges[cmem->nr_ranges].start = res->start;
- cmem->ranges[cmem->nr_ranges].end = res->end;
- cmem->nr_ranges++;
-
- return 0;
-}
-
-static int prepare_elf_headers(void **addr, unsigned long *sz)
-{
- struct crash_mem *cmem;
- unsigned int nr_ranges;
- int ret;
-
- nr_ranges = 1; /* For exclusion of crashkernel region */
- walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
-
- cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
- if (!cmem)
- return -ENOMEM;
-
- cmem->max_nr_ranges = nr_ranges;
- cmem->nr_ranges = 0;
- ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
- if (ret)
- goto out;
-
- /* Exclude crashkernel region */
- ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
- if (!ret)
- ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
-
-out:
- kfree(cmem);
- return ret;
-}
-
-static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
- unsigned long cmdline_len)
-{
- int elfcorehdr_strlen;
- char *cmdline_ptr;
-
- cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
- if (!cmdline_ptr)
- return NULL;
-
- elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
- image->elf_load_addr);
-
- if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
- pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
- kfree(cmdline_ptr);
- return NULL;
- }
-
- memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
- /* Ensure it's nul terminated */
- cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
- return cmdline_ptr;
-}
-#endif
-
-static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
- unsigned long kernel_len, char *initrd,
- unsigned long initrd_len, char *cmdline,
- unsigned long cmdline_len)
-{
- int ret;
- void *fdt;
- unsigned long old_kernel_pbase = ULONG_MAX;
- unsigned long new_kernel_pbase = 0UL;
- unsigned long initrd_pbase = 0UL;
- unsigned long kernel_start;
- struct elfhdr ehdr;
- struct kexec_buf kbuf;
- struct kexec_elf_info elf_info;
- char *modified_cmdline = NULL;
-
- ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
- if (ret)
- return ERR_PTR(ret);
-
- ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
- &old_kernel_pbase, &new_kernel_pbase);
- if (ret)
- goto out;
- kernel_start = image->start;
-
- /* Add the kernel binary to the image */
- ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
- old_kernel_pbase, new_kernel_pbase);
- if (ret)
- goto out;
-
- kbuf.image = image;
- kbuf.buf_min = new_kernel_pbase + kernel_len;
- kbuf.buf_max = ULONG_MAX;
-
-#ifdef CONFIG_CRASH_DUMP
- /* Add elfcorehdr */
- if (image->type == KEXEC_TYPE_CRASH) {
- void *headers;
- unsigned long headers_sz;
- ret = prepare_elf_headers(&headers, &headers_sz);
- if (ret) {
- pr_err("Preparing elf core header failed\n");
- goto out;
- }
-
- kbuf.buffer = headers;
- kbuf.bufsz = headers_sz;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf.memsz = headers_sz;
- kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
- kbuf.top_down = true;
-
- ret = kexec_add_buffer(&kbuf);
- if (ret) {
- vfree(headers);
- goto out;
- }
- image->elf_headers = headers;
- image->elf_load_addr = kbuf.mem;
- image->elf_headers_sz = headers_sz;
-
- kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
-
- /* Setup cmdline for kdump kernel case */
- modified_cmdline = setup_kdump_cmdline(image, cmdline,
- cmdline_len);
- if (!modified_cmdline) {
- pr_err("Setting up cmdline for kdump kernel failed\n");
- ret = -EINVAL;
- goto out;
- }
- cmdline = modified_cmdline;
- }
-#endif
-
-#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
- /* Add purgatory to the image */
- kbuf.top_down = true;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_load_purgatory(image, &kbuf);
- if (ret) {
- pr_err("Error loading purgatory ret=%d\n", ret);
- goto out;
- }
- kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem);
-
- ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry",
- &kernel_start,
- sizeof(kernel_start), 0);
- if (ret)
- pr_err("Error update purgatory ret=%d\n", ret);
-#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */
-
- /* Add the initrd to the image */
- if (initrd != NULL) {
- kbuf.buffer = initrd;
- kbuf.bufsz = kbuf.memsz = initrd_len;
- kbuf.buf_align = PAGE_SIZE;
- kbuf.top_down = true;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- initrd_pbase = kbuf.mem;
- kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase);
- }
-
- /* Add the DTB to the image */
- fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
- initrd_len, cmdline, 0);
- if (!fdt) {
- pr_err("Error setting up the new device tree.\n");
- ret = -EINVAL;
- goto out;
- }
-
- fdt_pack(fdt);
- kbuf.buffer = fdt;
- kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
- kbuf.buf_align = PAGE_SIZE;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf.top_down = true;
- ret = kexec_add_buffer(&kbuf);
- if (ret) {
- pr_err("Error add DTB kbuf ret=%d\n", ret);
- goto out_free_fdt;
- }
- /* Cache the fdt buffer address for memory cleanup */
- image->arch.fdt = fdt;
- kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem);
- goto out;
-
-out_free_fdt:
- kvfree(fdt);
-out:
- kfree(modified_cmdline);
- kexec_free_elf_info(&elf_info);
- return ret ? ERR_PTR(ret) : NULL;
-}
-
-#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
-#define RISCV_IMM_BITS 12
-#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
-#define RISCV_CONST_HIGH_PART(x) \
- (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
-#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
-
-#define ENCODE_ITYPE_IMM(x) \
- (RV_X(x, 0, 12) << 20)
-#define ENCODE_BTYPE_IMM(x) \
- ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
- (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
-#define ENCODE_UTYPE_IMM(x) \
- (RV_X(x, 12, 20) << 12)
-#define ENCODE_JTYPE_IMM(x) \
- ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
- (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
-#define ENCODE_CBTYPE_IMM(x) \
- ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
- (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
-#define ENCODE_CJTYPE_IMM(x) \
- ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
- (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
- (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
-#define ENCODE_UJTYPE_IMM(x) \
- (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
- (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
-#define ENCODE_UITYPE_IMM(x) \
- (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
-
-#define CLEAN_IMM(type, x) \
- ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
-
-int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
- Elf_Shdr *section,
- const Elf_Shdr *relsec,
- const Elf_Shdr *symtab)
-{
- const char *strtab, *name, *shstrtab;
- const Elf_Shdr *sechdrs;
- Elf64_Rela *relas;
- int i, r_type;
-
- /* String & section header string table */
- sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
- strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
- shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
-
- relas = (void *)pi->ehdr + relsec->sh_offset;
-
- for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
- const Elf_Sym *sym; /* symbol to relocate */
- unsigned long addr; /* final location after relocation */
- unsigned long val; /* relocated symbol value */
- unsigned long sec_base; /* relocated symbol value */
- void *loc; /* tmp location to modify */
-
- sym = (void *)pi->ehdr + symtab->sh_offset;
- sym += ELF64_R_SYM(relas[i].r_info);
-
- if (sym->st_name)
- name = strtab + sym->st_name;
- else
- name = shstrtab + sechdrs[sym->st_shndx].sh_name;
-
- loc = pi->purgatory_buf;
- loc += section->sh_offset;
- loc += relas[i].r_offset;
-
- if (sym->st_shndx == SHN_ABS)
- sec_base = 0;
- else if (sym->st_shndx >= pi->ehdr->e_shnum) {
- pr_err("Invalid section %d for symbol %s\n",
- sym->st_shndx, name);
- return -ENOEXEC;
- } else
- sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
-
- val = sym->st_value;
- val += sec_base;
- val += relas[i].r_addend;
-
- addr = section->sh_addr + relas[i].r_offset;
-
- r_type = ELF64_R_TYPE(relas[i].r_info);
-
- switch (r_type) {
- case R_RISCV_BRANCH:
- *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
- ENCODE_BTYPE_IMM(val - addr);
- break;
- case R_RISCV_JAL:
- *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
- ENCODE_JTYPE_IMM(val - addr);
- break;
- /*
- * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
- * sym is expected to be next to R_RISCV_PCREL_HI20
- * in purgatory relsec. Handle it like R_RISCV_CALL
- * sym, instead of searching the whole relsec.
- */
- case R_RISCV_PCREL_HI20:
- case R_RISCV_CALL_PLT:
- case R_RISCV_CALL:
- *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
- ENCODE_UJTYPE_IMM(val - addr);
- break;
- case R_RISCV_RVC_BRANCH:
- *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
- ENCODE_CBTYPE_IMM(val - addr);
- break;
- case R_RISCV_RVC_JUMP:
- *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
- ENCODE_CJTYPE_IMM(val - addr);
- break;
- case R_RISCV_ADD16:
- *(u16 *)loc += val;
- break;
- case R_RISCV_SUB16:
- *(u16 *)loc -= val;
- break;
- case R_RISCV_ADD32:
- *(u32 *)loc += val;
- break;
- case R_RISCV_SUB32:
- *(u32 *)loc -= val;
- break;
- /* It has been applied by R_RISCV_PCREL_HI20 sym */
- case R_RISCV_PCREL_LO12_I:
- case R_RISCV_ALIGN:
- case R_RISCV_RELAX:
- break;
- case R_RISCV_64:
- *(u64 *)loc = val;
- break;
- default:
- pr_err("Unknown rela relocation: %d\n", r_type);
- return -ENOEXEC;
- }
- }
- return 0;
-}
-
-const struct kexec_file_ops elf_kexec_ops = {
- .probe = kexec_elf_probe,
- .load = elf_kexec_load,
-};
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 0fb338000c6d..75656afa2d6b 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -401,9 +401,18 @@ SYM_FUNC_START(__switch_to)
REG_S s9, TASK_THREAD_S9_RA(a3)
REG_S s10, TASK_THREAD_S10_RA(a3)
REG_S s11, TASK_THREAD_S11_RA(a3)
+
+ /* save the user space access flag */
+ csrr s0, CSR_STATUS
+ REG_S s0, TASK_THREAD_SUM_RA(a3)
+
/* Save the kernel shadow call stack pointer */
scs_save_current
/* Restore context from next->thread */
+ REG_L s0, TASK_THREAD_SUM_RA(a4)
+ li s1, SR_SUM
+ and s0, s0, s1
+ csrs CSR_STATUS, s0
REG_L ra, TASK_THREAD_RA_RA(a4)
REG_L sp, TASK_THREAD_SP_RA(a4)
REG_L s0, TASK_THREAD_S0_RA(a4)
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 674dcdfae7a1..8d18d6727f0f 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -8,98 +8,139 @@
#include <linux/ftrace.h>
#include <linux/uaccess.h>
#include <linux/memory.h>
+#include <linux/irqflags.h>
#include <linux/stop_machine.h>
#include <asm/cacheflush.h>
#include <asm/text-patching.h>
#ifdef CONFIG_DYNAMIC_FTRACE
-void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
+void ftrace_arch_code_modify_prepare(void)
+ __acquires(&text_mutex)
{
mutex_lock(&text_mutex);
-
- /*
- * The code sequences we use for ftrace can't be patched while the
- * kernel is running, so we need to use stop_machine() to modify them
- * for now. This doesn't play nice with text_mutex, we use this flag
- * to elide the check.
- */
- riscv_patch_in_stop_machine = true;
}
-void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
+void ftrace_arch_code_modify_post_process(void)
+ __releases(&text_mutex)
{
- riscv_patch_in_stop_machine = false;
mutex_unlock(&text_mutex);
}
-static int ftrace_check_current_call(unsigned long hook_pos,
- unsigned int *expected)
+unsigned long ftrace_call_adjust(unsigned long addr)
{
- unsigned int replaced[2];
- unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4};
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return addr + 8 + MCOUNT_AUIPC_SIZE;
- /* we expect nops at the hook position */
- if (!expected)
- expected = nops;
+ return addr + MCOUNT_AUIPC_SIZE;
+}
- /*
- * Read the text we want to modify;
- * return must be -EFAULT on read error
- */
- if (copy_from_kernel_nofault(replaced, (void *)hook_pos,
- MCOUNT_INSN_SIZE))
- return -EFAULT;
+unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
+{
+ return fentry_ip - MCOUNT_AUIPC_SIZE;
+}
- /*
- * Make sure it is what we expect it to be;
- * return must be -EINVAL on failed comparison
- */
- if (memcmp(expected, replaced, sizeof(replaced))) {
- pr_err("%p: expected (%08x %08x) but got (%08x %08x)\n",
- (void *)hook_pos, expected[0], expected[1], replaced[0],
- replaced[1]);
- return -EINVAL;
+void arch_ftrace_update_code(int command)
+{
+ command |= FTRACE_MAY_SLEEP;
+ ftrace_modify_all_code(command);
+ flush_icache_all();
+}
+
+static int __ftrace_modify_call(unsigned long source, unsigned long target, bool validate)
+{
+ unsigned int call[2], offset;
+ unsigned int replaced[2];
+
+ offset = target - source;
+ call[1] = to_jalr_t0(offset);
+
+ if (validate) {
+ call[0] = to_auipc_t0(offset);
+ /*
+ * Read the text we want to modify;
+ * return must be -EFAULT on read error
+ */
+ if (copy_from_kernel_nofault(replaced, (void *)source, 2 * MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ if (replaced[0] != call[0]) {
+ pr_err("%p: expected (%08x) but got (%08x)\n",
+ (void *)source, call[0], replaced[0]);
+ return -EINVAL;
+ }
}
+ /* Replace the jalr at once. Return -EPERM on write error. */
+ if (patch_insn_write((void *)(source + MCOUNT_AUIPC_SIZE), call + 1, MCOUNT_JALR_SIZE))
+ return -EPERM;
+
return 0;
}
-static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
- bool enable, bool ra)
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+static const struct ftrace_ops *riscv64_rec_get_ops(struct dyn_ftrace *rec)
{
- unsigned int call[2];
- unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4};
+ const struct ftrace_ops *ops = NULL;
- if (ra)
- make_call_ra(hook_pos, target, call);
- else
- make_call_t0(hook_pos, target, call);
+ if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+ ops = ftrace_find_unique_ops(rec);
+ WARN_ON_ONCE(!ops);
+ }
- /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
- if (patch_insn_write((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE))
- return -EPERM;
+ if (!ops)
+ ops = &ftrace_list_ops;
- return 0;
+ return ops;
+}
+
+static int ftrace_rec_set_ops(const struct dyn_ftrace *rec, const struct ftrace_ops *ops)
+{
+ unsigned long literal = ALIGN_DOWN(rec->ip - 12, 8);
+
+ return patch_text_nosync((void *)literal, &ops, sizeof(ops));
}
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, &ftrace_nop_ops);
+}
+
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, riscv64_rec_get_ops(rec));
+}
+#else
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; }
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; }
+#endif
+
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned int call[2];
+ unsigned long distance, orig_addr, pc = rec->ip - MCOUNT_AUIPC_SIZE;
+ int ret;
- make_call_t0(rec->ip, addr, call);
+ ret = ftrace_rec_update_ops(rec);
+ if (ret)
+ return ret;
- if (patch_insn_write((void *)rec->ip, call, MCOUNT_INSN_SIZE))
- return -EPERM;
+ orig_addr = (unsigned long)&ftrace_caller;
+ distance = addr > orig_addr ? addr - orig_addr : orig_addr - addr;
+ if (distance > JALR_RANGE)
+ addr = FTRACE_ADDR;
- return 0;
+ return __ftrace_modify_call(pc, addr, false);
}
-int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
- unsigned long addr)
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4};
+ u32 nop4 = RISCV_INSN_NOP4;
+ int ret;
+
+ ret = ftrace_rec_set_nop_ops(rec);
+ if (ret)
+ return ret;
- if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
+ if (patch_insn_write((void *)rec->ip, &nop4, MCOUNT_NOP4_SIZE))
return -EPERM;
return 0;
@@ -114,75 +155,71 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
*/
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
- int out;
+ unsigned long pc = rec->ip - MCOUNT_AUIPC_SIZE;
+ unsigned int nops[2], offset;
+ int ret;
- mutex_lock(&text_mutex);
- out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
- mutex_unlock(&text_mutex);
+ guard(mutex)(&text_mutex);
- return out;
-}
+ ret = ftrace_rec_set_nop_ops(rec);
+ if (ret)
+ return ret;
-int ftrace_update_ftrace_func(ftrace_func_t func)
-{
- int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
- (unsigned long)func, true, true);
+ offset = (unsigned long) &ftrace_caller - pc;
+ nops[0] = to_auipc_t0(offset);
+ nops[1] = RISCV_INSN_NOP4;
+
+ ret = patch_insn_write((void *)pc, nops, 2 * MCOUNT_INSN_SIZE);
return ret;
}
-struct ftrace_modify_param {
- int command;
- atomic_t cpu_count;
-};
-
-static int __ftrace_modify_code(void *data)
+ftrace_func_t ftrace_call_dest = ftrace_stub;
+int ftrace_update_ftrace_func(ftrace_func_t func)
{
- struct ftrace_modify_param *param = data;
-
- if (atomic_inc_return(&param->cpu_count) == num_online_cpus()) {
- ftrace_modify_all_code(param->command);
- /*
- * Make sure the patching store is effective *before* we
- * increment the counter which releases all waiting CPUs
- * by using the release variant of atomic increment. The
- * release pairs with the call to local_flush_icache_all()
- * on the waiting CPU.
- */
- atomic_inc_return_release(&param->cpu_count);
- } else {
- while (atomic_read(&param->cpu_count) <= num_online_cpus())
- cpu_relax();
-
- local_flush_icache_all();
- }
+ /*
+ * When using CALL_OPS, the function to call is associated with the
+ * call site, and we don't have a global function pointer to update.
+ */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return 0;
+ WRITE_ONCE(ftrace_call_dest, func);
+ /*
+ * The data fence ensure that the update to ftrace_call_dest happens
+ * before the write to function_trace_op later in the generic ftrace.
+ * If the sequence is not enforced, then an old ftrace_call_dest may
+ * race loading a new function_trace_op set in ftrace_modify_all_code
+ */
+ smp_wmb();
+ /*
+ * Updating ftrace dpes not take stop_machine path, so irqs should not
+ * be disabled.
+ */
+ WARN_ON(irqs_disabled());
+ smp_call_function(ftrace_sync_ipi, NULL, 1);
return 0;
}
-void arch_ftrace_update_code(int command)
+#else /* CONFIG_DYNAMIC_FTRACE */
+unsigned long ftrace_call_adjust(unsigned long addr)
{
- struct ftrace_modify_param param = { command, ATOMIC_INIT(0) };
-
- stop_machine(__ftrace_modify_code, &param, cpu_online_mask);
+ return addr;
}
-#endif
+#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
- unsigned int call[2];
- unsigned long caller = rec->ip;
+ unsigned long caller = rec->ip - MCOUNT_AUIPC_SIZE;
int ret;
- make_call_t0(caller, old_addr, call);
- ret = ftrace_check_current_call(caller, call);
-
+ ret = ftrace_rec_update_ops(rec);
if (ret)
return ret;
- return __ftrace_modify_call(caller, addr, true, false);
+ return __ftrace_modify_call(caller, FTRACE_ADDR, true);
}
#endif
@@ -210,7 +247,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
}
#ifdef CONFIG_DYNAMIC_FTRACE
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
@@ -231,19 +267,5 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
if (!function_graph_enter_regs(old, ip, frame_pointer, parent, fregs))
*parent = return_hooker;
}
-#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
-extern void ftrace_graph_call(void);
-int ftrace_enable_ftrace_graph_caller(void)
-{
- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
- (unsigned long)&prepare_ftrace_return, true, true);
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
- (unsigned long)&prepare_ftrace_return, false, true);
-}
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c
new file mode 100644
index 000000000000..f4755d49b89e
--- /dev/null
+++ b/arch/riscv/kernel/kexec_elf.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2021 Huawei Technologies Co, Ltd.
+ *
+ * Author: Liao Chang (liaochang1@huawei.com)
+ *
+ * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
+ * for kernel.
+ */
+
+#define pr_fmt(fmt) "kexec_image: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/libfdt.h>
+#include <linux/types.h>
+#include <linux/memblock.h>
+#include <asm/setup.h>
+
+static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
+ struct kexec_elf_info *elf_info, unsigned long old_pbase,
+ unsigned long new_pbase)
+{
+ int i;
+ int ret = 0;
+ size_t size;
+ struct kexec_buf kbuf;
+ const struct elf_phdr *phdr;
+
+ kbuf.image = image;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ size = phdr->p_filesz;
+ if (size > phdr->p_memsz)
+ size = phdr->p_memsz;
+
+ kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
+ kbuf.bufsz = size;
+ kbuf.buf_align = phdr->p_align;
+ kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
+ kbuf.memsz = phdr->p_memsz;
+ kbuf.top_down = false;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Go through the available phsyical memory regions and find one that hold
+ * an image of the specified size.
+ */
+static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
+ struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
+ unsigned long *old_pbase, unsigned long *new_pbase)
+{
+ int i;
+ int ret;
+ struct kexec_buf kbuf;
+ const struct elf_phdr *phdr;
+ unsigned long lowest_paddr = ULONG_MAX;
+ unsigned long lowest_vaddr = ULONG_MAX;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ if (lowest_paddr > phdr->p_paddr)
+ lowest_paddr = phdr->p_paddr;
+
+ if (lowest_vaddr > phdr->p_vaddr)
+ lowest_vaddr = phdr->p_vaddr;
+ }
+
+ kbuf.image = image;
+ kbuf.buf_min = lowest_paddr;
+ kbuf.buf_max = ULONG_MAX;
+
+ /*
+ * Current riscv boot protocol requires 2MB alignment for
+ * RV64 and 4MB alignment for RV32
+ *
+ */
+ kbuf.buf_align = PMD_SIZE;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
+ kbuf.top_down = false;
+ ret = arch_kexec_locate_mem_hole(&kbuf);
+ if (!ret) {
+ *old_pbase = lowest_paddr;
+ *new_pbase = kbuf.mem;
+ image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
+ }
+ return ret;
+}
+
+static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ unsigned long old_kernel_pbase = ULONG_MAX;
+ unsigned long new_kernel_pbase = 0UL;
+ struct elfhdr ehdr;
+ struct kexec_elf_info elf_info;
+
+ ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ if (ret)
+ return ERR_PTR(ret);
+
+ ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
+ &old_kernel_pbase, &new_kernel_pbase);
+ if (ret)
+ goto out;
+
+ /* Add the kernel binary to the image */
+ ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
+ old_kernel_pbase, new_kernel_pbase);
+ if (ret)
+ goto out;
+
+ ret = load_extra_segments(image, image->start, kernel_len,
+ initrd, initrd_len, cmdline, cmdline_len);
+out:
+ kexec_free_elf_info(&elf_info);
+ return ret ? ERR_PTR(ret) : NULL;
+}
+
+const struct kexec_file_ops elf_kexec_ops = {
+ .probe = kexec_elf_probe,
+ .load = elf_kexec_load,
+};
diff --git a/arch/riscv/kernel/kexec_image.c b/arch/riscv/kernel/kexec_image.c
new file mode 100644
index 000000000000..26a81774a78a
--- /dev/null
+++ b/arch/riscv/kernel/kexec_image.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V Kexec image loader
+ *
+ */
+
+#define pr_fmt(fmt) "kexec_file(Image): " fmt
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/pe.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+#include <asm/image.h>
+
+static int image_probe(const char *kernel_buf, unsigned long kernel_len)
+{
+ const struct riscv_image_header *h = (const struct riscv_image_header *)kernel_buf;
+
+ if (!h || kernel_len < sizeof(*h))
+ return -EINVAL;
+
+ /* According to Documentation/riscv/boot-image-header.rst,
+ * use "magic2" field to check when version >= 0.2.
+ */
+
+ if (h->version >= RISCV_HEADER_VERSION &&
+ memcmp(&h->magic2, RISCV_IMAGE_MAGIC2, sizeof(h->magic2)))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void *image_load(struct kimage *image,
+ char *kernel, unsigned long kernel_len,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline, unsigned long cmdline_len)
+{
+ struct riscv_image_header *h;
+ u64 flags;
+ bool be_image, be_kernel;
+ struct kexec_buf kbuf;
+ int ret;
+
+ /* Check Image header */
+ h = (struct riscv_image_header *)kernel;
+ if (!h->image_size) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Check endianness */
+ flags = le64_to_cpu(h->flags);
+ be_image = riscv_image_flag_field(flags, RISCV_IMAGE_FLAG_BE);
+ be_kernel = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+ if (be_image != be_kernel) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Load the kernel image */
+ kbuf.image = image;
+ kbuf.buf_min = 0;
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.top_down = false;
+
+ kbuf.buffer = kernel;
+ kbuf.bufsz = kernel_len;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = le64_to_cpu(h->image_size);
+ kbuf.buf_align = le64_to_cpu(h->text_offset);
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ pr_err("Error add kernel image ret=%d\n", ret);
+ goto out;
+ }
+
+ image->start = kbuf.mem;
+
+ pr_info("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kbuf.mem, kbuf.bufsz, kbuf.memsz);
+
+ ret = load_extra_segments(image, kbuf.mem, kbuf.memsz,
+ initrd, initrd_len, cmdline, cmdline_len);
+
+out:
+ return ret ? ERR_PTR(ret) : NULL;
+}
+
+const struct kexec_file_ops image_kexec_ops = {
+ .probe = image_probe,
+ .load = image_load,
+};
diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c
index b0bf8c1722c0..e36104af2e24 100644
--- a/arch/riscv/kernel/machine_kexec_file.c
+++ b/arch/riscv/kernel/machine_kexec_file.c
@@ -7,8 +7,369 @@
* Author: Liao Chang (liaochang1@huawei.com)
*/
#include <linux/kexec.h>
+#include <linux/elf.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/libfdt.h>
+#include <linux/types.h>
+#include <linux/memblock.h>
+#include <linux/vmalloc.h>
+#include <asm/setup.h>
const struct kexec_file_ops * const kexec_file_loaders[] = {
&elf_kexec_ops,
+ &image_kexec_ops,
NULL
};
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ kvfree(image->arch.fdt);
+ image->arch.fdt = NULL;
+
+ vfree(image->elf_headers);
+ image->elf_headers = NULL;
+ image->elf_headers_sz = 0;
+
+ return kexec_image_post_load_cleanup_default(image);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
+{
+ unsigned int *nr_ranges = arg;
+
+ (*nr_ranges)++;
+ return 0;
+}
+
+static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
+{
+ struct crash_mem *cmem = arg;
+
+ cmem->ranges[cmem->nr_ranges].start = res->start;
+ cmem->ranges[cmem->nr_ranges].end = res->end;
+ cmem->nr_ranges++;
+
+ return 0;
+}
+
+static int prepare_elf_headers(void **addr, unsigned long *sz)
+{
+ struct crash_mem *cmem;
+ unsigned int nr_ranges;
+ int ret;
+
+ nr_ranges = 1; /* For exclusion of crashkernel region */
+ walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
+
+ cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
+ if (!cmem)
+ return -ENOMEM;
+
+ cmem->max_nr_ranges = nr_ranges;
+ cmem->nr_ranges = 0;
+ ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
+ if (ret)
+ goto out;
+
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+ if (!ret)
+ ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
+
+out:
+ kfree(cmem);
+ return ret;
+}
+
+static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int elfcorehdr_strlen;
+ char *cmdline_ptr;
+
+ cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
+ if (!cmdline_ptr)
+ return NULL;
+
+ elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
+ image->elf_load_addr);
+
+ if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
+ pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
+ kfree(cmdline_ptr);
+ return NULL;
+ }
+
+ memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
+ /* Ensure it's nul terminated */
+ cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
+ return cmdline_ptr;
+}
+#endif
+
+#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
+#define RISCV_IMM_BITS 12
+#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
+#define RISCV_CONST_HIGH_PART(x) \
+ (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
+#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
+
+#define ENCODE_ITYPE_IMM(x) \
+ (RV_X(x, 0, 12) << 20)
+#define ENCODE_BTYPE_IMM(x) \
+ ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
+ (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
+#define ENCODE_UTYPE_IMM(x) \
+ (RV_X(x, 12, 20) << 12)
+#define ENCODE_JTYPE_IMM(x) \
+ ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
+ (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
+#define ENCODE_CBTYPE_IMM(x) \
+ ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
+ (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
+#define ENCODE_CJTYPE_IMM(x) \
+ ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
+ (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
+ (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
+#define ENCODE_UJTYPE_IMM(x) \
+ (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
+ (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
+#define ENCODE_UITYPE_IMM(x) \
+ (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
+
+#define CLEAN_IMM(type, x) \
+ ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
+
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section,
+ const Elf_Shdr *relsec,
+ const Elf_Shdr *symtab)
+{
+ const char *strtab, *name, *shstrtab;
+ const Elf_Shdr *sechdrs;
+ Elf64_Rela *relas;
+ int i, r_type;
+
+ /* String & section header string table */
+ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
+ strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
+ shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
+
+ relas = (void *)pi->ehdr + relsec->sh_offset;
+
+ for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
+ const Elf_Sym *sym; /* symbol to relocate */
+ unsigned long addr; /* final location after relocation */
+ unsigned long val; /* relocated symbol value */
+ unsigned long sec_base; /* relocated symbol value */
+ void *loc; /* tmp location to modify */
+
+ sym = (void *)pi->ehdr + symtab->sh_offset;
+ sym += ELF64_R_SYM(relas[i].r_info);
+
+ if (sym->st_name)
+ name = strtab + sym->st_name;
+ else
+ name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+ loc = pi->purgatory_buf;
+ loc += section->sh_offset;
+ loc += relas[i].r_offset;
+
+ if (sym->st_shndx == SHN_ABS)
+ sec_base = 0;
+ else if (sym->st_shndx >= pi->ehdr->e_shnum) {
+ pr_err("Invalid section %d for symbol %s\n",
+ sym->st_shndx, name);
+ return -ENOEXEC;
+ } else
+ sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
+
+ val = sym->st_value;
+ val += sec_base;
+ val += relas[i].r_addend;
+
+ addr = section->sh_addr + relas[i].r_offset;
+
+ r_type = ELF64_R_TYPE(relas[i].r_info);
+
+ switch (r_type) {
+ case R_RISCV_BRANCH:
+ *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
+ ENCODE_BTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_JAL:
+ *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
+ ENCODE_JTYPE_IMM(val - addr);
+ break;
+ /*
+ * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
+ * sym is expected to be next to R_RISCV_PCREL_HI20
+ * in purgatory relsec. Handle it like R_RISCV_CALL
+ * sym, instead of searching the whole relsec.
+ */
+ case R_RISCV_PCREL_HI20:
+ case R_RISCV_CALL_PLT:
+ case R_RISCV_CALL:
+ *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
+ ENCODE_UJTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_RVC_BRANCH:
+ *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
+ ENCODE_CBTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_RVC_JUMP:
+ *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
+ ENCODE_CJTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_ADD16:
+ *(u16 *)loc += val;
+ break;
+ case R_RISCV_SUB16:
+ *(u16 *)loc -= val;
+ break;
+ case R_RISCV_ADD32:
+ *(u32 *)loc += val;
+ break;
+ case R_RISCV_SUB32:
+ *(u32 *)loc -= val;
+ break;
+ /* It has been applied by R_RISCV_PCREL_HI20 sym */
+ case R_RISCV_PCREL_LO12_I:
+ case R_RISCV_ALIGN:
+ case R_RISCV_RELAX:
+ break;
+ case R_RISCV_64:
+ *(u64 *)loc = val;
+ break;
+ default:
+ pr_err("Unknown rela relocation: %d\n", r_type);
+ return -ENOEXEC;
+ }
+ }
+ return 0;
+}
+
+
+int load_extra_segments(struct kimage *image, unsigned long kernel_start,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ void *fdt;
+ unsigned long initrd_pbase = 0UL;
+ struct kexec_buf kbuf;
+ char *modified_cmdline = NULL;
+
+ kbuf.image = image;
+ kbuf.buf_min = kernel_start + kernel_len;
+ kbuf.buf_max = ULONG_MAX;
+
+#ifdef CONFIG_CRASH_DUMP
+ /* Add elfcorehdr */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ void *headers;
+ unsigned long headers_sz;
+ ret = prepare_elf_headers(&headers, &headers_sz);
+ if (ret) {
+ pr_err("Preparing elf core header failed\n");
+ goto out;
+ }
+
+ kbuf.buffer = headers;
+ kbuf.bufsz = headers_sz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = headers_sz;
+ kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+ kbuf.top_down = true;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ vfree(headers);
+ goto out;
+ }
+ image->elf_headers = headers;
+ image->elf_load_addr = kbuf.mem;
+ image->elf_headers_sz = headers_sz;
+
+ kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
+
+ /* Setup cmdline for kdump kernel case */
+ modified_cmdline = setup_kdump_cmdline(image, cmdline,
+ cmdline_len);
+ if (!modified_cmdline) {
+ pr_err("Setting up cmdline for kdump kernel failed\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ cmdline = modified_cmdline;
+ }
+#endif
+
+#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
+ /* Add purgatory to the image */
+ kbuf.top_down = true;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_load_purgatory(image, &kbuf);
+ if (ret) {
+ pr_err("Error loading purgatory ret=%d\n", ret);
+ goto out;
+ }
+ kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem);
+
+ ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry",
+ &kernel_start,
+ sizeof(kernel_start), 0);
+ if (ret)
+ pr_err("Error update purgatory ret=%d\n", ret);
+#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */
+
+ /* Add the initrd to the image */
+ if (initrd != NULL) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = true;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ initrd_pbase = kbuf.mem;
+ kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase);
+ }
+
+ /* Add the DTB to the image */
+ fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
+ initrd_len, cmdline, 0);
+ if (!fdt) {
+ pr_err("Error setting up the new device tree.\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ fdt_pack(fdt);
+ kbuf.buffer = fdt;
+ kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.top_down = true;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ pr_err("Error add DTB kbuf ret=%d\n", ret);
+ goto out_free_fdt;
+ }
+ /* Cache the fdt buffer address for memory cleanup */
+ image->arch.fdt = fdt;
+ kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem);
+ goto out;
+
+out_free_fdt:
+ kvfree(fdt);
+out:
+ kfree(modified_cmdline);
+ return ret;
+}
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
index 745dd4c4a69c..48f6c4f7dca0 100644
--- a/arch/riscv/kernel/mcount-dyn.S
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -13,7 +13,6 @@
.text
-#define FENTRY_RA_OFFSET 8
#define ABI_SIZE_ON_STACK 80
#define ABI_A0 0
#define ABI_A1 8
@@ -56,16 +55,13 @@
addi sp, sp, ABI_SIZE_ON_STACK
.endm
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
-
/**
* SAVE_ABI_REGS - save regs against the ftrace_regs struct
*
* After the stack is established,
*
* 0(sp) stores the PC of the traced function which can be accessed
-* by &(fregs)->epc in tracing function. Note that the real
-* function entry address should be computed with -FENTRY_RA_OFFSET.
+* by &(fregs)->epc in tracing function.
*
* 8(sp) stores the function return address (i.e. parent IP) that
* can be accessed by &(fregs)->ra in tracing function.
@@ -86,17 +82,20 @@
* +++++++++
**/
.macro SAVE_ABI_REGS
- mv t4, sp // Save original SP in T4
addi sp, sp, -FREGS_SIZE_ON_STACK
-
REG_S t0, FREGS_EPC(sp)
REG_S x1, FREGS_RA(sp)
- REG_S t4, FREGS_SP(sp) // Put original SP on stack
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
REG_S x8, FREGS_S0(sp)
#endif
REG_S x6, FREGS_T1(sp)
-
+#ifdef CONFIG_CC_IS_CLANG
+ REG_S x7, FREGS_T2(sp)
+ REG_S x28, FREGS_T3(sp)
+ REG_S x29, FREGS_T4(sp)
+ REG_S x30, FREGS_T5(sp)
+ REG_S x31, FREGS_T6(sp)
+#endif
// save the arguments
REG_S x10, FREGS_A0(sp)
REG_S x11, FREGS_A1(sp)
@@ -106,16 +105,25 @@
REG_S x15, FREGS_A5(sp)
REG_S x16, FREGS_A6(sp)
REG_S x17, FREGS_A7(sp)
+ mv a0, sp
+ addi a0, a0, FREGS_SIZE_ON_STACK
+ REG_S a0, FREGS_SP(sp) // Put original SP on stack
.endm
- .macro RESTORE_ABI_REGS, all=0
+ .macro RESTORE_ABI_REGS
REG_L t0, FREGS_EPC(sp)
REG_L x1, FREGS_RA(sp)
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
REG_L x8, FREGS_S0(sp)
#endif
REG_L x6, FREGS_T1(sp)
-
+#ifdef CONFIG_CC_IS_CLANG
+ REG_L x7, FREGS_T2(sp)
+ REG_L x28, FREGS_T3(sp)
+ REG_L x29, FREGS_T4(sp)
+ REG_L x30, FREGS_T5(sp)
+ REG_L x31, FREGS_T6(sp)
+#endif
// restore the arguments
REG_L x10, FREGS_A0(sp)
REG_L x11, FREGS_A1(sp)
@@ -130,60 +138,71 @@
.endm
.macro PREPARE_ARGS
- addi a0, t0, -FENTRY_RA_OFFSET
+ addi a0, t0, -MCOUNT_JALR_SIZE // ip (callsite's jalr insn)
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ mv a1, ra // parent_ip
+ REG_L a2, -16(t0) // op
+ REG_L ra, FTRACE_OPS_FUNC(a2) // op->func
+#else
la a1, function_trace_op
- REG_L a2, 0(a1)
- mv a1, ra
- mv a3, sp
+ REG_L a2, 0(a1) // op
+ mv a1, ra // parent_ip
+#endif
+ mv a3, sp // regs
.endm
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
-
-#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
SYM_FUNC_START(ftrace_caller)
- SAVE_ABI
-
- addi a0, t0, -FENTRY_RA_OFFSET
- la a1, function_trace_op
- REG_L a2, 0(a1)
- mv a1, ra
- mv a3, sp
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ /*
+ * When CALL_OPS is enabled (2 or 4) nops [8B] are placed before the
+ * function entry, these are later overwritten with the pointer to the
+ * associated struct ftrace_ops.
+ *
+ * -8: &ftrace_ops of the associated tracer function.
+ *<ftrace enable>:
+ * 0: auipc t0/ra, 0x?
+ * 4: jalr t0/ra, ?(t0/ra)
+ *
+ * -8: &ftrace_nop_ops
+ *<ftrace disable>:
+ * 0: nop
+ * 4: nop
+ *
+ * t0 is set to ip+8 after the jalr is executed at the callsite,
+ * so we find the associated op at t0-16.
+ */
+ REG_L t1, -16(t0) // op Should be SZ_REG instead of 16
-SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
- call ftrace_stub
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- addi a0, sp, ABI_RA
- REG_L a1, ABI_T0(sp)
- addi a1, a1, -FENTRY_RA_OFFSET
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- mv a2, s0
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ /*
+ * If the op has a direct call, handle it immediately without
+ * saving/restoring registers.
+ */
+ REG_L t1, FTRACE_OPS_DIRECT_CALL(t1)
+ bnez t1, ftrace_caller_direct
#endif
-SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
- call ftrace_stub
#endif
- RESTORE_ABI
- jr t0
-SYM_FUNC_END(ftrace_caller)
-
-#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
-SYM_FUNC_START(ftrace_caller)
- mv t1, zero
SAVE_ABI_REGS
PREPARE_ARGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ jalr ra
+#else
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
- call ftrace_stub
-
+ REG_L ra, ftrace_call_dest
+ jalr ra, 0(ra)
+#endif
RESTORE_ABI_REGS
- bnez t1, .Ldirect
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ bnez t1, ftrace_caller_direct
+#endif
jr t0
-.Ldirect:
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+SYM_INNER_LABEL(ftrace_caller_direct, SYM_L_LOCAL)
jr t1
+#endif
SYM_FUNC_END(ftrace_caller)
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
-
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
SYM_CODE_START(ftrace_stub_direct_tramp)
jr t0
diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c
index 91d0b355ceef..75551ac6504c 100644
--- a/arch/riscv/kernel/module-sections.c
+++ b/arch/riscv/kernel/module-sections.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleloader.h>
+#include <linux/sort.h>
unsigned long module_emit_got_entry(struct module *mod, unsigned long val)
{
@@ -55,44 +56,70 @@ unsigned long module_emit_plt_entry(struct module *mod, unsigned long val)
return (unsigned long)&plt[i];
}
-static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y)
+#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rela(const void *a, const void *b)
{
- return x->r_info == y->r_info && x->r_addend == y->r_addend;
+ const Elf_Rela *x = a, *y = b;
+ int i;
+
+ /* sort by type, symbol index and addend */
+ i = cmp_3way(x->r_info, y->r_info);
+ if (i == 0)
+ i = cmp_3way(x->r_addend, y->r_addend);
+ return i;
}
static bool duplicate_rela(const Elf_Rela *rela, int idx)
{
- int i;
- for (i = 0; i < idx; i++) {
- if (is_rela_equal(&rela[i], &rela[idx]))
- return true;
- }
- return false;
+ /*
+ * Entries are sorted by type, symbol index and addend. That means
+ * that, if a duplicate entry exists, it must be in the preceding slot.
+ */
+ return idx > 0 && cmp_rela(rela + idx, rela + idx - 1) == 0;
}
-static void count_max_entries(Elf_Rela *relas, int num,
+static void count_max_entries(const Elf_Rela *relas, size_t num,
unsigned int *plts, unsigned int *gots)
{
- for (int i = 0; i < num; i++) {
+ for (size_t i = 0; i < num; i++) {
+ if (duplicate_rela(relas, i))
+ continue;
+
switch (ELF_R_TYPE(relas[i].r_info)) {
case R_RISCV_CALL_PLT:
case R_RISCV_PLT32:
- if (!duplicate_rela(relas, i))
- (*plts)++;
+ (*plts)++;
break;
case R_RISCV_GOT_HI20:
- if (!duplicate_rela(relas, i))
- (*gots)++;
+ (*gots)++;
break;
+ default:
+ unreachable();
}
}
}
+static bool rela_needs_plt_got_entry(const Elf_Rela *rela)
+{
+ switch (ELF_R_TYPE(rela->r_info)) {
+ case R_RISCV_CALL_PLT:
+ case R_RISCV_GOT_HI20:
+ case R_RISCV_PLT32:
+ return true;
+ default:
+ return false;
+ }
+}
+
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod)
{
+ size_t num_scratch_relas = 0;
unsigned int num_plts = 0;
unsigned int num_gots = 0;
+ Elf_Rela *scratch = NULL;
+ size_t scratch_size = 0;
int i;
/*
@@ -122,9 +149,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
/* Calculate the maxinum number of entries */
for (i = 0; i < ehdr->e_shnum; i++) {
+ size_t num_relas = sechdrs[i].sh_size / sizeof(Elf_Rela);
Elf_Rela *relas = (void *)ehdr + sechdrs[i].sh_offset;
- int num_rela = sechdrs[i].sh_size / sizeof(Elf_Rela);
Elf_Shdr *dst_sec = sechdrs + sechdrs[i].sh_info;
+ size_t scratch_size_needed;
if (sechdrs[i].sh_type != SHT_RELA)
continue;
@@ -133,7 +161,28 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
if (!(dst_sec->sh_flags & SHF_EXECINSTR))
continue;
- count_max_entries(relas, num_rela, &num_plts, &num_gots);
+ /*
+ * apply_relocate_add() relies on HI20 and LO12 relocation pairs being
+ * close together, so sort a copy of the section to avoid interfering.
+ */
+ scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch);
+ if (scratch_size_needed > scratch_size) {
+ scratch_size = scratch_size_needed;
+ scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL);
+ if (!scratch)
+ return -ENOMEM;
+ }
+
+ for (size_t j = 0; j < num_relas; j++)
+ if (rela_needs_plt_got_entry(&relas[j]))
+ scratch[num_scratch_relas++] = relas[j];
+ }
+
+ if (scratch) {
+ /* sort the accumulated PLT/GOT relocations so duplicates are adjacent */
+ sort(scratch, num_scratch_relas, sizeof(*scratch), cmp_rela, NULL);
+ count_max_entries(scratch, num_scratch_relas, &num_plts, &num_gots);
+ kvfree(scratch);
}
mod->arch.plt.shdr->sh_type = SHT_NOBITS;
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index bbf7ec6a75c0..a0a40889d79a 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -60,7 +60,7 @@ int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
if (!unaligned_ctl_available())
return -EINVAL;
- return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr);
+ return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr);
}
void __show_regs(struct pt_regs *regs)
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index 1989b8cade1b..53836a9235e3 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -299,6 +299,76 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask,
return 0;
}
+static bool sbi_fwft_supported;
+
+struct fwft_set_req {
+ u32 feature;
+ unsigned long value;
+ unsigned long flags;
+ atomic_t error;
+};
+
+static void cpu_sbi_fwft_set(void *arg)
+{
+ struct fwft_set_req *req = arg;
+ int ret;
+
+ ret = sbi_fwft_set(req->feature, req->value, req->flags);
+ if (ret)
+ atomic_set(&req->error, ret);
+}
+
+/**
+ * sbi_fwft_set() - Set a feature on the local hart
+ * @feature: The feature ID to be set
+ * @value: The feature value to be set
+ * @flags: FWFT feature set flags
+ *
+ * Return: 0 on success, appropriate linux error code otherwise.
+ */
+int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags)
+{
+ struct sbiret ret;
+
+ if (!sbi_fwft_supported)
+ return -EOPNOTSUPP;
+
+ ret = sbi_ecall(SBI_EXT_FWFT, SBI_EXT_FWFT_SET,
+ feature, value, flags, 0, 0, 0);
+
+ return sbi_err_map_linux_errno(ret.error);
+}
+
+/**
+ * sbi_fwft_set_cpumask() - Set a feature for the specified cpumask
+ * @mask: CPU mask of cpus that need the feature to be set
+ * @feature: The feature ID to be set
+ * @value: The feature value to be set
+ * @flags: FWFT feature set flags
+ *
+ * Return: 0 on success, appropriate linux error code otherwise.
+ */
+int sbi_fwft_set_cpumask(const cpumask_t *mask, u32 feature,
+ unsigned long value, unsigned long flags)
+{
+ struct fwft_set_req req = {
+ .feature = feature,
+ .value = value,
+ .flags = flags,
+ .error = ATOMIC_INIT(0),
+ };
+
+ if (!sbi_fwft_supported)
+ return -EOPNOTSUPP;
+
+ if (feature & SBI_FWFT_GLOBAL_FEATURE_BIT)
+ return -EINVAL;
+
+ on_each_cpu_mask(mask, cpu_sbi_fwft_set, &req, 1);
+
+ return atomic_read(&req.error);
+}
+
/**
* sbi_set_timer() - Program the timer for next timer event.
* @stime_value: The value after which next timer event should fire.
@@ -609,7 +679,7 @@ void __init sbi_init(void)
} else {
__sbi_rfence = __sbi_rfence_v01;
}
- if ((sbi_spec_version >= sbi_mk_version(0, 3)) &&
+ if (sbi_spec_version >= sbi_mk_version(0, 3) &&
sbi_probe_extension(SBI_EXT_SRST)) {
pr_info("SBI SRST extension detected\n");
pm_power_off = sbi_srst_power_off;
@@ -617,11 +687,16 @@ void __init sbi_init(void)
sbi_srst_reboot_nb.priority = 192;
register_restart_handler(&sbi_srst_reboot_nb);
}
- if ((sbi_spec_version >= sbi_mk_version(2, 0)) &&
- (sbi_probe_extension(SBI_EXT_DBCN) > 0)) {
+ if (sbi_spec_version >= sbi_mk_version(2, 0) &&
+ sbi_probe_extension(SBI_EXT_DBCN) > 0) {
pr_info("SBI DBCN extension detected\n");
sbi_debug_console_available = true;
}
+ if (sbi_spec_version >= sbi_mk_version(3, 0) &&
+ sbi_probe_extension(SBI_EXT_FWFT)) {
+ pr_info("SBI FWFT extension detected\n");
+ sbi_fwft_supported = true;
+ }
} else {
__sbi_set_timer = __sbi_set_timer_v01;
__sbi_send_ipi = __sbi_send_ipi_v01;
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index f7c9a1caa83e..14888e5ea19a 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -50,6 +50,7 @@ atomic_t hart_lottery __section(".sdata")
#endif
;
unsigned long boot_cpu_hartid;
+EXPORT_SYMBOL_GPL(boot_cpu_hartid);
/*
* Place kernel memory regions on the resource tree so that
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 249aec8594a9..0b170e18a2be 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -15,6 +15,7 @@
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/vector.h>
+#include <asm/vendor_extensions/sifive_hwprobe.h>
#include <asm/vendor_extensions/thead_hwprobe.h>
#include <vdso/vsyscall.h>
@@ -96,6 +97,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
* presence in the hart_isa bitmap, are made.
*/
EXT_KEY(ZAAMO);
+ EXT_KEY(ZABHA);
EXT_KEY(ZACAS);
EXT_KEY(ZALRSC);
EXT_KEY(ZAWRS);
@@ -300,6 +302,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
pair->value = riscv_timebase;
break;
+ case RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0:
+ hwprobe_isa_vendor_ext_sifive_0(pair, cpus);
+ break;
+
case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0:
hwprobe_isa_vendor_ext_thead_0(pair, cpus);
break;
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 9c83848797a7..80230de167de 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -6,6 +6,7 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/irqflags.h>
#include <linux/randomize_kstack.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
@@ -151,7 +152,9 @@ asmlinkage __visible __trap_section void name(struct pt_regs *regs) \
{ \
if (user_mode(regs)) { \
irqentry_enter_from_user_mode(regs); \
+ local_irq_enable(); \
do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \
+ local_irq_disable(); \
irqentry_exit_to_user_mode(regs); \
} else { \
irqentry_state_t state = irqentry_nmi_enter(regs); \
@@ -173,17 +176,14 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re
if (user_mode(regs)) {
irqentry_enter_from_user_mode(regs);
-
local_irq_enable();
handled = riscv_v_first_use_handler(regs);
-
- local_irq_disable();
-
if (!handled)
do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc,
"Oops - illegal instruction");
+ local_irq_disable();
irqentry_exit_to_user_mode(regs);
} else {
irqentry_state_t state = irqentry_nmi_enter(regs);
@@ -308,9 +308,11 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
{
if (user_mode(regs)) {
irqentry_enter_from_user_mode(regs);
+ local_irq_enable();
handle_break(regs);
+ local_irq_disable();
irqentry_exit_to_user_mode(regs);
} else {
irqentry_state_t state = irqentry_nmi_enter(regs);
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 77c788660223..f760e4fcc052 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -16,6 +16,7 @@
#include <asm/entry-common.h>
#include <asm/hwprobe.h>
#include <asm/cpufeature.h>
+#include <asm/sbi.h>
#include <asm/vector.h>
#define INSN_MATCH_LB 0x3
@@ -368,9 +369,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
-#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
*this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED;
-#endif
if (!unaligned_enabled)
return -1;
@@ -462,7 +461,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs)
}
if (!fp)
- SET_RD(insn, regs, val.data_ulong << shift >> shift);
+ SET_RD(insn, regs, (long)(val.data_ulong << shift) >> shift);
else if (len == 8)
set_f64_rd(insn, regs, val.data_u64);
else
@@ -626,6 +625,10 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void)
{
int cpu;
+ /*
+ * While being documented as very slow, schedule_on_each_cpu() is used since
+ * kernel_vector_begin() expects irqs to be enabled or it will panic()
+ */
schedule_on_each_cpu(check_vector_unaligned_access_emulated);
for_each_online_cpu(cpu)
@@ -642,11 +645,23 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void)
}
#endif
+static bool all_cpus_unaligned_scalar_access_emulated(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ if (per_cpu(misaligned_access_speed, cpu) !=
+ RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED)
+ return false;
+
+ return true;
+}
+
#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
static bool unaligned_ctl __read_mostly;
-void check_unaligned_access_emulated(struct work_struct *work __always_unused)
+static void check_unaligned_access_emulated(void *arg __always_unused)
{
int cpu = smp_processor_id();
long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
@@ -657,6 +672,13 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused)
__asm__ __volatile__ (
" "REG_L" %[tmp], 1(%[ptr])\n"
: [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory");
+}
+
+static int cpu_online_check_unaligned_access_emulated(unsigned int cpu)
+{
+ long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
+
+ check_unaligned_access_emulated(NULL);
/*
* If unaligned_ctl is already set, this means that we detected that all
@@ -665,26 +687,23 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused)
*/
if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED))) {
pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n");
- while (true)
- cpu_relax();
+ return -EINVAL;
}
+
+ return 0;
}
bool __init check_unaligned_access_emulated_all_cpus(void)
{
- int cpu;
-
/*
* We can only support PR_UNALIGN controls if all CPUs have misaligned
* accesses emulated since tasks requesting such control can run on any
* CPU.
*/
- schedule_on_each_cpu(check_unaligned_access_emulated);
+ on_each_cpu(check_unaligned_access_emulated, NULL, 1);
- for_each_online_cpu(cpu)
- if (per_cpu(misaligned_access_speed, cpu)
- != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED)
- return false;
+ if (!all_cpus_unaligned_scalar_access_emulated())
+ return false;
unaligned_ctl = true;
return true;
@@ -699,4 +718,73 @@ bool __init check_unaligned_access_emulated_all_cpus(void)
{
return false;
}
+static int cpu_online_check_unaligned_access_emulated(unsigned int cpu)
+{
+ return 0;
+}
+#endif
+
+static bool misaligned_traps_delegated;
+
+#ifdef CONFIG_RISCV_SBI
+
+static int cpu_online_sbi_unaligned_setup(unsigned int cpu)
+{
+ if (sbi_fwft_set(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0) &&
+ misaligned_traps_delegated) {
+ pr_crit("Misaligned trap delegation non homogeneous (expected delegated)");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void __init unaligned_access_init(void)
+{
+ int ret;
+
+ ret = sbi_fwft_set_online_cpus(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0);
+ if (ret)
+ return;
+
+ misaligned_traps_delegated = true;
+ pr_info("SBI misaligned access exception delegation ok\n");
+ /*
+ * Note that we don't have to take any specific action here, if
+ * the delegation is successful, then
+ * check_unaligned_access_emulated() will verify that indeed the
+ * platform traps on misaligned accesses.
+ */
+}
+#else
+void __init unaligned_access_init(void) {}
+
+static int cpu_online_sbi_unaligned_setup(unsigned int cpu __always_unused)
+{
+ return 0;
+}
+
#endif
+
+int cpu_online_unaligned_access_init(unsigned int cpu)
+{
+ int ret;
+
+ ret = cpu_online_sbi_unaligned_setup(cpu);
+ if (ret)
+ return ret;
+
+ return cpu_online_check_unaligned_access_emulated(cpu);
+}
+
+bool misaligned_traps_can_delegate(void)
+{
+ /*
+ * Either we successfully requested misaligned traps delegation for all
+ * CPUs, or the SBI does not implement the FWFT extension but delegated
+ * the exception by default.
+ */
+ return misaligned_traps_delegated ||
+ all_cpus_unaligned_scalar_access_emulated();
+}
+EXPORT_SYMBOL_GPL(misaligned_traps_can_delegate);
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index b8ba13819d05..ae2068425fbc 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -236,6 +236,11 @@ arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
static int riscv_online_cpu(unsigned int cpu)
{
+ int ret = cpu_online_unaligned_access_init(cpu);
+
+ if (ret)
+ return ret;
+
/* We are already set since the last check */
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) {
goto exit;
@@ -248,7 +253,6 @@ static int riscv_online_cpu(unsigned int cpu)
{
static struct page *buf;
- check_unaligned_access_emulated(NULL);
buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!buf) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
@@ -439,6 +443,8 @@ static int __init check_unaligned_access_all_cpus(void)
{
int cpu;
+ unaligned_access_init();
+
if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) {
pr_info("scalar unaligned access speed set to '%s' (%lu) by command line\n",
speed_str[unaligned_scalar_speed_param], unaligned_scalar_speed_param);
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index cc2895d1fbc2..3a8e038b10a2 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -136,7 +136,7 @@ static int __setup_additional_pages(struct mm_struct *mm,
ret =
_install_special_mapping(mm, vdso_base, vdso_text_len,
- (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
+ (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_SEALED_SYSMAP),
vdso_info->cm);
if (IS_ERR(ret))
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index ad73607abc28..9ebb5e590f93 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -13,9 +13,17 @@ vdso-syms += flush_icache
vdso-syms += hwprobe
vdso-syms += sys_hwprobe
+ifdef CONFIG_VDSO_GETRANDOM
+vdso-syms += getrandom
+endif
+
# Files to link into the vdso
obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
+ifdef CONFIG_VDSO_GETRANDOM
+obj-vdso += vgetrandom-chacha.o
+endif
+
ccflags-y := -fno-stack-protector
ccflags-y += -DDISABLE_BRANCH_PROFILING
ccflags-y += -fno-builtin
@@ -24,6 +32,10 @@ ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
endif
+ifneq ($(c-getrandom-y),)
+ CFLAGS_getrandom.o += -fPIC -include $(c-getrandom-y)
+endif
+
CFLAGS_hwprobe.o += -fPIC
# Build rules
@@ -38,6 +50,7 @@ endif
# Disable -pg to prevent insert call site
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
+CFLAGS_REMOVE_getrandom.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
# Force dependency
@@ -47,7 +60,7 @@ $(obj)/vdso.o: $(obj)/vdso.so
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,vdsold_and_check)
LDFLAGS_vdso.so.dbg = -shared -soname=linux-vdso.so.1 \
- --build-id=sha1 --hash-style=both --eh-frame-hdr
+ --build-id=sha1 --eh-frame-hdr
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
diff --git a/arch/riscv/kernel/vdso/getrandom.c b/arch/riscv/kernel/vdso/getrandom.c
new file mode 100644
index 000000000000..f21922e8cebd
--- /dev/null
+++ b/arch/riscv/kernel/vdso/getrandom.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ */
+#include <linux/types.h>
+
+ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+}
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index 8e86965a8aae..c29ef12a63bb 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -30,7 +30,7 @@ SECTIONS
*(.data .data.* .gnu.linkonce.d.*)
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
- }
+ } :text
.note : { *(.note.*) } :text :note
@@ -80,6 +80,9 @@ VERSION
#ifndef COMPAT_VDSO
__vdso_riscv_hwprobe;
#endif
+#if defined(CONFIG_VDSO_GETRANDOM) && !defined(COMPAT_VDSO)
+ __vdso_getrandom;
+#endif
local: *;
};
}
diff --git a/arch/riscv/kernel/vdso/vgetrandom-chacha.S b/arch/riscv/kernel/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..5f0dad8f2373
--- /dev/null
+++ b/arch/riscv/kernel/vdso/vgetrandom-chacha.S
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ *
+ * Based on arch/loongarch/vdso/vgetrandom-chacha.S.
+ */
+
+#include <asm/asm.h>
+#include <linux/linkage.h>
+
+.text
+
+.macro ROTRI rd rs imm
+ slliw t0, \rs, 32 - \imm
+ srliw \rd, \rs, \imm
+ or \rd, \rd, t0
+.endm
+
+.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3
+ \op \d0, \d0, \s0
+ \op \d1, \d1, \s1
+ \op \d2, \d2, \s2
+ \op \d3, \d3, \s3
+.endm
+
+/*
+ * a0: output bytes
+ * a1: 32-byte key input
+ * a2: 8-byte counter input/output
+ * a3: number of 64-byte blocks to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+
+#define output a0
+#define key a1
+#define counter a2
+#define nblocks a3
+#define i a4
+#define state0 s0
+#define state1 s1
+#define state2 s2
+#define state3 s3
+#define state4 s4
+#define state5 s5
+#define state6 s6
+#define state7 s7
+#define state8 s8
+#define state9 s9
+#define state10 s10
+#define state11 s11
+#define state12 a5
+#define state13 a6
+#define state14 a7
+#define state15 t1
+#define cnt t2
+#define copy0 t3
+#define copy1 t4
+#define copy2 t5
+#define copy3 t6
+
+/* Packs to be used with OP_4REG */
+#define line0 state0, state1, state2, state3
+#define line1 state4, state5, state6, state7
+#define line2 state8, state9, state10, state11
+#define line3 state12, state13, state14, state15
+
+#define line1_perm state5, state6, state7, state4
+#define line2_perm state10, state11, state8, state9
+#define line3_perm state15, state12, state13, state14
+
+#define copy copy0, copy1, copy2, copy3
+
+#define _16 16, 16, 16, 16
+#define _20 20, 20, 20, 20
+#define _24 24, 24, 24, 24
+#define _25 25, 25, 25, 25
+
+ /*
+ * The ABI requires s0-s9 saved.
+ * This does not violate the stack-less requirement: no sensitive data
+ * is spilled onto the stack.
+ */
+ addi sp, sp, -12*SZREG
+ REG_S s0, (sp)
+ REG_S s1, SZREG(sp)
+ REG_S s2, 2*SZREG(sp)
+ REG_S s3, 3*SZREG(sp)
+ REG_S s4, 4*SZREG(sp)
+ REG_S s5, 5*SZREG(sp)
+ REG_S s6, 6*SZREG(sp)
+ REG_S s7, 7*SZREG(sp)
+ REG_S s8, 8*SZREG(sp)
+ REG_S s9, 9*SZREG(sp)
+ REG_S s10, 10*SZREG(sp)
+ REG_S s11, 11*SZREG(sp)
+
+ ld cnt, (counter)
+
+ li copy0, 0x61707865
+ li copy1, 0x3320646e
+ li copy2, 0x79622d32
+ li copy3, 0x6b206574
+
+.Lblock:
+ /* state[0,1,2,3] = "expand 32-byte k" */
+ mv state0, copy0
+ mv state1, copy1
+ mv state2, copy2
+ mv state3, copy3
+
+ /* state[4,5,..,11] = key */
+ lw state4, (key)
+ lw state5, 4(key)
+ lw state6, 8(key)
+ lw state7, 12(key)
+ lw state8, 16(key)
+ lw state9, 20(key)
+ lw state10, 24(key)
+ lw state11, 28(key)
+
+ /* state[12,13] = counter */
+ mv state12, cnt
+ srli state13, cnt, 32
+
+ /* state[14,15] = 0 */
+ mv state14, zero
+ mv state15, zero
+
+ li i, 10
+.Lpermute:
+ /* odd round */
+ OP_4REG addw line0, line1
+ OP_4REG xor line3, line0
+ OP_4REG ROTRI line3, _16
+
+ OP_4REG addw line2, line3
+ OP_4REG xor line1, line2
+ OP_4REG ROTRI line1, _20
+
+ OP_4REG addw line0, line1
+ OP_4REG xor line3, line0
+ OP_4REG ROTRI line3, _24
+
+ OP_4REG addw line2, line3
+ OP_4REG xor line1, line2
+ OP_4REG ROTRI line1, _25
+
+ /* even round */
+ OP_4REG addw line0, line1_perm
+ OP_4REG xor line3_perm, line0
+ OP_4REG ROTRI line3_perm, _16
+
+ OP_4REG addw line2_perm, line3_perm
+ OP_4REG xor line1_perm, line2_perm
+ OP_4REG ROTRI line1_perm, _20
+
+ OP_4REG addw line0, line1_perm
+ OP_4REG xor line3_perm, line0
+ OP_4REG ROTRI line3_perm, _24
+
+ OP_4REG addw line2_perm, line3_perm
+ OP_4REG xor line1_perm, line2_perm
+ OP_4REG ROTRI line1_perm, _25
+
+ addi i, i, -1
+ bnez i, .Lpermute
+
+ /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */
+ OP_4REG addw line0, copy
+ sw state0, (output)
+ sw state1, 4(output)
+ sw state2, 8(output)
+ sw state3, 12(output)
+
+ /* from now on state[0,1,2,3] are scratch registers */
+
+ /* state[0,1,2,3] = lo(key) */
+ lw state0, (key)
+ lw state1, 4(key)
+ lw state2, 8(key)
+ lw state3, 12(key)
+
+ /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */
+ OP_4REG addw line1, line0
+ sw state4, 16(output)
+ sw state5, 20(output)
+ sw state6, 24(output)
+ sw state7, 28(output)
+
+ /* state[0,1,2,3] = hi(key) */
+ lw state0, 16(key)
+ lw state1, 20(key)
+ lw state2, 24(key)
+ lw state3, 28(key)
+
+ /* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */
+ OP_4REG addw line2, line0
+ sw state8, 32(output)
+ sw state9, 36(output)
+ sw state10, 40(output)
+ sw state11, 44(output)
+
+ /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */
+ addw state12, state12, cnt
+ srli state0, cnt, 32
+ addw state13, state13, state0
+ sw state12, 48(output)
+ sw state13, 52(output)
+ sw state14, 56(output)
+ sw state15, 60(output)
+
+ /* ++counter */
+ addi cnt, cnt, 1
+
+ /* output += 64 */
+ addi output, output, 64
+ /* --nblocks */
+ addi nblocks, nblocks, -1
+ bnez nblocks, .Lblock
+
+ /* counter = [cnt_lo, cnt_hi] */
+ sd cnt, (counter)
+
+ /* Zero out the potentially sensitive regs, in case nothing uses these
+ * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and
+ * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we
+ * only need to zero state[12,...,15].
+ */
+ mv state12, zero
+ mv state13, zero
+ mv state14, zero
+ mv state15, zero
+
+ REG_L s0, (sp)
+ REG_L s1, SZREG(sp)
+ REG_L s2, 2*SZREG(sp)
+ REG_L s3, 3*SZREG(sp)
+ REG_L s4, 4*SZREG(sp)
+ REG_L s5, 5*SZREG(sp)
+ REG_L s6, 6*SZREG(sp)
+ REG_L s7, 7*SZREG(sp)
+ REG_L s8, 8*SZREG(sp)
+ REG_L s9, 9*SZREG(sp)
+ REG_L s10, 10*SZREG(sp)
+ REG_L s11, 11*SZREG(sp)
+ addi sp, sp, 12*SZREG
+
+ ret
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c
index 9feb7f67a0a3..92d8ff81f42c 100644
--- a/arch/riscv/kernel/vendor_extensions.c
+++ b/arch/riscv/kernel/vendor_extensions.c
@@ -6,6 +6,7 @@
#include <asm/vendorid_list.h>
#include <asm/vendor_extensions.h>
#include <asm/vendor_extensions/andes.h>
+#include <asm/vendor_extensions/sifive.h>
#include <asm/vendor_extensions/thead.h>
#include <linux/array_size.h>
@@ -15,6 +16,9 @@ struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = {
#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES
&riscv_isa_vendor_ext_list_andes,
#endif
+#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE
+ &riscv_isa_vendor_ext_list_sifive,
+#endif
#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD
&riscv_isa_vendor_ext_list_thead,
#endif
@@ -45,6 +49,12 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig
cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap;
break;
#endif
+ #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE
+ case SIFIVE_VENDOR_ID:
+ bmap = &riscv_isa_vendor_ext_list_sifive.all_harts_isa_bitmap;
+ cpu_bmap = riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap;
+ break;
+ #endif
#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD
case THEAD_VENDOR_ID:
bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap;
diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile
index 866414c81a9f..a4eca96d1c8a 100644
--- a/arch/riscv/kernel/vendor_extensions/Makefile
+++ b/arch/riscv/kernel/vendor_extensions/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive.o
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive_hwprobe.o
obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o
obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o
diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c
new file mode 100644
index 000000000000..8fcf67e8c07f
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/sifive.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/cpufeature.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/sifive.h>
+
+#include <linux/array_size.h>
+#include <linux/types.h>
+
+/* All SiFive vendor extensions supported in Linux */
+static const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = {
+ __RISCV_ISA_EXT_DATA(xsfvfnrclipxfqf, RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF),
+ __RISCV_ISA_EXT_DATA(xsfvfwmaccqqq, RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ),
+ __RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD),
+ __RISCV_ISA_EXT_DATA(xsfvqmaccqoq, RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ),
+};
+
+struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive = {
+ .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_sifive),
+ .ext_data = riscv_isa_vendor_ext_sifive,
+};
diff --git a/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c
new file mode 100644
index 000000000000..1f77f6309763
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/vendor_extensions/sifive.h>
+#include <asm/vendor_extensions/sifive_hwprobe.h>
+#include <asm/vendor_extensions/vendor_hwprobe.h>
+
+#include <linux/cpumask.h>
+#include <linux/types.h>
+
+#include <uapi/asm/hwprobe.h>
+#include <uapi/asm/vendor/sifive.h>
+
+void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cpumask *cpus)
+{
+ VENDOR_EXTENSION_SUPPORTED(pair, cpus,
+ riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap, {
+ VENDOR_EXT_KEY(XSFVQMACCDOD);
+ VENDOR_EXT_KEY(XSFVQMACCQOQ);
+ VENDOR_EXT_KEY(XSFVFNRCLIPXFQF);
+ VENDOR_EXT_KEY(XSFVFWMACCQQQ);
+ });
+}
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 19afd1f23537..dad318185660 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -30,28 +30,6 @@ unsigned int kvm_riscv_aia_nr_hgei;
unsigned int kvm_riscv_aia_max_ids;
DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
-static int aia_find_hgei(struct kvm_vcpu *owner)
-{
- int i, hgei;
- unsigned long flags;
- struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
-
- raw_spin_lock_irqsave(&hgctrl->lock, flags);
-
- hgei = -1;
- for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) {
- if (hgctrl->owners[i] == owner) {
- hgei = i;
- break;
- }
- }
-
- raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
-
- put_cpu_ptr(&aia_hgei);
- return hgei;
-}
-
static inline unsigned long aia_hvictl_value(bool ext_irq_pending)
{
unsigned long hvictl;
@@ -95,7 +73,6 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu)
bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
{
- int hgei;
unsigned long seip;
if (!kvm_riscv_aia_available())
@@ -114,11 +91,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip)
return false;
- hgei = aia_find_hgei(vcpu);
- if (hgei > 0)
- return !!(ncsr_read(CSR_HGEIP) & BIT(hgei));
-
- return false;
+ return kvm_riscv_vcpu_aia_imsic_has_interrupt(vcpu);
}
void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu)
@@ -164,6 +137,9 @@ void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu)
csr_write(CSR_HVIPRIO2H, csr->hviprio2h);
#endif
}
+
+ if (kvm_riscv_aia_initialized(vcpu->kvm))
+ kvm_riscv_vcpu_aia_imsic_load(vcpu, cpu);
}
void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu)
@@ -174,6 +150,9 @@ void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu)
if (!kvm_riscv_aia_available())
return;
+ if (kvm_riscv_aia_initialized(vcpu->kvm))
+ kvm_riscv_vcpu_aia_imsic_put(vcpu);
+
if (kvm_riscv_nacl_available()) {
nsh = nacl_shmem();
csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT);
@@ -472,22 +451,6 @@ void kvm_riscv_aia_free_hgei(int cpu, int hgei)
raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
}
-void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable)
-{
- int hgei;
-
- if (!kvm_riscv_aia_available())
- return;
-
- hgei = aia_find_hgei(owner);
- if (hgei > 0) {
- if (enable)
- csr_set(CSR_HGEIE, BIT(hgei));
- else
- csr_clear(CSR_HGEIE, BIT(hgei));
- }
-}
-
static irqreturn_t hgei_interrupt(int irq, void *dev_id)
{
int i;
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
index 29ef9c2133a9..2ff865943ebb 100644
--- a/arch/riscv/kvm/aia_imsic.c
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -676,6 +676,48 @@ static void imsic_swfile_update(struct kvm_vcpu *vcpu,
imsic_swfile_extirq_update(vcpu);
}
+bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+ unsigned long flags;
+ bool ret = false;
+
+ /*
+ * The IMSIC SW-file directly injects interrupt via hvip so
+ * only check for interrupt when IMSIC VS-file is being used.
+ */
+
+ read_lock_irqsave(&imsic->vsfile_lock, flags);
+ if (imsic->vsfile_cpu > -1)
+ ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei));
+ read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ return ret;
+}
+
+void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ /*
+ * No need to explicitly clear HGEIE CSR bits because the
+ * hgei interrupt handler (aka hgei_interrupt()) will always
+ * clear it for us.
+ */
+}
+
+void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+ unsigned long flags;
+
+ if (!kvm_vcpu_is_blocking(vcpu))
+ return;
+
+ read_lock_irqsave(&imsic->vsfile_lock, flags);
+ if (imsic->vsfile_cpu > -1)
+ csr_set(CSR_HGEIE, BIT(imsic->vsfile_hgei));
+ read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+}
+
void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
{
unsigned long flags;
@@ -781,6 +823,9 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
* producers to the new IMSIC VS-file.
*/
+ /* Ensure HGEIE CSR bit is zero before using the new IMSIC VS-file */
+ csr_clear(CSR_HGEIE, BIT(new_vsfile_hgei));
+
/* Zero-out new IMSIC VS-file */
imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix);
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index e0a01af426ff..0462863206ca 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -207,16 +207,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvm_riscv_vcpu_timer_pending(vcpu);
}
-void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
-{
- kvm_riscv_aia_wakeon_hgei(vcpu, true);
-}
-
-void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
-{
- kvm_riscv_aia_wakeon_hgei(vcpu, false);
-}
-
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
index 5fbf3f94f1e8..b17fad091bab 100644
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -103,7 +103,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
- if (cp->a2 == 0 && cp->a3 == 0)
+ if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL)
kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask);
else
kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask,
@@ -111,7 +111,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
- if (cp->a2 == 0 && cp->a3 == 0)
+ if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL)
kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
hbase, hmask, cp->a4);
else
@@ -127,9 +127,9 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
/*
* Until nested virtualization is implemented, the
- * SBI HFENCE calls should be treated as NOPs
+ * SBI HFENCE calls should return not supported
+ * hence fallthrough.
*/
- break;
default:
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
}
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
index ff672fa71fcc..85a7262115e1 100644
--- a/arch/riscv/kvm/vcpu_timer.c
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -345,8 +345,24 @@ void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu)
/*
* The vstimecmp CSRs are saved by kvm_riscv_vcpu_timer_sync()
* upon every VM exit so no need to save here.
+ *
+ * If VS-timer expires when no VCPU running on a host CPU then
+ * WFI executed by such host CPU will be effective NOP resulting
+ * in no power savings. This is because as-per RISC-V Privileged
+ * specificaiton: "WFI is also required to resume execution for
+ * locally enabled interrupts pending at any privilege level,
+ * regardless of the global interrupt enable at each privilege
+ * level."
+ *
+ * To address the above issue, vstimecmp CSR must be set to -1UL
+ * over here when VCPU is scheduled-out or exits to user space.
*/
+ csr_write(CSR_VSTIMECMP, -1UL);
+#if defined(CONFIG_32BIT)
+ csr_write(CSR_VSTIMECMPH, -1UL);
+#endif
+
/* timer should be enabled for the remaining operations */
if (unlikely(!t->init_done))
return;
diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
index be38a93cedae..7bbdfc6d4552 100644
--- a/arch/riscv/lib/riscv_v_helpers.c
+++ b/arch/riscv/lib/riscv_v_helpers.c
@@ -16,8 +16,11 @@
#ifdef CONFIG_MMU
size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD;
int __asm_vector_usercopy(void *dst, void *src, size_t n);
+int __asm_vector_usercopy_sum_enabled(void *dst, void *src, size_t n);
int fallback_scalar_usercopy(void *dst, void *src, size_t n);
-asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
+int fallback_scalar_usercopy_sum_enabled(void *dst, void *src, size_t n);
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n,
+ bool enable_sum)
{
size_t remain, copied;
@@ -26,7 +29,8 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
goto fallback;
kernel_vector_begin();
- remain = __asm_vector_usercopy(dst, src, n);
+ remain = enable_sum ? __asm_vector_usercopy(dst, src, n) :
+ __asm_vector_usercopy_sum_enabled(dst, src, n);
kernel_vector_end();
if (remain) {
@@ -40,6 +44,7 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
return remain;
fallback:
- return fallback_scalar_usercopy(dst, src, n);
+ return enable_sum ? fallback_scalar_usercopy(dst, src, n) :
+ fallback_scalar_usercopy_sum_enabled(dst, src, n);
}
#endif
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 6a9f116bb545..4efea1b3326c 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -17,14 +17,43 @@ SYM_FUNC_START(__asm_copy_to_user)
ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V)
REG_L t0, riscv_v_usercopy_threshold
bltu a2, t0, fallback_scalar_usercopy
- tail enter_vector_usercopy
+ li a3, 1
+ tail enter_vector_usercopy
#endif
-SYM_FUNC_START(fallback_scalar_usercopy)
+SYM_FUNC_END(__asm_copy_to_user)
+EXPORT_SYMBOL(__asm_copy_to_user)
+SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
+EXPORT_SYMBOL(__asm_copy_from_user)
+SYM_FUNC_START(fallback_scalar_usercopy)
/* Enable access to user memory */
- li t6, SR_SUM
- csrs CSR_STATUS, t6
+ li t6, SR_SUM
+ csrs CSR_STATUS, t6
+ mv t6, ra
+ call fallback_scalar_usercopy_sum_enabled
+
+ /* Disable access to user memory */
+ mv ra, t6
+ li t6, SR_SUM
+ csrc CSR_STATUS, t6
+ ret
+SYM_FUNC_END(fallback_scalar_usercopy)
+
+SYM_FUNC_START(__asm_copy_to_user_sum_enabled)
+#ifdef CONFIG_RISCV_ISA_V
+ ALTERNATIVE("j fallback_scalar_usercopy_sum_enabled", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V)
+ REG_L t0, riscv_v_usercopy_threshold
+ bltu a2, t0, fallback_scalar_usercopy_sum_enabled
+ li a3, 0
+ tail enter_vector_usercopy
+#endif
+SYM_FUNC_END(__asm_copy_to_user_sum_enabled)
+SYM_FUNC_ALIAS(__asm_copy_from_user_sum_enabled, __asm_copy_to_user_sum_enabled)
+EXPORT_SYMBOL(__asm_copy_from_user_sum_enabled)
+EXPORT_SYMBOL(__asm_copy_to_user_sum_enabled)
+
+SYM_FUNC_START(fallback_scalar_usercopy_sum_enabled)
/*
* Save the terminal address which will be used to compute the number
* of bytes copied in case of a fixup exception.
@@ -178,23 +207,12 @@ SYM_FUNC_START(fallback_scalar_usercopy)
bltu a0, t0, 4b /* t0 - end of dst */
.Lout_copy_user:
- /* Disable access to user memory */
- csrc CSR_STATUS, t6
li a0, 0
ret
-
- /* Exception fixup code */
10:
- /* Disable access to user memory */
- csrc CSR_STATUS, t6
sub a0, t5, a0
ret
-SYM_FUNC_END(__asm_copy_to_user)
-SYM_FUNC_END(fallback_scalar_usercopy)
-EXPORT_SYMBOL(__asm_copy_to_user)
-SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
-EXPORT_SYMBOL(__asm_copy_from_user)
-
+SYM_FUNC_END(fallback_scalar_usercopy_sum_enabled)
SYM_FUNC_START(__clear_user)
diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
index 7c45f26de4f7..03b5560609a2 100644
--- a/arch/riscv/lib/uaccess_vector.S
+++ b/arch/riscv/lib/uaccess_vector.S
@@ -24,7 +24,18 @@ SYM_FUNC_START(__asm_vector_usercopy)
/* Enable access to user memory */
li t6, SR_SUM
csrs CSR_STATUS, t6
+ mv t6, ra
+ call __asm_vector_usercopy_sum_enabled
+
+ /* Disable access to user memory */
+ mv ra, t6
+ li t6, SR_SUM
+ csrc CSR_STATUS, t6
+ ret
+SYM_FUNC_END(__asm_vector_usercopy)
+
+SYM_FUNC_START(__asm_vector_usercopy_sum_enabled)
loop:
vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
fixup vle8.v vData, (pSrc), 10f
@@ -36,8 +47,6 @@ loop:
/* Exception fixup for vector load is shared with normal exit */
10:
- /* Disable access to user memory */
- csrc CSR_STATUS, t6
mv a0, iNum
ret
@@ -49,4 +58,4 @@ loop:
csrr t2, CSR_VSTART
sub iNum, iNum, t2
j 10b
-SYM_FUNC_END(__asm_vector_usercopy)
+SYM_FUNC_END(__asm_vector_usercopy_sum_enabled)
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index b8e96dfff19d..4ca5aafce22e 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -24,7 +24,20 @@ void flush_icache_all(void)
if (num_online_cpus() < 2)
return;
- else if (riscv_use_sbi_for_rfence())
+
+ /*
+ * Make sure all previous writes to the D$ are ordered before making
+ * the IPI. The RISC-V spec states that a hart must execute a data fence
+ * before triggering a remote fence.i in order to make the modification
+ * visable for remote harts.
+ *
+ * IPIs on RISC-V are triggered by MMIO writes to either CLINT or
+ * S-IMSIC, so the fence ensures previous data writes "happen before"
+ * the MMIO.
+ */
+ RISCV_FENCE(w, o);
+
+ if (riscv_use_sbi_for_rfence())
sbi_remote_fence_i(NULL);
else
on_each_cpu(ipi_remote_fence_i, NULL, 1);
@@ -101,6 +114,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size);
unsigned int riscv_cboz_block_size;
EXPORT_SYMBOL_GPL(riscv_cboz_block_size);
+unsigned int riscv_cbop_block_size;
+EXPORT_SYMBOL_GPL(riscv_cbop_block_size);
+
static void __init cbo_get_block_size(struct device_node *node,
const char *name, u32 *block_size,
unsigned long *first_hartid)
@@ -125,8 +141,8 @@ static void __init cbo_get_block_size(struct device_node *node,
void __init riscv_init_cbo_blocksizes(void)
{
- unsigned long cbom_hartid, cboz_hartid;
- u32 cbom_block_size = 0, cboz_block_size = 0;
+ unsigned long cbom_hartid, cboz_hartid, cbop_hartid;
+ u32 cbom_block_size = 0, cboz_block_size = 0, cbop_block_size = 0;
struct device_node *node;
struct acpi_table_header *rhct;
acpi_status status;
@@ -138,13 +154,15 @@ void __init riscv_init_cbo_blocksizes(void)
&cbom_block_size, &cbom_hartid);
cbo_get_block_size(node, "riscv,cboz-block-size",
&cboz_block_size, &cboz_hartid);
+ cbo_get_block_size(node, "riscv,cbop-block-size",
+ &cbop_block_size, &cbop_hartid);
}
} else {
status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
if (ACPI_FAILURE(status))
return;
- acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL);
+ acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, &cbop_block_size);
acpi_put_table((struct acpi_table_header *)rhct);
}
@@ -153,6 +171,9 @@ void __init riscv_init_cbo_blocksizes(void)
if (cboz_block_size)
riscv_cboz_block_size = cboz_block_size;
+
+ if (cbop_block_size)
+ riscv_cbop_block_size = cbop_block_size;
}
#ifdef CONFIG_SMP
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index 4ae67324f992..8b6c0a112a8d 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -154,4 +154,14 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
flush_tlb_mm(vma->vm_mm);
return pmd;
}
+
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ VM_WARN_ON_ONCE(!pud_present(*pudp));
+ pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp));
+
+ flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+ return old;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index f9e27ba1df99..e737ba7949b1 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -7,6 +7,27 @@
#include <linux/mmu_notifier.h>
#include <asm/sbi.h>
#include <asm/mmu_context.h>
+#include <asm/cpufeature.h>
+
+#define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
+
+static inline void local_sfence_inval_ir(void)
+{
+ asm volatile(SFENCE_INVAL_IR() ::: "memory");
+}
+
+static inline void local_sfence_w_inval(void)
+{
+ asm volatile(SFENCE_W_INVAL() ::: "memory");
+}
+
+static inline void local_sinval_vma(unsigned long vma, unsigned long asid)
+{
+ if (asid != FLUSH_TLB_NO_ASID)
+ asm volatile(SINVAL_VMA(%0, %1) : : "r" (vma), "r" (asid) : "memory");
+ else
+ asm volatile(SINVAL_VMA(%0, zero) : : "r" (vma) : "memory");
+}
/*
* Flush entire TLB if number of entries to be flushed is greater
@@ -27,6 +48,16 @@ static void local_flush_tlb_range_threshold_asid(unsigned long start,
return;
}
+ if (has_svinval()) {
+ local_sfence_w_inval();
+ for (i = 0; i < nr_ptes_in_range; ++i) {
+ local_sinval_vma(start, asid);
+ start += stride;
+ }
+ local_sfence_inval_ir();
+ return;
+ }
+
for (i = 0; i < nr_ptes_in_range; ++i) {
local_flush_tlb_page_asid(start, asid);
start += stride;
@@ -182,6 +213,13 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
__flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm),
start, end - start, PMD_SIZE);
}
+
+void flush_pud_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm),
+ start, end - start, PUD_SIZE);
+}
#endif
bool arch_tlbbatch_should_defer(struct mm_struct *mm)
diff --git a/arch/riscv/tools/relocs_check.sh b/arch/riscv/tools/relocs_check.sh
index baeb2e7b2290..742993e6a8cb 100755
--- a/arch/riscv/tools/relocs_check.sh
+++ b/arch/riscv/tools/relocs_check.sh
@@ -14,7 +14,9 @@ bad_relocs=$(
${srctree}/scripts/relocs_check.sh "$@" |
# These relocations are okay
# R_RISCV_RELATIVE
- grep -F -w -v 'R_RISCV_RELATIVE'
+ # R_RISCV_NONE
+ grep -F -w -v 'R_RISCV_RELATIVE
+R_RISCV_NONE'
)
if [ -z "$bad_relocs" ]; then
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index d229cbd2ba22..9b0d55be1239 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -38,6 +38,7 @@ static int s390_sha1_init(struct shash_desc *desc)
sctx->state[4] = SHA1_H4;
sctx->count = 0;
sctx->func = CPACF_KIMD_SHA_1;
+ sctx->first_message_part = 0;
return 0;
}
@@ -60,6 +61,7 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in)
sctx->count = ictx->count;
memcpy(sctx->state, ictx->state, sizeof(ictx->state));
sctx->func = CPACF_KIMD_SHA_1;
+ sctx->first_message_part = 0;
return 0;
}
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 33711a29618c..6cbbf5e8555f 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -32,6 +32,7 @@ static int sha512_init(struct shash_desc *desc)
ctx->count = 0;
ctx->sha512.count_hi = 0;
ctx->func = CPACF_KIMD_SHA_512;
+ ctx->first_message_part = 0;
return 0;
}
@@ -57,6 +58,7 @@ static int sha512_import(struct shash_desc *desc, const void *in)
memcpy(sctx->state, ictx->state, sizeof(ictx->state));
sctx->func = CPACF_KIMD_SHA_512;
+ sctx->first_message_part = 0;
return 0;
}
@@ -97,6 +99,7 @@ static int sha384_init(struct shash_desc *desc)
ctx->count = 0;
ctx->sha512.count_hi = 0;
ctx->func = CPACF_KIMD_SHA_512;
+ ctx->first_message_part = 0;
return 0;
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 1c661ac62ce8..6d8bc27a366e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -915,7 +915,7 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 62c0ab4a4b9d..0905fa99a31e 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -265,7 +265,7 @@ static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *r
addr = kernel_stack_pointer(regs) + n * sizeof(long);
if (!regs_within_kernel_stack(regs, addr))
return 0;
- return READ_ONCE_NOCHECK(addr);
+ return READ_ONCE_NOCHECK(*(unsigned long *)addr);
}
/**
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index db5f3a3faefb..ea5ed6654050 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -46,7 +46,7 @@ obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
obj-y += diag/
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index e23670e1949c..21c2e61fece4 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -319,7 +319,7 @@ enum prot_type {
PROT_TYPE_DAT = 3,
PROT_TYPE_IEP = 4,
/* Dummy value for passing an initialized value when code != PGM_PROTECTION */
- PROT_NONE,
+ PROT_TYPE_DUMMY,
};
static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
@@ -335,7 +335,7 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (code) {
case PGM_PROTECTION:
switch (prot) {
- case PROT_NONE:
+ case PROT_TYPE_DUMMY:
/* We should never get here, acts like termination */
WARN_ON_ONCE(1);
break;
@@ -805,7 +805,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
gpa = kvm_s390_real_to_abs(vcpu, ga);
if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
rc = PGM_ADDRESSING;
- prot = PROT_NONE;
+ prot = PROT_TYPE_DUMMY;
}
}
if (rc)
@@ -963,7 +963,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
if (rc == PGM_PROTECTION)
prot = PROT_TYPE_KEYC;
else
- prot = PROT_NONE;
+ prot = PROT_TYPE_DUMMY;
rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
}
out_unlock:
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 3829521450dd..e1ad05bfd28a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -441,6 +441,8 @@ void do_secure_storage_access(struct pt_regs *regs)
if (rc)
BUG();
} else {
+ if (faulthandler_disabled())
+ return handle_fault_error_nolock(regs, 0);
mm = current->mm;
mmap_read_lock(mm);
vma = find_vma(mm, addr);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index c7f8313ba449..0c9a35782c83 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -566,7 +566,15 @@ static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
{
memcpy(plt, &bpf_plt, sizeof(*plt));
plt->ret = ret;
- plt->target = target;
+ /*
+ * (target == NULL) implies that the branch to this PLT entry was
+ * patched and became a no-op. However, some CPU could have jumped
+ * to this PLT entry before patching and may be still executing it.
+ *
+ * Since the intention in this case is to make the PLT entry a no-op,
+ * make the target point to the return label instead of NULL.
+ */
+ plt->target = target ?: ret;
}
/*
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 2fbee3887d13..d930416d4c90 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
case PCI_ERS_RESULT_CAN_RECOVER:
case PCI_ERS_RESULT_RECOVERED:
case PCI_ERS_RESULT_NEED_RESET:
+ case PCI_ERS_RESULT_NONE:
return false;
default:
return true;
@@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver)
return false;
if (!driver->err_handler->error_detected)
return false;
- if (!driver->err_handler->slot_reset)
- return false;
- if (!driver->err_handler->resume)
- return false;
return true;
}
@@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
struct zpci_dev *zdev = to_zpci(pdev);
int rc;
+ /* The underlying device may have been disabled by the event */
+ if (!zdev_enabled(zdev))
+ return PCI_ERS_RESULT_NEED_RESET;
+
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
rc = zpci_reset_load_store_blocked(zdev);
if (rc) {
@@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
return PCI_ERS_RESULT_NEED_RESET;
}
- if (driver->err_handler->mmio_enabled) {
+ if (driver->err_handler->mmio_enabled)
ers_res = driver->err_handler->mmio_enabled(pdev);
- if (ers_result_indicates_abort(ers_res)) {
- pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
- pci_name(pdev));
- return ers_res;
- } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
- pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
- return ers_res;
- }
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+ pci_name(pdev));
+ return ers_res;
+ } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+ return ers_res;
}
pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
@@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
return ers_res;
}
pdev->error_state = pci_channel_io_normal;
- ers_res = driver->err_handler->slot_reset(pdev);
+
+ if (driver->err_handler->slot_reset)
+ ers_res = driver->err_handler->slot_reset(pdev);
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
if (ers_result_indicates_abort(ers_res)) {
pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
return ers_res;
@@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
goto out_unlock;
}
- if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+ if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
ers_res = zpci_event_do_error_state_clear(pdev, driver);
if (ers_result_indicates_abort(ers_res)) {
status_str = "failed (abort on MMIO enable)";
@@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
ers_res = zpci_event_do_reset(pdev, driver);
+ /*
+ * ers_res can be PCI_ERS_RESULT_NONE either because the driver
+ * decided to return it, indicating that it abstains from voting
+ * on how to recover, or because it didn't implement the callback.
+ * Both cases assume, that if there is nothing else causing a
+ * disconnect, we recovered successfully.
+ */
+ if (ers_res == PCI_ERS_RESULT_NONE)
+ ers_res = PCI_ERS_RESULT_RECOVERED;
+
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
pci_name(pdev));
@@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
pci_ers_result_t ers_res;
+ u32 fh = 0;
+ int rc;
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
@@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
if (zdev) {
mutex_lock(&zdev->state_lock);
+ rc = clp_refresh_fh(zdev->fid, &fh);
+ if (rc)
+ goto no_pdev;
+ if (!fh || ccdf->fh != fh) {
+ /* Ignore events with stale handles */
+ zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
+ ccdf->fid, fh, ccdf->fh);
+ goto no_pdev;
+ }
zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 6f13557eecd6..a641e26f8fdf 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -825,6 +825,7 @@ static struct spi_board_info spi_bus[] = {
/* MSIOF0 */
static struct sh_msiof_spi_info msiof0_data = {
.num_chipselect = 1,
+ .mode = MSIOF_SPI_HOST,
};
static struct resource msiof0_resources[] = {
diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c
index 24391b444b28..42103038a7d0 100644
--- a/arch/sh/drivers/heartbeat.c
+++ b/arch/sh/drivers/heartbeat.c
@@ -58,7 +58,7 @@ static inline void heartbeat_toggle_bit(struct heartbeat_data *hd,
static void heartbeat_timer(struct timer_list *t)
{
- struct heartbeat_data *hd = from_timer(hd, t, timer);
+ struct heartbeat_data *hd = timer_container_of(hd, t, timer);
static unsigned bit = 0, up = 1;
heartbeat_toggle_bit(hd, bit, hd->flags & HEARTBEAT_INVERTED);
diff --git a/arch/sh/drivers/pci/common.c b/arch/sh/drivers/pci/common.c
index 5442475d132e..9633b6147a05 100644
--- a/arch/sh/drivers/pci/common.c
+++ b/arch/sh/drivers/pci/common.c
@@ -88,7 +88,7 @@ int __init pci_is_66mhz_capable(struct pci_channel *hose,
static void pcibios_enable_err(struct timer_list *t)
{
- struct pci_channel *hose = from_timer(hose, t, err_timer);
+ struct pci_channel *hose = timer_container_of(hose, t, err_timer);
timer_delete(&hose->err_timer);
printk(KERN_DEBUG "PCI: re-enabling error IRQ.\n");
@@ -97,7 +97,7 @@ static void pcibios_enable_err(struct timer_list *t)
static void pcibios_enable_serr(struct timer_list *t)
{
- struct pci_channel *hose = from_timer(hose, t, serr_timer);
+ struct pci_channel *hose = timer_container_of(hose, t, serr_timer);
timer_delete(&hose->serr_timer);
printk(KERN_DEBUG "PCI: re-enabling system error IRQ.\n");
diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c
index 2b51ad9d5586..443cc6fd26a8 100644
--- a/arch/sh/drivers/push-switch.c
+++ b/arch/sh/drivers/push-switch.c
@@ -25,7 +25,7 @@ static DEVICE_ATTR_RO(switch);
static void switch_timer(struct timer_list *t)
{
- struct push_switch *psw = from_timer(psw, t, debounce);
+ struct push_switch *psw = timer_container_of(psw, t, debounce);
schedule_work(&psw->work);
}
diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h
index b38dbc975581..e7ac9c950275 100644
--- a/arch/sh/include/asm/cache.h
+++ b/arch/sh/include/asm/cache.h
@@ -22,7 +22,7 @@
#define __read_mostly __section(".data..read_mostly")
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct cache_info {
unsigned int ways; /* Number of cache ways */
unsigned int sets; /* Number of cache sets */
@@ -48,5 +48,5 @@ struct cache_info {
unsigned long flags;
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_SH_CACHE_H */
diff --git a/arch/sh/include/asm/dwarf.h b/arch/sh/include/asm/dwarf.h
index 571954474122..f46d18b84833 100644
--- a/arch/sh/include/asm/dwarf.h
+++ b/arch/sh/include/asm/dwarf.h
@@ -189,7 +189,7 @@
*/
#define DWARF_ARCH_RA_REG 17
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/compiler.h>
#include <linux/bug.h>
@@ -379,7 +379,7 @@ extern int module_dwarf_finalize(const Elf_Ehdr *, const Elf_Shdr *,
struct module *);
extern void module_dwarf_cleanup(struct module *);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#define CFI_STARTPROC .cfi_startproc
#define CFI_ENDPROC .cfi_endproc
@@ -402,7 +402,7 @@ extern void module_dwarf_cleanup(struct module *);
#define CFI_REL_OFFSET CFI_IGNORE
#define CFI_UNDEFINED CFI_IGNORE
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
static inline void dwarf_unwinder_init(void)
{
}
diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h
index 0379f4cce5ed..a086e38b70ee 100644
--- a/arch/sh/include/asm/fpu.h
+++ b/arch/sh/include/asm/fpu.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SH_FPU_H
#define __ASM_SH_FPU_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/ptrace.h>
@@ -67,6 +67,6 @@ static inline void clear_fpu(struct task_struct *tsk, struct pt_regs *regs)
void float_raise(unsigned int flags);
int float_rounding_mode(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_SH_FPU_H */
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index 1c10e1066390..d35781ab716e 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -7,7 +7,7 @@
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
#define FTRACE_SYSCALL_MAX NR_syscalls
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern void mcount(void);
#define MCOUNT_ADDR ((unsigned long)(mcount))
@@ -35,10 +35,10 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_FUNCTION_TRACER */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* arch/sh/kernel/return_address.c */
extern void *return_address(unsigned int);
@@ -53,6 +53,6 @@ static inline void arch_ftrace_nmi_enter(void) { }
static inline void arch_ftrace_nmi_exit(void) { }
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_SH_FTRACE_H */
diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h
index 172e329fd92d..b9c9f91e6616 100644
--- a/arch/sh/include/asm/mmu.h
+++ b/arch/sh/include/asm/mmu.h
@@ -33,7 +33,7 @@
#define PMB_NO_ENTRY (-1)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/errno.h>
#include <linux/threads.h>
#include <asm/page.h>
@@ -102,6 +102,6 @@ pmb_remap(phys_addr_t phys, unsigned long size, pgprot_t prot)
return pmb_remap_caller(phys, size, prot, __builtin_return_address(0));
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __MMU_H */
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 3990cbd9aa04..def4205491ec 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -30,7 +30,7 @@
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT)
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/uncached.h>
extern unsigned long shm_align_mask;
@@ -85,7 +85,7 @@ typedef struct page *pgtable_t;
#define pte_pgprot(x) __pgprot(pte_val(x) & PTE_FLAGS_MASK)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* __MEMORY_START and SIZE are the physical addresses and size of RAM.
@@ -126,10 +126,10 @@ typedef struct page *pgtable_t;
#define ___va(x) ((x)+PAGE_OFFSET)
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define __pa(x) ___pa((unsigned long)x)
#define __va(x) (void *)___va((unsigned long)x)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef CONFIG_UNCACHED_MAPPING
#if defined(CONFIG_29BIT)
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 729f5c6225fb..10fa8f2bb8d1 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -17,7 +17,7 @@
#include <asm/page.h>
#include <asm/mmu.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/addrspace.h>
#include <asm/fixmap.h>
@@ -28,7 +28,7 @@
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* Effective and physical address definitions, to aid with sign
diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h
index db2e48366e0d..5f51af18997b 100644
--- a/arch/sh/include/asm/pgtable_32.h
+++ b/arch/sh/include/asm/pgtable_32.h
@@ -170,7 +170,7 @@ static inline unsigned long copy_ptea_attributes(unsigned long x)
(PTE_MASK | _PAGE_ACCESSED | _PAGE_CACHABLE | \
_PAGE_DIRTY | _PAGE_SPECIAL)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#if defined(CONFIG_X2TLB) /* SH-X2 TLB */
#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_CACHABLE | \
@@ -287,9 +287,9 @@ static inline unsigned long copy_ptea_attributes(unsigned long x)
__pgprot(0)
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Certain architectures need to do special things when PTEs
@@ -470,7 +470,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
/* In both cases, we borrow bit 6 to store the exclusive marker in swap PTEs. */
#define _PAGE_SWP_EXCLUSIVE _PAGE_USER
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte.pte_low & _PAGE_SWP_EXCLUSIVE;
}
@@ -478,5 +478,5 @@ static inline int pte_swp_exclusive(pte_t pte)
PTE_BIT_FUNC(low, swp_mkexclusive, |= _PAGE_SWP_EXCLUSIVE);
PTE_BIT_FUNC(low, swp_clear_exclusive, &= ~_PAGE_SWP_EXCLUSIVE);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_SH_PGTABLE_32_H */
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 73fba7c922f9..2a0b5713ab80 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -5,7 +5,7 @@
#include <asm/cpu-features.h>
#include <asm/cache.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* CPU type and hardware bug flags. Kept separately for each CPU.
*
@@ -168,7 +168,7 @@ extern unsigned int instruction_size(unsigned int insn);
void select_idle_routine(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#include <asm/processor_32.h>
diff --git a/arch/sh/include/asm/smc37c93x.h b/arch/sh/include/asm/smc37c93x.h
index 891f2f8f2fd0..caf4cd8dd241 100644
--- a/arch/sh/include/asm/smc37c93x.h
+++ b/arch/sh/include/asm/smc37c93x.h
@@ -67,7 +67,7 @@
#define UART_DLL 0x0 /* Divisor Latch (LS) */
#define UART_DLM 0x2 /* Divisor Latch (MS) */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
typedef struct uart_reg {
volatile __u16 rbr;
volatile __u16 ier;
@@ -78,7 +78,7 @@ typedef struct uart_reg {
volatile __u16 msr;
volatile __u16 scr;
} uart_reg;
-#endif /* ! __ASSEMBLY__ */
+#endif /* ! __ASSEMBLER__ */
/* Alias for Write Only Register */
diff --git a/arch/sh/include/asm/suspend.h b/arch/sh/include/asm/suspend.h
index 47db17520261..0f991babc559 100644
--- a/arch/sh/include/asm/suspend.h
+++ b/arch/sh/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef _ASM_SH_SUSPEND_H
#define _ASM_SH_SUSPEND_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/notifier.h>
#include <asm/ptrace.h>
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 9f19a682d315..471db5173036 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -21,7 +21,7 @@
#define FAULT_CODE_PROT (1 << 3) /* protection fault */
#define FAULT_CODE_USER (1 << 4) /* user-mode access */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/processor.h>
struct thread_info {
@@ -49,7 +49,7 @@ struct thread_info {
/*
* macros/functions for gaining access to the thread information structure
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define INIT_THREAD_INFO(tsk) \
{ \
.task = &tsk, \
@@ -86,7 +86,7 @@ static inline struct thread_info *current_thread_info(void)
extern void init_thread_xstate(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* Thread information flags
@@ -144,7 +144,7 @@ extern void init_thread_xstate(void);
*/
#define TS_USEDFPU 0x0002 /* FPU used by this task this quantum */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define TI_FLAG_FAULT_CODE_SHIFT 24
@@ -164,5 +164,5 @@ static inline unsigned int get_thread_fault_code(void)
return ti->flags >> TI_FLAG_FAULT_CODE_SHIFT;
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_SH_THREAD_INFO_H */
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index ddf324bfb9a0..39df40d0ebc2 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SH_TLB_H
#define __ASM_SH_TLB_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/pagemap.h>
#include <asm-generic/tlb.h>
@@ -29,5 +29,5 @@ asmlinkage int handle_tlbmiss(struct pt_regs *regs, unsigned long error_code,
unsigned long address);
#endif /* CONFIG_MMU */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_SH_TLB_H */
diff --git a/arch/sh/include/asm/types.h b/arch/sh/include/asm/types.h
index 9b3fc923ee28..fec3e89df0b1 100644
--- a/arch/sh/include/asm/types.h
+++ b/arch/sh/include/asm/types.h
@@ -7,10 +7,10 @@
/*
* These aren't exported outside the kernel to avoid name space clashes
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
typedef u16 insn_size_t;
typedef u32 reg_size_t;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_SH_TYPES_H */
diff --git a/arch/sh/include/mach-common/mach/romimage.h b/arch/sh/include/mach-common/mach/romimage.h
index 1915714263aa..22fb47ec9b15 100644
--- a/arch/sh/include/mach-common/mach/romimage.h
+++ b/arch/sh/include/mach-common/mach/romimage.h
@@ -1,12 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/* do nothing here by default */
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
static inline void mmcif_update_progress(int nr)
{
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
diff --git a/arch/sh/include/mach-ecovec24/mach/romimage.h b/arch/sh/include/mach-ecovec24/mach/romimage.h
index 2da6ff326cbd..f93d494736c3 100644
--- a/arch/sh/include/mach-ecovec24/mach/romimage.h
+++ b/arch/sh/include/mach-ecovec24/mach/romimage.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/* EcoVec board specific boot code:
* converts the "partner-jet-script.txt" script into assembly
@@ -22,7 +22,7 @@
1 : .long 0xa8000000
2 :
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
/* Ecovec board specific information:
*
@@ -45,4 +45,4 @@ static inline void mmcif_update_progress(int nr)
__raw_writeb(1 << (nr - 1), PGDR);
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
diff --git a/arch/sh/include/mach-kfr2r09/mach/romimage.h b/arch/sh/include/mach-kfr2r09/mach/romimage.h
index 209275872ff0..f68bb480d378 100644
--- a/arch/sh/include/mach-kfr2r09/mach/romimage.h
+++ b/arch/sh/include/mach-kfr2r09/mach/romimage.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/* kfr2r09 board specific boot code:
* converts the "partner-jet-script.txt" script into assembly
@@ -22,10 +22,10 @@
1: .long 0xa8000000
2:
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
static inline void mmcif_update_progress(int nr)
{
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 7b453592adaf..5ef123bc63f8 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -3,7 +3,7 @@
# Makefile for the Linux/SuperH kernel.
#
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 49c4ffd782d6..a250fb1b9420 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -404,13 +404,10 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
- struct kprobe *p = NULL;
struct die_args *args = (struct die_args *)data;
int ret = NOTIFY_DONE;
- kprobe_opcode_t *addr = NULL;
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- addr = (kprobe_opcode_t *) (args->regs->pc);
if (val == DIE_TRAP &&
args->trapnr == (BREAKPOINT_INSTRUCTION & 0xff)) {
if (!kprobe_running()) {
@@ -421,7 +418,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
ret = NOTIFY_DONE;
}
} else {
- p = get_kprobe(addr);
if ((kcb->kprobe_status == KPROBE_HIT_SS) ||
(kcb->kprobe_status == KPROBE_REENTER)) {
if (post_kprobe_handler(args->regs))
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 1454ebe91539..7c199c003ffe 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -348,7 +348,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & SRMMU_SWP_EXCLUSIVE;
}
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 4af03e3c161b..669cd02469a1 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -1023,7 +1023,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 3453f330e363..36f2727e1445 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -9,7 +9,7 @@ asflags-y := -ansi
# Undefine sparc when processing vmlinux.lds - it is used
# And teach CPP we are doing $(BITS) builds (for this case)
CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS)
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c
index e27afd233bf5..8fb2e7ca5015 100644
--- a/arch/sparc/kernel/viohs.c
+++ b/arch/sparc/kernel/viohs.c
@@ -804,7 +804,7 @@ EXPORT_SYMBOL(vio_port_up);
static void vio_port_timer(struct timer_list *t)
{
- struct vio_driver_state *vio = from_timer(vio, t, timer);
+ struct vio_driver_state *vio = timer_container_of(vio, t, timer);
vio_port_up(vio);
}
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 79509c7f39de..f08e8a7fac93 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -52,13 +52,7 @@ config NO_IOMEM
config UML_IOMEM_EMULATION
bool
select INDIRECT_IOMEM
- select HAS_IOPORT
select GENERIC_PCI_IOMAP
- select GENERIC_IOMAP
- select NO_GENERIC_PCI_IOPORT_MAP
-
-config NO_IOPORT_MAP
- def_bool !UML_IOMEM_EMULATION
config ISA
bool
diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig
index 1ffa088739f4..29d9666eceae 100644
--- a/arch/um/configs/i386_defconfig
+++ b/arch/um/configs/i386_defconfig
@@ -52,13 +52,6 @@ CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
# CONFIG_IPV6 is not set
-CONFIG_UML_NET=y
-CONFIG_UML_NET_ETHERTAP=y
-CONFIG_UML_NET_TUNTAP=y
-CONFIG_UML_NET_SLIP=y
-CONFIG_UML_NET_DAEMON=y
-CONFIG_UML_NET_MCAST=y
-CONFIG_UML_NET_SLIRP=y
CONFIG_EXT4_FS=y
CONFIG_QUOTA=y
CONFIG_AUTOFS_FS=m
diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig
index 03b10d3f6816..cf309c5406a2 100644
--- a/arch/um/configs/x86_64_defconfig
+++ b/arch/um/configs/x86_64_defconfig
@@ -51,13 +51,6 @@ CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
# CONFIG_IPV6 is not set
-CONFIG_UML_NET=y
-CONFIG_UML_NET_ETHERTAP=y
-CONFIG_UML_NET_TUNTAP=y
-CONFIG_UML_NET_SLIP=y
-CONFIG_UML_NET_DAEMON=y
-CONFIG_UML_NET_MCAST=y
-CONFIG_UML_NET_SLIRP=y
CONFIG_EXT4_FS=y
CONFIG_QUOTA=y
CONFIG_AUTOFS_FS=m
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index 9cb196070614..34085bfc6d41 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -124,206 +124,18 @@ endmenu
menu "UML Network Devices"
depends on NET
-# UML virtual driver
-config UML_NET
- bool "Virtual network device"
- help
- While the User-Mode port cannot directly talk to any physical
- hardware devices, this choice and the following transport options
- provide one or more virtual network devices through which the UML
- kernels can talk to each other, the host, and with the host's help,
- machines on the outside world.
-
- For more information, including explanations of the networking and
- sample configurations, see
- <http://user-mode-linux.sourceforge.net/old/networking.html>.
-
- If you'd like to be able to enable networking in the User-Mode
- linux environment, say Y; otherwise say N. Note that you must
- enable at least one of the following transport options to actually
- make use of UML networking.
-
-config UML_NET_ETHERTAP
- bool "Ethertap transport (obsolete)"
- depends on UML_NET
- help
- The Ethertap User-Mode Linux network transport allows a single
- running UML to exchange packets with its host over one of the
- host's Ethertap devices, such as /dev/tap0. Additional running
- UMLs can use additional Ethertap devices, one per running UML.
- While the UML believes it's on a (multi-device, broadcast) virtual
- Ethernet network, it's in fact communicating over a point-to-point
- link with the host.
-
- To use this, your host kernel must have support for Ethertap
- devices. Also, if your host kernel is 2.4.x, it must have
- CONFIG_NETLINK_DEV configured as Y or M.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html> That site
- has examples of the UML command line to use to enable Ethertap
- networking.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_TUNTAP
- bool "TUN/TAP transport (obsolete)"
- depends on UML_NET
- help
- The UML TUN/TAP network transport allows a UML instance to exchange
- packets with the host over a TUN/TAP device. This option will only
- work with a 2.4 host, unless you've applied the TUN/TAP patch to
- your 2.2 host kernel.
-
- To use this transport, your host kernel must have support for TUN/TAP
- devices, either built-in or as a module.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_SLIP
- bool "SLIP transport (obsolete)"
- depends on UML_NET
- help
- The slip User-Mode Linux network transport allows a running UML to
- network with its host over a point-to-point link. Unlike Ethertap,
- which can carry any Ethernet frame (and hence even non-IP packets),
- the slip transport can only carry IP packets.
-
- To use this, your host must support slip devices.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html>.
- has examples of the UML command line to use to enable slip
- networking, and details of a few quirks with it.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_DAEMON
- bool "Daemon transport (obsolete)"
- depends on UML_NET
- help
- This User-Mode Linux network transport allows one or more running
- UMLs on a single host to communicate with each other, but not to
- the host.
-
- To use this form of networking, you'll need to run the UML
- networking daemon on the host.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html> That site
- has examples of the UML command line to use to enable Daemon
- networking.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_DAEMON_DEFAULT_SOCK
- string "Default socket for daemon transport"
- default "/tmp/uml.ctl"
- depends on UML_NET_DAEMON
- help
- This option allows setting the default socket for the daemon
- transport, normally it defaults to /tmp/uml.ctl.
-
config UML_NET_VECTOR
bool "Vector I/O high performance network devices"
- depends on UML_NET
select MAY_HAVE_RUNTIME_DEPS
help
This User-Mode Linux network driver uses multi-message send
and receive functions. The host running the UML guest must have
a linux kernel version above 3.0 and a libc version > 2.13.
- This driver provides tap, raw, gre and l2tpv3 network transports
- with up to 4 times higher network throughput than the UML network
- drivers.
-
-config UML_NET_VDE
- bool "VDE transport (obsolete)"
- depends on UML_NET
- depends on !MODVERSIONS
- select MAY_HAVE_RUNTIME_DEPS
- help
- This User-Mode Linux network transport allows one or more running
- UMLs on a single host to communicate with each other and also
- with the rest of the world using Virtual Distributed Ethernet,
- an improved fork of uml_switch.
+ This driver provides tap, raw, gre and l2tpv3 network transports.
- You must have libvdeplug installed in order to build the vde
- transport into UML.
-
- To use this form of networking, you will need to run vde_switch
- on the host.
-
- For more information, see <http://wiki.virtualsquare.org/>
- That site has a good overview of what VDE is and also examples
- of the UML command line to use to enable VDE networking.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_MCAST
- bool "Multicast transport (obsolete)"
- depends on UML_NET
- help
- This Multicast User-Mode Linux network transport allows multiple
- UMLs (even ones running on different host machines!) to talk to
- each other over a virtual ethernet network. However, it requires
- at least one UML with one of the other transports to act as a
- bridge if any of them need to be able to talk to their hosts or any
- other IP machines.
-
- To use this, your host kernel(s) must support IP Multicasting.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html> That site
- has examples of the UML command line to use to enable Multicast
- networking, and notes about the security of this approach.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
-config UML_NET_SLIRP
- bool "SLiRP transport (obsolete)"
- depends on UML_NET
- help
- The SLiRP User-Mode Linux network transport allows a running UML
- to network by invoking a program that can handle SLIP encapsulated
- packets. This is commonly (but not limited to) the application
- known as SLiRP, a program that can re-socket IP packets back onto
- he host on which it is run. Only IP packets are supported,
- unlike other network transports that can handle all Ethernet
- frames. In general, slirp allows the UML the same IP connectivity
- to the outside world that the host user is permitted, and unlike
- other transports, SLiRP works without the need of root level
- privileges, setuid binaries, or SLIP devices on the host. This
- also means not every type of connection is possible, but most
- situations can be accommodated with carefully crafted slirp
- commands that can be passed along as part of the network device's
- setup string. The effect of this transport on the UML is similar
- that of a host behind a firewall that masquerades all network
- connections passing through it (but is less secure).
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
- Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
+ For more information, including explanations of the networking
+ and sample configurations, see
+ <file:Documentation/virt/uml/user_mode_linux_howto_v2.rst>.
endmenu
@@ -367,3 +179,11 @@ config UML_PCI_OVER_VIRTIO_DEVICE_ID
There's no official device ID assigned (yet), set the one you
wish to use for experimentation here. The default of -1 is
not valid and will cause the driver to fail at probe.
+
+config UML_PCI_OVER_VFIO
+ bool "Enable VFIO-based PCI passthrough"
+ select UML_PCI
+ help
+ This driver provides support for VFIO-based PCI passthrough.
+ Currently, only MSI-X capable devices are supported, and it
+ is assumed that drivers will use MSI-X.
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index 0a5820343ad3..6bf8cbf71d3c 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -6,12 +6,7 @@
# pcap is broken in 2.5 because kbuild doesn't allow pcap.a to be linked
# in to pcap.o
-slip-objs := slip_kern.o slip_user.o
-slirp-objs := slirp_kern.o slirp_user.o
-daemon-objs := daemon_kern.o daemon_user.o
vector-objs := vector_kern.o vector_user.o vector_transports.o
-umcast-objs := umcast_kern.o umcast_user.o
-net-objs := net_kern.o net_user.o
mconsole-objs := mconsole_kern.o mconsole_user.o
hostaudio-objs := hostaudio_kern.o
ubd-objs := ubd_kern.o ubd_user.o
@@ -19,13 +14,7 @@ port-objs := port_kern.o port_user.o
harddog-objs := harddog_kern.o
harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o
rtc-objs := rtc_kern.o rtc_user.o
-
-LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a)
-
-targets := vde_kern.o vde_user.o
-
-$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o
- $(LD) -r -dp -o $@ $^ $(ld_flags)
+vfio_uml-objs := vfio_kern.o vfio_user.o
#XXX: The call below does not work because the flags are added before the
# object name, so nothing from the library gets linked.
@@ -38,13 +27,7 @@ obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o
obj-$(CONFIG_SSL) += ssl.o
obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
-obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
-obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
-obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
obj-$(CONFIG_UML_NET_VECTOR) += vector.o
-obj-$(CONFIG_UML_NET_VDE) += vde.o
-obj-$(CONFIG_UML_NET_MCAST) += umcast.o
-obj-$(CONFIG_UML_NET) += net.o
obj-$(CONFIG_MCONSOLE) += mconsole.o
obj-$(CONFIG_MMAPPER) += mmapper_kern.o
obj-$(CONFIG_BLK_DEV_UBD) += ubd.o
@@ -62,9 +45,10 @@ obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
obj-$(CONFIG_UML_RTC) += rtc.o
obj-$(CONFIG_UML_PCI) += virt-pci.o
obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virtio_pcidev.o
+obj-$(CONFIG_UML_PCI_OVER_VFIO) += vfio_uml.o
# pcap_user.o must be added explicitly.
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o vector_user.o
CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"'
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index e78a99816c86..26442db7d608 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -212,7 +212,7 @@ int enable_chan(struct line *line)
* be permanently disabled. This is discovered in IRQ context, but
* the freeing of the IRQ must be done later.
*/
-static DEFINE_SPINLOCK(irqs_to_free_lock);
+static DEFINE_RAW_SPINLOCK(irqs_to_free_lock);
static LIST_HEAD(irqs_to_free);
void free_irqs(void)
@@ -222,9 +222,9 @@ void free_irqs(void)
struct list_head *ele;
unsigned long flags;
- spin_lock_irqsave(&irqs_to_free_lock, flags);
+ raw_spin_lock_irqsave(&irqs_to_free_lock, flags);
list_splice_init(&irqs_to_free, &list);
- spin_unlock_irqrestore(&irqs_to_free_lock, flags);
+ raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags);
list_for_each(ele, &list) {
chan = list_entry(ele, struct chan, free_list);
@@ -246,9 +246,9 @@ static void close_one_chan(struct chan *chan, int delay_free_irq)
return;
if (delay_free_irq) {
- spin_lock_irqsave(&irqs_to_free_lock, flags);
+ raw_spin_lock_irqsave(&irqs_to_free_lock, flags);
list_add(&chan->free_list, &irqs_to_free);
- spin_unlock_irqrestore(&irqs_to_free_lock, flags);
+ raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags);
} else {
if (chan->input && chan->enabled)
um_free_irq(chan->line->read_irq, chan);
diff --git a/arch/um/drivers/daemon.h b/arch/um/drivers/daemon.h
deleted file mode 100644
index 1509cc7eb907..000000000000
--- a/arch/um/drivers/daemon.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __DAEMON_H__
-#define __DAEMON_H__
-
-#include <net_user.h>
-
-#define SWITCH_VERSION 3
-
-struct daemon_data {
- char *sock_type;
- char *ctl_sock;
- void *ctl_addr;
- void *data_addr;
- void *local_addr;
- int fd;
- int control;
- void *dev;
-};
-
-extern const struct net_user_info daemon_user_info;
-
-extern int daemon_user_write(int fd, void *buf, int len,
- struct daemon_data *pri);
-
-#endif
diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c
deleted file mode 100644
index afde1e82c056..000000000000
--- a/arch/um/drivers/daemon_kern.c
+++ /dev/null
@@ -1,95 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include "daemon.h"
-
-struct daemon_init {
- char *sock_type;
- char *ctl_sock;
-};
-
-static void daemon_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *pri;
- struct daemon_data *dpri;
- struct daemon_init *init = data;
-
- pri = netdev_priv(dev);
- dpri = (struct daemon_data *) pri->user;
- dpri->sock_type = init->sock_type;
- dpri->ctl_sock = init->ctl_sock;
- dpri->fd = -1;
- dpri->control = -1;
- dpri->dev = dev;
- /* We will free this pointer. If it contains crap we're burned. */
- dpri->ctl_addr = NULL;
- dpri->data_addr = NULL;
- dpri->local_addr = NULL;
-
- printk("daemon backend (uml_switch version %d) - %s:%s",
- SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock);
- printk("\n");
-}
-
-static int daemon_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return net_recvfrom(fd, skb_mac_header(skb),
- skb->dev->mtu + ETH_HEADER_OTHER);
-}
-
-static int daemon_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return daemon_user_write(fd, skb->data, skb->len,
- (struct daemon_data *) &lp->user);
-}
-
-static const struct net_kern_info daemon_kern_info = {
- .init = daemon_init,
- .protocol = eth_protocol,
- .read = daemon_read,
- .write = daemon_write,
-};
-
-static int daemon_setup(char *str, char **mac_out, void *data)
-{
- struct daemon_init *init = data;
- char *remain;
-
- *init = ((struct daemon_init)
- { .sock_type = "unix",
- .ctl_sock = CONFIG_UML_NET_DAEMON_DEFAULT_SOCK });
-
- remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock,
- NULL);
- if (remain != NULL)
- printk(KERN_WARNING "daemon_setup : Ignoring data socket "
- "specification\n");
-
- return 1;
-}
-
-static struct transport daemon_transport = {
- .list = LIST_HEAD_INIT(daemon_transport.list),
- .name = "daemon",
- .setup = daemon_setup,
- .user = &daemon_user_info,
- .kern = &daemon_kern_info,
- .private_size = sizeof(struct daemon_data),
- .setup_size = sizeof(struct daemon_init),
-};
-
-static int register_daemon(void)
-{
- register_transport(&daemon_transport);
- return 0;
-}
-
-late_initcall(register_daemon);
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
deleted file mode 100644
index 785baedc3555..000000000000
--- a/arch/um/drivers/daemon_user.c
+++ /dev/null
@@ -1,194 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <stdint.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/un.h>
-#include "daemon.h"
-#include <net_user.h>
-#include <os.h>
-#include <um_malloc.h>
-
-enum request_type { REQ_NEW_CONTROL };
-
-#define SWITCH_MAGIC 0xfeedface
-
-struct request_v3 {
- uint32_t magic;
- uint32_t version;
- enum request_type type;
- struct sockaddr_un sock;
-};
-
-static struct sockaddr_un *new_addr(void *name, int len)
-{
- struct sockaddr_un *sun;
-
- sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
- if (sun == NULL) {
- printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un "
- "failed\n");
- return NULL;
- }
- sun->sun_family = AF_UNIX;
- memcpy(sun->sun_path, name, len);
- return sun;
-}
-
-static int connect_to_switch(struct daemon_data *pri)
-{
- struct sockaddr_un *ctl_addr = pri->ctl_addr;
- struct sockaddr_un *local_addr = pri->local_addr;
- struct sockaddr_un *sun;
- struct request_v3 req;
- int fd, n, err;
-
- pri->control = socket(AF_UNIX, SOCK_STREAM, 0);
- if (pri->control < 0) {
- err = -errno;
- printk(UM_KERN_ERR "daemon_open : control socket failed, "
- "errno = %d\n", -err);
- return err;
- }
-
- if (connect(pri->control, (struct sockaddr *) ctl_addr,
- sizeof(*ctl_addr)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "daemon_open : control connect failed, "
- "errno = %d\n", -err);
- goto out;
- }
-
- fd = socket(AF_UNIX, SOCK_DGRAM, 0);
- if (fd < 0) {
- err = -errno;
- printk(UM_KERN_ERR "daemon_open : data socket failed, "
- "errno = %d\n", -err);
- goto out;
- }
- if (bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "daemon_open : data bind failed, "
- "errno = %d\n", -err);
- goto out_close;
- }
-
- sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
- if (sun == NULL) {
- printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un "
- "failed\n");
- err = -ENOMEM;
- goto out_close;
- }
-
- req.magic = SWITCH_MAGIC;
- req.version = SWITCH_VERSION;
- req.type = REQ_NEW_CONTROL;
- req.sock = *local_addr;
- n = write(pri->control, &req, sizeof(req));
- if (n != sizeof(req)) {
- printk(UM_KERN_ERR "daemon_open : control setup request "
- "failed, err = %d\n", -errno);
- err = -ENOTCONN;
- goto out_free;
- }
-
- n = read(pri->control, sun, sizeof(*sun));
- if (n != sizeof(*sun)) {
- printk(UM_KERN_ERR "daemon_open : read of data socket failed, "
- "err = %d\n", -errno);
- err = -ENOTCONN;
- goto out_free;
- }
-
- pri->data_addr = sun;
- return fd;
-
- out_free:
- kfree(sun);
- out_close:
- close(fd);
- out:
- close(pri->control);
- return err;
-}
-
-static int daemon_user_init(void *data, void *dev)
-{
- struct daemon_data *pri = data;
- struct timeval tv;
- struct {
- char zero;
- int pid;
- int usecs;
- } name;
-
- if (!strcmp(pri->sock_type, "unix"))
- pri->ctl_addr = new_addr(pri->ctl_sock,
- strlen(pri->ctl_sock) + 1);
- name.zero = 0;
- name.pid = os_getpid();
- gettimeofday(&tv, NULL);
- name.usecs = tv.tv_usec;
- pri->local_addr = new_addr(&name, sizeof(name));
- pri->dev = dev;
- pri->fd = connect_to_switch(pri);
- if (pri->fd < 0) {
- kfree(pri->local_addr);
- pri->local_addr = NULL;
- return pri->fd;
- }
-
- return 0;
-}
-
-static int daemon_open(void *data)
-{
- struct daemon_data *pri = data;
- return pri->fd;
-}
-
-static void daemon_remove(void *data)
-{
- struct daemon_data *pri = data;
-
- close(pri->fd);
- pri->fd = -1;
- close(pri->control);
- pri->control = -1;
-
- kfree(pri->data_addr);
- pri->data_addr = NULL;
- kfree(pri->ctl_addr);
- pri->ctl_addr = NULL;
- kfree(pri->local_addr);
- pri->local_addr = NULL;
-}
-
-int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri)
-{
- struct sockaddr_un *data_addr = pri->data_addr;
-
- return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
-}
-
-const struct net_user_info daemon_user_info = {
- .init = daemon_user_init,
- .open = daemon_open,
- .close = NULL,
- .remove = daemon_remove,
- .add_address = NULL,
- .delete_address = NULL,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
deleted file mode 100644
index d5a9c5aabaec..000000000000
--- a/arch/um/drivers/net_kern.c
+++ /dev/null
@@ -1,889 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <linux/memblock.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/inetdevice.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/platform_device.h>
-#include <linux/rtnetlink.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <init.h>
-#include <irq_kern.h>
-#include <irq_user.h>
-#include "mconsole_kern.h"
-#include <net_kern.h>
-#include <net_user.h>
-
-#define DRIVER_NAME "uml-netdev"
-
-static DEFINE_SPINLOCK(opened_lock);
-static LIST_HEAD(opened);
-
-/*
- * The drop_skb is used when we can't allocate an skb. The
- * packet is read into drop_skb in order to get the data off the
- * connection to the host.
- * It is reallocated whenever a maximum packet size is seen which is
- * larger than any seen before. update_drop_skb is called from
- * eth_configure when a new interface is added.
- */
-static DEFINE_SPINLOCK(drop_lock);
-static struct sk_buff *drop_skb;
-static int drop_max;
-
-static int update_drop_skb(int max)
-{
- struct sk_buff *new;
- unsigned long flags;
- int err = 0;
-
- spin_lock_irqsave(&drop_lock, flags);
-
- if (max <= drop_max)
- goto out;
-
- err = -ENOMEM;
- new = dev_alloc_skb(max);
- if (new == NULL)
- goto out;
-
- skb_put(new, max);
-
- kfree_skb(drop_skb);
- drop_skb = new;
- drop_max = max;
- err = 0;
-out:
- spin_unlock_irqrestore(&drop_lock, flags);
-
- return err;
-}
-
-static int uml_net_rx(struct net_device *dev)
-{
- struct uml_net_private *lp = netdev_priv(dev);
- int pkt_len;
- struct sk_buff *skb;
-
- /* If we can't allocate memory, try again next round. */
- skb = dev_alloc_skb(lp->max_packet);
- if (skb == NULL) {
- drop_skb->dev = dev;
- /* Read a packet into drop_skb and don't do anything with it. */
- (*lp->read)(lp->fd, drop_skb, lp);
- dev->stats.rx_dropped++;
- return 0;
- }
-
- skb->dev = dev;
- skb_put(skb, lp->max_packet);
- skb_reset_mac_header(skb);
- pkt_len = (*lp->read)(lp->fd, skb, lp);
-
- if (pkt_len > 0) {
- skb_trim(skb, pkt_len);
- skb->protocol = (*lp->protocol)(skb);
-
- dev->stats.rx_bytes += skb->len;
- dev->stats.rx_packets++;
- netif_rx(skb);
- return pkt_len;
- }
-
- kfree_skb(skb);
- return pkt_len;
-}
-
-static void uml_dev_close(struct work_struct *work)
-{
- struct uml_net_private *lp =
- container_of(work, struct uml_net_private, work);
- dev_close(lp->dev);
-}
-
-static irqreturn_t uml_net_interrupt(int irq, void *dev_id)
-{
- struct net_device *dev = dev_id;
- struct uml_net_private *lp = netdev_priv(dev);
- int err;
-
- if (!netif_running(dev))
- return IRQ_NONE;
-
- spin_lock(&lp->lock);
- while ((err = uml_net_rx(dev)) > 0) ;
- if (err < 0) {
- printk(KERN_ERR
- "Device '%s' read returned %d, shutting it down\n",
- dev->name, err);
- /* dev_close can't be called in interrupt context, and takes
- * again lp->lock.
- * And dev_close() can be safely called multiple times on the
- * same device, since it tests for (dev->flags & IFF_UP). So
- * there's no harm in delaying the device shutdown.
- * Furthermore, the workqueue will not re-enqueue an already
- * enqueued work item. */
- schedule_work(&lp->work);
- goto out;
- }
-out:
- spin_unlock(&lp->lock);
- return IRQ_HANDLED;
-}
-
-static int uml_net_open(struct net_device *dev)
-{
- struct uml_net_private *lp = netdev_priv(dev);
- int err;
-
- if (lp->fd >= 0) {
- err = -ENXIO;
- goto out;
- }
-
- lp->fd = (*lp->open)(&lp->user);
- if (lp->fd < 0) {
- err = lp->fd;
- goto out;
- }
-
- err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt,
- IRQF_SHARED, dev->name, dev);
- if (err < 0) {
- printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err);
- err = -ENETUNREACH;
- goto out_close;
- }
-
- netif_start_queue(dev);
-
- /* clear buffer - it can happen that the host side of the interface
- * is full when we get here. In this case, new data is never queued,
- * SIGIOs never arrive, and the net never works.
- */
- while ((err = uml_net_rx(dev)) > 0) ;
-
- spin_lock(&opened_lock);
- list_add(&lp->list, &opened);
- spin_unlock(&opened_lock);
-
- return 0;
-out_close:
- if (lp->close != NULL) (*lp->close)(lp->fd, &lp->user);
- lp->fd = -1;
-out:
- return err;
-}
-
-static int uml_net_close(struct net_device *dev)
-{
- struct uml_net_private *lp = netdev_priv(dev);
-
- netif_stop_queue(dev);
-
- um_free_irq(dev->irq, dev);
- if (lp->close != NULL)
- (*lp->close)(lp->fd, &lp->user);
- lp->fd = -1;
-
- spin_lock(&opened_lock);
- list_del(&lp->list);
- spin_unlock(&opened_lock);
-
- return 0;
-}
-
-static netdev_tx_t uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- struct uml_net_private *lp = netdev_priv(dev);
- unsigned long flags;
- int len;
-
- netif_stop_queue(dev);
-
- spin_lock_irqsave(&lp->lock, flags);
-
- len = (*lp->write)(lp->fd, skb, lp);
- skb_tx_timestamp(skb);
-
- if (len == skb->len) {
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
- netif_trans_update(dev);
- netif_start_queue(dev);
-
- /* this is normally done in the interrupt when tx finishes */
- netif_wake_queue(dev);
- }
- else if (len == 0) {
- netif_start_queue(dev);
- dev->stats.tx_dropped++;
- }
- else {
- netif_start_queue(dev);
- printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len);
- }
-
- spin_unlock_irqrestore(&lp->lock, flags);
-
- dev_consume_skb_any(skb);
-
- return NETDEV_TX_OK;
-}
-
-static void uml_net_set_multicast_list(struct net_device *dev)
-{
- return;
-}
-
-static void uml_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
-{
- netif_trans_update(dev);
- netif_wake_queue(dev);
-}
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void uml_net_poll_controller(struct net_device *dev)
-{
- disable_irq(dev->irq);
- uml_net_interrupt(dev->irq, dev);
- enable_irq(dev->irq);
-}
-#endif
-
-static void uml_net_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info)
-{
- strscpy(info->driver, DRIVER_NAME);
-}
-
-static const struct ethtool_ops uml_net_ethtool_ops = {
- .get_drvinfo = uml_net_get_drvinfo,
- .get_link = ethtool_op_get_link,
- .get_ts_info = ethtool_op_get_ts_info,
-};
-
-void uml_net_setup_etheraddr(struct net_device *dev, char *str)
-{
- u8 addr[ETH_ALEN];
- char *end;
- int i;
-
- if (str == NULL)
- goto random;
-
- for (i = 0; i < 6; i++) {
- addr[i] = simple_strtoul(str, &end, 16);
- if ((end == str) ||
- ((*end != ':') && (*end != ',') && (*end != '\0'))) {
- printk(KERN_ERR
- "setup_etheraddr: failed to parse '%s' "
- "as an ethernet address\n", str);
- goto random;
- }
- str = end + 1;
- }
- if (is_multicast_ether_addr(addr)) {
- printk(KERN_ERR
- "Attempt to assign a multicast ethernet address to a "
- "device disallowed\n");
- goto random;
- }
- if (!is_valid_ether_addr(addr)) {
- printk(KERN_ERR
- "Attempt to assign an invalid ethernet address to a "
- "device disallowed\n");
- goto random;
- }
- if (!is_local_ether_addr(addr)) {
- printk(KERN_WARNING
- "Warning: Assigning a globally valid ethernet "
- "address to a device\n");
- printk(KERN_WARNING "You should set the 2nd rightmost bit in "
- "the first byte of the MAC,\n");
- printk(KERN_WARNING "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n",
- addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4],
- addr[5]);
- }
- eth_hw_addr_set(dev, addr);
- return;
-
-random:
- printk(KERN_INFO
- "Choosing a random ethernet address for device %s\n", dev->name);
- eth_hw_addr_random(dev);
-}
-
-static DEFINE_SPINLOCK(devices_lock);
-static LIST_HEAD(devices);
-
-static struct platform_driver uml_net_driver = {
- .driver = {
- .name = DRIVER_NAME,
- },
-};
-
-static void net_device_release(struct device *dev)
-{
- struct uml_net *device = container_of(dev, struct uml_net, pdev.dev);
- struct net_device *netdev = device->dev;
- struct uml_net_private *lp = netdev_priv(netdev);
-
- if (lp->remove != NULL)
- (*lp->remove)(&lp->user);
- list_del(&device->list);
- kfree(device);
- free_netdev(netdev);
-}
-
-static const struct net_device_ops uml_netdev_ops = {
- .ndo_open = uml_net_open,
- .ndo_stop = uml_net_close,
- .ndo_start_xmit = uml_net_start_xmit,
- .ndo_set_rx_mode = uml_net_set_multicast_list,
- .ndo_tx_timeout = uml_net_tx_timeout,
- .ndo_set_mac_address = eth_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
-#ifdef CONFIG_NET_POLL_CONTROLLER
- .ndo_poll_controller = uml_net_poll_controller,
-#endif
-};
-
-/*
- * Ensures that platform_driver_register is called only once by
- * eth_configure. Will be set in an initcall.
- */
-static int driver_registered;
-
-static void eth_configure(int n, void *init, char *mac,
- struct transport *transport, gfp_t gfp_mask)
-{
- struct uml_net *device;
- struct net_device *dev;
- struct uml_net_private *lp;
- int err, size;
-
- size = transport->private_size + sizeof(struct uml_net_private);
-
- device = kzalloc(sizeof(*device), gfp_mask);
- if (device == NULL) {
- printk(KERN_ERR "eth_configure failed to allocate struct "
- "uml_net\n");
- return;
- }
-
- dev = alloc_etherdev(size);
- if (dev == NULL) {
- printk(KERN_ERR "eth_configure: failed to allocate struct "
- "net_device for eth%d\n", n);
- goto out_free_device;
- }
-
- INIT_LIST_HEAD(&device->list);
- device->index = n;
-
- /* If this name ends up conflicting with an existing registered
- * netdevice, that is OK, register_netdev{,ice}() will notice this
- * and fail.
- */
- snprintf(dev->name, sizeof(dev->name), "eth%d", n);
-
- uml_net_setup_etheraddr(dev, mac);
-
- printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
-
- lp = netdev_priv(dev);
- /* This points to the transport private data. It's still clear, but we
- * must memset it to 0 *now*. Let's help the drivers. */
- memset(lp, 0, size);
- INIT_WORK(&lp->work, uml_dev_close);
-
- /* sysfs register */
- if (!driver_registered) {
- platform_driver_register(&uml_net_driver);
- driver_registered = 1;
- }
- device->pdev.id = n;
- device->pdev.name = DRIVER_NAME;
- device->pdev.dev.release = net_device_release;
- dev_set_drvdata(&device->pdev.dev, device);
- if (platform_device_register(&device->pdev))
- goto out_free_netdev;
- SET_NETDEV_DEV(dev,&device->pdev.dev);
-
- device->dev = dev;
-
- /*
- * These just fill in a data structure, so there's no failure
- * to be worried about.
- */
- (*transport->kern->init)(dev, init);
-
- *lp = ((struct uml_net_private)
- { .list = LIST_HEAD_INIT(lp->list),
- .dev = dev,
- .fd = -1,
- .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0},
- .max_packet = transport->user->max_packet,
- .protocol = transport->kern->protocol,
- .open = transport->user->open,
- .close = transport->user->close,
- .remove = transport->user->remove,
- .read = transport->kern->read,
- .write = transport->kern->write,
- .add_address = transport->user->add_address,
- .delete_address = transport->user->delete_address });
-
- spin_lock_init(&lp->lock);
- memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac));
-
- if ((transport->user->init != NULL) &&
- ((*transport->user->init)(&lp->user, dev) != 0))
- goto out_unregister;
-
- dev->mtu = transport->user->mtu;
- dev->netdev_ops = &uml_netdev_ops;
- dev->ethtool_ops = &uml_net_ethtool_ops;
- dev->watchdog_timeo = (HZ >> 1);
- dev->irq = UM_ETH_IRQ;
-
- err = update_drop_skb(lp->max_packet);
- if (err)
- goto out_undo_user_init;
-
- rtnl_lock();
- err = register_netdevice(dev);
- rtnl_unlock();
- if (err)
- goto out_undo_user_init;
-
- spin_lock(&devices_lock);
- list_add(&device->list, &devices);
- spin_unlock(&devices_lock);
-
- return;
-
-out_undo_user_init:
- if (transport->user->remove != NULL)
- (*transport->user->remove)(&lp->user);
-out_unregister:
- platform_device_unregister(&device->pdev);
- return; /* platform_device_unregister frees dev and device */
-out_free_netdev:
- free_netdev(dev);
-out_free_device:
- kfree(device);
-}
-
-static struct uml_net *find_device(int n)
-{
- struct uml_net *device;
- struct list_head *ele;
-
- spin_lock(&devices_lock);
- list_for_each(ele, &devices) {
- device = list_entry(ele, struct uml_net, list);
- if (device->index == n)
- goto out;
- }
- device = NULL;
- out:
- spin_unlock(&devices_lock);
- return device;
-}
-
-static int eth_parse(char *str, int *index_out, char **str_out,
- char **error_out)
-{
- char *end;
- int n, err = -EINVAL;
-
- n = simple_strtoul(str, &end, 0);
- if (end == str) {
- *error_out = "Bad device number";
- return err;
- }
-
- str = end;
- if (*str != '=') {
- *error_out = "Expected '=' after device number";
- return err;
- }
-
- str++;
- if (find_device(n)) {
- *error_out = "Device already configured";
- return err;
- }
-
- *index_out = n;
- *str_out = str;
- return 0;
-}
-
-struct eth_init {
- struct list_head list;
- char *init;
- int index;
-};
-
-static DEFINE_SPINLOCK(transports_lock);
-static LIST_HEAD(transports);
-
-/* Filled in during early boot */
-static LIST_HEAD(eth_cmd_line);
-
-static int check_transport(struct transport *transport, char *eth, int n,
- void **init_out, char **mac_out, gfp_t gfp_mask)
-{
- int len;
-
- len = strlen(transport->name);
- if (strncmp(eth, transport->name, len))
- return 0;
-
- eth += len;
- if (*eth == ',')
- eth++;
- else if (*eth != '\0')
- return 0;
-
- *init_out = kmalloc(transport->setup_size, gfp_mask);
- if (*init_out == NULL)
- return 1;
-
- if (!transport->setup(eth, mac_out, *init_out)) {
- kfree(*init_out);
- *init_out = NULL;
- }
- return 1;
-}
-
-void register_transport(struct transport *new)
-{
- struct list_head *ele, *next;
- struct eth_init *eth;
- void *init;
- char *mac = NULL;
- int match;
-
- spin_lock(&transports_lock);
- BUG_ON(!list_empty(&new->list));
- list_add(&new->list, &transports);
- spin_unlock(&transports_lock);
-
- list_for_each_safe(ele, next, &eth_cmd_line) {
- eth = list_entry(ele, struct eth_init, list);
- match = check_transport(new, eth->init, eth->index, &init,
- &mac, GFP_KERNEL);
- if (!match)
- continue;
- else if (init != NULL) {
- eth_configure(eth->index, init, mac, new, GFP_KERNEL);
- kfree(init);
- }
- list_del(&eth->list);
- }
-}
-
-static int eth_setup_common(char *str, int index)
-{
- struct list_head *ele;
- struct transport *transport;
- void *init;
- char *mac = NULL;
- int found = 0;
-
- spin_lock(&transports_lock);
- list_for_each(ele, &transports) {
- transport = list_entry(ele, struct transport, list);
- if (!check_transport(transport, str, index, &init,
- &mac, GFP_ATOMIC))
- continue;
- if (init != NULL) {
- eth_configure(index, init, mac, transport, GFP_ATOMIC);
- kfree(init);
- }
- found = 1;
- break;
- }
-
- spin_unlock(&transports_lock);
- return found;
-}
-
-static int __init eth_setup(char *str)
-{
- struct eth_init *new;
- char *error;
- int n, err;
-
- err = eth_parse(str, &n, &str, &error);
- if (err) {
- printk(KERN_ERR "eth_setup - Couldn't parse '%s' : %s\n",
- str, error);
- return 1;
- }
-
- new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES);
-
- INIT_LIST_HEAD(&new->list);
- new->index = n;
- new->init = str;
-
- list_add_tail(&new->list, &eth_cmd_line);
- return 1;
-}
-
-__setup("eth", eth_setup);
-__uml_help(eth_setup,
-"eth[0-9]+=<transport>,<options>\n"
-" Configure a network device.\n\n"
-);
-
-static int net_config(char *str, char **error_out)
-{
- int n, err;
-
- err = eth_parse(str, &n, &str, error_out);
- if (err)
- return err;
-
- /* This string is broken up and the pieces used by the underlying
- * driver. So, it is freed only if eth_setup_common fails.
- */
- str = kstrdup(str, GFP_KERNEL);
- if (str == NULL) {
- *error_out = "net_config failed to strdup string";
- return -ENOMEM;
- }
- err = !eth_setup_common(str, n);
- if (err)
- kfree(str);
- return err;
-}
-
-static int net_id(char **str, int *start_out, int *end_out)
-{
- char *end;
- int n;
-
- n = simple_strtoul(*str, &end, 0);
- if ((*end != '\0') || (end == *str))
- return -1;
-
- *start_out = n;
- *end_out = n;
- *str = end;
- return n;
-}
-
-static int net_remove(int n, char **error_out)
-{
- struct uml_net *device;
- struct net_device *dev;
- struct uml_net_private *lp;
-
- device = find_device(n);
- if (device == NULL)
- return -ENODEV;
-
- dev = device->dev;
- lp = netdev_priv(dev);
- if (lp->fd > 0)
- return -EBUSY;
- unregister_netdev(dev);
- platform_device_unregister(&device->pdev);
-
- return 0;
-}
-
-static struct mc_device net_mc = {
- .list = LIST_HEAD_INIT(net_mc.list),
- .name = "eth",
- .config = net_config,
- .get_config = NULL,
- .id = net_id,
- .remove = net_remove,
-};
-
-#ifdef CONFIG_INET
-static int uml_inetaddr_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct in_ifaddr *ifa = ptr;
- struct net_device *dev = ifa->ifa_dev->dev;
- struct uml_net_private *lp;
- void (*proc)(unsigned char *, unsigned char *, void *);
- unsigned char addr_buf[4], netmask_buf[4];
-
- if (dev->netdev_ops->ndo_open != uml_net_open)
- return NOTIFY_DONE;
-
- lp = netdev_priv(dev);
-
- proc = NULL;
- switch (event) {
- case NETDEV_UP:
- proc = lp->add_address;
- break;
- case NETDEV_DOWN:
- proc = lp->delete_address;
- break;
- }
- if (proc != NULL) {
- memcpy(addr_buf, &ifa->ifa_address, sizeof(addr_buf));
- memcpy(netmask_buf, &ifa->ifa_mask, sizeof(netmask_buf));
- (*proc)(addr_buf, netmask_buf, &lp->user);
- }
- return NOTIFY_DONE;
-}
-
-/* uml_net_init shouldn't be called twice on two CPUs at the same time */
-static struct notifier_block uml_inetaddr_notifier = {
- .notifier_call = uml_inetaddr_event,
-};
-
-static void inet_register(void)
-{
- struct list_head *ele;
- struct uml_net_private *lp;
- struct in_device *ip;
- struct in_ifaddr *in;
-
- register_inetaddr_notifier(&uml_inetaddr_notifier);
-
- /* Devices may have been opened already, so the uml_inetaddr_notifier
- * didn't get a chance to run for them. This fakes it so that
- * addresses which have already been set up get handled properly.
- */
- spin_lock(&opened_lock);
- list_for_each(ele, &opened) {
- lp = list_entry(ele, struct uml_net_private, list);
- ip = lp->dev->ip_ptr;
- if (ip == NULL)
- continue;
- in = ip->ifa_list;
- while (in != NULL) {
- uml_inetaddr_event(NULL, NETDEV_UP, in);
- in = in->ifa_next;
- }
- }
- spin_unlock(&opened_lock);
-}
-#else
-static inline void inet_register(void)
-{
-}
-#endif
-
-static int uml_net_init(void)
-{
- mconsole_register_dev(&net_mc);
- inet_register();
- return 0;
-}
-
-__initcall(uml_net_init);
-
-static void close_devices(void)
-{
- struct list_head *ele;
- struct uml_net_private *lp;
-
- spin_lock(&opened_lock);
- list_for_each(ele, &opened) {
- lp = list_entry(ele, struct uml_net_private, list);
- um_free_irq(lp->dev->irq, lp->dev);
- if ((lp->close != NULL) && (lp->fd >= 0))
- (*lp->close)(lp->fd, &lp->user);
- if (lp->remove != NULL)
- (*lp->remove)(&lp->user);
- }
- spin_unlock(&opened_lock);
-}
-
-__uml_exitcall(close_devices);
-
-void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *,
- void *),
- void *arg)
-{
- struct net_device *dev = d;
- struct in_device *ip = dev->ip_ptr;
- struct in_ifaddr *in;
- unsigned char address[4], netmask[4];
-
- if (ip == NULL) return;
- in = ip->ifa_list;
- while (in != NULL) {
- memcpy(address, &in->ifa_address, sizeof(address));
- memcpy(netmask, &in->ifa_mask, sizeof(netmask));
- (*cb)(address, netmask, arg);
- in = in->ifa_next;
- }
-}
-
-int dev_netmask(void *d, void *m)
-{
- struct net_device *dev = d;
- struct in_device *ip = dev->ip_ptr;
- struct in_ifaddr *in;
- __be32 *mask_out = m;
-
- if (ip == NULL)
- return 1;
-
- in = ip->ifa_list;
- if (in == NULL)
- return 1;
-
- *mask_out = in->ifa_mask;
- return 0;
-}
-
-void *get_output_buffer(int *len_out)
-{
- void *ret;
-
- ret = (void *) __get_free_pages(GFP_KERNEL, 0);
- if (ret) *len_out = PAGE_SIZE;
- else *len_out = 0;
- return ret;
-}
-
-void free_output_buffer(void *buffer)
-{
- free_pages((unsigned long) buffer, 0);
-}
-
-int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out,
- char **gate_addr)
-{
- char *remain;
-
- remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL);
- if (remain != NULL) {
- printk(KERN_ERR "tap_setup_common - Extra garbage on "
- "specification : '%s'\n", remain);
- return 1;
- }
-
- return 0;
-}
-
-unsigned short eth_protocol(struct sk_buff *skb)
-{
- return eth_type_trans(skb, skb->dev);
-}
diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
deleted file mode 100644
index 4c9576452ab0..000000000000
--- a/arch/um/drivers/net_user.c
+++ /dev/null
@@ -1,271 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <stdarg.h>
-#include <errno.h>
-#include <stddef.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include <net_user.h>
-#include <os.h>
-#include <um_malloc.h>
-
-int tap_open_common(void *dev, char *gate_addr)
-{
- int tap_addr[4];
-
- if (gate_addr == NULL)
- return 0;
- if (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0],
- &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4) {
- printk(UM_KERN_ERR "Invalid tap IP address - '%s'\n",
- gate_addr);
- return -EINVAL;
- }
- return 0;
-}
-
-void tap_check_ips(char *gate_addr, unsigned char *eth_addr)
-{
- int tap_addr[4];
-
- if ((gate_addr != NULL) &&
- (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0],
- &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) &&
- (eth_addr[0] == tap_addr[0]) &&
- (eth_addr[1] == tap_addr[1]) &&
- (eth_addr[2] == tap_addr[2]) &&
- (eth_addr[3] == tap_addr[3])) {
- printk(UM_KERN_ERR "The tap IP address and the UML eth IP "
- "address must be different\n");
- }
-}
-
-/* Do reliable error handling as this fails frequently enough. */
-void read_output(int fd, char *output, int len)
-{
- int remain, ret, expected;
- char c;
- char *str;
-
- if (output == NULL) {
- output = &c;
- len = sizeof(c);
- }
-
- *output = '\0';
- ret = read(fd, &remain, sizeof(remain));
-
- if (ret != sizeof(remain)) {
- if (ret < 0)
- ret = -errno;
- expected = sizeof(remain);
- str = "length";
- goto err;
- }
-
- while (remain != 0) {
- expected = (remain < len) ? remain : len;
- ret = read(fd, output, expected);
- if (ret != expected) {
- if (ret < 0)
- ret = -errno;
- str = "data";
- goto err;
- }
- remain -= ret;
- }
-
- return;
-
-err:
- if (ret < 0)
- printk(UM_KERN_ERR "read_output - read of %s failed, "
- "errno = %d\n", str, -ret);
- else
- printk(UM_KERN_ERR "read_output - read of %s failed, read only "
- "%d of %d bytes\n", str, ret, expected);
-}
-
-int net_read(int fd, void *buf, int len)
-{
- int n;
-
- n = read(fd, buf, len);
-
- if ((n < 0) && (errno == EAGAIN))
- return 0;
- else if (n == 0)
- return -ENOTCONN;
- return n;
-}
-
-int net_recvfrom(int fd, void *buf, int len)
-{
- int n;
-
- CATCH_EINTR(n = recvfrom(fd, buf, len, 0, NULL, NULL));
- if (n < 0) {
- if (errno == EAGAIN)
- return 0;
- return -errno;
- }
- else if (n == 0)
- return -ENOTCONN;
- return n;
-}
-
-int net_write(int fd, void *buf, int len)
-{
- int n;
-
- n = write(fd, buf, len);
-
- if ((n < 0) && (errno == EAGAIN))
- return 0;
- else if (n == 0)
- return -ENOTCONN;
- return n;
-}
-
-int net_send(int fd, void *buf, int len)
-{
- int n;
-
- CATCH_EINTR(n = send(fd, buf, len, 0));
- if (n < 0) {
- if (errno == EAGAIN)
- return 0;
- return -errno;
- }
- else if (n == 0)
- return -ENOTCONN;
- return n;
-}
-
-int net_sendto(int fd, void *buf, int len, void *to, int sock_len)
-{
- int n;
-
- CATCH_EINTR(n = sendto(fd, buf, len, 0, (struct sockaddr *) to,
- sock_len));
- if (n < 0) {
- if (errno == EAGAIN)
- return 0;
- return -errno;
- }
- else if (n == 0)
- return -ENOTCONN;
- return n;
-}
-
-struct change_pre_exec_data {
- int close_me;
- int stdout_fd;
-};
-
-static void change_pre_exec(void *arg)
-{
- struct change_pre_exec_data *data = arg;
-
- close(data->close_me);
- dup2(data->stdout_fd, 1);
-}
-
-static int change_tramp(char **argv, char *output, int output_len)
-{
- int pid, fds[2], err;
- struct change_pre_exec_data pe_data;
-
- err = os_pipe(fds, 1, 0);
- if (err < 0) {
- printk(UM_KERN_ERR "change_tramp - pipe failed, err = %d\n",
- -err);
- return err;
- }
- pe_data.close_me = fds[0];
- pe_data.stdout_fd = fds[1];
- pid = run_helper(change_pre_exec, &pe_data, argv);
-
- if (pid > 0) /* Avoid hang as we won't get data in failure case. */
- read_output(fds[0], output, output_len);
-
- close(fds[0]);
- close(fds[1]);
-
- if (pid > 0)
- helper_wait(pid);
- return pid;
-}
-
-static void change(char *dev, char *what, unsigned char *addr,
- unsigned char *netmask)
-{
- char addr_buf[sizeof("255.255.255.255\0")];
- char netmask_buf[sizeof("255.255.255.255\0")];
- char version[sizeof("nnnnn\0")];
- char *argv[] = { "uml_net", version, what, dev, addr_buf,
- netmask_buf, NULL };
- char *output;
- int output_len, pid;
-
- sprintf(version, "%d", UML_NET_VERSION);
- sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]);
- sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1],
- netmask[2], netmask[3]);
-
- output_len = UM_KERN_PAGE_SIZE;
- output = uml_kmalloc(output_len, UM_GFP_KERNEL);
- if (output == NULL)
- printk(UM_KERN_ERR "change : failed to allocate output "
- "buffer\n");
-
- pid = change_tramp(argv, output, output_len);
- if (pid < 0) {
- kfree(output);
- return;
- }
-
- if (output != NULL) {
- printk("%s", output);
- kfree(output);
- }
-}
-
-void open_addr(unsigned char *addr, unsigned char *netmask, void *arg)
-{
- change(arg, "add", addr, netmask);
-}
-
-void close_addr(unsigned char *addr, unsigned char *netmask, void *arg)
-{
- change(arg, "del", addr, netmask);
-}
-
-char *split_if_spec(char *str, ...)
-{
- char **arg, *end, *ret = NULL;
- va_list ap;
-
- va_start(ap, str);
- while ((arg = va_arg(ap, char **)) != NULL) {
- if (*str == '\0')
- goto out;
- end = strchr(str, ',');
- if (end != str)
- *arg = str;
- if (end == NULL)
- goto out;
- *end++ = '\0';
- str = end;
- }
- ret = str;
-out:
- va_end(ap);
- return ret;
-}
diff --git a/arch/um/drivers/slip.h b/arch/um/drivers/slip.h
deleted file mode 100644
index 0f3b7ca99465..000000000000
--- a/arch/um/drivers/slip.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __UM_SLIP_H
-#define __UM_SLIP_H
-
-#include "slip_common.h"
-
-struct slip_data {
- void *dev;
- char name[sizeof("slnnnnn\0")];
- char *addr;
- char *gate_addr;
- int slave;
- struct slip_proto slip;
-};
-
-extern const struct net_user_info slip_user_info;
-
-extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri);
-extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri);
-
-#endif
diff --git a/arch/um/drivers/slip_common.c b/arch/um/drivers/slip_common.c
deleted file mode 100644
index 20fe4f42743d..000000000000
--- a/arch/um/drivers/slip_common.c
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <string.h>
-#include "slip_common.h"
-#include <net_user.h>
-
-int slip_proto_read(int fd, void *buf, int len, struct slip_proto *slip)
-{
- int i, n, size, start;
-
- if(slip->more > 0){
- i = 0;
- while(i < slip->more){
- size = slip_unesc(slip->ibuf[i++], slip->ibuf,
- &slip->pos, &slip->esc);
- if(size){
- memcpy(buf, slip->ibuf, size);
- memmove(slip->ibuf, &slip->ibuf[i],
- slip->more - i);
- slip->more = slip->more - i;
- return size;
- }
- }
- slip->more = 0;
- }
-
- n = net_read(fd, &slip->ibuf[slip->pos],
- sizeof(slip->ibuf) - slip->pos);
- if(n <= 0)
- return n;
-
- start = slip->pos;
- for(i = 0; i < n; i++){
- size = slip_unesc(slip->ibuf[start + i], slip->ibuf,&slip->pos,
- &slip->esc);
- if(size){
- memcpy(buf, slip->ibuf, size);
- memmove(slip->ibuf, &slip->ibuf[start+i+1],
- n - (i + 1));
- slip->more = n - (i + 1);
- return size;
- }
- }
- return 0;
-}
-
-int slip_proto_write(int fd, void *buf, int len, struct slip_proto *slip)
-{
- int actual, n;
-
- actual = slip_esc(buf, slip->obuf, len);
- n = net_write(fd, slip->obuf, actual);
- if(n < 0)
- return n;
- else return len;
-}
diff --git a/arch/um/drivers/slip_common.h b/arch/um/drivers/slip_common.h
deleted file mode 100644
index d3798b5caf7f..000000000000
--- a/arch/um/drivers/slip_common.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __UM_SLIP_COMMON_H
-#define __UM_SLIP_COMMON_H
-
-#define BUF_SIZE 1500
- /* two bytes each for a (pathological) max packet of escaped chars + *
- * terminating END char + initial END char */
-#define ENC_BUF_SIZE (2 * BUF_SIZE + 2)
-
-/* SLIP protocol characters. */
-#define SLIP_END 0300 /* indicates end of frame */
-#define SLIP_ESC 0333 /* indicates byte stuffing */
-#define SLIP_ESC_END 0334 /* ESC ESC_END means END 'data' */
-#define SLIP_ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */
-
-static inline int slip_unesc(unsigned char c, unsigned char *buf, int *pos,
- int *esc)
-{
- int ret;
-
- switch(c){
- case SLIP_END:
- *esc = 0;
- ret=*pos;
- *pos=0;
- return(ret);
- case SLIP_ESC:
- *esc = 1;
- return(0);
- case SLIP_ESC_ESC:
- if(*esc){
- *esc = 0;
- c = SLIP_ESC;
- }
- break;
- case SLIP_ESC_END:
- if(*esc){
- *esc = 0;
- c = SLIP_END;
- }
- break;
- }
- buf[(*pos)++] = c;
- return(0);
-}
-
-static inline int slip_esc(unsigned char *s, unsigned char *d, int len)
-{
- unsigned char *ptr = d;
- unsigned char c;
-
- /*
- * Send an initial END character to flush out any
- * data that may have accumulated in the receiver
- * due to line noise.
- */
-
- *ptr++ = SLIP_END;
-
- /*
- * For each byte in the packet, send the appropriate
- * character sequence, according to the SLIP protocol.
- */
-
- while (len-- > 0) {
- switch(c = *s++) {
- case SLIP_END:
- *ptr++ = SLIP_ESC;
- *ptr++ = SLIP_ESC_END;
- break;
- case SLIP_ESC:
- *ptr++ = SLIP_ESC;
- *ptr++ = SLIP_ESC_ESC;
- break;
- default:
- *ptr++ = c;
- break;
- }
- }
- *ptr++ = SLIP_END;
- return (ptr - d);
-}
-
-struct slip_proto {
- unsigned char ibuf[ENC_BUF_SIZE];
- unsigned char obuf[ENC_BUF_SIZE];
- int more; /* more data: do not read fd until ibuf has been drained */
- int pos;
- int esc;
-};
-
-static inline void slip_proto_init(struct slip_proto * slip)
-{
- memset(slip->ibuf, 0, sizeof(slip->ibuf));
- memset(slip->obuf, 0, sizeof(slip->obuf));
- slip->more = 0;
- slip->pos = 0;
- slip->esc = 0;
-}
-
-extern int slip_proto_read(int fd, void *buf, int len,
- struct slip_proto *slip);
-extern int slip_proto_write(int fd, void *buf, int len,
- struct slip_proto *slip);
-
-#endif
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
deleted file mode 100644
index c58ccdcc16d6..000000000000
--- a/arch/um/drivers/slip_kern.c
+++ /dev/null
@@ -1,93 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <linux/if_arp.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include "slip.h"
-
-struct slip_init {
- char *gate_addr;
-};
-
-static void slip_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *private;
- struct slip_data *spri;
- struct slip_init *init = data;
-
- private = netdev_priv(dev);
- spri = (struct slip_data *) private->user;
-
- memset(spri->name, 0, sizeof(spri->name));
- spri->addr = NULL;
- spri->gate_addr = init->gate_addr;
- spri->slave = -1;
- spri->dev = dev;
-
- slip_proto_init(&spri->slip);
-
- dev->hard_header_len = 0;
- dev->header_ops = NULL;
- dev->addr_len = 0;
- dev->type = ARPHRD_SLIP;
- dev->tx_queue_len = 256;
- dev->flags = IFF_NOARP;
- printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr);
-}
-
-static unsigned short slip_protocol(struct sk_buff *skbuff)
-{
- return htons(ETH_P_IP);
-}
-
-static int slip_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return slip_user_read(fd, skb_mac_header(skb), skb->dev->mtu,
- (struct slip_data *) &lp->user);
-}
-
-static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return slip_user_write(fd, skb->data, skb->len,
- (struct slip_data *) &lp->user);
-}
-
-static const struct net_kern_info slip_kern_info = {
- .init = slip_init,
- .protocol = slip_protocol,
- .read = slip_read,
- .write = slip_write,
-};
-
-static int slip_setup(char *str, char **mac_out, void *data)
-{
- struct slip_init *init = data;
-
- *init = ((struct slip_init) { .gate_addr = NULL });
-
- if (str[0] != '\0')
- init->gate_addr = str;
- return 1;
-}
-
-static struct transport slip_transport = {
- .list = LIST_HEAD_INIT(slip_transport.list),
- .name = "slip",
- .setup = slip_setup,
- .user = &slip_user_info,
- .kern = &slip_kern_info,
- .private_size = sizeof(struct slip_data),
- .setup_size = sizeof(struct slip_init),
-};
-
-static int register_slip(void)
-{
- register_transport(&slip_transport);
- return 0;
-}
-
-late_initcall(register_slip);
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
deleted file mode 100644
index 7334019c9e60..000000000000
--- a/arch/um/drivers/slip_user.c
+++ /dev/null
@@ -1,252 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <string.h>
-#include <termios.h>
-#include <sys/wait.h>
-#include <net_user.h>
-#include <os.h>
-#include "slip.h"
-#include <um_malloc.h>
-
-static int slip_user_init(void *data, void *dev)
-{
- struct slip_data *pri = data;
-
- pri->dev = dev;
- return 0;
-}
-
-static int set_up_tty(int fd)
-{
- int i;
- struct termios tios;
-
- if (tcgetattr(fd, &tios) < 0) {
- printk(UM_KERN_ERR "could not get initial terminal "
- "attributes\n");
- return -1;
- }
-
- tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL;
- tios.c_iflag = IGNBRK | IGNPAR;
- tios.c_oflag = 0;
- tios.c_lflag = 0;
- for (i = 0; i < NCCS; i++)
- tios.c_cc[i] = 0;
- tios.c_cc[VMIN] = 1;
- tios.c_cc[VTIME] = 0;
-
- cfsetospeed(&tios, B38400);
- cfsetispeed(&tios, B38400);
-
- if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) {
- printk(UM_KERN_ERR "failed to set terminal attributes\n");
- return -1;
- }
- return 0;
-}
-
-struct slip_pre_exec_data {
- int stdin_fd;
- int stdout_fd;
- int close_me;
-};
-
-static void slip_pre_exec(void *arg)
-{
- struct slip_pre_exec_data *data = arg;
-
- if (data->stdin_fd >= 0)
- dup2(data->stdin_fd, 0);
- dup2(data->stdout_fd, 1);
- if (data->close_me >= 0)
- close(data->close_me);
-}
-
-static int slip_tramp(char **argv, int fd)
-{
- struct slip_pre_exec_data pe_data;
- char *output;
- int pid, fds[2], err, output_len;
-
- err = os_pipe(fds, 1, 0);
- if (err < 0) {
- printk(UM_KERN_ERR "slip_tramp : pipe failed, err = %d\n",
- -err);
- goto out;
- }
-
- err = 0;
- pe_data.stdin_fd = fd;
- pe_data.stdout_fd = fds[1];
- pe_data.close_me = fds[0];
- err = run_helper(slip_pre_exec, &pe_data, argv);
- if (err < 0)
- goto out_close;
- pid = err;
-
- output_len = UM_KERN_PAGE_SIZE;
- output = uml_kmalloc(output_len, UM_GFP_KERNEL);
- if (output == NULL) {
- printk(UM_KERN_ERR "slip_tramp : failed to allocate output "
- "buffer\n");
- os_kill_process(pid, 1);
- err = -ENOMEM;
- goto out_close;
- }
-
- close(fds[1]);
- read_output(fds[0], output, output_len);
- printk("%s", output);
-
- err = helper_wait(pid);
- close(fds[0]);
-
- kfree(output);
- return err;
-
-out_close:
- close(fds[0]);
- close(fds[1]);
-out:
- return err;
-}
-
-static int slip_open(void *data)
-{
- struct slip_data *pri = data;
- char version_buf[sizeof("nnnnn\0")];
- char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
- char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf,
- NULL };
- int sfd, mfd, err;
-
- err = get_pty();
- if (err < 0) {
- printk(UM_KERN_ERR "slip-open : Failed to open pty, err = %d\n",
- -err);
- goto out;
- }
- mfd = err;
-
- err = open(ptsname(mfd), O_RDWR, 0);
- if (err < 0) {
- printk(UM_KERN_ERR "Couldn't open tty for slip line, "
- "err = %d\n", -err);
- goto out_close;
- }
- sfd = err;
-
- err = set_up_tty(sfd);
- if (err)
- goto out_close2;
-
- pri->slave = sfd;
- pri->slip.pos = 0;
- pri->slip.esc = 0;
- if (pri->gate_addr != NULL) {
- sprintf(version_buf, "%d", UML_NET_VERSION);
- strcpy(gate_buf, pri->gate_addr);
-
- err = slip_tramp(argv, sfd);
-
- if (err < 0) {
- printk(UM_KERN_ERR "slip_tramp failed - err = %d\n",
- -err);
- goto out_close2;
- }
- err = os_get_ifname(pri->slave, pri->name);
- if (err < 0) {
- printk(UM_KERN_ERR "get_ifname failed, err = %d\n",
- -err);
- goto out_close2;
- }
- iter_addresses(pri->dev, open_addr, pri->name);
- }
- else {
- err = os_set_slip(sfd);
- if (err < 0) {
- printk(UM_KERN_ERR "Failed to set slip discipline "
- "encapsulation - err = %d\n", -err);
- goto out_close2;
- }
- }
- return mfd;
-out_close2:
- close(sfd);
-out_close:
- close(mfd);
-out:
- return err;
-}
-
-static void slip_close(int fd, void *data)
-{
- struct slip_data *pri = data;
- char version_buf[sizeof("nnnnn\0")];
- char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name,
- NULL };
- int err;
-
- if (pri->gate_addr != NULL)
- iter_addresses(pri->dev, close_addr, pri->name);
-
- sprintf(version_buf, "%d", UML_NET_VERSION);
-
- err = slip_tramp(argv, pri->slave);
-
- if (err != 0)
- printk(UM_KERN_ERR "slip_tramp failed - errno = %d\n", -err);
- close(fd);
- close(pri->slave);
- pri->slave = -1;
-}
-
-int slip_user_read(int fd, void *buf, int len, struct slip_data *pri)
-{
- return slip_proto_read(fd, buf, len, &pri->slip);
-}
-
-int slip_user_write(int fd, void *buf, int len, struct slip_data *pri)
-{
- return slip_proto_write(fd, buf, len, &pri->slip);
-}
-
-static void slip_add_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct slip_data *pri = data;
-
- if (pri->slave < 0)
- return;
- open_addr(addr, netmask, pri->name);
-}
-
-static void slip_del_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct slip_data *pri = data;
-
- if (pri->slave < 0)
- return;
- close_addr(addr, netmask, pri->name);
-}
-
-const struct net_user_info slip_user_info = {
- .init = slip_user_init,
- .open = slip_open,
- .close = slip_close,
- .remove = NULL,
- .add_address = slip_add_addr,
- .delete_address = slip_del_addr,
- .mtu = BUF_SIZE,
- .max_packet = BUF_SIZE,
-};
diff --git a/arch/um/drivers/slirp.h b/arch/um/drivers/slirp.h
deleted file mode 100644
index 4aef2b88249a..000000000000
--- a/arch/um/drivers/slirp.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __UM_SLIRP_H
-#define __UM_SLIRP_H
-
-#include "slip_common.h"
-
-#define SLIRP_MAX_ARGS 100
-/*
- * XXX this next definition is here because I don't understand why this
- * initializer doesn't work in slirp_kern.c:
- *
- * argv : { init->argv[ 0 ... SLIRP_MAX_ARGS-1 ] },
- *
- * or why I can't typecast like this:
- *
- * argv : (char* [SLIRP_MAX_ARGS])(init->argv),
- */
-struct arg_list_dummy_wrapper { char *argv[SLIRP_MAX_ARGS]; };
-
-struct slirp_data {
- void *dev;
- struct arg_list_dummy_wrapper argw;
- int pid;
- int slave;
- struct slip_proto slip;
-};
-
-extern const struct net_user_info slirp_user_info;
-
-extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri);
-extern int slirp_user_write(int fd, void *buf, int len,
- struct slirp_data *pri);
-
-#endif
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
deleted file mode 100644
index 0a6151ee9572..000000000000
--- a/arch/um/drivers/slirp_kern.c
+++ /dev/null
@@ -1,120 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <linux/if_arp.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <linux/string.h>
-#include <net_kern.h>
-#include <net_user.h>
-#include "slirp.h"
-
-struct slirp_init {
- struct arg_list_dummy_wrapper argw; /* XXX should be simpler... */
-};
-
-static void slirp_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *private;
- struct slirp_data *spri;
- struct slirp_init *init = data;
- int i;
-
- private = netdev_priv(dev);
- spri = (struct slirp_data *) private->user;
-
- spri->argw = init->argw;
- spri->pid = -1;
- spri->slave = -1;
- spri->dev = dev;
-
- slip_proto_init(&spri->slip);
-
- dev->hard_header_len = 0;
- dev->header_ops = NULL;
- dev->addr_len = 0;
- dev->type = ARPHRD_SLIP;
- dev->tx_queue_len = 256;
- dev->flags = IFF_NOARP;
- printk("SLIRP backend - command line:");
- for (i = 0; spri->argw.argv[i] != NULL; i++)
- printk(" '%s'",spri->argw.argv[i]);
- printk("\n");
-}
-
-static unsigned short slirp_protocol(struct sk_buff *skbuff)
-{
- return htons(ETH_P_IP);
-}
-
-static int slirp_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return slirp_user_read(fd, skb_mac_header(skb), skb->dev->mtu,
- (struct slirp_data *) &lp->user);
-}
-
-static int slirp_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return slirp_user_write(fd, skb->data, skb->len,
- (struct slirp_data *) &lp->user);
-}
-
-const struct net_kern_info slirp_kern_info = {
- .init = slirp_init,
- .protocol = slirp_protocol,
- .read = slirp_read,
- .write = slirp_write,
-};
-
-static int slirp_setup(char *str, char **mac_out, void *data)
-{
- struct slirp_init *init = data;
- int i=0;
-
- *init = ((struct slirp_init) { .argw = { { "slirp", NULL } } });
-
- str = split_if_spec(str, mac_out, NULL);
-
- if (str == NULL) /* no command line given after MAC addr */
- return 1;
-
- do {
- if (i >= SLIRP_MAX_ARGS - 1) {
- printk(KERN_WARNING "slirp_setup: truncating slirp "
- "arguments\n");
- break;
- }
- init->argw.argv[i++] = str;
- while(*str && *str!=',') {
- if (*str == '_')
- *str=' ';
- str++;
- }
- if (*str != ',')
- break;
- *str++ = '\0';
- } while (1);
-
- init->argw.argv[i] = NULL;
- return 1;
-}
-
-static struct transport slirp_transport = {
- .list = LIST_HEAD_INIT(slirp_transport.list),
- .name = "slirp",
- .setup = slirp_setup,
- .user = &slirp_user_info,
- .kern = &slirp_kern_info,
- .private_size = sizeof(struct slirp_data),
- .setup_size = sizeof(struct slirp_init),
-};
-
-static int register_slirp(void)
-{
- register_transport(&slirp_transport);
- return 0;
-}
-
-late_initcall(register_slirp);
diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c
deleted file mode 100644
index 97228aa080cb..000000000000
--- a/arch/um/drivers/slirp_user.c
+++ /dev/null
@@ -1,124 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/wait.h>
-#include <net_user.h>
-#include <os.h>
-#include "slirp.h"
-
-static int slirp_user_init(void *data, void *dev)
-{
- struct slirp_data *pri = data;
-
- pri->dev = dev;
- return 0;
-}
-
-struct slirp_pre_exec_data {
- int stdin_fd;
- int stdout_fd;
-};
-
-static void slirp_pre_exec(void *arg)
-{
- struct slirp_pre_exec_data *data = arg;
-
- if (data->stdin_fd != -1)
- dup2(data->stdin_fd, 0);
- if (data->stdout_fd != -1)
- dup2(data->stdout_fd, 1);
-}
-
-static int slirp_tramp(char **argv, int fd)
-{
- struct slirp_pre_exec_data pe_data;
- int pid;
-
- pe_data.stdin_fd = fd;
- pe_data.stdout_fd = fd;
- pid = run_helper(slirp_pre_exec, &pe_data, argv);
-
- return pid;
-}
-
-static int slirp_open(void *data)
-{
- struct slirp_data *pri = data;
- int fds[2], err;
-
- err = os_pipe(fds, 1, 1);
- if (err)
- return err;
-
- err = slirp_tramp(pri->argw.argv, fds[1]);
- if (err < 0) {
- printk(UM_KERN_ERR "slirp_tramp failed - errno = %d\n", -err);
- goto out;
- }
-
- pri->slave = fds[1];
- pri->slip.pos = 0;
- pri->slip.esc = 0;
- pri->pid = err;
-
- return fds[0];
-out:
- close(fds[0]);
- close(fds[1]);
- return err;
-}
-
-static void slirp_close(int fd, void *data)
-{
- struct slirp_data *pri = data;
- int err;
-
- close(fd);
- close(pri->slave);
-
- pri->slave = -1;
-
- if (pri->pid<1) {
- printk(UM_KERN_ERR "slirp_close: no child process to shut "
- "down\n");
- return;
- }
-
-#if 0
- if (kill(pri->pid, SIGHUP)<0) {
- printk(UM_KERN_ERR "slirp_close: sending hangup to %d failed "
- "(%d)\n", pri->pid, errno);
- }
-#endif
- err = helper_wait(pri->pid);
- if (err < 0)
- return;
-
- pri->pid = -1;
-}
-
-int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri)
-{
- return slip_proto_read(fd, buf, len, &pri->slip);
-}
-
-int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri)
-{
- return slip_proto_write(fd, buf, len, &pri->slip);
-}
-
-const struct net_user_info slirp_user_info = {
- .init = slirp_user_init,
- .open = slirp_open,
- .close = slirp_close,
- .remove = NULL,
- .add_address = NULL,
- .delete_address = NULL,
- .mtu = BUF_SIZE,
- .max_packet = BUF_SIZE,
-};
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
index c5e6545f6fcf..8e8a8bf518b6 100644
--- a/arch/um/drivers/ubd_user.c
+++ b/arch/um/drivers/ubd_user.c
@@ -41,7 +41,7 @@ int start_io_thread(struct os_helper_thread **td_out, int *fd_out)
*fd_out = fds[1];
err = os_set_fd_block(*fd_out, 0);
- err = os_set_fd_block(kernel_fd, 0);
+ err |= os_set_fd_block(kernel_fd, 0);
if (err) {
printk("start_io_thread - failed to set nonblocking I/O.\n");
goto out_close;
diff --git a/arch/um/drivers/umcast.h b/arch/um/drivers/umcast.h
deleted file mode 100644
index fe39bee1e3bd..000000000000
--- a/arch/um/drivers/umcast.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __DRIVERS_UMCAST_H
-#define __DRIVERS_UMCAST_H
-
-#include <net_user.h>
-
-struct umcast_data {
- char *addr;
- unsigned short lport;
- unsigned short rport;
- void *listen_addr;
- void *remote_addr;
- int ttl;
- int unicast;
- void *dev;
-};
-
-extern const struct net_user_info umcast_user_info;
-
-extern int umcast_user_write(int fd, void *buf, int len,
- struct umcast_data *pri);
-
-#endif
diff --git a/arch/um/drivers/umcast_kern.c b/arch/um/drivers/umcast_kern.c
deleted file mode 100644
index 595a54f2b9c6..000000000000
--- a/arch/um/drivers/umcast_kern.c
+++ /dev/null
@@ -1,188 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * user-mode-linux networking multicast transport
- * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- *
- * based on the existing uml-networking code, which is
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- *
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include "umcast.h"
-#include <net_kern.h>
-
-struct umcast_init {
- char *addr;
- int lport;
- int rport;
- int ttl;
- bool unicast;
-};
-
-static void umcast_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *pri;
- struct umcast_data *dpri;
- struct umcast_init *init = data;
-
- pri = netdev_priv(dev);
- dpri = (struct umcast_data *) pri->user;
- dpri->addr = init->addr;
- dpri->lport = init->lport;
- dpri->rport = init->rport;
- dpri->unicast = init->unicast;
- dpri->ttl = init->ttl;
- dpri->dev = dev;
-
- if (dpri->unicast) {
- printk(KERN_INFO "ucast backend address: %s:%u listen port: "
- "%u\n", dpri->addr, dpri->rport, dpri->lport);
- } else {
- printk(KERN_INFO "mcast backend multicast address: %s:%u, "
- "TTL:%u\n", dpri->addr, dpri->lport, dpri->ttl);
- }
-}
-
-static int umcast_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return net_recvfrom(fd, skb_mac_header(skb),
- skb->dev->mtu + ETH_HEADER_OTHER);
-}
-
-static int umcast_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return umcast_user_write(fd, skb->data, skb->len,
- (struct umcast_data *) &lp->user);
-}
-
-static const struct net_kern_info umcast_kern_info = {
- .init = umcast_init,
- .protocol = eth_protocol,
- .read = umcast_read,
- .write = umcast_write,
-};
-
-static int mcast_setup(char *str, char **mac_out, void *data)
-{
- struct umcast_init *init = data;
- char *port_str = NULL, *ttl_str = NULL, *remain;
- char *last;
-
- *init = ((struct umcast_init)
- { .addr = "239.192.168.1",
- .lport = 1102,
- .ttl = 1 });
-
- remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str,
- NULL);
- if (remain != NULL) {
- printk(KERN_ERR "mcast_setup - Extra garbage on "
- "specification : '%s'\n", remain);
- return 0;
- }
-
- if (port_str != NULL) {
- init->lport = simple_strtoul(port_str, &last, 10);
- if ((*last != '\0') || (last == port_str)) {
- printk(KERN_ERR "mcast_setup - Bad port : '%s'\n",
- port_str);
- return 0;
- }
- }
-
- if (ttl_str != NULL) {
- init->ttl = simple_strtoul(ttl_str, &last, 10);
- if ((*last != '\0') || (last == ttl_str)) {
- printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n",
- ttl_str);
- return 0;
- }
- }
-
- init->unicast = false;
- init->rport = init->lport;
-
- printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr,
- init->lport, init->ttl);
-
- return 1;
-}
-
-static int ucast_setup(char *str, char **mac_out, void *data)
-{
- struct umcast_init *init = data;
- char *lport_str = NULL, *rport_str = NULL, *remain;
- char *last;
-
- *init = ((struct umcast_init)
- { .addr = "",
- .lport = 1102,
- .rport = 1102 });
-
- remain = split_if_spec(str, mac_out, &init->addr,
- &lport_str, &rport_str, NULL);
- if (remain != NULL) {
- printk(KERN_ERR "ucast_setup - Extra garbage on "
- "specification : '%s'\n", remain);
- return 0;
- }
-
- if (lport_str != NULL) {
- init->lport = simple_strtoul(lport_str, &last, 10);
- if ((*last != '\0') || (last == lport_str)) {
- printk(KERN_ERR "ucast_setup - Bad listen port : "
- "'%s'\n", lport_str);
- return 0;
- }
- }
-
- if (rport_str != NULL) {
- init->rport = simple_strtoul(rport_str, &last, 10);
- if ((*last != '\0') || (last == rport_str)) {
- printk(KERN_ERR "ucast_setup - Bad remote port : "
- "'%s'\n", rport_str);
- return 0;
- }
- }
-
- init->unicast = true;
-
- printk(KERN_INFO "Configured ucast device: :%u -> %s:%u\n",
- init->lport, init->addr, init->rport);
-
- return 1;
-}
-
-static struct transport mcast_transport = {
- .list = LIST_HEAD_INIT(mcast_transport.list),
- .name = "mcast",
- .setup = mcast_setup,
- .user = &umcast_user_info,
- .kern = &umcast_kern_info,
- .private_size = sizeof(struct umcast_data),
- .setup_size = sizeof(struct umcast_init),
-};
-
-static struct transport ucast_transport = {
- .list = LIST_HEAD_INIT(ucast_transport.list),
- .name = "ucast",
- .setup = ucast_setup,
- .user = &umcast_user_info,
- .kern = &umcast_kern_info,
- .private_size = sizeof(struct umcast_data),
- .setup_size = sizeof(struct umcast_init),
-};
-
-static int register_umcast(void)
-{
- register_transport(&mcast_transport);
- register_transport(&ucast_transport);
- return 0;
-}
-
-late_initcall(register_umcast);
diff --git a/arch/um/drivers/umcast_user.c b/arch/um/drivers/umcast_user.c
deleted file mode 100644
index b50b13cff04e..000000000000
--- a/arch/um/drivers/umcast_user.c
+++ /dev/null
@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * user-mode-linux networking multicast transport
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
- *
- * based on the existing uml-networking code, which is
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- *
- *
- */
-
-#include <unistd.h>
-#include <errno.h>
-#include <netinet/in.h>
-#include "umcast.h"
-#include <net_user.h>
-#include <um_malloc.h>
-
-static struct sockaddr_in *new_addr(char *addr, unsigned short port)
-{
- struct sockaddr_in *sin;
-
- sin = uml_kmalloc(sizeof(struct sockaddr_in), UM_GFP_KERNEL);
- if (sin == NULL) {
- printk(UM_KERN_ERR "new_addr: allocation of sockaddr_in "
- "failed\n");
- return NULL;
- }
- sin->sin_family = AF_INET;
- if (addr)
- sin->sin_addr.s_addr = in_aton(addr);
- else
- sin->sin_addr.s_addr = INADDR_ANY;
- sin->sin_port = htons(port);
- return sin;
-}
-
-static int umcast_user_init(void *data, void *dev)
-{
- struct umcast_data *pri = data;
-
- pri->remote_addr = new_addr(pri->addr, pri->rport);
- if (pri->unicast)
- pri->listen_addr = new_addr(NULL, pri->lport);
- else
- pri->listen_addr = pri->remote_addr;
- pri->dev = dev;
- return 0;
-}
-
-static void umcast_remove(void *data)
-{
- struct umcast_data *pri = data;
-
- kfree(pri->listen_addr);
- if (pri->unicast)
- kfree(pri->remote_addr);
- pri->listen_addr = pri->remote_addr = NULL;
-}
-
-static int umcast_open(void *data)
-{
- struct umcast_data *pri = data;
- struct sockaddr_in *lsin = pri->listen_addr;
- struct sockaddr_in *rsin = pri->remote_addr;
- struct ip_mreq mreq;
- int fd, yes = 1, err = -EINVAL;
-
-
- if ((!pri->unicast && lsin->sin_addr.s_addr == 0) ||
- (rsin->sin_addr.s_addr == 0) ||
- (lsin->sin_port == 0) || (rsin->sin_port == 0))
- goto out;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
-
- if (fd < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open : data socket failed, "
- "errno = %d\n", errno);
- goto out;
- }
-
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open: SO_REUSEADDR failed, "
- "errno = %d\n", errno);
- goto out_close;
- }
-
- if (!pri->unicast) {
- /* set ttl according to config */
- if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl,
- sizeof(pri->ttl)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_TTL "
- "failed, error = %d\n", errno);
- goto out_close;
- }
-
- /* set LOOP, so data does get fed back to local sockets */
- if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP,
- &yes, sizeof(yes)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_LOOP "
- "failed, error = %d\n", errno);
- goto out_close;
- }
- }
-
- /* bind socket to the address */
- if (bind(fd, (struct sockaddr *) lsin, sizeof(*lsin)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open : data bind failed, "
- "errno = %d\n", errno);
- goto out_close;
- }
-
- if (!pri->unicast) {
- /* subscribe to the multicast group */
- mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr;
- mreq.imr_interface.s_addr = 0;
- if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP,
- &mreq, sizeof(mreq)) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "umcast_open: IP_ADD_MEMBERSHIP "
- "failed, error = %d\n", errno);
- printk(UM_KERN_ERR "There appears not to be a "
- "multicast-capable network interface on the "
- "host.\n");
- printk(UM_KERN_ERR "eth0 should be configured in order "
- "to use the multicast transport.\n");
- goto out_close;
- }
- }
-
- return fd;
-
- out_close:
- close(fd);
- out:
- return err;
-}
-
-static void umcast_close(int fd, void *data)
-{
- struct umcast_data *pri = data;
-
- if (!pri->unicast) {
- struct ip_mreq mreq;
- struct sockaddr_in *lsin = pri->listen_addr;
-
- mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr;
- mreq.imr_interface.s_addr = 0;
- if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP,
- &mreq, sizeof(mreq)) < 0) {
- printk(UM_KERN_ERR "umcast_close: IP_DROP_MEMBERSHIP "
- "failed, error = %d\n", errno);
- }
- }
-
- close(fd);
-}
-
-int umcast_user_write(int fd, void *buf, int len, struct umcast_data *pri)
-{
- struct sockaddr_in *data_addr = pri->remote_addr;
-
- return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
-}
-
-const struct net_user_info umcast_user_info = {
- .init = umcast_user_init,
- .open = umcast_open,
- .close = umcast_close,
- .remove = umcast_remove,
- .add_address = NULL,
- .delete_address = NULL,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/drivers/vde.h b/arch/um/drivers/vde.h
deleted file mode 100644
index cab0379e6142..000000000000
--- a/arch/um/drivers/vde.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- */
-
-#ifndef __UM_VDE_H__
-#define __UM_VDE_H__
-
-struct vde_data {
- char *vde_switch;
- char *descr;
- void *args;
- void *conn;
- void *dev;
-};
-
-struct vde_init {
- char *vde_switch;
- char *descr;
- int port;
- char *group;
- int mode;
-};
-
-extern const struct net_user_info vde_user_info;
-
-extern void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init);
-
-extern int vde_user_read(void *conn, void *buf, int len);
-extern int vde_user_write(void *conn, void *buf, int len);
-
-#endif
diff --git a/arch/um/drivers/vde_kern.c b/arch/um/drivers/vde_kern.c
deleted file mode 100644
index bc6f22cbfb35..000000000000
--- a/arch/um/drivers/vde_kern.c
+++ /dev/null
@@ -1,129 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- *
- * Transport usage:
- * ethN=vde,<vde_switch>,<mac addr>,<port>,<group>,<mode>,<description>
- *
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include <net_user.h>
-#include "vde.h"
-
-static void vde_init(struct net_device *dev, void *data)
-{
- struct vde_init *init = data;
- struct uml_net_private *pri;
- struct vde_data *vpri;
-
- pri = netdev_priv(dev);
- vpri = (struct vde_data *) pri->user;
-
- vpri->vde_switch = init->vde_switch;
- vpri->descr = init->descr ? init->descr : "UML vde_transport";
- vpri->args = NULL;
- vpri->conn = NULL;
- vpri->dev = dev;
-
- printk("vde backend - %s, ", vpri->vde_switch ?
- vpri->vde_switch : "(default socket)");
-
- vde_init_libstuff(vpri, init);
-
- printk("\n");
-}
-
-static int vde_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- struct vde_data *pri = (struct vde_data *) &lp->user;
-
- if (pri->conn != NULL)
- return vde_user_read(pri->conn, skb_mac_header(skb),
- skb->dev->mtu + ETH_HEADER_OTHER);
-
- printk(KERN_ERR "vde_read - we have no VDECONN to read from");
- return -EBADF;
-}
-
-static int vde_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- struct vde_data *pri = (struct vde_data *) &lp->user;
-
- if (pri->conn != NULL)
- return vde_user_write((void *)pri->conn, skb->data,
- skb->len);
-
- printk(KERN_ERR "vde_write - we have no VDECONN to write to");
- return -EBADF;
-}
-
-static const struct net_kern_info vde_kern_info = {
- .init = vde_init,
- .protocol = eth_protocol,
- .read = vde_read,
- .write = vde_write,
-};
-
-static int vde_setup(char *str, char **mac_out, void *data)
-{
- struct vde_init *init = data;
- char *remain, *port_str = NULL, *mode_str = NULL, *last;
-
- *init = ((struct vde_init)
- { .vde_switch = NULL,
- .descr = NULL,
- .port = 0,
- .group = NULL,
- .mode = 0 });
-
- remain = split_if_spec(str, &init->vde_switch, mac_out, &port_str,
- &init->group, &mode_str, &init->descr, NULL);
-
- if (remain != NULL)
- printk(KERN_WARNING "vde_setup - Ignoring extra data :"
- "'%s'\n", remain);
-
- if (port_str != NULL) {
- init->port = simple_strtoul(port_str, &last, 10);
- if ((*last != '\0') || (last == port_str)) {
- printk(KERN_ERR "vde_setup - Bad port : '%s'\n",
- port_str);
- return 0;
- }
- }
-
- if (mode_str != NULL) {
- init->mode = simple_strtoul(mode_str, &last, 8);
- if ((*last != '\0') || (last == mode_str)) {
- printk(KERN_ERR "vde_setup - Bad mode : '%s'\n",
- mode_str);
- return 0;
- }
- }
-
- printk(KERN_INFO "Configured vde device: %s\n", init->vde_switch ?
- init->vde_switch : "(default socket)");
-
- return 1;
-}
-
-static struct transport vde_transport = {
- .list = LIST_HEAD_INIT(vde_transport.list),
- .name = "vde",
- .setup = vde_setup,
- .user = &vde_user_info,
- .kern = &vde_kern_info,
- .private_size = sizeof(struct vde_data),
- .setup_size = sizeof(struct vde_init),
-};
-
-static int register_vde(void)
-{
- register_transport(&vde_transport);
- return 0;
-}
-
-late_initcall(register_vde);
diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c
deleted file mode 100644
index bc7dc4e1e486..000000000000
--- a/arch/um/drivers/vde_user.c
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- */
-
-#include <stddef.h>
-#include <errno.h>
-#include <libvdeplug.h>
-#include <net_user.h>
-#include <um_malloc.h>
-#include "vde.h"
-
-static int vde_user_init(void *data, void *dev)
-{
- struct vde_data *pri = data;
- VDECONN *conn = NULL;
- int err = -EINVAL;
-
- pri->dev = dev;
-
- conn = vde_open(pri->vde_switch, pri->descr, pri->args);
-
- if (conn == NULL) {
- err = -errno;
- printk(UM_KERN_ERR "vde_user_init: vde_open failed, "
- "errno = %d\n", errno);
- return err;
- }
-
- printk(UM_KERN_INFO "vde backend - connection opened\n");
-
- pri->conn = conn;
-
- return 0;
-}
-
-static int vde_user_open(void *data)
-{
- struct vde_data *pri = data;
-
- if (pri->conn != NULL)
- return vde_datafd(pri->conn);
-
- printk(UM_KERN_WARNING "vde_open - we have no VDECONN to open");
- return -EINVAL;
-}
-
-static void vde_remove(void *data)
-{
- struct vde_data *pri = data;
-
- if (pri->conn != NULL) {
- printk(UM_KERN_INFO "vde backend - closing connection\n");
- vde_close(pri->conn);
- pri->conn = NULL;
- kfree(pri->args);
- pri->args = NULL;
- return;
- }
-
- printk(UM_KERN_WARNING "vde_remove - we have no VDECONN to remove");
-}
-
-const struct net_user_info vde_user_info = {
- .init = vde_user_init,
- .open = vde_user_open,
- .close = NULL,
- .remove = vde_remove,
- .add_address = NULL,
- .delete_address = NULL,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
-
-void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init)
-{
- struct vde_open_args *args;
-
- vpri->args = uml_kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL);
- if (vpri->args == NULL) {
- printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args "
- "allocation failed");
- return;
- }
-
- args = vpri->args;
-
- args->port = init->port;
- args->group = init->group;
- args->mode = init->mode ? init->mode : 0700;
-
- args->port ? printk("port %d", args->port) :
- printk("undefined port");
-}
-
-int vde_user_read(void *conn, void *buf, int len)
-{
- VDECONN *vconn = conn;
- int rv;
-
- if (vconn == NULL)
- return 0;
-
- rv = vde_recv(vconn, buf, len, 0);
- if (rv < 0) {
- if (errno == EAGAIN)
- return 0;
- return -errno;
- }
- else if (rv == 0)
- return -ENOTCONN;
-
- return rv;
-}
-
-int vde_user_write(void *conn, void *buf, int len)
-{
- VDECONN *vconn = conn;
-
- if (vconn == NULL)
- return 0;
-
- return vde_send(vconn, buf, len, 0);
-}
-
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index b97bb52dd562..9bbbddfe866b 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -8,6 +8,8 @@
* Copyright (C) 2001 by various other people who didn't put their name here.
*/
+#define pr_fmt(fmt) "uml-vector: " fmt
+
#include <linux/memblock.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
@@ -27,7 +29,6 @@
#include <init.h>
#include <irq_kern.h>
#include <irq_user.h>
-#include <net_kern.h>
#include <os.h>
#include "mconsole_kern.h"
#include "vector_user.h"
@@ -1533,13 +1534,47 @@ static const struct net_device_ops vector_netdev_ops = {
static void vector_timer_expire(struct timer_list *t)
{
- struct vector_private *vp = from_timer(vp, t, tl);
+ struct vector_private *vp = timer_container_of(vp, t, tl);
vp->estats.tx_kicks++;
napi_schedule(&vp->napi);
}
+static void vector_setup_etheraddr(struct net_device *dev, char *str)
+{
+ u8 addr[ETH_ALEN];
+
+ if (str == NULL)
+ goto random;
+
+ if (!mac_pton(str, addr)) {
+ netdev_err(dev,
+ "Failed to parse '%s' as an ethernet address\n", str);
+ goto random;
+ }
+ if (is_multicast_ether_addr(addr)) {
+ netdev_err(dev,
+ "Attempt to assign a multicast ethernet address to a device disallowed\n");
+ goto random;
+ }
+ if (!is_valid_ether_addr(addr)) {
+ netdev_err(dev,
+ "Attempt to assign an invalid ethernet address to a device disallowed\n");
+ goto random;
+ }
+ if (!is_local_ether_addr(addr)) {
+ netdev_warn(dev, "Warning: Assigning a globally valid ethernet address to a device\n");
+ netdev_warn(dev, "You should set the 2nd rightmost bit in the first byte of the MAC,\n");
+ netdev_warn(dev, "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n",
+ addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], addr[5]);
+ }
+ eth_hw_addr_set(dev, addr);
+ return;
+random:
+ netdev_info(dev, "Choosing a random ethernet address\n");
+ eth_hw_addr_random(dev);
+}
static void vector_eth_configure(
int n,
@@ -1553,14 +1588,12 @@ static void vector_eth_configure(
device = kzalloc(sizeof(*device), GFP_KERNEL);
if (device == NULL) {
- printk(KERN_ERR "eth_configure failed to allocate struct "
- "vector_device\n");
+ pr_err("Failed to allocate struct vector_device for vec%d\n", n);
return;
}
dev = alloc_etherdev(sizeof(struct vector_private));
if (dev == NULL) {
- printk(KERN_ERR "eth_configure: failed to allocate struct "
- "net_device for vec%d\n", n);
+ pr_err("Failed to allocate struct net_device for vec%d\n", n);
goto out_free_device;
}
@@ -1574,7 +1607,7 @@ static void vector_eth_configure(
* and fail.
*/
snprintf(dev->name, sizeof(dev->name), "vec%d", n);
- uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
+ vector_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
vp = netdev_priv(dev);
/* sysfs register */
@@ -1592,35 +1625,19 @@ static void vector_eth_configure(
device->dev = dev;
- *vp = ((struct vector_private)
- {
- .list = LIST_HEAD_INIT(vp->list),
- .dev = dev,
- .unit = n,
- .options = get_transport_options(def),
- .rx_irq = 0,
- .tx_irq = 0,
- .parsed = def,
- .max_packet = get_mtu(def) + ETH_HEADER_OTHER,
- /* TODO - we need to calculate headroom so that ip header
- * is 16 byte aligned all the time
- */
- .headroom = get_headroom(def),
- .form_header = NULL,
- .verify_header = NULL,
- .header_rxbuffer = NULL,
- .header_txbuffer = NULL,
- .header_size = 0,
- .rx_header_size = 0,
- .rexmit_scheduled = false,
- .opened = false,
- .transport_data = NULL,
- .in_write_poll = false,
- .coalesce = 2,
- .req_size = get_req_size(def),
- .in_error = false,
- .bpf = NULL
- });
+ INIT_LIST_HEAD(&vp->list);
+ vp->dev = dev;
+ vp->unit = n;
+ vp->options = get_transport_options(def);
+ vp->parsed = def;
+ vp->max_packet = get_mtu(def) + ETH_HEADER_OTHER;
+ /*
+ * TODO - we need to calculate headroom so that ip header
+ * is 16 byte aligned all the time
+ */
+ vp->headroom = get_headroom(def);
+ vp->coalesce = 2;
+ vp->req_size = get_req_size(def);
dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
INIT_WORK(&vp->reset_tx, vector_reset_tx);
@@ -1690,8 +1707,7 @@ static int __init vector_setup(char *str)
err = vector_parse(str, &n, &str, &error);
if (err) {
- printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
- str, error);
+ pr_err("Couldn't parse '%s': %s\n", str, error);
return 1;
}
new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES);
diff --git a/arch/um/drivers/vfio_kern.c b/arch/um/drivers/vfio_kern.c
new file mode 100644
index 000000000000..13b971a2bd43
--- /dev/null
+++ b/arch/um/drivers/vfio_kern.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ */
+
+#define pr_fmt(fmt) "vfio-uml: " fmt
+
+#include <linux/module.h>
+#include <linux/logic_iomem.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
+#include <irq_kern.h>
+#include <init.h>
+#include <os.h>
+
+#include "virt-pci.h"
+#include "vfio_user.h"
+
+#define to_vdev(_pdev) container_of(_pdev, struct uml_vfio_device, pdev)
+
+struct uml_vfio_intr_ctx {
+ struct uml_vfio_device *dev;
+ int irq;
+};
+
+struct uml_vfio_device {
+ const char *name;
+ int group;
+
+ struct um_pci_device pdev;
+ struct uml_vfio_user_device udev;
+ struct uml_vfio_intr_ctx *intr_ctx;
+
+ int msix_cap;
+ int msix_bar;
+ int msix_offset;
+ int msix_size;
+ u32 *msix_data;
+
+ struct list_head list;
+};
+
+struct uml_vfio_group {
+ int id;
+ int fd;
+ int users;
+ struct list_head list;
+};
+
+static struct {
+ int fd;
+ int users;
+} uml_vfio_container = { .fd = -1 };
+static DEFINE_MUTEX(uml_vfio_container_mtx);
+
+static LIST_HEAD(uml_vfio_groups);
+static DEFINE_MUTEX(uml_vfio_groups_mtx);
+
+static LIST_HEAD(uml_vfio_devices);
+
+static int uml_vfio_set_container(int group_fd)
+{
+ int err;
+
+ guard(mutex)(&uml_vfio_container_mtx);
+
+ err = uml_vfio_user_set_container(uml_vfio_container.fd, group_fd);
+ if (err)
+ return err;
+
+ uml_vfio_container.users++;
+ if (uml_vfio_container.users > 1)
+ return 0;
+
+ err = uml_vfio_user_setup_iommu(uml_vfio_container.fd);
+ if (err) {
+ uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd);
+ uml_vfio_container.users--;
+ }
+ return err;
+}
+
+static void uml_vfio_unset_container(int group_fd)
+{
+ guard(mutex)(&uml_vfio_container_mtx);
+
+ uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd);
+ uml_vfio_container.users--;
+}
+
+static int uml_vfio_open_group(int group_id)
+{
+ struct uml_vfio_group *group;
+ int err;
+
+ guard(mutex)(&uml_vfio_groups_mtx);
+
+ list_for_each_entry(group, &uml_vfio_groups, list) {
+ if (group->id == group_id) {
+ group->users++;
+ return group->fd;
+ }
+ }
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return -ENOMEM;
+
+ group->fd = uml_vfio_user_open_group(group_id);
+ if (group->fd < 0) {
+ err = group->fd;
+ goto free_group;
+ }
+
+ err = uml_vfio_set_container(group->fd);
+ if (err)
+ goto close_group;
+
+ group->id = group_id;
+ group->users = 1;
+
+ list_add(&group->list, &uml_vfio_groups);
+
+ return group->fd;
+
+close_group:
+ os_close_file(group->fd);
+free_group:
+ kfree(group);
+ return err;
+}
+
+static int uml_vfio_release_group(int group_fd)
+{
+ struct uml_vfio_group *group;
+
+ guard(mutex)(&uml_vfio_groups_mtx);
+
+ list_for_each_entry(group, &uml_vfio_groups, list) {
+ if (group->fd == group_fd) {
+ group->users--;
+ if (group->users == 0) {
+ uml_vfio_unset_container(group_fd);
+ os_close_file(group_fd);
+ list_del(&group->list);
+ kfree(group);
+ }
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static irqreturn_t uml_vfio_interrupt(int unused, void *opaque)
+{
+ struct uml_vfio_intr_ctx *ctx = opaque;
+ struct uml_vfio_device *dev = ctx->dev;
+ int index = ctx - dev->intr_ctx;
+ int irqfd = dev->udev.irqfd[index];
+ int irq = dev->msix_data[index];
+ uint64_t v;
+ int r;
+
+ do {
+ r = os_read_file(irqfd, &v, sizeof(v));
+ if (r == sizeof(v))
+ generic_handle_irq(irq);
+ } while (r == sizeof(v) || r == -EINTR);
+ WARN(r != -EAGAIN, "read returned %d\n", r);
+
+ return IRQ_HANDLED;
+}
+
+static int uml_vfio_activate_irq(struct uml_vfio_device *dev, int index)
+{
+ struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index];
+ int err, irqfd;
+
+ if (ctx->irq >= 0)
+ return 0;
+
+ irqfd = uml_vfio_user_activate_irq(&dev->udev, index);
+ if (irqfd < 0)
+ return irqfd;
+
+ ctx->irq = um_request_irq(UM_IRQ_ALLOC, irqfd, IRQ_READ,
+ uml_vfio_interrupt, 0,
+ "vfio-uml", ctx);
+ if (ctx->irq < 0) {
+ err = ctx->irq;
+ goto deactivate;
+ }
+
+ err = add_sigio_fd(irqfd);
+ if (err)
+ goto free_irq;
+
+ return 0;
+
+free_irq:
+ um_free_irq(ctx->irq, ctx);
+ ctx->irq = -1;
+deactivate:
+ uml_vfio_user_deactivate_irq(&dev->udev, index);
+ return err;
+}
+
+static int uml_vfio_deactivate_irq(struct uml_vfio_device *dev, int index)
+{
+ struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index];
+
+ if (ctx->irq >= 0) {
+ ignore_sigio_fd(dev->udev.irqfd[index]);
+ um_free_irq(ctx->irq, ctx);
+ uml_vfio_user_deactivate_irq(&dev->udev, index);
+ ctx->irq = -1;
+ }
+ return 0;
+}
+
+static int uml_vfio_update_msix_cap(struct uml_vfio_device *dev,
+ unsigned int offset, int size,
+ unsigned long val)
+{
+ /*
+ * Here, we handle only the operations we care about,
+ * ignoring the rest.
+ */
+ if (size == 2 && offset == dev->msix_cap + PCI_MSIX_FLAGS) {
+ switch (val & ~PCI_MSIX_FLAGS_QSIZE) {
+ case PCI_MSIX_FLAGS_ENABLE:
+ case 0:
+ return uml_vfio_user_update_irqs(&dev->udev);
+ }
+ }
+ return 0;
+}
+
+static int uml_vfio_update_msix_table(struct uml_vfio_device *dev,
+ unsigned int offset, int size,
+ unsigned long val)
+{
+ int index;
+
+ /*
+ * Here, we handle only the operations we care about,
+ * ignoring the rest.
+ */
+ offset -= dev->msix_offset + PCI_MSIX_ENTRY_DATA;
+
+ if (size != 4 || offset % PCI_MSIX_ENTRY_SIZE != 0)
+ return 0;
+
+ index = offset / PCI_MSIX_ENTRY_SIZE;
+ if (index >= dev->udev.irq_count)
+ return -EINVAL;
+
+ dev->msix_data[index] = val;
+
+ return val ? uml_vfio_activate_irq(dev, index) :
+ uml_vfio_deactivate_irq(dev, index);
+}
+
+static unsigned long __uml_vfio_cfgspace_read(struct uml_vfio_device *dev,
+ unsigned int offset, int size)
+{
+ u8 data[8];
+
+ memset(data, 0xff, sizeof(data));
+
+ if (uml_vfio_user_cfgspace_read(&dev->udev, offset, data, size))
+ return ULONG_MAX;
+
+ switch (size) {
+ case 1:
+ return data[0];
+ case 2:
+ return le16_to_cpup((void *)data);
+ case 4:
+ return le32_to_cpup((void *)data);
+#ifdef CONFIG_64BIT
+ case 8:
+ return le64_to_cpup((void *)data);
+#endif
+ default:
+ return ULONG_MAX;
+ }
+}
+
+static unsigned long uml_vfio_cfgspace_read(struct um_pci_device *pdev,
+ unsigned int offset, int size)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+
+ return __uml_vfio_cfgspace_read(dev, offset, size);
+}
+
+static void __uml_vfio_cfgspace_write(struct uml_vfio_device *dev,
+ unsigned int offset, int size,
+ unsigned long val)
+{
+ u8 data[8];
+
+ switch (size) {
+ case 1:
+ data[0] = (u8)val;
+ break;
+ case 2:
+ put_unaligned_le16(val, (void *)data);
+ break;
+ case 4:
+ put_unaligned_le32(val, (void *)data);
+ break;
+#ifdef CONFIG_64BIT
+ case 8:
+ put_unaligned_le64(val, (void *)data);
+ break;
+#endif
+ }
+
+ WARN_ON(uml_vfio_user_cfgspace_write(&dev->udev, offset, data, size));
+}
+
+static void uml_vfio_cfgspace_write(struct um_pci_device *pdev,
+ unsigned int offset, int size,
+ unsigned long val)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+
+ if (offset < dev->msix_cap + PCI_CAP_MSIX_SIZEOF &&
+ offset + size > dev->msix_cap)
+ WARN_ON(uml_vfio_update_msix_cap(dev, offset, size, val));
+
+ __uml_vfio_cfgspace_write(dev, offset, size, val);
+}
+
+static void uml_vfio_bar_copy_from(struct um_pci_device *pdev, int bar,
+ void *buffer, unsigned int offset, int size)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+
+ memset(buffer, 0xff, size);
+ uml_vfio_user_bar_read(&dev->udev, bar, offset, buffer, size);
+}
+
+static unsigned long uml_vfio_bar_read(struct um_pci_device *pdev, int bar,
+ unsigned int offset, int size)
+{
+ u8 data[8];
+
+ uml_vfio_bar_copy_from(pdev, bar, data, offset, size);
+
+ switch (size) {
+ case 1:
+ return data[0];
+ case 2:
+ return le16_to_cpup((void *)data);
+ case 4:
+ return le32_to_cpup((void *)data);
+#ifdef CONFIG_64BIT
+ case 8:
+ return le64_to_cpup((void *)data);
+#endif
+ default:
+ return ULONG_MAX;
+ }
+}
+
+static void uml_vfio_bar_copy_to(struct um_pci_device *pdev, int bar,
+ unsigned int offset, const void *buffer,
+ int size)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+
+ uml_vfio_user_bar_write(&dev->udev, bar, offset, buffer, size);
+}
+
+static void uml_vfio_bar_write(struct um_pci_device *pdev, int bar,
+ unsigned int offset, int size,
+ unsigned long val)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+ u8 data[8];
+
+ if (bar == dev->msix_bar && offset + size > dev->msix_offset &&
+ offset < dev->msix_offset + dev->msix_size)
+ WARN_ON(uml_vfio_update_msix_table(dev, offset, size, val));
+
+ switch (size) {
+ case 1:
+ data[0] = (u8)val;
+ break;
+ case 2:
+ put_unaligned_le16(val, (void *)data);
+ break;
+ case 4:
+ put_unaligned_le32(val, (void *)data);
+ break;
+#ifdef CONFIG_64BIT
+ case 8:
+ put_unaligned_le64(val, (void *)data);
+ break;
+#endif
+ }
+
+ uml_vfio_bar_copy_to(pdev, bar, offset, data, size);
+}
+
+static void uml_vfio_bar_set(struct um_pci_device *pdev, int bar,
+ unsigned int offset, u8 value, int size)
+{
+ struct uml_vfio_device *dev = to_vdev(pdev);
+ int i;
+
+ for (i = 0; i < size; i++)
+ uml_vfio_user_bar_write(&dev->udev, bar, offset + i, &value, 1);
+}
+
+static const struct um_pci_ops uml_vfio_um_pci_ops = {
+ .cfgspace_read = uml_vfio_cfgspace_read,
+ .cfgspace_write = uml_vfio_cfgspace_write,
+ .bar_read = uml_vfio_bar_read,
+ .bar_write = uml_vfio_bar_write,
+ .bar_copy_from = uml_vfio_bar_copy_from,
+ .bar_copy_to = uml_vfio_bar_copy_to,
+ .bar_set = uml_vfio_bar_set,
+};
+
+static u8 uml_vfio_find_capability(struct uml_vfio_device *dev, u8 cap)
+{
+ u8 id, pos;
+ u16 ent;
+ int ttl = 48; /* PCI_FIND_CAP_TTL */
+
+ pos = __uml_vfio_cfgspace_read(dev, PCI_CAPABILITY_LIST, sizeof(pos));
+
+ while (pos && ttl--) {
+ ent = __uml_vfio_cfgspace_read(dev, pos, sizeof(ent));
+
+ id = ent & 0xff;
+ if (id == 0xff)
+ break;
+ if (id == cap)
+ return pos;
+
+ pos = ent >> 8;
+ }
+
+ return 0;
+}
+
+static int uml_vfio_read_msix_table(struct uml_vfio_device *dev)
+{
+ unsigned int off;
+ u16 flags;
+ u32 tbl;
+
+ off = uml_vfio_find_capability(dev, PCI_CAP_ID_MSIX);
+ if (!off)
+ return -ENOTSUPP;
+
+ dev->msix_cap = off;
+
+ tbl = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_TABLE, sizeof(tbl));
+ flags = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_FLAGS, sizeof(flags));
+
+ dev->msix_bar = tbl & PCI_MSIX_TABLE_BIR;
+ dev->msix_offset = tbl & PCI_MSIX_TABLE_OFFSET;
+ dev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * PCI_MSIX_ENTRY_SIZE;
+
+ dev->msix_data = kzalloc(dev->msix_size, GFP_KERNEL);
+ if (!dev->msix_data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void uml_vfio_open_device(struct uml_vfio_device *dev)
+{
+ struct uml_vfio_intr_ctx *ctx;
+ int err, group_id, i;
+
+ group_id = uml_vfio_user_get_group_id(dev->name);
+ if (group_id < 0) {
+ pr_err("Failed to get group id (%s), error %d\n",
+ dev->name, group_id);
+ goto free_dev;
+ }
+
+ dev->group = uml_vfio_open_group(group_id);
+ if (dev->group < 0) {
+ pr_err("Failed to open group %d (%s), error %d\n",
+ group_id, dev->name, dev->group);
+ goto free_dev;
+ }
+
+ err = uml_vfio_user_setup_device(&dev->udev, dev->group, dev->name);
+ if (err) {
+ pr_err("Failed to setup device (%s), error %d\n",
+ dev->name, err);
+ goto release_group;
+ }
+
+ err = uml_vfio_read_msix_table(dev);
+ if (err) {
+ pr_err("Failed to read MSI-X table (%s), error %d\n",
+ dev->name, err);
+ goto teardown_udev;
+ }
+
+ dev->intr_ctx = kmalloc_array(dev->udev.irq_count,
+ sizeof(struct uml_vfio_intr_ctx),
+ GFP_KERNEL);
+ if (!dev->intr_ctx) {
+ pr_err("Failed to allocate interrupt context (%s)\n",
+ dev->name);
+ goto free_msix;
+ }
+
+ for (i = 0; i < dev->udev.irq_count; i++) {
+ ctx = &dev->intr_ctx[i];
+ ctx->dev = dev;
+ ctx->irq = -1;
+ }
+
+ dev->pdev.ops = &uml_vfio_um_pci_ops;
+
+ err = um_pci_device_register(&dev->pdev);
+ if (err) {
+ pr_err("Failed to register UM PCI device (%s), error %d\n",
+ dev->name, err);
+ goto free_intr_ctx;
+ }
+
+ return;
+
+free_intr_ctx:
+ kfree(dev->intr_ctx);
+free_msix:
+ kfree(dev->msix_data);
+teardown_udev:
+ uml_vfio_user_teardown_device(&dev->udev);
+release_group:
+ uml_vfio_release_group(dev->group);
+free_dev:
+ list_del(&dev->list);
+ kfree(dev->name);
+ kfree(dev);
+}
+
+static void uml_vfio_release_device(struct uml_vfio_device *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->udev.irq_count; i++)
+ uml_vfio_deactivate_irq(dev, i);
+ uml_vfio_user_update_irqs(&dev->udev);
+
+ um_pci_device_unregister(&dev->pdev);
+ kfree(dev->intr_ctx);
+ kfree(dev->msix_data);
+ uml_vfio_user_teardown_device(&dev->udev);
+ uml_vfio_release_group(dev->group);
+ list_del(&dev->list);
+ kfree(dev->name);
+ kfree(dev);
+}
+
+static struct uml_vfio_device *uml_vfio_find_device(const char *device)
+{
+ struct uml_vfio_device *dev;
+
+ list_for_each_entry(dev, &uml_vfio_devices, list) {
+ if (!strcmp(dev->name, device))
+ return dev;
+ }
+ return NULL;
+}
+
+static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *kp)
+{
+ struct uml_vfio_device *dev;
+ int fd;
+
+ if (uml_vfio_container.fd < 0) {
+ fd = uml_vfio_user_open_container();
+ if (fd < 0)
+ return fd;
+ uml_vfio_container.fd = fd;
+ }
+
+ if (uml_vfio_find_device(device))
+ return -EEXIST;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->name = kstrdup(device, GFP_KERNEL);
+ if (!dev->name) {
+ kfree(dev);
+ return -ENOMEM;
+ }
+
+ list_add_tail(&dev->list, &uml_vfio_devices);
+ return 0;
+}
+
+static int uml_vfio_cmdline_get(char *buffer, const struct kernel_param *kp)
+{
+ return 0;
+}
+
+static const struct kernel_param_ops uml_vfio_cmdline_param_ops = {
+ .set = uml_vfio_cmdline_set,
+ .get = uml_vfio_cmdline_get,
+};
+
+device_param_cb(device, &uml_vfio_cmdline_param_ops, NULL, 0400);
+__uml_help(uml_vfio_cmdline_param_ops,
+"vfio_uml.device=<domain:bus:slot.function>\n"
+" Pass through a PCI device to UML via VFIO. Currently, only MSI-X\n"
+" capable devices are supported, and it is assumed that drivers will\n"
+" use MSI-X. This parameter can be specified multiple times to pass\n"
+" through multiple PCI devices to UML.\n\n"
+);
+
+static int __init uml_vfio_init(void)
+{
+ struct uml_vfio_device *dev, *n;
+
+ sigio_broken();
+
+ /* If the opening fails, the device will be released. */
+ list_for_each_entry_safe(dev, n, &uml_vfio_devices, list)
+ uml_vfio_open_device(dev);
+
+ return 0;
+}
+late_initcall(uml_vfio_init);
+
+static void __exit uml_vfio_exit(void)
+{
+ struct uml_vfio_device *dev, *n;
+
+ list_for_each_entry_safe(dev, n, &uml_vfio_devices, list)
+ uml_vfio_release_device(dev);
+
+ if (uml_vfio_container.fd >= 0)
+ os_close_file(uml_vfio_container.fd);
+}
+module_exit(uml_vfio_exit);
diff --git a/arch/um/drivers/vfio_user.c b/arch/um/drivers/vfio_user.c
new file mode 100644
index 000000000000..6a45d8e14582
--- /dev/null
+++ b/arch/um/drivers/vfio_user.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+#include <linux/limits.h>
+#include <linux/vfio.h>
+#include <linux/pci_regs.h>
+#include <as-layout.h>
+#include <um_malloc.h>
+
+#include "vfio_user.h"
+
+int uml_vfio_user_open_container(void)
+{
+ int r, fd;
+
+ fd = open("/dev/vfio/vfio", O_RDWR);
+ if (fd < 0)
+ return -errno;
+
+ r = ioctl(fd, VFIO_GET_API_VERSION);
+ if (r != VFIO_API_VERSION) {
+ r = r < 0 ? -errno : -EINVAL;
+ goto error;
+ }
+
+ r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU);
+ if (r <= 0) {
+ r = r < 0 ? -errno : -EINVAL;
+ goto error;
+ }
+
+ return fd;
+
+error:
+ close(fd);
+ return r;
+}
+
+int uml_vfio_user_setup_iommu(int container)
+{
+ /*
+ * This is a bit tricky. See the big comment in
+ * vhost_user_set_mem_table() in virtio_uml.c.
+ */
+ unsigned long reserved = uml_reserved - uml_physmem;
+ struct vfio_iommu_type1_dma_map dma_map = {
+ .argsz = sizeof(dma_map),
+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+ .vaddr = uml_reserved,
+ .iova = reserved,
+ .size = physmem_size - reserved,
+ };
+
+ if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0)
+ return -errno;
+
+ if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int uml_vfio_user_get_group_id(const char *device)
+{
+ char *path, *buf, *end;
+ const char *name;
+ int r;
+
+ path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
+ if (!path)
+ return -ENOMEM;
+
+ sprintf(path, "/sys/bus/pci/devices/%s/iommu_group", device);
+
+ buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL);
+ if (!buf) {
+ r = -ENOMEM;
+ goto free_path;
+ }
+
+ r = readlink(path, buf, PATH_MAX);
+ if (r < 0) {
+ r = -errno;
+ goto free_buf;
+ }
+ buf[r] = '\0';
+
+ name = basename(buf);
+
+ r = strtoul(name, &end, 10);
+ if (*end != '\0' || end == name) {
+ r = -EINVAL;
+ goto free_buf;
+ }
+
+free_buf:
+ kfree(buf);
+free_path:
+ kfree(path);
+ return r;
+}
+
+int uml_vfio_user_open_group(int group_id)
+{
+ char *path;
+ int fd;
+
+ path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
+ if (!path)
+ return -ENOMEM;
+
+ sprintf(path, "/dev/vfio/%d", group_id);
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ fd = -errno;
+ goto out;
+ }
+
+out:
+ kfree(path);
+ return fd;
+}
+
+int uml_vfio_user_set_container(int container, int group)
+{
+ if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0)
+ return -errno;
+ return 0;
+}
+
+int uml_vfio_user_unset_container(int container, int group)
+{
+ if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0)
+ return -errno;
+ return 0;
+}
+
+static int vfio_set_irqs(int device, int start, int count, int *irqfd)
+{
+ struct vfio_irq_set *irq_set;
+ int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count;
+ int err = 0;
+
+ irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL);
+ if (!irq_set)
+ return -ENOMEM;
+
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = start;
+ irq_set->count = count;
+ memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count);
+
+ if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) {
+ err = -errno;
+ goto out;
+ }
+
+out:
+ kfree(irq_set);
+ return err;
+}
+
+int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev,
+ int group, const char *device)
+{
+ struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+ struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
+ int err, i;
+
+ dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device);
+ if (dev->device < 0)
+ return -errno;
+
+ if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) {
+ err = -errno;
+ goto close_device;
+ }
+
+ dev->num_regions = device_info.num_regions;
+ if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1)
+ dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1;
+
+ dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions,
+ UM_GFP_KERNEL);
+ if (!dev->region) {
+ err = -ENOMEM;
+ goto close_device;
+ }
+
+ for (i = 0; i < dev->num_regions; i++) {
+ struct vfio_region_info region = {
+ .argsz = sizeof(region),
+ .index = i,
+ };
+ if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, &region) < 0) {
+ err = -errno;
+ goto free_region;
+ }
+ dev->region[i].size = region.size;
+ dev->region[i].offset = region.offset;
+ }
+
+ /* Only MSI-X is supported currently. */
+ irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX;
+ if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) {
+ err = -errno;
+ goto free_region;
+ }
+
+ dev->irq_count = irq_info.count;
+
+ dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL);
+ if (!dev->irqfd) {
+ err = -ENOMEM;
+ goto free_region;
+ }
+
+ memset(dev->irqfd, -1, sizeof(int) * dev->irq_count);
+
+ err = vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
+ if (err)
+ goto free_irqfd;
+
+ return 0;
+
+free_irqfd:
+ kfree(dev->irqfd);
+free_region:
+ kfree(dev->region);
+close_device:
+ close(dev->device);
+ return err;
+}
+
+void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev)
+{
+ kfree(dev->irqfd);
+ kfree(dev->region);
+ close(dev->device);
+}
+
+int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index)
+{
+ int irqfd;
+
+ irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (irqfd < 0)
+ return -errno;
+
+ dev->irqfd[index] = irqfd;
+ return irqfd;
+}
+
+void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index)
+{
+ close(dev->irqfd[index]);
+ dev->irqfd[index] = -1;
+}
+
+int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev)
+{
+ return vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
+}
+
+static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index,
+ uint64_t offset, void *buf, uint64_t size)
+{
+ if (index >= dev->num_regions || offset + size > dev->region[index].size)
+ return -EINVAL;
+
+ if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index,
+ uint64_t offset, const void *buf, uint64_t size)
+{
+ if (index >= dev->num_regions || offset + size > dev->region[index].size)
+ return -EINVAL;
+
+ if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev,
+ unsigned int offset, void *buf, int size)
+{
+ return vfio_region_read(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+ offset, buf, size);
+}
+
+int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev,
+ unsigned int offset, const void *buf, int size)
+{
+ return vfio_region_write(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+ offset, buf, size);
+}
+
+int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar,
+ unsigned int offset, void *buf, int size)
+{
+ return vfio_region_read(dev, bar, offset, buf, size);
+}
+
+int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar,
+ unsigned int offset, const void *buf, int size)
+{
+ return vfio_region_write(dev, bar, offset, buf, size);
+}
diff --git a/arch/um/drivers/vfio_user.h b/arch/um/drivers/vfio_user.h
new file mode 100644
index 000000000000..75535e05059b
--- /dev/null
+++ b/arch/um/drivers/vfio_user.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_VFIO_USER_H
+#define __UM_VFIO_USER_H
+
+struct uml_vfio_user_device {
+ int device;
+
+ struct {
+ uint64_t size;
+ uint64_t offset;
+ } *region;
+ int num_regions;
+
+ int32_t *irqfd;
+ int irq_count;
+};
+
+int uml_vfio_user_open_container(void);
+int uml_vfio_user_setup_iommu(int container);
+
+int uml_vfio_user_get_group_id(const char *device);
+int uml_vfio_user_open_group(int group_id);
+int uml_vfio_user_set_container(int container, int group);
+int uml_vfio_user_unset_container(int container, int group);
+
+int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev,
+ int group, const char *device);
+void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev);
+
+int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index);
+void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index);
+int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev);
+
+int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev,
+ unsigned int offset, void *buf, int size);
+int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev,
+ unsigned int offset, const void *buf, int size);
+
+int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar,
+ unsigned int offset, void *buf, int size);
+int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar,
+ unsigned int offset, const void *buf, int size);
+
+#endif /* __UM_VFIO_USER_H */
diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
index b83b5a765d4e..0fe207ca4b72 100644
--- a/arch/um/drivers/virt-pci.c
+++ b/arch/um/drivers/virt-pci.c
@@ -538,11 +538,6 @@ void um_pci_platform_device_unregister(struct um_pci_device *dev)
static int __init um_pci_init(void)
{
- struct irq_domain_info inner_domain_info = {
- .size = MAX_MSI_VECTORS,
- .hwirq_max = MAX_MSI_VECTORS,
- .ops = &um_pci_inner_domain_ops,
- };
int err, i;
WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource,
@@ -564,10 +559,10 @@ static int __init um_pci_init(void)
goto free;
}
- inner_domain_info.fwnode = um_pci_fwnode;
- um_pci_inner_domain = irq_domain_instantiate(&inner_domain_info);
- if (IS_ERR(um_pci_inner_domain)) {
- err = PTR_ERR(um_pci_inner_domain);
+ um_pci_inner_domain = irq_domain_create_linear(um_pci_fwnode, MAX_MSI_VECTORS,
+ &um_pci_inner_domain_ops, NULL);
+ if (!um_pci_inner_domain) {
+ err = -ENOMEM;
goto free;
}
@@ -602,7 +597,7 @@ static int __init um_pci_init(void)
return 0;
free:
- if (!IS_ERR_OR_NULL(um_pci_inner_domain))
+ if (um_pci_inner_domain)
irq_domain_remove(um_pci_inner_domain);
if (um_pci_fwnode)
irq_domain_free_fwnode(um_pci_fwnode);
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
index e4316c7981e8..d05918e422f9 100644
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -81,7 +81,7 @@ __uml_setup("xterm=", xterm_setup,
" '<switch> command arg1 arg2 ...'.\n"
" The default values are 'xterm=" CONFIG_XTERM_CHAN_DEFAULT_EMULATOR
",-T,-e'.\n"
-" Values for gnome-terminal are 'xterm=gnome-terminal,-t,-x'.\n\n"
+" Values for gnome-terminal are 'xterm=gnome-terminal,-t,--'.\n\n"
);
static int xterm_open(int input, int output, int primary, void *d,
@@ -97,12 +97,9 @@ static int xterm_open(int input, int output, int primary, void *d,
if (access(argv[4], X_OK) < 0)
argv[4] = "port-helper";
- /*
- * Check that DISPLAY is set, this doesn't guarantee the xterm
- * will work but w/o it we can be pretty sure it won't.
- */
- if (getenv("DISPLAY") == NULL) {
- printk(UM_KERN_ERR "xterm_open: $DISPLAY not set.\n");
+ /* Ensure we are running on Xorg or Wayland. */
+ if (!getenv("DISPLAY") && !getenv("WAYLAND_DISPLAY")) {
+ printk(UM_KERN_ERR "xterm_open : neither $DISPLAY nor $WAYLAND_DISPLAY is set.\n");
return -ENODEV;
}
diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h
index 5898a26daa0d..408b31d59127 100644
--- a/arch/um/include/asm/asm-prototypes.h
+++ b/arch/um/include/asm/asm-prototypes.h
@@ -1 +1,6 @@
#include <asm-generic/asm-prototypes.h>
+#include <asm/checksum.h>
+
+#ifdef CONFIG_UML_X86
+extern void cmpxchg8b_emu(void);
+#endif
diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
index 749dfe8512e8..36dbedd1af48 100644
--- a/arch/um/include/asm/irq.h
+++ b/arch/um/include/asm/irq.h
@@ -13,17 +13,18 @@
#define TELNETD_IRQ 8
#define XTERM_IRQ 9
#define RANDOM_IRQ 10
+#define SIGCHLD_IRQ 11
#ifdef CONFIG_UML_NET_VECTOR
-#define VECTOR_BASE_IRQ (RANDOM_IRQ + 1)
+#define VECTOR_BASE_IRQ (SIGCHLD_IRQ + 1)
#define VECTOR_IRQ_SPACE 8
#define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
#else
-#define UM_FIRST_DYN_IRQ (RANDOM_IRQ + 1)
+#define UM_FIRST_DYN_IRQ (SIGCHLD_IRQ + 1)
#endif
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
index a3eaca41ff61..4d0e4239f3cc 100644
--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -6,11 +6,14 @@
#ifndef __ARCH_UM_MMU_H
#define __ARCH_UM_MMU_H
+#include "linux/types.h"
#include <mm_id.h>
typedef struct mm_context {
struct mm_id id;
+ struct list_head list;
+
/* Address range in need of a TLB sync */
unsigned long sync_tlb_range_from;
unsigned long sync_tlb_range_to;
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index ca2a519d53ab..24fdea6f88c3 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -314,7 +314,7 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
((swp_entry_t) { pte_val(pte_mkuptodate(pte)) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_get_bits(pte, _PAGE_SWP_EXCLUSIVE);
}
diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 73f3a4792ed8..8ca66a1918c3 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
@@ -14,3 +14,7 @@ DEFINE(UM_THREAD_SIZE, THREAD_SIZE);
DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
+
+DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES);
+
+DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE);
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index 88835b52ae2b..746abc24a5d5 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -17,6 +17,8 @@ enum um_irq_type {
struct siginfo;
extern void sigio_handler(int sig, struct siginfo *unused_si,
struct uml_pt_regs *regs, void *mc);
+extern void sigchld_handler(int sig, struct siginfo *unused_si,
+ struct uml_pt_regs *regs, void *mc);
void sigio_run_timetravel_handlers(void);
extern void free_irq_by_fd(int fd);
extern void deactivate_fd(int fd, int irqnum);
diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h
deleted file mode 100644
index 67b2e9a1f2e5..000000000000
--- a/arch/um/include/shared/net_kern.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __UM_NET_KERN_H
-#define __UM_NET_KERN_H
-
-#include <linux/netdevice.h>
-#include <linux/platform_device.h>
-#include <linux/skbuff.h>
-#include <linux/socket.h>
-#include <linux/list.h>
-#include <linux/workqueue.h>
-
-struct uml_net {
- struct list_head list;
- struct net_device *dev;
- struct platform_device pdev;
- int index;
-};
-
-struct uml_net_private {
- struct list_head list;
- spinlock_t lock;
- struct net_device *dev;
- struct timer_list tl;
-
- struct work_struct work;
- int fd;
- unsigned char mac[ETH_ALEN];
- int max_packet;
- unsigned short (*protocol)(struct sk_buff *);
- int (*open)(void *);
- void (*close)(int, void *);
- void (*remove)(void *);
- int (*read)(int, struct sk_buff *skb, struct uml_net_private *);
- int (*write)(int, struct sk_buff *skb, struct uml_net_private *);
-
- void (*add_address)(unsigned char *, unsigned char *, void *);
- void (*delete_address)(unsigned char *, unsigned char *, void *);
- char user[];
-};
-
-struct net_kern_info {
- void (*init)(struct net_device *, void *);
- unsigned short (*protocol)(struct sk_buff *);
- int (*read)(int, struct sk_buff *skb, struct uml_net_private *);
- int (*write)(int, struct sk_buff *skb, struct uml_net_private *);
-};
-
-struct transport {
- struct list_head list;
- const char *name;
- int (* const setup)(char *, char **, void *);
- const struct net_user_info *user;
- const struct net_kern_info *kern;
- const int private_size;
- const int setup_size;
-};
-
-extern int tap_setup_common(char *str, char *type, char **dev_name,
- char **mac_out, char **gate_addr);
-extern void register_transport(struct transport *new);
-extern unsigned short eth_protocol(struct sk_buff *skb);
-extern void uml_net_setup_etheraddr(struct net_device *dev, char *str);
-
-
-#endif
diff --git a/arch/um/include/shared/net_user.h b/arch/um/include/shared/net_user.h
deleted file mode 100644
index ba92a4d93531..000000000000
--- a/arch/um/include/shared/net_user.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __UM_NET_USER_H__
-#define __UM_NET_USER_H__
-
-#define ETH_ADDR_LEN (6)
-#define ETH_HEADER_ETHERTAP (16)
-#define ETH_HEADER_OTHER (26) /* 14 for ethernet + VLAN + MPLS for crazy people */
-#define ETH_MAX_PACKET (1500)
-
-#define UML_NET_VERSION (4)
-
-struct net_user_info {
- int (*init)(void *, void *);
- int (*open)(void *);
- void (*close)(int, void *);
- void (*remove)(void *);
- void (*add_address)(unsigned char *, unsigned char *, void *);
- void (*delete_address)(unsigned char *, unsigned char *, void *);
- int max_packet;
- int mtu;
-};
-
-extern void iter_addresses(void *d, void (*cb)(unsigned char *,
- unsigned char *, void *),
- void *arg);
-
-extern void *get_output_buffer(int *len_out);
-extern void free_output_buffer(void *buffer);
-
-extern int tap_open_common(void *dev, char *gate_addr);
-extern void tap_check_ips(char *gate_addr, unsigned char *eth_addr);
-
-extern void read_output(int fd, char *output_out, int len);
-
-extern int net_read(int fd, void *buf, int len);
-extern int net_recvfrom(int fd, void *buf, int len);
-extern int net_write(int fd, void *buf, int len);
-extern int net_send(int fd, void *buf, int len);
-extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len);
-
-extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg);
-extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg);
-
-extern char *split_if_spec(char *str, ...);
-
-extern int dev_netmask(void *d, void *m);
-
-#endif
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 152a60080d5b..b35cc8ce333b 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -143,7 +143,6 @@ extern int os_access(const char *file, int mode);
extern int os_set_exec_close(int fd);
extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg);
extern int os_get_ifname(int fd, char *namebuf);
-extern int os_set_slip(int fd);
extern int os_mode_fd(int fd, int mode);
extern int os_seek_file(int fd, unsigned long long offset);
@@ -198,6 +197,7 @@ extern int create_mem_file(unsigned long long len);
extern void report_enomem(void);
/* process.c */
+pid_t os_reap_child(void);
extern void os_alarm_process(int pid);
extern void os_kill_process(int pid, int reap_child);
extern void os_kill_ptraced_process(int pid, int reap_child);
@@ -286,7 +286,7 @@ int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
/* skas/process.c */
extern int is_skas_winch(int pid, int fd, void *data);
-extern int start_userspace(unsigned long stub_stack);
+extern int start_userspace(struct mm_id *mm_id);
extern void userspace(struct uml_pt_regs *regs);
extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
extern void switch_threads(jmp_buf *me, jmp_buf *you);
diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h
index 140388c282f6..89df9a55fbea 100644
--- a/arch/um/include/shared/skas/mm_id.h
+++ b/arch/um/include/shared/skas/mm_id.h
@@ -6,12 +6,21 @@
#ifndef __MM_ID_H
#define __MM_ID_H
+#define STUB_MAX_FDS 4
+
struct mm_id {
int pid;
unsigned long stack;
int syscall_data_len;
+
+ /* Only used with SECCOMP mode */
+ int sock;
+ int syscall_fd_num;
+ int syscall_fd_map[STUB_MAX_FDS];
};
void __switch_mm(struct mm_id *mm_idp);
+void notify_mm_kill(int pid);
+
#endif
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index 85c50122ab98..7d1de4cab551 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -8,6 +8,7 @@
#include <sysdep/ptrace.h>
+extern int using_seccomp;
extern int userspace_pid[];
extern void new_thread_handler(void);
diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h
index 81a4cace032c..c261a77a32f6 100644
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -11,8 +11,15 @@
#include <linux/compiler_types.h>
#include <as-layout.h>
#include <sysdep/tls.h>
+#include <sysdep/stub-data.h>
+#include <mm_id.h>
+
+#define FUTEX_IN_CHILD 0
+#define FUTEX_IN_KERN 1
struct stub_init_data {
+ int seccomp;
+
unsigned long stub_start;
int stub_code_fd;
@@ -20,7 +27,8 @@ struct stub_init_data {
int stub_data_fd;
unsigned long stub_data_offset;
- unsigned long segv_handler;
+ unsigned long signal_handler;
+ unsigned long signal_restorer;
};
#define STUB_NEXT_SYSCALL(s) \
@@ -52,6 +60,16 @@ struct stub_data {
/* 128 leaves enough room for additional fields in the struct */
struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16);
+ /* data shared with signal handler (only used in seccomp mode) */
+ short restart_wait;
+ unsigned int futex;
+ int signal;
+ unsigned short si_offset;
+ unsigned short mctx_offset;
+
+ /* seccomp architecture specific state restore */
+ struct stub_data_arch arch_data;
+
/* Stack for our signal handlers and for calling into . */
unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE);
};
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 4df1cd0d2017..b8f4e9281599 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -12,7 +12,7 @@ CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START) \
-DELF_ARCH=$(LDS_ELF_ARCH) \
-DELF_FORMAT=$(LDS_ELF_FORMAT) \
$(LDS_EXTRA)
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
physmem.o process.o ptrace.o reboot.o sigio.o \
@@ -25,7 +25,6 @@ obj-$(CONFIG_GPROF) += gprof_syms.o
obj-$(CONFIG_OF) += dtb.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
-obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
USER_OBJS := config.o
diff --git a/arch/um/kernel/ioport.c b/arch/um/kernel/ioport.c
deleted file mode 100644
index 7220615b3beb..000000000000
--- a/arch/um/kernel/ioport.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2021 Intel Corporation
- * Author: Johannes Berg <johannes@sipsolutions.net>
- */
-#include <asm/iomap.h>
-#include <asm-generic/pci_iomap.h>
-
-void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port,
- unsigned int nr)
-{
- return NULL;
-}
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index abe8f30a521c..0dfaf96bb7da 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -690,3 +690,9 @@ void __init init_IRQ(void)
/* Initialize EPOLL Loop */
os_setup_epoll();
}
+
+void sigchld_handler(int sig, struct siginfo *unused_si,
+ struct uml_pt_regs *regs, void *mc)
+{
+ do_IRQ(SIGCHLD_IRQ, regs);
+}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 0eb5a1d3ba70..849fafa4b54f 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -8,6 +8,7 @@
#include <linux/sched/signal.h>
#include <linux/slab.h>
+#include <shared/irq_kern.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/mmu_context.h>
@@ -19,6 +20,9 @@
/* Ensure the stub_data struct covers the allocated area */
static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
+spinlock_t mm_list_lock;
+struct list_head mm_list;
+
int init_new_context(struct task_struct *task, struct mm_struct *mm)
{
struct mm_id *new_id = &mm->context.id;
@@ -31,14 +35,14 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
new_id->stack = stack;
- block_signals_trace();
- new_id->pid = start_userspace(stack);
- unblock_signals_trace();
+ scoped_guard(spinlock_irqsave, &mm_list_lock) {
+ /* Insert into list, used for lookups when the child dies */
+ list_add(&mm->context.list, &mm_list);
+ }
- if (new_id->pid < 0) {
- ret = new_id->pid;
+ ret = start_userspace(new_id);
+ if (ret < 0)
goto out_free;
- }
/* Ensure the new MM is clean and nothing unwanted is mapped */
unmap(new_id, 0, STUB_START);
@@ -60,13 +64,82 @@ void destroy_context(struct mm_struct *mm)
* zero, resulting in a kill(0), which will result in the
* whole UML suddenly dying. Also, cover negative and
* 1 cases, since they shouldn't happen either.
+ *
+ * Negative cases happen if the child died unexpectedly.
*/
- if (mmu->id.pid < 2) {
+ if (mmu->id.pid >= 0 && mmu->id.pid < 2) {
printk(KERN_ERR "corrupt mm_context - pid = %d\n",
mmu->id.pid);
return;
}
- os_kill_ptraced_process(mmu->id.pid, 1);
+
+ if (mmu->id.pid > 0) {
+ os_kill_ptraced_process(mmu->id.pid, 1);
+ mmu->id.pid = -1;
+ }
+
+ if (using_seccomp && mmu->id.sock)
+ os_close_file(mmu->id.sock);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
+
+ guard(spinlock_irqsave)(&mm_list_lock);
+
+ list_del(&mm->context.list);
+}
+
+static irqreturn_t mm_sigchld_irq(int irq, void* dev)
+{
+ struct mm_context *mm_context;
+ pid_t pid;
+
+ guard(spinlock)(&mm_list_lock);
+
+ while ((pid = os_reap_child()) > 0) {
+ /*
+ * A child died, check if we have an MM with the PID. This is
+ * only relevant in SECCOMP mode (as ptrace will fail anyway).
+ *
+ * See wait_stub_done_seccomp for more details.
+ */
+ list_for_each_entry(mm_context, &mm_list, list) {
+ if (mm_context->id.pid == pid) {
+ struct stub_data *stub_data;
+ printk("Unexpectedly lost MM child! Affected tasks will segfault.");
+
+ /* Marks the MM as dead */
+ mm_context->id.pid = -1;
+
+ /*
+ * NOTE: If SMP is implemented, a futex_wake
+ * needs to be added here.
+ */
+ stub_data = (void *)mm_context->id.stack;
+ stub_data->futex = FUTEX_IN_KERN;
+
+ /*
+ * NOTE: Currently executing syscalls by
+ * affected tasks may finish normally.
+ */
+ break;
+ }
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int __init init_child_tracking(void)
+{
+ int err;
+
+ spin_lock_init(&mm_list_lock);
+ INIT_LIST_HEAD(&mm_list);
+
+ err = request_irq(SIGCHLD_IRQ, mm_sigchld_irq, 0, "SIGCHLD", NULL);
+ if (err < 0)
+ panic("Failed to register SIGCHLD IRQ: %d", err);
+
+ return 0;
}
+early_initcall(init_child_tracking)
diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
index 796fc266d3bb..67cab46a602c 100644
--- a/arch/um/kernel/skas/stub.c
+++ b/arch/um/kernel/skas/stub.c
@@ -5,21 +5,54 @@
#include <sysdep/stub.h>
-static __always_inline int syscall_handler(struct stub_data *d)
+#include <linux/futex.h>
+#include <sys/socket.h>
+#include <errno.h>
+
+/*
+ * Known security issues
+ *
+ * Userspace can jump to this address to execute *any* syscall that is
+ * permitted by the stub. As we will return afterwards, it can do
+ * whatever it likes, including:
+ * - Tricking the kernel into handing out the memory FD
+ * - Using this memory FD to read/write all physical memory
+ * - Running in parallel to the kernel processing a syscall
+ * (possibly creating data races?)
+ * - Blocking e.g. SIGALRM to avoid time based scheduling
+ *
+ * To avoid this, the permitted location for each syscall needs to be
+ * checked for in the SECCOMP filter (which is reasonably simple). Also,
+ * more care will need to go into considerations how the code might be
+ * tricked by using a prepared stack (or even modifying the stack from
+ * another thread in case SMP support is added).
+ *
+ * As for the SIGALRM, the best counter measure will be to check in the
+ * kernel that the process is reporting back the SIGALRM in a timely
+ * fashion.
+ */
+static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS])
{
+ struct stub_data *d = get_stub_data();
int i;
unsigned long res;
+ int fd;
for (i = 0; i < d->syscall_data_len; i++) {
struct stub_syscall *sc = &d->syscall_data[i];
switch (sc->syscall) {
case STUB_SYSCALL_MMAP:
+ if (fd_map)
+ fd = fd_map[sc->mem.fd];
+ else
+ fd = sc->mem.fd;
+
res = stub_syscall6(STUB_MMAP_NR,
sc->mem.addr, sc->mem.length,
sc->mem.prot,
MAP_SHARED | MAP_FIXED,
- sc->mem.fd, sc->mem.offset);
+ fd, sc->mem.offset);
if (res != sc->mem.addr) {
d->err = res;
d->syscall_data_len = i;
@@ -51,9 +84,98 @@ static __always_inline int syscall_handler(struct stub_data *d)
void __section(".__syscall_stub")
stub_syscall_handler(void)
{
+ syscall_handler(NULL);
+
+ trap_myself();
+}
+
+void __section(".__syscall_stub")
+stub_signal_interrupt(int sig, siginfo_t *info, void *p)
+{
struct stub_data *d = get_stub_data();
+ char rcv_data;
+ union {
+ char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)];
+ struct cmsghdr align;
+ } ctrl = {};
+ struct iovec iov = {
+ .iov_base = &rcv_data,
+ .iov_len = 1,
+ };
+ struct msghdr msghdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &ctrl,
+ .msg_controllen = sizeof(ctrl),
+ };
+ ucontext_t *uc = p;
+ struct cmsghdr *fd_msg;
+ int *fd_map;
+ int num_fds;
+ long res;
- syscall_handler(d);
+ d->signal = sig;
+ d->si_offset = (unsigned long)info - (unsigned long)&d->sigstack[0];
+ d->mctx_offset = (unsigned long)&uc->uc_mcontext - (unsigned long)&d->sigstack[0];
- trap_myself();
+restart_wait:
+ d->futex = FUTEX_IN_KERN;
+ do {
+ res = stub_syscall3(__NR_futex, (unsigned long)&d->futex,
+ FUTEX_WAKE, 1);
+ } while (res == -EINTR);
+
+ do {
+ res = stub_syscall4(__NR_futex, (unsigned long)&d->futex,
+ FUTEX_WAIT, FUTEX_IN_KERN, 0);
+ } while (res == -EINTR || d->futex == FUTEX_IN_KERN);
+
+ if (res < 0 && res != -EAGAIN)
+ stub_syscall1(__NR_exit_group, 1);
+
+ if (d->syscall_data_len) {
+ /* Read passed FDs (if any) */
+ do {
+ res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0);
+ } while (res == -EINTR);
+
+ /* We should never have a receive error (other than -EAGAIN) */
+ if (res < 0 && res != -EAGAIN)
+ stub_syscall1(__NR_exit_group, 1);
+
+ /* Receive the FDs */
+ num_fds = 0;
+ fd_msg = msghdr.msg_control;
+ fd_map = (void *)&CMSG_DATA(fd_msg);
+ if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr))
+ num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ /* Try running queued syscalls. */
+ res = syscall_handler(fd_map);
+
+ while (num_fds)
+ stub_syscall2(__NR_close, fd_map[--num_fds], 0);
+ } else {
+ res = 0;
+ }
+
+ if (res < 0 || d->restart_wait) {
+ /* Report SIGSYS if we restart. */
+ d->signal = SIGSYS;
+ d->restart_wait = 0;
+
+ goto restart_wait;
+ }
+
+ /* Restore arch dependent state that is not part of the mcontext */
+ stub_seccomp_restore_state(&d->arch_data);
+
+ /* Return so that the host modified mcontext is restored. */
+}
+
+void __section(".__syscall_stub")
+stub_signal_restorer(void)
+{
+ /* We must not have anything on the stack when doing rt_sigreturn */
+ stub_syscall0(__NR_rt_sigreturn);
}
diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c
index 23c99b285e82..cbafaa684e66 100644
--- a/arch/um/kernel/skas/stub_exe.c
+++ b/arch/um/kernel/skas/stub_exe.c
@@ -1,8 +1,12 @@
#include <sys/ptrace.h>
#include <sys/prctl.h>
+#include <sys/fcntl.h>
#include <asm/unistd.h>
#include <sysdep/stub.h>
#include <stub-data.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <generated/asm-offsets.h>
void _start(void);
@@ -25,8 +29,6 @@ noinline static void real_init(void)
} sa = {
/* Need to set SA_RESTORER (but the handler never returns) */
.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000,
- /* no need to mask any signals */
- .sa_mask = 0,
};
/* set a nice name */
@@ -35,13 +37,20 @@ noinline static void real_init(void)
/* Make sure this process dies if the kernel dies */
stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL);
+ /* Needed in SECCOMP mode (and safe to do anyway) */
+ stub_syscall5(__NR_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+
/* read information from STDIN and close it */
res = stub_syscall3(__NR_read, 0,
(unsigned long)&init_data, sizeof(init_data));
if (res != sizeof(init_data))
stub_syscall1(__NR_exit, 10);
- stub_syscall1(__NR_close, 0);
+ /* In SECCOMP mode, FD 0 is a socket and is later used for FD passing */
+ if (!init_data.seccomp)
+ stub_syscall1(__NR_close, 0);
+ else
+ stub_syscall3(__NR_fcntl, 0, F_SETFL, O_NONBLOCK);
/* map stub code + data */
res = stub_syscall6(STUB_MMAP_NR,
@@ -59,22 +68,148 @@ noinline static void real_init(void)
if (res != init_data.stub_start + UM_KERN_PAGE_SIZE)
stub_syscall1(__NR_exit, 12);
+ /* In SECCOMP mode, we only need the signalling FD from now on */
+ if (init_data.seccomp) {
+ res = stub_syscall3(__NR_close_range, 1, ~0U, 0);
+ if (res != 0)
+ stub_syscall1(__NR_exit, 13);
+ }
+
/* setup signal stack inside stub data */
stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE;
stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0);
- /* register SIGSEGV handler */
- sa.sa_handler_ = (void *) init_data.segv_handler;
- res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0,
- sizeof(sa.sa_mask));
- if (res != 0)
- stub_syscall1(__NR_exit, 13);
+ /* register signal handlers */
+ sa.sa_handler_ = (void *) init_data.signal_handler;
+ sa.sa_restorer = (void *) init_data.signal_restorer;
+ if (!init_data.seccomp) {
+ /* In ptrace mode, the SIGSEGV handler never returns */
+ sa.sa_mask = 0;
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 14);
+ } else {
+ /* SECCOMP mode uses rt_sigreturn, need to mask all signals */
+ sa.sa_mask = ~0ULL;
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 15);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGSYS,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 16);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGALRM,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 17);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGTRAP,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 18);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGILL,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 19);
+
+ res = stub_syscall4(__NR_rt_sigaction, SIGFPE,
+ (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 20);
+ }
+
+ /*
+ * If in seccomp mode, install the SECCOMP filter and trigger a syscall.
+ * Otherwise set PTRACE_TRACEME and do a SIGSTOP.
+ */
+ if (init_data.seccomp) {
+ struct sock_filter filter[] = {
+#if __BITS_PER_LONG > 32
+ /* [0] Load upper 32bit of instruction pointer from seccomp_data */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ (offsetof(struct seccomp_data, instruction_pointer) + 4)),
+
+ /* [1] Jump forward 3 instructions if the upper address is not identical */
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) >> 32, 0, 3),
+#endif
+ /* [2] Load lower 32bit of instruction pointer from seccomp_data */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ (offsetof(struct seccomp_data, instruction_pointer))),
+
+ /* [3] Mask out lower bits */
+ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000),
+
+ /* [4] Jump to [6] if the lower bits are not on the expected page */
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) & 0xfffff000, 1, 0),
+
+ /* [5] Trap call, allow */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+
+ /* [6,7] Check architecture */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ offsetof(struct seccomp_data, arch)),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,
+ UM_SECCOMP_ARCH_NATIVE, 1, 0),
+
+ /* [8] Kill (for architecture check) */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
+
+ /* [9] Load syscall number */
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+
+ /* [10-16] Check against permitted syscalls */
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex,
+ 7, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_recvmsg,
+ 6, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_close,
+ 5, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR,
+ 4, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap,
+ 3, 0),
+#ifdef __i386__
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area,
+ 2, 0),
+#else
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl,
+ 2, 0),
+#endif
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn,
+ 1, 0),
+
+ /* [17] Not one of the permitted syscalls */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
+
+ /* [18] Permitted call for the stub */
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = sizeof(filter) / sizeof(filter[0]),
+ .filter = filter,
+ };
+
+ if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+ SECCOMP_FILTER_FLAG_TSYNC,
+ (unsigned long)&prog) != 0)
+ stub_syscall1(__NR_exit, 21);
- stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
+ /* Fall through, the exit syscall will cause SIGSYS */
+ } else {
+ stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
- stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
+ stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
+ }
- stub_syscall1(__NR_exit, 14);
+ stub_syscall1(__NR_exit, 30);
__builtin_unreachable();
}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 1394568c0210..ae0fa2173778 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -856,11 +856,16 @@ static struct clock_event_device timer_clockevent = {
static irqreturn_t um_timer(int irq, void *dev)
{
- if (get_current()->mm != NULL)
- {
- /* userspace - relay signal, results in correct userspace timers */
+ /*
+ * Interrupt the (possibly) running userspace process, technically this
+ * should only happen if userspace is currently executing.
+ * With infinite CPU time-travel, we can only get here when userspace
+ * is not executing. Do not notify there and avoid spurious scheduling.
+ */
+ if (time_travel_mode != TT_MODE_INFCPU &&
+ time_travel_mode != TT_MODE_EXTERNAL &&
+ get_current()->mm)
os_alarm_process(get_current()->mm->context.id.pid);
- }
(*timer_clockevent.event_handler)(&timer_clockevent);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index ef2272e92a43..5b80a3a89c20 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -16,7 +16,122 @@
#include <kern_util.h>
#include <os.h>
#include <skas.h>
-#include <arch.h>
+
+/*
+ * NOTE: UML does not have exception tables. As such, this is almost a copy
+ * of the code in mm/memory.c, only adjusting the logic to simply check whether
+ * we are coming from the kernel instead of doing an additional lookup in the
+ * exception table.
+ * We can do this simplification because we never get here if the exception was
+ * fixable.
+ */
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+ if (likely(mmap_read_trylock(mm)))
+ return true;
+
+ if (!is_user)
+ return false;
+
+ return !mmap_read_lock_killable(mm);
+}
+
+static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
+{
+ /*
+ * We don't have this operation yet.
+ *
+ * It should be easy enough to do: it's basically a
+ * atomic_long_try_cmpxchg_acquire()
+ * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+ * it also needs the proper lockdep magic etc.
+ */
+ return false;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+ mmap_read_unlock(mm);
+ if (!is_user)
+ return false;
+
+ return !mmap_write_lock_killable(mm);
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+static struct vm_area_struct *
+um_lock_mm_and_find_vma(struct mm_struct *mm,
+ unsigned long addr, bool is_user)
+{
+ struct vm_area_struct *vma;
+
+ if (!get_mmap_lock_carefully(mm, is_user))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (likely(vma && (vma->vm_start <= addr)))
+ return vma;
+
+ /*
+ * Well, dang. We might still be successful, but only
+ * if we can extend a vma to do so.
+ */
+ if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+ mmap_read_unlock(mm);
+ return NULL;
+ }
+
+ /*
+ * We can try to upgrade the mmap lock atomically,
+ * in which case we can continue to use the vma
+ * we already looked up.
+ *
+ * Otherwise we'll have to drop the mmap lock and
+ * re-take it, and also look up the vma again,
+ * re-checking it.
+ */
+ if (!mmap_upgrade_trylock(mm)) {
+ if (!upgrade_mmap_lock_carefully(mm, is_user))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (!vma)
+ goto fail;
+ if (vma->vm_start <= addr)
+ goto success;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto fail;
+ }
+
+ if (expand_stack_locked(vma, addr))
+ goto fail;
+
+success:
+ mmap_write_downgrade(mm);
+ return vma;
+
+fail:
+ mmap_write_unlock(mm);
+ return NULL;
+}
/*
* Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by
@@ -44,21 +159,10 @@ int handle_page_fault(unsigned long address, unsigned long ip,
if (is_user)
flags |= FAULT_FLAG_USER;
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
- if (!vma)
- goto out;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto out;
- if (is_user && !ARCH_IS_STACKGROW(address))
- goto out;
- vma = expand_stack(mm, address);
+ vma = um_lock_mm_and_find_vma(mm, address, is_user);
if (!vma)
goto out_nosemaphore;
-good_area:
*code_out = SEGV_ACCERR;
if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 049dfa5bc9c6..fae836713487 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -8,7 +8,7 @@ KCOV_INSTRUMENT := n
obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \
registers.o sigio.o signal.o start_up.o time.o tty.o \
- umid.o user_syms.o util.o drivers/ skas/
+ umid.o user_syms.o util.o skas/
CFLAGS_signal.o += -Wframe-larger-than=4096
diff --git a/arch/um/os-Linux/drivers/Makefile b/arch/um/os-Linux/drivers/Makefile
deleted file mode 100644
index cf2d75bb1884..000000000000
--- a/arch/um/os-Linux/drivers/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
-#
-
-ethertap-objs := ethertap_kern.o ethertap_user.o
-tuntap-objs := tuntap_kern.o tuntap_user.o
-
-obj-y =
-obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o
-obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o
-
-include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h
deleted file mode 100644
index a475259f90e1..000000000000
--- a/arch/um/os-Linux/drivers/etap.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __DRIVERS_ETAP_H
-#define __DRIVERS_ETAP_H
-
-#include <net_user.h>
-
-struct ethertap_data {
- char *dev_name;
- char *gate_addr;
- int data_fd;
- int control_fd;
- void *dev;
-};
-
-extern const struct net_user_info ethertap_user_info;
-
-#endif
diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c
deleted file mode 100644
index 5e5ee40680ce..000000000000
--- a/arch/um/os-Linux/drivers/ethertap_kern.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include "etap.h"
-#include <net_kern.h>
-
-struct ethertap_init {
- char *dev_name;
- char *gate_addr;
-};
-
-static void etap_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *pri;
- struct ethertap_data *epri;
- struct ethertap_init *init = data;
-
- pri = netdev_priv(dev);
- epri = (struct ethertap_data *) pri->user;
- epri->dev_name = init->dev_name;
- epri->gate_addr = init->gate_addr;
- epri->data_fd = -1;
- epri->control_fd = -1;
- epri->dev = dev;
-
- printk(KERN_INFO "ethertap backend - %s", epri->dev_name);
- if (epri->gate_addr != NULL)
- printk(KERN_CONT ", IP = %s", epri->gate_addr);
- printk(KERN_CONT "\n");
-}
-
-static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- int len;
-
- len = net_recvfrom(fd, skb_mac_header(skb),
- skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP);
- if (len <= 0)
- return(len);
-
- skb_pull(skb, 2);
- len -= 2;
- return len;
-}
-
-static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- skb_push(skb, 2);
- return net_send(fd, skb->data, skb->len);
-}
-
-const struct net_kern_info ethertap_kern_info = {
- .init = etap_init,
- .protocol = eth_protocol,
- .read = etap_read,
- .write = etap_write,
-};
-
-static int ethertap_setup(char *str, char **mac_out, void *data)
-{
- struct ethertap_init *init = data;
-
- *init = ((struct ethertap_init)
- { .dev_name = NULL,
- .gate_addr = NULL });
- if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out,
- &init->gate_addr))
- return 0;
- if (init->dev_name == NULL) {
- printk(KERN_ERR "ethertap_setup : Missing tap device name\n");
- return 0;
- }
-
- return 1;
-}
-
-static struct transport ethertap_transport = {
- .list = LIST_HEAD_INIT(ethertap_transport.list),
- .name = "ethertap",
- .setup = ethertap_setup,
- .user = &ethertap_user_info,
- .kern = &ethertap_kern_info,
- .private_size = sizeof(struct ethertap_data),
- .setup_size = sizeof(struct ethertap_init),
-};
-
-static int register_ethertap(void)
-{
- register_transport(&ethertap_transport);
- return 0;
-}
-
-late_initcall(register_ethertap);
diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
deleted file mode 100644
index bdf215c0eca7..000000000000
--- a/arch/um/os-Linux/drivers/ethertap_user.c
+++ /dev/null
@@ -1,248 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include "etap.h"
-#include <os.h>
-#include <net_user.h>
-#include <um_malloc.h>
-
-#define MAX_PACKET ETH_MAX_PACKET
-
-static int etap_user_init(void *data, void *dev)
-{
- struct ethertap_data *pri = data;
-
- pri->dev = dev;
- return 0;
-}
-
-struct addr_change {
- enum { ADD_ADDR, DEL_ADDR } what;
- unsigned char addr[4];
- unsigned char netmask[4];
-};
-
-static void etap_change(int op, unsigned char *addr, unsigned char *netmask,
- int fd)
-{
- struct addr_change change;
- char *output;
- int n;
-
- change.what = op;
- memcpy(change.addr, addr, sizeof(change.addr));
- memcpy(change.netmask, netmask, sizeof(change.netmask));
- CATCH_EINTR(n = write(fd, &change, sizeof(change)));
- if (n != sizeof(change)) {
- printk(UM_KERN_ERR "etap_change - request failed, err = %d\n",
- errno);
- return;
- }
-
- output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL);
- if (output == NULL)
- printk(UM_KERN_ERR "etap_change : Failed to allocate output "
- "buffer\n");
- read_output(fd, output, UM_KERN_PAGE_SIZE);
- if (output != NULL) {
- printk("%s", output);
- kfree(output);
- }
-}
-
-static void etap_open_addr(unsigned char *addr, unsigned char *netmask,
- void *arg)
-{
- etap_change(ADD_ADDR, addr, netmask, *((int *) arg));
-}
-
-static void etap_close_addr(unsigned char *addr, unsigned char *netmask,
- void *arg)
-{
- etap_change(DEL_ADDR, addr, netmask, *((int *) arg));
-}
-
-struct etap_pre_exec_data {
- int control_remote;
- int control_me;
- int data_me;
-};
-
-static void etap_pre_exec(void *arg)
-{
- struct etap_pre_exec_data *data = arg;
-
- dup2(data->control_remote, 1);
- close(data->data_me);
- close(data->control_me);
-}
-
-static int etap_tramp(char *dev, char *gate, int control_me,
- int control_remote, int data_me, int data_remote)
-{
- struct etap_pre_exec_data pe_data;
- int pid, err, n;
- char version_buf[sizeof("nnnnn\0")];
- char data_fd_buf[sizeof("nnnnnn\0")];
- char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
- char *setup_args[] = { "uml_net", version_buf, "ethertap", dev,
- data_fd_buf, gate_buf, NULL };
- char *nosetup_args[] = { "uml_net", version_buf, "ethertap",
- dev, data_fd_buf, NULL };
- char **args, c;
-
- sprintf(data_fd_buf, "%d", data_remote);
- sprintf(version_buf, "%d", UML_NET_VERSION);
- if (gate != NULL) {
- strscpy(gate_buf, gate);
- args = setup_args;
- }
- else args = nosetup_args;
-
- err = 0;
- pe_data.control_remote = control_remote;
- pe_data.control_me = control_me;
- pe_data.data_me = data_me;
- pid = run_helper(etap_pre_exec, &pe_data, args);
-
- if (pid < 0)
- err = pid;
- close(data_remote);
- close(control_remote);
- CATCH_EINTR(n = read(control_me, &c, sizeof(c)));
- if (n != sizeof(c)) {
- err = -errno;
- printk(UM_KERN_ERR "etap_tramp : read of status failed, "
- "err = %d\n", -err);
- return err;
- }
- if (c != 1) {
- printk(UM_KERN_ERR "etap_tramp : uml_net failed\n");
- err = helper_wait(pid);
- }
- return err;
-}
-
-static int etap_open(void *data)
-{
- struct ethertap_data *pri = data;
- char *output;
- int data_fds[2], control_fds[2], err, output_len;
-
- err = tap_open_common(pri->dev, pri->gate_addr);
- if (err)
- return err;
-
- err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds);
- if (err) {
- err = -errno;
- printk(UM_KERN_ERR "etap_open - data socketpair failed - "
- "err = %d\n", errno);
- return err;
- }
-
- err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds);
- if (err) {
- err = -errno;
- printk(UM_KERN_ERR "etap_open - control socketpair failed - "
- "err = %d\n", errno);
- goto out_close_data;
- }
-
- err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0],
- control_fds[1], data_fds[0], data_fds[1]);
- output_len = UM_KERN_PAGE_SIZE;
- output = uml_kmalloc(output_len, UM_GFP_KERNEL);
- read_output(control_fds[0], output, output_len);
-
- if (output == NULL)
- printk(UM_KERN_ERR "etap_open : failed to allocate output "
- "buffer\n");
- else {
- printk("%s", output);
- kfree(output);
- }
-
- if (err < 0) {
- printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err);
- goto out_close_control;
- }
-
- pri->data_fd = data_fds[0];
- pri->control_fd = control_fds[0];
- iter_addresses(pri->dev, etap_open_addr, &pri->control_fd);
- return data_fds[0];
-
-out_close_control:
- close(control_fds[0]);
- close(control_fds[1]);
-out_close_data:
- close(data_fds[0]);
- close(data_fds[1]);
- return err;
-}
-
-static void etap_close(int fd, void *data)
-{
- struct ethertap_data *pri = data;
-
- iter_addresses(pri->dev, etap_close_addr, &pri->control_fd);
- close(fd);
-
- if (shutdown(pri->data_fd, SHUT_RDWR) < 0)
- printk(UM_KERN_ERR "etap_close - shutdown data socket failed, "
- "errno = %d\n", errno);
-
- if (shutdown(pri->control_fd, SHUT_RDWR) < 0)
- printk(UM_KERN_ERR "etap_close - shutdown control socket "
- "failed, errno = %d\n", errno);
-
- close(pri->data_fd);
- pri->data_fd = -1;
- close(pri->control_fd);
- pri->control_fd = -1;
-}
-
-static void etap_add_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct ethertap_data *pri = data;
-
- tap_check_ips(pri->gate_addr, addr);
- if (pri->control_fd == -1)
- return;
- etap_open_addr(addr, netmask, &pri->control_fd);
-}
-
-static void etap_del_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct ethertap_data *pri = data;
-
- if (pri->control_fd == -1)
- return;
-
- etap_close_addr(addr, netmask, &pri->control_fd);
-}
-
-const struct net_user_info ethertap_user_info = {
- .init = etap_user_init,
- .open = etap_open,
- .close = etap_close,
- .remove = NULL,
- .add_address = etap_add_addr,
- .delete_address = etap_del_addr,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_ETHERTAP,
-};
diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h
deleted file mode 100644
index e364e42abfc5..000000000000
--- a/arch/um/os-Linux/drivers/tuntap.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __UM_TUNTAP_H
-#define __UM_TUNTAP_H
-
-#include <net_user.h>
-
-struct tuntap_data {
- char *dev_name;
- int fixed_config;
- char *gate_addr;
- int fd;
- void *dev;
-};
-
-extern const struct net_user_info tuntap_user_info;
-
-#endif
diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c
deleted file mode 100644
index ff022d9cf0dd..000000000000
--- a/arch/um/os-Linux/drivers/tuntap_kern.c
+++ /dev/null
@@ -1,86 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <linux/netdevice.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <asm/errno.h>
-#include <net_kern.h>
-#include "tuntap.h"
-
-struct tuntap_init {
- char *dev_name;
- char *gate_addr;
-};
-
-static void tuntap_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *pri;
- struct tuntap_data *tpri;
- struct tuntap_init *init = data;
-
- pri = netdev_priv(dev);
- tpri = (struct tuntap_data *) pri->user;
- tpri->dev_name = init->dev_name;
- tpri->fixed_config = (init->dev_name != NULL);
- tpri->gate_addr = init->gate_addr;
- tpri->fd = -1;
- tpri->dev = dev;
-
- printk(KERN_INFO "TUN/TAP backend - ");
- if (tpri->gate_addr != NULL)
- printk(KERN_CONT "IP = %s", tpri->gate_addr);
- printk(KERN_CONT "\n");
-}
-
-static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return net_read(fd, skb_mac_header(skb),
- skb->dev->mtu + ETH_HEADER_OTHER);
-}
-
-static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return net_write(fd, skb->data, skb->len);
-}
-
-const struct net_kern_info tuntap_kern_info = {
- .init = tuntap_init,
- .protocol = eth_protocol,
- .read = tuntap_read,
- .write = tuntap_write,
-};
-
-static int tuntap_setup(char *str, char **mac_out, void *data)
-{
- struct tuntap_init *init = data;
-
- *init = ((struct tuntap_init)
- { .dev_name = NULL,
- .gate_addr = NULL });
- if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out,
- &init->gate_addr))
- return 0;
-
- return 1;
-}
-
-static struct transport tuntap_transport = {
- .list = LIST_HEAD_INIT(tuntap_transport.list),
- .name = "tuntap",
- .setup = tuntap_setup,
- .user = &tuntap_user_info,
- .kern = &tuntap_kern_info,
- .private_size = sizeof(struct tuntap_data),
- .setup_size = sizeof(struct tuntap_init),
-};
-
-static int register_tuntap(void)
-{
- register_transport(&tuntap_transport);
- return 0;
-}
-
-late_initcall(register_tuntap);
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
deleted file mode 100644
index 91f0e27ca3a6..000000000000
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ /dev/null
@@ -1,215 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <linux/if_tun.h>
-#include <net/if.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include <sys/uio.h>
-#include <kern_util.h>
-#include <os.h>
-#include "tuntap.h"
-
-static int tuntap_user_init(void *data, void *dev)
-{
- struct tuntap_data *pri = data;
-
- pri->dev = dev;
- return 0;
-}
-
-static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct tuntap_data *pri = data;
-
- tap_check_ips(pri->gate_addr, addr);
- if ((pri->fd == -1) || pri->fixed_config)
- return;
- open_addr(addr, netmask, pri->dev_name);
-}
-
-static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct tuntap_data *pri = data;
-
- if ((pri->fd == -1) || pri->fixed_config)
- return;
- close_addr(addr, netmask, pri->dev_name);
-}
-
-struct tuntap_pre_exec_data {
- int stdout_fd;
- int close_me;
-};
-
-static void tuntap_pre_exec(void *arg)
-{
- struct tuntap_pre_exec_data *data = arg;
-
- dup2(data->stdout_fd, 1);
- close(data->close_me);
-}
-
-static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote,
- char *buffer, int buffer_len, int *used_out)
-{
- struct tuntap_pre_exec_data data;
- char version_buf[sizeof("nnnnn\0")];
- char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate,
- NULL };
- char buf[CMSG_SPACE(sizeof(*fd_out))];
- struct msghdr msg;
- struct cmsghdr *cmsg;
- struct iovec iov;
- int pid, n, err;
-
- sprintf(version_buf, "%d", UML_NET_VERSION);
-
- data.stdout_fd = remote;
- data.close_me = me;
-
- pid = run_helper(tuntap_pre_exec, &data, argv);
-
- if (pid < 0)
- return pid;
-
- close(remote);
-
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- if (buffer != NULL) {
- iov = ((struct iovec) { buffer, buffer_len });
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- }
- else {
- msg.msg_iov = NULL;
- msg.msg_iovlen = 0;
- }
- msg.msg_control = buf;
- msg.msg_controllen = sizeof(buf);
- msg.msg_flags = 0;
- n = recvmsg(me, &msg, 0);
- *used_out = n;
- if (n < 0) {
- err = -errno;
- printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - "
- "errno = %d\n", errno);
- return err;
- }
- helper_wait(pid);
-
- cmsg = CMSG_FIRSTHDR(&msg);
- if (cmsg == NULL) {
- printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
- "message\n");
- return -EINVAL;
- }
- if ((cmsg->cmsg_level != SOL_SOCKET) ||
- (cmsg->cmsg_type != SCM_RIGHTS)) {
- printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
- "descriptor\n");
- return -EINVAL;
- }
- *fd_out = ((int *) CMSG_DATA(cmsg))[0];
- os_set_exec_close(*fd_out);
- return 0;
-}
-
-static int tuntap_open(void *data)
-{
- struct ifreq ifr;
- struct tuntap_data *pri = data;
- char *output, *buffer;
- int err, fds[2], len, used;
-
- err = tap_open_common(pri->dev, pri->gate_addr);
- if (err < 0)
- return err;
-
- if (pri->fixed_config) {
- pri->fd = os_open_file("/dev/net/tun",
- of_cloexec(of_rdwr(OPENFLAGS())), 0);
- if (pri->fd < 0) {
- printk(UM_KERN_ERR "Failed to open /dev/net/tun, "
- "err = %d\n", -pri->fd);
- return pri->fd;
- }
- memset(&ifr, 0, sizeof(ifr));
- ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- strscpy(ifr.ifr_name, pri->dev_name);
- if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n",
- errno);
- close(pri->fd);
- return err;
- }
- }
- else {
- err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds);
- if (err) {
- err = -errno;
- printk(UM_KERN_ERR "tuntap_open : socketpair failed - "
- "errno = %d\n", errno);
- return err;
- }
-
- buffer = get_output_buffer(&len);
- if (buffer != NULL)
- len--;
- used = 0;
-
- err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0],
- fds[1], buffer, len, &used);
-
- output = buffer;
- if (err < 0) {
- printk("%s", output);
- free_output_buffer(buffer);
- printk(UM_KERN_ERR "tuntap_open_tramp failed - "
- "err = %d\n", -err);
- return err;
- }
-
- pri->dev_name = uml_strdup(buffer);
- output += IFNAMSIZ;
- printk("%s", output);
- free_output_buffer(buffer);
-
- close(fds[0]);
- iter_addresses(pri->dev, open_addr, pri->dev_name);
- }
-
- return pri->fd;
-}
-
-static void tuntap_close(int fd, void *data)
-{
- struct tuntap_data *pri = data;
-
- if (!pri->fixed_config)
- iter_addresses(pri->dev, close_addr, pri->dev_name);
- close(fd);
- pri->fd = -1;
-}
-
-const struct net_user_info tuntap_user_info = {
- .init = tuntap_user_init,
- .open = tuntap_open,
- .close = tuntap_close,
- .remove = NULL,
- .add_address = tuntap_add_addr,
- .delete_address = tuntap_del_addr,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index a0d01c68ce3e..617886d1fb1e 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -106,21 +106,6 @@ int os_get_ifname(int fd, char* namebuf)
return 0;
}
-int os_set_slip(int fd)
-{
- int disc, sencap;
-
- disc = N_SLIP;
- if (ioctl(fd, TIOCSETD, &disc) < 0)
- return -errno;
-
- sencap = 0;
- if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0)
- return -errno;
-
- return 0;
-}
-
int os_mode_fd(int fd, int mode)
{
int err;
diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h
index 317fca190c2b..5d8d3b0817a9 100644
--- a/arch/um/os-Linux/internal.h
+++ b/arch/um/os-Linux/internal.h
@@ -2,6 +2,9 @@
#ifndef __UM_OS_LINUX_INTERNAL_H
#define __UM_OS_LINUX_INTERNAL_H
+#include <mm_id.h>
+#include <stub-data.h>
+
/*
* elf_aux.c
*/
@@ -16,5 +19,5 @@ void check_tmpexec(void);
* skas/process.c
*/
void wait_stub_done(int pid);
-
+void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys);
#endif /* __UM_OS_LINUX_INTERNAL_H */
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 184566edeee9..00b49e90d05f 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -18,17 +18,29 @@
#include <init.h>
#include <longjmp.h>
#include <os.h>
+#include <skas/skas.h>
void os_alarm_process(int pid)
{
+ if (pid <= 0)
+ return;
+
kill(pid, SIGALRM);
}
void os_kill_process(int pid, int reap_child)
{
+ if (pid <= 0)
+ return;
+
+ /* Block signals until child is reaped */
+ block_signals();
+
kill(pid, SIGKILL);
if (reap_child)
CATCH_EINTR(waitpid(pid, NULL, __WALL));
+
+ unblock_signals();
}
/* Kill off a ptraced child by all means available. kill it normally first,
@@ -38,11 +50,27 @@ void os_kill_process(int pid, int reap_child)
void os_kill_ptraced_process(int pid, int reap_child)
{
+ if (pid <= 0)
+ return;
+
+ /* Block signals until child is reaped */
+ block_signals();
+
kill(pid, SIGKILL);
ptrace(PTRACE_KILL, pid);
ptrace(PTRACE_CONT, pid);
if (reap_child)
CATCH_EINTR(waitpid(pid, NULL, __WALL));
+
+ unblock_signals();
+}
+
+pid_t os_reap_child(void)
+{
+ int status;
+
+ /* Try to reap a child */
+ return waitpid(-1, &status, WNOHANG);
}
/* Don't use the glibc version, which caches the result in TLS. It misses some
@@ -151,6 +179,9 @@ void init_new_thread_signals(void)
set_handler(SIGBUS);
signal(SIGHUP, SIG_IGN);
set_handler(SIGIO);
+ /* We (currently) only use the child reaper IRQ in seccomp mode */
+ if (using_seccomp)
+ set_handler(SIGCHLD);
signal(SIGWINCH, SIG_IGN);
}
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index d7ca148807b2..bfba2cbc9478 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -14,8 +14,8 @@
/* This is set once at boot time and not changed thereafter */
-static unsigned long exec_regs[MAX_REG_NR];
-static unsigned long *exec_fp_regs;
+unsigned long exec_regs[MAX_REG_NR];
+unsigned long *exec_fp_regs;
int init_pid_registers(int pid)
{
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index a05a6ecee756..6de145f8fe3d 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -12,6 +12,7 @@
#include <signal.h>
#include <string.h>
#include <sys/epoll.h>
+#include <asm/unistd.h>
#include <kern_util.h>
#include <init.h>
#include <os.h>
@@ -46,7 +47,7 @@ static void *write_sigio_thread(void *unused)
__func__, errno);
}
- CATCH_EINTR(r = tgkill(pid, pid, SIGIO));
+ CATCH_EINTR(r = syscall(__NR_tgkill, pid, pid, SIGIO));
if (r < 0)
printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n",
__func__, errno);
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index e71e5b4878d1..11f07f498270 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -29,6 +29,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) =
[SIGBUS] = relay_signal,
[SIGSEGV] = segv_handler,
[SIGIO] = sigio_handler,
+ [SIGCHLD] = sigchld_handler,
};
static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
@@ -44,7 +45,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
}
/* enable signals if sig isn't IRQ signal */
- if ((sig != SIGIO) && (sig != SIGWINCH))
+ if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGCHLD))
unblock_signals_trace();
(*sig_info[sig])(sig, si, &r, mc);
@@ -64,6 +65,9 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
#define SIGALRM_BIT 1
#define SIGALRM_MASK (1 << SIGALRM_BIT)
+#define SIGCHLD_BIT 2
+#define SIGCHLD_MASK (1 << SIGCHLD_BIT)
+
int signals_enabled;
#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
static int signals_blocked, signals_blocked_pending;
@@ -102,6 +106,11 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
return;
}
+ if (!enabled && (sig == SIGCHLD)) {
+ signals_pending |= SIGCHLD_MASK;
+ return;
+ }
+
block_signals_trace();
sig_handler_common(sig, si, mc);
@@ -181,6 +190,8 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
[SIGIO] = sig_handler,
[SIGWINCH] = sig_handler,
+ /* SIGCHLD is only actually registered in seccomp mode. */
+ [SIGCHLD] = sig_handler,
[SIGALRM] = timer_alarm_handler,
[SIGUSR1] = sigusr1_handler,
@@ -309,6 +320,12 @@ void unblock_signals(void)
if (save_pending & SIGIO_MASK)
sig_handler_common(SIGIO, NULL, NULL);
+ if (save_pending & SIGCHLD_MASK) {
+ struct uml_pt_regs regs = {};
+
+ sigchld_handler(SIGCHLD, NULL, &regs, NULL);
+ }
+
/* Do not reenter the handler */
if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK)))
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index d7f1814b0e5a..8b9921ac3ef8 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -43,6 +43,16 @@ void syscall_stub_dump_error(struct mm_id *mm_idp)
print_hex_dump(UM_KERN_ERR, " syscall data: ", 0,
16, 4, sc, sizeof(*sc), 0);
+
+ if (using_seccomp) {
+ printk(UM_KERN_ERR "%s: FD map num: %d", __func__,
+ mm_idp->syscall_fd_num);
+ print_hex_dump(UM_KERN_ERR,
+ " FD map: ", 0, 16,
+ sizeof(mm_idp->syscall_fd_map[0]),
+ mm_idp->syscall_fd_map,
+ sizeof(mm_idp->syscall_fd_map), 0);
+ }
}
static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
@@ -80,27 +90,32 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
int n, i;
int err, pid = mm_idp->pid;
- n = ptrace_setregs(pid, syscall_regs);
- if (n < 0) {
- printk(UM_KERN_ERR "Registers - \n");
- for (i = 0; i < MAX_REG_NR; i++)
- printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
- panic("%s : PTRACE_SETREGS failed, errno = %d\n",
- __func__, -n);
- }
-
/* Inform process how much we have filled in. */
proc_data->syscall_data_len = mm_idp->syscall_data_len;
- err = ptrace(PTRACE_CONT, pid, 0, 0);
- if (err)
- panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
- errno);
-
- wait_stub_done(pid);
+ if (using_seccomp) {
+ proc_data->restart_wait = 1;
+ wait_stub_done_seccomp(mm_idp, 0, 1);
+ } else {
+ n = ptrace_setregs(pid, syscall_regs);
+ if (n < 0) {
+ printk(UM_KERN_ERR "Registers -\n");
+ for (i = 0; i < MAX_REG_NR; i++)
+ printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
+ panic("%s : PTRACE_SETREGS failed, errno = %d\n",
+ __func__, -n);
+ }
+
+ err = ptrace(PTRACE_CONT, pid, 0, 0);
+ if (err)
+ panic("Failed to continue stub, pid = %d, errno = %d\n",
+ pid, errno);
+
+ wait_stub_done(pid);
+ }
/*
- * proc_data->err will be non-zero if there was an (unexpected) error.
+ * proc_data->err will be negative if there was an (unexpected) error.
* In that case, syscall_data_len points to the last executed syscall,
* otherwise it will be zero (but we do not need to rely on that).
*/
@@ -113,6 +128,9 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
mm_idp->syscall_data_len = 0;
}
+ if (using_seccomp)
+ mm_idp->syscall_fd_num = 0;
+
return mm_idp->syscall_data_len;
}
@@ -175,6 +193,44 @@ static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp,
return NULL;
}
+static int get_stub_fd(struct mm_id *mm_idp, int fd)
+{
+ int i;
+
+ /* Find an FD slot (or flush and use first) */
+ if (!using_seccomp)
+ return fd;
+
+ /* Already crashed, value does not matter */
+ if (mm_idp->syscall_data_len < 0)
+ return 0;
+
+ /* Find existing FD in map if we can allocate another syscall */
+ if (mm_idp->syscall_data_len <
+ ARRAY_SIZE(((struct stub_data *)NULL)->syscall_data)) {
+ for (i = 0; i < mm_idp->syscall_fd_num; i++) {
+ if (mm_idp->syscall_fd_map[i] == fd)
+ return i;
+ }
+
+ if (mm_idp->syscall_fd_num < STUB_MAX_FDS) {
+ i = mm_idp->syscall_fd_num;
+ mm_idp->syscall_fd_map[i] = fd;
+
+ mm_idp->syscall_fd_num++;
+
+ return i;
+ }
+ }
+
+ /* FD map full or no syscall space available, continue after flush */
+ do_syscall_stub(mm_idp);
+ mm_idp->syscall_fd_map[0] = fd;
+ mm_idp->syscall_fd_num = 1;
+
+ return 0;
+}
+
int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
int phys_fd, unsigned long long offset)
{
@@ -182,12 +238,21 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
/* Compress with previous syscall if that is possible */
sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt);
- if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd &&
+ if (sc && sc->mem.prot == prot &&
sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) {
- sc->mem.length += len;
- return 0;
+ int prev_fd = sc->mem.fd;
+
+ if (using_seccomp)
+ prev_fd = mm_idp->syscall_fd_map[sc->mem.fd];
+
+ if (phys_fd == prev_fd) {
+ sc->mem.length += len;
+ return 0;
+ }
}
+ phys_fd = get_stub_fd(mm_idp, phys_fd);
+
sc = syscall_stub_alloc(mm_idp);
sc->syscall = STUB_SYSCALL_MMAP;
sc->mem.addr = virt;
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index ae2aea062f06..e42ffac23e3c 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
* Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
* Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
@@ -15,6 +16,7 @@
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/stat.h>
+#include <sys/socket.h>
#include <asm/unistd.h>
#include <as-layout.h>
#include <init.h>
@@ -25,8 +27,11 @@
#include <registers.h>
#include <skas.h>
#include <sysdep/stub.h>
+#include <sysdep/mcontext.h>
+#include <linux/futex.h>
#include <linux/threads.h>
#include <timetravel.h>
+#include <asm-generic/rwonce.h>
#include "../internal.h"
int is_skas_winch(int pid, int fd, void *data)
@@ -142,6 +147,105 @@ bad_wait:
fatal_sigsegv();
}
+void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys)
+{
+ struct stub_data *data = (void *)mm_idp->stack;
+ int ret;
+
+ do {
+ const char byte = 0;
+ struct iovec iov = {
+ .iov_base = (void *)&byte,
+ .iov_len = sizeof(byte),
+ };
+ union {
+ char data[CMSG_SPACE(sizeof(mm_idp->syscall_fd_map))];
+ struct cmsghdr align;
+ } ctrl;
+ struct msghdr msgh = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ if (!running) {
+ if (mm_idp->syscall_fd_num) {
+ unsigned int fds_size =
+ sizeof(int) * mm_idp->syscall_fd_num;
+ struct cmsghdr *cmsg;
+
+ msgh.msg_control = ctrl.data;
+ msgh.msg_controllen = CMSG_SPACE(fds_size);
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(fds_size);
+ memcpy(CMSG_DATA(cmsg), mm_idp->syscall_fd_map,
+ fds_size);
+
+ CATCH_EINTR(syscall(__NR_sendmsg, mm_idp->sock,
+ &msgh, 0));
+ }
+
+ data->signal = 0;
+ data->futex = FUTEX_IN_CHILD;
+ CATCH_EINTR(syscall(__NR_futex, &data->futex,
+ FUTEX_WAKE, 1, NULL, NULL, 0));
+ }
+
+ do {
+ /*
+ * We need to check whether the child is still alive
+ * before and after the FUTEX_WAIT call. Before, in
+ * case it just died but we still updated data->futex
+ * to FUTEX_IN_CHILD. And after, in case it died while
+ * we were waiting (and SIGCHLD woke us up, see the
+ * IRQ handler in mmu.c).
+ *
+ * Either way, if PID is negative, then we have no
+ * choice but to kill the task.
+ */
+ if (__READ_ONCE(mm_idp->pid) < 0)
+ goto out_kill;
+
+ ret = syscall(__NR_futex, &data->futex,
+ FUTEX_WAIT, FUTEX_IN_CHILD,
+ NULL, NULL, 0);
+ if (ret < 0 && errno != EINTR && errno != EAGAIN) {
+ printk(UM_KERN_ERR "%s : FUTEX_WAIT failed, errno = %d\n",
+ __func__, errno);
+ goto out_kill;
+ }
+ } while (data->futex == FUTEX_IN_CHILD);
+
+ if (__READ_ONCE(mm_idp->pid) < 0)
+ goto out_kill;
+
+ running = 0;
+
+ /* We may receive a SIGALRM before SIGSYS, iterate again. */
+ } while (wait_sigsys && data->signal == SIGALRM);
+
+ if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) {
+ printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__);
+ goto out_kill;
+ }
+
+ if (wait_sigsys && data->signal != SIGSYS) {
+ printk(UM_KERN_ERR "%s : expected SIGSYS but got %d",
+ __func__, data->signal);
+ goto out_kill;
+ }
+
+ return;
+
+out_kill:
+ printk(UM_KERN_ERR "%s : failed to wait for stub, pid = %d, errno = %d\n",
+ __func__, mm_idp->pid, errno);
+ /* This is not true inside start_userspace */
+ if (current_mm_id() == mm_idp)
+ fatal_sigsegv();
+}
+
extern unsigned long current_stub_stack(void);
static void get_skas_faultinfo(int pid, struct faultinfo *fi)
@@ -163,12 +267,6 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi)
memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
}
-static void handle_segv(int pid, struct uml_pt_regs *regs)
-{
- get_skas_faultinfo(pid, &regs->faultinfo);
- segv(regs->faultinfo, 0, 1, NULL, NULL);
-}
-
static void handle_trap(int pid, struct uml_pt_regs *regs)
{
if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
@@ -181,29 +279,48 @@ extern char __syscall_stub_start[];
static int stub_exe_fd;
+struct tramp_data {
+ struct stub_data *stub_data;
+ /* 0 is inherited, 1 is the kernel side */
+ int sockpair[2];
+};
+
#ifndef CLOSE_RANGE_CLOEXEC
#define CLOSE_RANGE_CLOEXEC (1U << 2)
#endif
-static int userspace_tramp(void *stack)
+static int userspace_tramp(void *data)
{
+ struct tramp_data *tramp_data = data;
char *const argv[] = { "uml-userspace", NULL };
- int pipe_fds[2];
unsigned long long offset;
struct stub_init_data init_data = {
+ .seccomp = using_seccomp,
.stub_start = STUB_START,
- .segv_handler = STUB_CODE +
- (unsigned long) stub_segv_handler -
- (unsigned long) __syscall_stub_start,
};
struct iomem_region *iomem;
int ret;
+ if (using_seccomp) {
+ init_data.signal_handler = STUB_CODE +
+ (unsigned long) stub_signal_interrupt -
+ (unsigned long) __syscall_stub_start;
+ init_data.signal_restorer = STUB_CODE +
+ (unsigned long) stub_signal_restorer -
+ (unsigned long) __syscall_stub_start;
+ } else {
+ init_data.signal_handler = STUB_CODE +
+ (unsigned long) stub_segv_handler -
+ (unsigned long) __syscall_stub_start;
+ init_data.signal_restorer = 0;
+ }
+
init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start),
&offset);
init_data.stub_code_offset = MMAP_OFFSET(offset);
- init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset);
+ init_data.stub_data_fd = phys_mapping(uml_to_phys(tramp_data->stub_data),
+ &offset);
init_data.stub_data_offset = MMAP_OFFSET(offset);
/*
@@ -214,20 +331,21 @@ static int userspace_tramp(void *stack)
syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC);
fcntl(init_data.stub_data_fd, F_SETFD, 0);
- for (iomem = iomem_regions; iomem; iomem = iomem->next)
- fcntl(iomem->fd, F_SETFD, 0);
- /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */
- if (pipe(pipe_fds))
- exit(2);
+ /* In SECCOMP mode, these FDs are passed when needed */
+ if (!using_seccomp) {
+ for (iomem = iomem_regions; iomem; iomem = iomem->next)
+ fcntl(iomem->fd, F_SETFD, 0);
+ }
- if (dup2(pipe_fds[0], 0) < 0)
+ /* dup2 signaling FD/socket to STDIN */
+ if (dup2(tramp_data->sockpair[0], 0) < 0)
exit(3);
- close(pipe_fds[0]);
+ close(tramp_data->sockpair[0]);
/* Write init_data and close write side */
- ret = write(pipe_fds[1], &init_data, sizeof(init_data));
- close(pipe_fds[1]);
+ ret = write(tramp_data->sockpair[1], &init_data, sizeof(init_data));
+ close(tramp_data->sockpair[1]);
if (ret != sizeof(init_data))
exit(4);
@@ -315,11 +433,12 @@ static int __init init_stub_exe_fd(void)
}
__initcall(init_stub_exe_fd);
+int using_seccomp;
int userspace_pid[NR_CPUS];
/**
* start_userspace() - prepare a new userspace process
- * @stub_stack: pointer to the stub stack.
+ * @mm_id: The corresponding struct mm_id
*
* Setups a new temporary stack page that is used while userspace_tramp() runs
* Clones the kernel process into a new userspace process, with FDs only.
@@ -328,11 +447,15 @@ int userspace_pid[NR_CPUS];
* when negative: an error number.
* FIXME: can PIDs become negative?!
*/
-int start_userspace(unsigned long stub_stack)
+int start_userspace(struct mm_id *mm_id)
{
+ struct stub_data *proc_data = (void *)mm_id->stack;
+ struct tramp_data tramp_data = {
+ .stub_data = proc_data,
+ };
void *stack;
unsigned long sp;
- int pid, status, n, err;
+ int status, n, err;
/* setup a temporary stack page */
stack = mmap(NULL, UM_KERN_PAGE_SIZE,
@@ -348,40 +471,55 @@ int start_userspace(unsigned long stub_stack)
/* set stack pointer to the end of the stack page, so it can grow downwards */
sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
- /* clone into new userspace process */
- pid = clone(userspace_tramp, (void *) sp,
+ /* socket pair for init data and SECCOMP FD passing (no CLOEXEC here) */
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, tramp_data.sockpair)) {
+ err = -errno;
+ printk(UM_KERN_ERR "%s : socketpair failed, errno = %d\n",
+ __func__, errno);
+ return err;
+ }
+
+ if (using_seccomp)
+ proc_data->futex = FUTEX_IN_CHILD;
+
+ mm_id->pid = clone(userspace_tramp, (void *) sp,
CLONE_VFORK | CLONE_VM | SIGCHLD,
- (void *)stub_stack);
- if (pid < 0) {
+ (void *)&tramp_data);
+ if (mm_id->pid < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
__func__, errno);
- return err;
+ goto out_close;
}
- do {
- CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
- if (n < 0) {
+ if (using_seccomp) {
+ wait_stub_done_seccomp(mm_id, 1, 1);
+ } else {
+ do {
+ CATCH_EINTR(n = waitpid(mm_id->pid, &status,
+ WUNTRACED | __WALL));
+ if (n < 0) {
+ err = -errno;
+ printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+ __func__, errno);
+ goto out_kill;
+ }
+ } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
+
+ if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
+ err = -EINVAL;
+ printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
+ __func__, status);
+ goto out_kill;
+ }
+
+ if (ptrace(PTRACE_SETOPTIONS, mm_id->pid, NULL,
+ (void *) PTRACE_O_TRACESYSGOOD) < 0) {
err = -errno;
- printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+ printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
__func__, errno);
goto out_kill;
}
- } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
-
- if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
- err = -EINVAL;
- printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
- __func__, status);
- goto out_kill;
- }
-
- if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
- (void *) PTRACE_O_TRACESYSGOOD) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
- __func__, errno);
- goto out_kill;
}
if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
@@ -391,10 +529,22 @@ int start_userspace(unsigned long stub_stack)
goto out_kill;
}
- return pid;
+ close(tramp_data.sockpair[0]);
+ if (using_seccomp)
+ mm_id->sock = tramp_data.sockpair[1];
+ else
+ close(tramp_data.sockpair[1]);
+
+ return 0;
+
+out_kill:
+ os_kill_ptraced_process(mm_id->pid, 1);
+out_close:
+ close(tramp_data.sockpair[0]);
+ close(tramp_data.sockpair[1]);
+
+ mm_id->pid = -1;
- out_kill:
- os_kill_ptraced_process(pid, 1);
return err;
}
@@ -404,7 +554,9 @@ extern unsigned long tt_extra_sched_jiffies;
void userspace(struct uml_pt_regs *regs)
{
int err, status, op, pid = userspace_pid[0];
- siginfo_t si;
+ siginfo_t si_ptrace;
+ siginfo_t *si;
+ int sig;
/* Handle any immediate reschedules or signals */
interrupt_end();
@@ -437,103 +589,177 @@ void userspace(struct uml_pt_regs *regs)
current_mm_sync();
- /* Flush out any pending syscalls */
- err = syscall_stub_flush(current_mm_id());
- if (err) {
- if (err == -ENOMEM)
- report_enomem();
+ if (using_seccomp) {
+ struct mm_id *mm_id = current_mm_id();
+ struct stub_data *proc_data = (void *) mm_id->stack;
+ int ret;
- printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
- __func__, -err);
- fatal_sigsegv();
- }
+ ret = set_stub_state(regs, proc_data, singlestepping());
+ if (ret) {
+ printk(UM_KERN_ERR "%s - failed to set regs: %d",
+ __func__, ret);
+ fatal_sigsegv();
+ }
- /*
- * This can legitimately fail if the process loads a
- * bogus value into a segment register. It will
- * segfault and PTRACE_GETREGS will read that value
- * out of the process. However, PTRACE_SETREGS will
- * fail. In this case, there is nothing to do but
- * just kill the process.
- */
- if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
- printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ /* Must have been reset by the syscall caller */
+ if (proc_data->restart_wait != 0)
+ panic("Programming error: Flag to only run syscalls in child was not cleared!");
- if (put_fp_registers(pid, regs->fp)) {
- printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ /* Mark pending syscalls for flushing */
+ proc_data->syscall_data_len = mm_id->syscall_data_len;
- if (singlestepping())
- op = PTRACE_SYSEMU_SINGLESTEP;
- else
- op = PTRACE_SYSEMU;
+ wait_stub_done_seccomp(mm_id, 0, 0);
- if (ptrace(op, pid, 0, 0)) {
- printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
- __func__, op, errno);
- fatal_sigsegv();
- }
+ sig = proc_data->signal;
- CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
- if (err < 0) {
- printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ if (sig == SIGTRAP && proc_data->err != 0) {
+ printk(UM_KERN_ERR "%s - Error flushing stub syscalls",
+ __func__);
+ syscall_stub_dump_error(mm_id);
+ mm_id->syscall_data_len = proc_data->err;
+ fatal_sigsegv();
+ }
- regs->is_user = 1;
- if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
- printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ mm_id->syscall_data_len = 0;
+ mm_id->syscall_fd_num = 0;
- if (get_fp_registers(pid, regs->fp)) {
- printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ ret = get_stub_state(regs, proc_data, NULL);
+ if (ret) {
+ printk(UM_KERN_ERR "%s - failed to get regs: %d",
+ __func__, ret);
+ fatal_sigsegv();
+ }
- UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+ if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si))
+ panic("%s - Invalid siginfo offset from child",
+ __func__);
+ si = (void *)&proc_data->sigstack[proc_data->si_offset];
+
+ regs->is_user = 1;
+
+ /* Fill in ORIG_RAX and extract fault information */
+ PT_SYSCALL_NR(regs->gp) = si->si_syscall;
+ if (sig == SIGSEGV) {
+ mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset];
- if (WIFSTOPPED(status)) {
- int sig = WSTOPSIG(status);
+ GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext);
+ }
+ } else {
+ /* Flush out any pending syscalls */
+ err = syscall_stub_flush(current_mm_id());
+ if (err) {
+ if (err == -ENOMEM)
+ report_enomem();
+
+ printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
+ __func__, -err);
+ fatal_sigsegv();
+ }
- /* These signal handlers need the si argument.
- * The SIGIO and SIGALARM handlers which constitute the
- * majority of invocations, do not use it.
+ /*
+ * This can legitimately fail if the process loads a
+ * bogus value into a segment register. It will
+ * segfault and PTRACE_GETREGS will read that value
+ * out of the process. However, PTRACE_SETREGS will
+ * fail. In this case, there is nothing to do but
+ * just kill the process.
*/
- switch (sig) {
- case SIGSEGV:
- case SIGTRAP:
- case SIGILL:
- case SIGBUS:
- case SIGFPE:
- case SIGWINCH:
- ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
- break;
+ if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
+ printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
}
- switch (sig) {
- case SIGSEGV:
- if (PTRACE_FULL_FAULTINFO) {
+ if (put_fp_registers(pid, regs->fp)) {
+ printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ if (singlestepping())
+ op = PTRACE_SYSEMU_SINGLESTEP;
+ else
+ op = PTRACE_SYSEMU;
+
+ if (ptrace(op, pid, 0, 0)) {
+ printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
+ __func__, op, errno);
+ fatal_sigsegv();
+ }
+
+ CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
+ if (err < 0) {
+ printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ regs->is_user = 1;
+ if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
+ printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ if (get_fp_registers(pid, regs->fp)) {
+ printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ if (WIFSTOPPED(status)) {
+ sig = WSTOPSIG(status);
+
+ /*
+ * These signal handlers need the si argument
+ * and SIGSEGV needs the faultinfo.
+ * The SIGIO and SIGALARM handlers which constitute
+ * the majority of invocations, do not use it.
+ */
+ switch (sig) {
+ case SIGSEGV:
get_skas_faultinfo(pid,
&regs->faultinfo);
- (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
- regs, NULL);
+ fallthrough;
+ case SIGTRAP:
+ case SIGILL:
+ case SIGBUS:
+ case SIGFPE:
+ case SIGWINCH:
+ ptrace(PTRACE_GETSIGINFO, pid, 0,
+ (struct siginfo *)&si_ptrace);
+ si = &si_ptrace;
+ break;
+ default:
+ si = NULL;
+ break;
}
- else handle_segv(pid, regs);
+ } else {
+ sig = 0;
+ }
+ }
+
+ UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+
+ if (sig) {
+ switch (sig) {
+ case SIGSEGV:
+ if (using_seccomp || PTRACE_FULL_FAULTINFO)
+ (*sig_info[SIGSEGV])(SIGSEGV,
+ (struct siginfo *)si,
+ regs, NULL);
+ else
+ segv(regs->faultinfo, 0, 1, NULL, NULL);
+
+ break;
+ case SIGSYS:
+ handle_syscall(regs);
break;
case SIGTRAP + 0x80:
handle_trap(pid, regs);
break;
case SIGTRAP:
- relay_signal(SIGTRAP, (struct siginfo *)&si, regs, NULL);
+ relay_signal(SIGTRAP, (struct siginfo *)si, regs, NULL);
break;
case SIGALRM:
break;
@@ -543,7 +769,7 @@ void userspace(struct uml_pt_regs *regs)
case SIGFPE:
case SIGWINCH:
block_signals_trace();
- (*sig_info[sig])(sig, (struct siginfo *)&si, regs, NULL);
+ (*sig_info[sig])(sig, (struct siginfo *)si, regs, NULL);
unblock_signals_trace();
break;
default:
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 93fc82c01aba..a827c2e01aa5 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
@@ -24,6 +25,13 @@
#include <kern_util.h>
#include <mem_user.h>
#include <ptrace_user.h>
+#include <stdbool.h>
+#include <stub-data.h>
+#include <sys/prctl.h>
+#include <linux/seccomp.h>
+#include <linux/filter.h>
+#include <sysdep/mcontext.h>
+#include <sysdep/stub.h>
#include <registers.h>
#include <skas.h>
#include "internal.h"
@@ -224,6 +232,140 @@ static void __init check_ptrace(void)
check_sysemu();
}
+extern unsigned long host_fp_size;
+extern unsigned long exec_regs[MAX_REG_NR];
+extern unsigned long *exec_fp_regs;
+
+__initdata static struct stub_data *seccomp_test_stub_data;
+
+static void __init sigsys_handler(int sig, siginfo_t *info, void *p)
+{
+ ucontext_t *uc = p;
+
+ /* Stow away the location of the mcontext in the stack */
+ seccomp_test_stub_data->mctx_offset = (unsigned long)&uc->uc_mcontext -
+ (unsigned long)&seccomp_test_stub_data->sigstack[0];
+
+ /* Prevent libc from clearing memory (mctx_offset in particular) */
+ syscall(__NR_exit, 0);
+}
+
+static int __init seccomp_helper(void *data)
+{
+ static struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_nanosleep, 1, 0),
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+ };
+ static struct sock_fprog prog = {
+ .len = ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ struct sigaction sa;
+
+ /* close_range is needed for the stub */
+ if (stub_syscall3(__NR_close_range, 1, ~0U, 0))
+ exit(1);
+
+ set_sigstack(seccomp_test_stub_data->sigstack,
+ sizeof(seccomp_test_stub_data->sigstack));
+
+ sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+ sa.sa_sigaction = (void *) sigsys_handler;
+ sa.sa_restorer = NULL;
+ if (sigaction(SIGSYS, &sa, NULL) < 0)
+ exit(2);
+
+ prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+ SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0)
+ exit(3);
+
+ sleep(0);
+
+ /* Never reached. */
+ _exit(4);
+}
+
+static bool __init init_seccomp(void)
+{
+ int pid;
+ int status;
+ int n;
+ unsigned long sp;
+
+ /*
+ * We check that we can install a seccomp filter and then exit(0)
+ * from a trapped syscall.
+ *
+ * Note that we cannot verify that no seccomp filter already exists
+ * for a syscall that results in the process/thread to be killed.
+ */
+
+ os_info("Checking that seccomp filters can be installed...");
+
+ seccomp_test_stub_data = mmap(0, sizeof(*seccomp_test_stub_data),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANON, 0, 0);
+
+ /* Use the syscall data area as stack, we just need something */
+ sp = (unsigned long)&seccomp_test_stub_data->syscall_data +
+ sizeof(seccomp_test_stub_data->syscall_data) -
+ sizeof(void *);
+ pid = clone(seccomp_helper, (void *)sp, CLONE_VFORK | CLONE_VM, NULL);
+
+ if (pid < 0)
+ fatal_perror("check_seccomp : clone failed");
+
+ CATCH_EINTR(n = waitpid(pid, &status, __WCLONE));
+ if (n < 0)
+ fatal_perror("check_seccomp : waitpid failed");
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+ struct uml_pt_regs *regs;
+ unsigned long fp_size;
+ int r;
+
+ /* Fill in the host_fp_size from the mcontext. */
+ regs = calloc(1, sizeof(struct uml_pt_regs));
+ get_stub_state(regs, seccomp_test_stub_data, &fp_size);
+ host_fp_size = fp_size;
+ free(regs);
+
+ /* Repeat with the correct size */
+ regs = calloc(1, sizeof(struct uml_pt_regs) + host_fp_size);
+ r = get_stub_state(regs, seccomp_test_stub_data, NULL);
+
+ /* Store as the default startup registers */
+ exec_fp_regs = malloc(host_fp_size);
+ memcpy(exec_regs, regs->gp, sizeof(exec_regs));
+ memcpy(exec_fp_regs, regs->fp, host_fp_size);
+
+ munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
+
+ free(regs);
+
+ if (r) {
+ os_info("failed to fetch registers: %d\n", r);
+ return false;
+ }
+
+ os_info("OK\n");
+ return true;
+ }
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 2)
+ os_info("missing\n");
+ else
+ os_info("error\n");
+
+ munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
+ return false;
+}
+
+
static void __init check_coredump_limit(void)
{
struct rlimit lim;
@@ -278,6 +420,44 @@ void __init get_host_cpu_features(
}
}
+static int seccomp_config __initdata;
+
+static int __init uml_seccomp_config(char *line, int *add)
+{
+ *add = 0;
+
+ if (strcmp(line, "off") == 0)
+ seccomp_config = 0;
+ else if (strcmp(line, "auto") == 0)
+ seccomp_config = 1;
+ else if (strcmp(line, "on") == 0)
+ seccomp_config = 2;
+ else
+ fatal("Invalid seccomp option '%s', expected on/auto/off\n",
+ line);
+
+ return 0;
+}
+
+__uml_setup("seccomp=", uml_seccomp_config,
+"seccomp=<on/auto/off>\n"
+" Configure whether or not SECCOMP is used. With SECCOMP, userspace\n"
+" processes work collaboratively with the kernel instead of being\n"
+" traced using ptrace. All syscalls from the application are caught and\n"
+" redirected using a signal. This signal handler in turn is permitted to\n"
+" do the selected set of syscalls to communicate with the UML kernel and\n"
+" do the required memory management.\n"
+"\n"
+" This method is overall faster than the ptrace based userspace, primarily\n"
+" because it reduces the number of context switches for (minor) page faults.\n"
+"\n"
+" However, the SECCOMP filter is not (yet) restrictive enough to prevent\n"
+" userspace from reading and writing all physical memory. Userspace\n"
+" processes could also trick the stub into disabling SIGALRM which\n"
+" prevents it from being interrupted for scheduling purposes.\n"
+"\n"
+" This is insecure and should only be used with a trusted userspace\n\n"
+);
void __init os_early_checks(void)
{
@@ -286,13 +466,24 @@ void __init os_early_checks(void)
/* Print out the core dump limits early */
check_coredump_limit();
- check_ptrace();
-
/* Need to check this early because mmapping happens before the
* kernel is running.
*/
check_tmpexec();
+ if (seccomp_config) {
+ if (init_seccomp()) {
+ using_seccomp = 1;
+ return;
+ }
+
+ if (seccomp_config == 2)
+ fatal("SECCOMP userspace requested but not functional!\n");
+ }
+
+ using_seccomp = 0;
+ check_ptrace();
+
pid = start_ptraced_child();
if (init_pid_registers(pid))
fatal("Failed to initialize default registers");
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 340e5468980e..8bed9030ad47 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -89,7 +89,7 @@ config X86
select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN
select ARCH_HAS_EARLY_DEBUG if KGDB
select ARCH_HAS_ELF_RANDOMIZE
- select ARCH_HAS_EXECMEM_ROX if X86_64
+ select ARCH_HAS_EXECMEM_ROX if X86_64 && STRICT_MODULE_RWX
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
@@ -147,7 +147,7 @@ config X86
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_GENERAL_HUGETLB
- select ARCH_WANT_HUGE_PMD_SHARE
+ select ARCH_WANT_HUGE_PMD_SHARE if X86_64
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64
@@ -2695,6 +2695,15 @@ config MITIGATION_ITS
disabled, mitigation cannot be enabled via cmdline.
See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst>
+config MITIGATION_TSA
+ bool "Mitigate Transient Scheduler Attacks"
+ depends on CPU_SUP_AMD
+ default y
+ help
+ Enable mitigation for Transient Scheduler Attacks. TSA is a hardware
+ security vulnerability on AMD CPUs which can lead to forwarding of
+ invalid info to subsequent instructions and thus can affect their
+ timing and thereby cause a leakage.
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile
index db3255b979bd..342d79f0ab6a 100644
--- a/arch/x86/coco/sev/Makefile
+++ b/arch/x86/coco/sev/Makefile
@@ -5,5 +5,6 @@ obj-y += core.o sev-nmi.o vc-handle.o
# Clang 14 and older may fail to respect __no_sanitize_undefined when inlining
UBSAN_SANITIZE_sev-nmi.o := n
-# GCC may fail to respect __no_sanitize_address when inlining
+# GCC may fail to respect __no_sanitize_address or __no_kcsan when inlining
KASAN_SANITIZE_sev-nmi.o := n
+KCSAN_SANITIZE_sev-nmi.o := n
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index b6db4e0b936b..7543a8b52c67 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -88,7 +88,7 @@ static const char * const sev_status_feat_names[] = {
*/
static u64 snp_tsc_scale __ro_after_init;
static u64 snp_tsc_offset __ro_after_init;
-static u64 snp_tsc_freq_khz __ro_after_init;
+static unsigned long snp_tsc_freq_khz __ro_after_init;
DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
@@ -2167,15 +2167,31 @@ static unsigned long securetsc_get_tsc_khz(void)
void __init snp_secure_tsc_init(void)
{
- unsigned long long tsc_freq_mhz;
+ struct snp_secrets_page *secrets;
+ unsigned long tsc_freq_mhz;
+ void *mem;
if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
return;
+ mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
+ if (!mem) {
+ pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
+ }
+
+ secrets = (__force struct snp_secrets_page *)mem;
+
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
- snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000);
+
+ /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */
+ tsc_freq_mhz &= GENMASK_ULL(17, 0);
+
+ snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
+
+ early_memunmap(mem, PAGE_SIZE);
}
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 175958b02f2b..8e9a0cc20a4a 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -36,20 +36,20 @@ EXPORT_SYMBOL_GPL(write_ibpb);
/*
* Define the VERW operand that is disguised as entry code so that
- * it can be referenced with KPTI enabled. This ensure VERW can be
+ * it can be referenced with KPTI enabled. This ensures VERW can be
* used late in exit-to-user path after page tables are switched.
*/
.pushsection .entry.text, "ax"
.align L1_CACHE_BYTES, 0xcc
-SYM_CODE_START_NOALIGN(mds_verw_sel)
+SYM_CODE_START_NOALIGN(x86_verw_sel)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR
.word __KERNEL_DS
.align L1_CACHE_BYTES, 0xcc
-SYM_CODE_END(mds_verw_sel);
+SYM_CODE_END(x86_verw_sel);
/* For KVM */
-EXPORT_SYMBOL_GPL(mds_verw_sel);
+EXPORT_SYMBOL_GPL(x86_verw_sel);
.popsection
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 466283326630..c2fb729c270e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2826,7 +2826,7 @@ static void intel_pmu_read_event(struct perf_event *event)
* If the PEBS counters snapshotting is enabled,
* the topdown event is available in PEBS records.
*/
- if (is_topdown_event(event) && !is_pebs_counter_event_group(event))
+ if (is_topdown_count(event) && !is_pebs_counter_event_group(event))
static_call(intel_pmu_update_topdown_event)(event, NULL);
else
intel_pmu_drain_pebs_buffer();
@@ -2900,6 +2900,7 @@ static void intel_pmu_config_acr(int idx, u64 mask, u32 reload)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int msr_b, msr_c;
+ int msr_offset;
if (!mask && !cpuc->acr_cfg_b[idx])
return;
@@ -2907,19 +2908,20 @@ static void intel_pmu_config_acr(int idx, u64 mask, u32 reload)
if (idx < INTEL_PMC_IDX_FIXED) {
msr_b = MSR_IA32_PMC_V6_GP0_CFG_B;
msr_c = MSR_IA32_PMC_V6_GP0_CFG_C;
+ msr_offset = x86_pmu.addr_offset(idx, false);
} else {
msr_b = MSR_IA32_PMC_V6_FX0_CFG_B;
msr_c = MSR_IA32_PMC_V6_FX0_CFG_C;
- idx -= INTEL_PMC_IDX_FIXED;
+ msr_offset = x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
}
if (cpuc->acr_cfg_b[idx] != mask) {
- wrmsrl(msr_b + x86_pmu.addr_offset(idx, false), mask);
+ wrmsrl(msr_b + msr_offset, mask);
cpuc->acr_cfg_b[idx] = mask;
}
/* Only need to update the reload value when there is a valid config value. */
if (mask && cpuc->acr_cfg_c[idx] != reload) {
- wrmsrl(msr_c + x86_pmu.addr_offset(idx, false), reload);
+ wrmsrl(msr_c + msr_offset, reload);
cpuc->acr_cfg_c[idx] = reload;
}
}
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 3d1d3547095a..afdbda2dd7b7 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -34,6 +34,7 @@
#include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h>
#include <linux/highmem.h>
+#include <linux/export.h>
void *hv_hypercall_pg;
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
index 31f0d29cbc5e..090f5ac9f492 100644
--- a/arch/x86/hyperv/irqdomain.c
+++ b/arch/x86/hyperv/irqdomain.c
@@ -10,6 +10,7 @@
#include <linux/pci.h>
#include <linux/irq.h>
+#include <linux/export.h>
#include <asm/mshyperv.h>
static int hv_map_interrupt(union hv_device_id device_id, bool level,
@@ -46,7 +47,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level,
if (nr_bank < 0) {
local_irq_restore(flags);
pr_err("%s: unable to generate VP set\n", __func__);
- return EINVAL;
+ return -EINVAL;
}
intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
@@ -66,7 +67,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level,
if (!hv_result_success(status))
hv_status_err(status, "\n");
- return hv_result(status);
+ return hv_result_to_errno(status);
}
static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
@@ -88,7 +89,10 @@ static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
local_irq_restore(flags);
- return hv_result(status);
+ if (!hv_result_success(status))
+ hv_status_err(status, "\n");
+
+ return hv_result_to_errno(status);
}
#ifdef CONFIG_PCI_MSI
@@ -169,13 +173,34 @@ static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
return dev_id;
}
-static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector,
- struct hv_interrupt_entry *entry)
+/**
+ * hv_map_msi_interrupt() - "Map" the MSI IRQ in the hypervisor.
+ * @data: Describes the IRQ
+ * @out_entry: Hypervisor (MSI) interrupt entry (can be NULL)
+ *
+ * Map the IRQ in the hypervisor by issuing a MAP_DEVICE_INTERRUPT hypercall.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_map_msi_interrupt(struct irq_data *data,
+ struct hv_interrupt_entry *out_entry)
{
- union hv_device_id device_id = hv_build_pci_dev_id(dev);
+ struct irq_cfg *cfg = irqd_cfg(data);
+ struct hv_interrupt_entry dummy;
+ union hv_device_id device_id;
+ struct msi_desc *msidesc;
+ struct pci_dev *dev;
+ int cpu;
- return hv_map_interrupt(device_id, false, cpu, vector, entry);
+ msidesc = irq_data_get_msi_desc(data);
+ dev = msi_desc_to_pci_dev(msidesc);
+ device_id = hv_build_pci_dev_id(dev);
+ cpu = cpumask_first(irq_data_get_effective_affinity_mask(data));
+
+ return hv_map_interrupt(device_id, false, cpu, cfg->vector,
+ out_entry ? out_entry : &dummy);
}
+EXPORT_SYMBOL_GPL(hv_map_msi_interrupt);
static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
{
@@ -188,13 +213,11 @@ static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi
static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
+ struct hv_interrupt_entry *stored_entry;
+ struct irq_cfg *cfg = irqd_cfg(data);
struct msi_desc *msidesc;
struct pci_dev *dev;
- struct hv_interrupt_entry out_entry, *stored_entry;
- struct irq_cfg *cfg = irqd_cfg(data);
- const cpumask_t *affinity;
- int cpu;
- u64 status;
+ int ret;
msidesc = irq_data_get_msi_desc(data);
dev = msi_desc_to_pci_dev(msidesc);
@@ -204,9 +227,6 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
return;
}
- affinity = irq_data_get_effective_affinity_mask(data);
- cpu = cpumask_first_and(affinity, cpu_online_mask);
-
if (data->chip_data) {
/*
* This interrupt is already mapped. Let's unmap first.
@@ -219,14 +239,12 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
stored_entry = data->chip_data;
data->chip_data = NULL;
- status = hv_unmap_msi_interrupt(dev, stored_entry);
+ ret = hv_unmap_msi_interrupt(dev, stored_entry);
kfree(stored_entry);
- if (status != HV_STATUS_SUCCESS) {
- hv_status_debug(status, "failed to unmap\n");
+ if (ret)
return;
- }
}
stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
@@ -235,15 +253,14 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
return;
}
- status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry);
- if (status != HV_STATUS_SUCCESS) {
+ ret = hv_map_msi_interrupt(data, stored_entry);
+ if (ret) {
kfree(stored_entry);
return;
}
- *stored_entry = out_entry;
data->chip_data = stored_entry;
- entry_to_msi_msg(&out_entry, msg);
+ entry_to_msi_msg(data->chip_data, msg);
return;
}
@@ -257,7 +274,6 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
{
struct hv_interrupt_entry old_entry;
struct msi_msg msg;
- u64 status;
if (!irqd->chip_data) {
pr_debug("%s: no chip data\n!", __func__);
@@ -270,10 +286,7 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
kfree(irqd->chip_data);
irqd->chip_data = NULL;
- status = hv_unmap_msi_interrupt(dev, &old_entry);
-
- if (status != HV_STATUS_SUCCESS)
- hv_status_err(status, "\n");
+ (void)hv_unmap_msi_interrupt(dev, &old_entry);
}
static void hv_msi_free_irq(struct irq_domain *domain,
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index e93a2f488ff7..ade6c665c97e 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -10,6 +10,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/export.h>
#include <asm/svm.h>
#include <asm/sev.h>
#include <asm/io.h>
diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c
index 1083dc8646f9..8ccbb7c4fc27 100644
--- a/arch/x86/hyperv/nested.c
+++ b/arch/x86/hyperv/nested.c
@@ -11,6 +11,7 @@
#include <linux/types.h>
+#include <linux/export.h>
#include <hyperv/hvhdk.h>
#include <asm/mshyperv.h>
#include <asm/tlbflush.h>
diff --git a/arch/x86/include/asm/amd/fch.h b/arch/x86/include/asm/amd/fch.h
deleted file mode 100644
index 2cf5153edbc2..000000000000
--- a/arch/x86/include/asm/amd/fch.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_AMD_FCH_H_
-#define _ASM_X86_AMD_FCH_H_
-
-#define FCH_PM_BASE 0xFED80300
-
-/* Register offsets from PM base: */
-#define FCH_PM_DECODEEN 0x00
-#define FCH_PM_DECODEEN_SMBUS0SEL GENMASK(20, 19)
-#define FCH_PM_SCRATCH 0x80
-#define FCH_PM_S5_RESET_STATUS 0xC0
-
-#endif /* _ASM_X86_AMD_FCH_H_ */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ee176236c2be..286d509f9363 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -456,6 +456,7 @@
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */
#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
@@ -487,6 +488,9 @@
#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
+#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
+#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
/*
* BUG word(s)
@@ -542,5 +546,5 @@
#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
-
+#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 363110e6b2e3..a2c1f2d24b64 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -9,6 +9,14 @@
#include <asm/cpufeature.h>
#include <asm/msr.h>
+/*
+ * Define bits that are always set to 1 in DR7, only bit 10 is
+ * architecturally reserved to '1'.
+ *
+ * This is also the init/reset value for DR7.
+ */
+#define DR7_FIXED_1 0x00000400
+
DECLARE_PER_CPU(unsigned long, cpu_dr7);
#ifndef CONFIG_PARAVIRT_XXL
@@ -100,8 +108,8 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value)
static inline void hw_breakpoint_disable(void)
{
- /* Zero the control register for HW Breakpoint */
- set_debugreg(0UL, 7);
+ /* Reset the control register for HW Breakpoint */
+ set_debugreg(DR7_FIXED_1, 7);
/* Zero-out the individual HW breakpoint address registers */
set_debugreg(0UL, 0);
@@ -125,9 +133,12 @@ static __always_inline unsigned long local_db_save(void)
return 0;
get_debugreg(dr7, 7);
- dr7 &= ~0x400; /* architecturally set bit */
+
+ /* Architecturally set bit */
+ dr7 &= ~DR7_FIXED_1;
if (dr7)
- set_debugreg(0, 7);
+ set_debugreg(DR7_FIXED_1, 7);
+
/*
* Ensure the compiler doesn't lower the above statements into
* the critical section; disabling breakpoints late would not
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 9a9b21b78905..b30e5474c18e 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -44,13 +44,13 @@ static __always_inline void native_irq_enable(void)
static __always_inline void native_safe_halt(void)
{
- mds_idle_clear_cpu_buffers();
+ x86_idle_clear_cpu_buffers();
asm volatile("sti; hlt": : :"memory");
}
static __always_inline void native_halt(void)
{
- mds_idle_clear_cpu_buffers();
+ x86_idle_clear_cpu_buffers();
asm volatile("hlt": : :"memory");
}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b4a391929cdb..f7af967aa16f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -31,6 +31,7 @@
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
+#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/mtrr.h>
#include <asm/msr-index.h>
@@ -249,7 +250,6 @@ enum x86_intercept_stage;
#define DR7_BP_EN_MASK 0x000000ff
#define DR7_GE (1 << 9)
#define DR7_GD (1 << 13)
-#define DR7_FIXED_1 0x00000400
#define DR7_VOLATILE 0xffff2bff
#define KVM_GUESTDBG_VALID_MASK \
@@ -700,8 +700,13 @@ struct kvm_vcpu_hv {
struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
- /* Preallocated buffer for handling hypercalls passing sparse vCPU set */
+ /*
+ * Preallocated buffers for handling hypercalls that pass sparse vCPU
+ * sets (for high vCPU counts, they're too large to comfortably fit on
+ * the stack).
+ */
u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
+ DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
struct hv_vp_assist_page vp_assist_page;
@@ -764,6 +769,7 @@ enum kvm_only_cpuid_leafs {
CPUID_8000_0022_EAX,
CPUID_7_2_EDX,
CPUID_24_0_EBX,
+ CPUID_8000_0021_ECX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index e988bac0a4a1..3c2de4ce3b10 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -5,12 +5,20 @@
#include <asm-generic/module.h>
#include <asm/orc_types.h>
+struct its_array {
+#ifdef CONFIG_MITIGATION_ITS
+ void **pages;
+ int num;
+#endif
+};
+
struct mod_arch_specific {
#ifdef CONFIG_UNWINDER_ORC
unsigned int num_orcs;
int *orc_unwind_ip;
struct orc_entry *orc_unwind;
#endif
+ struct its_array its_pages;
};
#endif /* _ASM_X86_MODULE_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index e1752ba47e67..abc4659f5809 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -112,12 +112,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_status;
}
-/* Hypercall to the L0 hypervisor */
-static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output)
-{
- return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output);
-}
-
/* Fast hypercall with 8 bytes of input and no output */
static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
{
@@ -165,13 +159,6 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
return _hv_do_fast_hypercall8(control, input1);
}
-static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
-{
- u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
-
- return _hv_do_fast_hypercall8(control, input1);
-}
-
/* Fast hypercall with 16 bytes of input */
static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
{
@@ -223,13 +210,6 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
return _hv_do_fast_hypercall16(control, input1, input2);
}
-static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
-{
- u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
-
- return _hv_do_fast_hypercall16(control, input1, input2);
-}
-
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
@@ -262,6 +242,8 @@ static inline void hv_apic_init(void) {}
struct irq_domain *hv_create_pci_msi_domain(void);
+int hv_map_msi_interrupt(struct irq_data *data,
+ struct hv_interrupt_entry *out_entry);
int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
struct hv_interrupt_entry *entry);
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b7dded3c8113..5cfb5d74dd5f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -628,6 +628,7 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
+#define MSR_AMD64_CPUID_FN_7 0xc0011002
#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index dd2b129b0418..6ca6516c7492 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -43,8 +43,6 @@ static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx)
static __always_inline void __mwait(u32 eax, u32 ecx)
{
- mds_idle_clear_cpu_buffers();
-
/*
* Use the instruction mnemonic with implicit operands, as the LLVM
* assembler fails to assemble the mnemonic with explicit operands:
@@ -80,7 +78,7 @@ static __always_inline void __mwait(u32 eax, u32 ecx)
*/
static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx)
{
- /* No MDS buffer clear as this is AMD/HYGON only */
+ /* No need for TSA buffer clearing on AMD */
/* "mwaitx %eax, %ebx, %ecx" */
asm volatile(".byte 0x0f, 0x01, 0xfb"
@@ -98,7 +96,6 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx)
*/
static __always_inline void __sti_mwait(u32 eax, u32 ecx)
{
- mds_idle_clear_cpu_buffers();
asm volatile("sti; mwait" :: "a" (eax), "c" (ecx));
}
@@ -115,21 +112,29 @@ static __always_inline void __sti_mwait(u32 eax, u32 ecx)
*/
static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx)
{
+ if (need_resched())
+ return;
+
+ x86_idle_clear_cpu_buffers();
+
if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
const void *addr = &current_thread_info()->flags;
alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
__monitor(addr, 0, 0);
- if (!need_resched()) {
- if (ecx & 1) {
- __mwait(eax, ecx);
- } else {
- __sti_mwait(eax, ecx);
- raw_local_irq_disable();
- }
+ if (need_resched())
+ goto out;
+
+ if (ecx & 1) {
+ __mwait(eax, ecx);
+ } else {
+ __sti_mwait(eax, ecx);
+ raw_local_irq_disable();
}
}
+
+out:
current_clr_polling();
}
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 20d754b98f3f..10f261678749 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -302,25 +302,31 @@
.endm
/*
- * Macro to execute VERW instruction that mitigate transient data sampling
- * attacks such as MDS. On affected systems a microcode update overloaded VERW
- * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
- *
+ * Macro to execute VERW insns that mitigate transient data sampling
+ * attacks such as MDS or TSA. On affected systems a microcode update
+ * overloaded VERW insns to also clear the CPU buffers. VERW clobbers
+ * CFLAGS.ZF.
* Note: Only the memory operand variant of VERW clears the CPU buffers.
*/
-.macro CLEAR_CPU_BUFFERS
+.macro __CLEAR_CPU_BUFFERS feature
#ifdef CONFIG_X86_64
- ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF
+ ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature
#else
/*
* In 32bit mode, the memory operand must be a %cs reference. The data
* segments may not be usable (vm86 mode), and the stack segment may not
* be flat (ESPFIX32).
*/
- ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF
+ ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature
#endif
.endm
+#define CLEAR_CPU_BUFFERS \
+ __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF
+
+#define VM_CLEAR_CPU_BUFFERS \
+ __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM
+
#ifdef CONFIG_X86_64
.macro CLEAR_BRANCH_HISTORY
ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
@@ -567,24 +573,24 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb);
-DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
+DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear);
-extern u16 mds_verw_sel;
+extern u16 x86_verw_sel;
#include <asm/segment.h>
/**
- * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+ * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns
*
* This uses the otherwise unused and obsolete VERW instruction in
* combination with microcode which triggers a CPU buffer flush when the
* instruction is executed.
*/
-static __always_inline void mds_clear_cpu_buffers(void)
+static __always_inline void x86_clear_cpu_buffers(void)
{
static const u16 ds = __KERNEL_DS;
@@ -601,14 +607,15 @@ static __always_inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS
+ * and TSA vulnerabilities.
*
* Clear CPU buffers if the corresponding static key is enabled
*/
-static __always_inline void mds_idle_clear_cpu_buffers(void)
+static __always_inline void x86_idle_clear_cpu_buffers(void)
{
- if (static_branch_likely(&mds_idle_clear))
- mds_clear_cpu_buffers();
+ if (static_branch_likely(&cpu_buf_idle_clear))
+ x86_clear_cpu_buffers();
}
#endif /* __ASSEMBLER__ */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 774430c3abff..97954c936c54 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1561,7 +1561,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte)
return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE);
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 58e028d42e41..a631f7d7c0c0 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -223,6 +223,18 @@ struct snp_tsc_info_resp {
u8 rsvd2[100];
} __packed;
+/*
+ * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with
+ * TSC_FACTOR as documented in the SNP Firmware ABI specification:
+ *
+ * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001))
+ *
+ * which is equivalent to:
+ *
+ * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000;
+ */
+#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000)
+
struct snp_guest_req {
void *req_buf;
size_t req_sz;
@@ -282,8 +294,11 @@ struct snp_secrets_page {
u8 svsm_guest_vmpl;
u8 rsvd3[3];
+ /* The percentage decrease from nominal to mean TSC frequency. */
+ u32 tsc_factor;
+
/* Remainder of page */
- u8 rsvd4[3744];
+ u8 rsvd4[3740];
} __packed;
struct snp_msg_desc {
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index 2f3820342598..8bc074c8d7c6 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -72,6 +72,7 @@
#define TDVMCALL_MAP_GPA 0x10001
#define TDVMCALL_GET_QUOTE 0x10002
#define TDVMCALL_REPORT_FATAL_ERROR 0x10003
+#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL
/*
* TDG.VP.VMCALL Status Codes (returned in R10)
@@ -80,6 +81,7 @@
#define TDVMCALL_STATUS_RETRY 0x0000000000000001ULL
#define TDVMCALL_STATUS_INVALID_OPERAND 0x8000000000000000ULL
#define TDVMCALL_STATUS_ALIGN_ERROR 0x8000000000000002ULL
+#define TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED 0x8000000000000003ULL
/*
* Bitmasks of exposed registers (with VMM).
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index e770c4fc47f4..8727c7e21dd1 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -24,4 +24,26 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+/*
+ * To prevent immediate repeat of single step trap on return from SIGTRAP
+ * handler if the trap flag (TF) is set without an external debugger attached,
+ * clear the software event flag in the augmented SS, ensuring no single-step
+ * trap is pending upon ERETU completion.
+ *
+ * Note, this function should be called in sigreturn() before the original
+ * state is restored to make sure the TF is read from the entry frame.
+ */
+static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs)
+{
+ /*
+ * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction
+ * is being single-stepped, do not clear the software event flag in the
+ * augmented SS, thus a debugger won't skip over the following instruction.
+ */
+#ifdef CONFIG_X86_FRED
+ if (!(regs->flags & X86_EFLAGS_TF))
+ regs->fred_ss.swevent = 0;
+#endif
+}
+
#endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 8b19294600c4..7ddef3a69866 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -106,7 +106,7 @@ void tdx_init(void);
typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
-static inline u64 sc_retry(sc_func_t func, u64 fn,
+static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
struct tdx_module_args *args)
{
int retry = RDRAND_RETRY_LOOPS;
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 0454d5e60e5d..721b408d9a67 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -44,16 +44,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_after_save,
TP_ARGS(fpu)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_before_restore,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
-DEFINE_EVENT(x86_fpu, x86_fpu_after_restore,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
@@ -64,11 +54,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated,
TP_ARGS(fpu)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_dropped,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index 0007ba077c0c..41da492dfb01 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -15,7 +15,26 @@
which debugging register was responsible for the trap. The other bits
are either reserved or not of interest to us. */
-/* Define reserved bits in DR6 which are always set to 1 */
+/*
+ * Define bits in DR6 which are set to 1 by default.
+ *
+ * This is also the DR6 architectural value following Power-up, Reset or INIT.
+ *
+ * Note, with the introduction of Bus Lock Detection (BLD) and Restricted
+ * Transactional Memory (RTM), the DR6 register has been modified:
+ *
+ * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports
+ * Bus Lock Detection. The assertion of a bus lock could clear it.
+ *
+ * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports
+ * restricted transactional memory. #DB occurred inside an RTM region
+ * could clear it.
+ *
+ * Apparently, DR6.BLD and DR6.RTM are active low bits.
+ *
+ * As a result, DR6_RESERVED is an incorrect name now, but it is kept for
+ * compatibility.
+ */
#define DR6_RESERVED (0xFFFF0FF0)
#define DR_TRAP0 (0x1) /* db0 */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 6f3499507c5e..0f15d683817d 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -965,7 +965,13 @@ struct kvm_tdx_cmd {
struct kvm_tdx_capabilities {
__u64 supported_attrs;
__u64 supported_xfam;
- __u64 reserved[254];
+
+ __u64 kernel_tdvmcallinfo_1_r11;
+ __u64 user_tdvmcallinfo_1_r11;
+ __u64 kernel_tdvmcallinfo_1_r12;
+ __u64 user_tdvmcallinfo_1_r12;
+
+ __u64 reserved[250];
/* Configurable CPUID bits for userspace */
struct kvm_cpuid2 cpuid;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 99a783fd4691..0d2a6d953be9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -3,7 +3,7 @@
# Makefile for the linux kernel.
#
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ecfe7b497cad..ea1d984166cd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -116,6 +116,24 @@ static struct module *its_mod;
#endif
static void *its_page;
static unsigned int its_offset;
+struct its_array its_pages;
+
+static void *__its_alloc(struct its_array *pages)
+{
+ void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE);
+ if (!page)
+ return NULL;
+
+ void *tmp = krealloc(pages->pages, (pages->num+1) * sizeof(void *),
+ GFP_KERNEL);
+ if (!tmp)
+ return NULL;
+
+ pages->pages = tmp;
+ pages->pages[pages->num++] = page;
+
+ return no_free_ptr(page);
+}
/* Initialize a thunk with the "jmp *reg; int3" instructions. */
static void *its_init_thunk(void *thunk, int reg)
@@ -151,6 +169,21 @@ static void *its_init_thunk(void *thunk, int reg)
return thunk + offset;
}
+static void its_pages_protect(struct its_array *pages)
+{
+ for (int i = 0; i < pages->num; i++) {
+ void *page = pages->pages[i];
+ execmem_restore_rox(page, PAGE_SIZE);
+ }
+}
+
+static void its_fini_core(void)
+{
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ its_pages_protect(&its_pages);
+ kfree(its_pages.pages);
+}
+
#ifdef CONFIG_MODULES
void its_init_mod(struct module *mod)
{
@@ -173,10 +206,8 @@ void its_fini_mod(struct module *mod)
its_page = NULL;
mutex_unlock(&text_mutex);
- for (int i = 0; i < mod->its_num_pages; i++) {
- void *page = mod->its_page_array[i];
- execmem_restore_rox(page, PAGE_SIZE);
- }
+ if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+ its_pages_protect(&mod->arch.its_pages);
}
void its_free_mod(struct module *mod)
@@ -184,37 +215,33 @@ void its_free_mod(struct module *mod)
if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
return;
- for (int i = 0; i < mod->its_num_pages; i++) {
- void *page = mod->its_page_array[i];
+ for (int i = 0; i < mod->arch.its_pages.num; i++) {
+ void *page = mod->arch.its_pages.pages[i];
execmem_free(page);
}
- kfree(mod->its_page_array);
+ kfree(mod->arch.its_pages.pages);
}
#endif /* CONFIG_MODULES */
static void *its_alloc(void)
{
- void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE);
-
- if (!page)
- return NULL;
+ struct its_array *pages = &its_pages;
+ void *page;
#ifdef CONFIG_MODULES
- if (its_mod) {
- void *tmp = krealloc(its_mod->its_page_array,
- (its_mod->its_num_pages+1) * sizeof(void *),
- GFP_KERNEL);
- if (!tmp)
- return NULL;
+ if (its_mod)
+ pages = &its_mod->arch.its_pages;
+#endif
- its_mod->its_page_array = tmp;
- its_mod->its_page_array[its_mod->its_num_pages++] = page;
+ page = __its_alloc(pages);
+ if (!page)
+ return NULL;
- execmem_make_temp_rw(page, PAGE_SIZE);
- }
-#endif /* CONFIG_MODULES */
+ execmem_make_temp_rw(page, PAGE_SIZE);
+ if (pages == &its_pages)
+ set_memory_x((unsigned long)page, 1);
- return no_free_ptr(page);
+ return page;
}
static void *its_allocate_thunk(int reg)
@@ -268,7 +295,9 @@ u8 *its_static_thunk(int reg)
return thunk;
}
-#endif
+#else
+static inline void its_fini_core(void) {}
+#endif /* CONFIG_MITIGATION_ITS */
/*
* Nomenclature for variable names to simplify and clarify this code and ease
@@ -2338,6 +2367,8 @@ void __init alternative_instructions(void)
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
apply_returns(__return_sites, __return_sites_end);
+ its_fini_core();
+
/*
* Adjust all CALL instructions to point to func()-10, including
* those in .altinstr_replacement.
@@ -3107,6 +3138,6 @@ void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, c
*/
void __ref smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate)
{
- __smp_text_poke_batch_add(addr, opcode, len, emulate);
+ smp_text_poke_batch_add(addr, opcode, len, emulate);
smp_text_poke_batch_finish();
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 93da466dfe2c..329ee185d8cc 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -9,7 +9,7 @@
#include <linux/sched/clock.h>
#include <linux/random.h>
#include <linux/topology.h>
-#include <asm/amd/fch.h>
+#include <linux/platform_data/x86/amd-fch.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cacheinfo.h>
@@ -31,7 +31,7 @@
#include "cpu.h"
-u16 invlpgb_count_max __ro_after_init;
+u16 invlpgb_count_max __ro_after_init = 1;
static inline int rdmsrq_amd_safe(unsigned msr, u64 *p)
{
@@ -377,6 +377,47 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c)
#endif
}
+#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \
+ X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \
+ step, step, ucode)
+
+static const struct x86_cpu_id amd_tsa_microcode[] = {
+ ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008),
+ ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216),
+ {},
+};
+
+static void tsa_init(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return;
+
+ if (cpu_has(c, X86_FEATURE_ZEN3) ||
+ cpu_has(c, X86_FEATURE_ZEN4)) {
+ if (x86_match_min_microcode_rev(amd_tsa_microcode))
+ setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR);
+ else
+ pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode);
+ } else {
+ setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO);
+ setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO);
+ }
+}
+
static void bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
@@ -489,6 +530,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
}
bsp_determine_snp(c);
+
+ tsa_init(c);
+
return;
warn:
@@ -930,6 +974,16 @@ static void init_amd_zen2(struct cpuinfo_x86 *c)
init_spectral_chicken(c);
fix_erratum_1386(c);
zen2_zenbleed_check(c);
+
+ /* Disable RDSEED on AMD Cyan Skillfish because of an error. */
+ if (c->x86_model == 0x47 && c->x86_stepping == 0x0) {
+ clear_cpu_cap(c, X86_FEATURE_RDSEED);
+ msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18);
+ pr_emerg("RDSEED is not reliable on this platform; disabling.\n");
+ }
+
+ /* Correct misconfigured CPUID on some clients. */
+ clear_cpu_cap(c, X86_FEATURE_INVLPGB);
}
static void init_amd_zen3(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7f94e6a5497d..f4d3abb12317 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -94,6 +94,8 @@ static void __init bhi_apply_mitigation(void);
static void __init its_select_mitigation(void);
static void __init its_update_mitigation(void);
static void __init its_apply_mitigation(void);
+static void __init tsa_select_mitigation(void);
+static void __init tsa_apply_mitigation(void);
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
@@ -169,9 +171,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb);
EXPORT_SYMBOL_GPL(switch_vcpu_ibpb);
-/* Control MDS CPU buffer clear before idling (halt, mwait) */
-DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
-EXPORT_SYMBOL_GPL(mds_idle_clear);
+/* Control CPU buffer clear before idling (halt, mwait) */
+DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
+EXPORT_SYMBOL_GPL(cpu_buf_idle_clear);
/*
* Controls whether l1d flush based mitigations are enabled,
@@ -225,6 +227,7 @@ void __init cpu_select_mitigations(void)
gds_select_mitigation();
its_select_mitigation();
bhi_select_mitigation();
+ tsa_select_mitigation();
/*
* After mitigations are selected, some may need to update their
@@ -272,6 +275,7 @@ void __init cpu_select_mitigations(void)
gds_apply_mitigation();
its_apply_mitigation();
bhi_apply_mitigation();
+ tsa_apply_mitigation();
}
/*
@@ -637,7 +641,7 @@ static void __init mmio_apply_mitigation(void)
* is required irrespective of SMT state.
*/
if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
- static_branch_enable(&mds_idle_clear);
+ static_branch_enable(&cpu_buf_idle_clear);
if (mmio_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
@@ -1488,6 +1492,94 @@ static void __init its_apply_mitigation(void)
}
#undef pr_fmt
+#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt
+
+enum tsa_mitigations {
+ TSA_MITIGATION_NONE,
+ TSA_MITIGATION_AUTO,
+ TSA_MITIGATION_UCODE_NEEDED,
+ TSA_MITIGATION_USER_KERNEL,
+ TSA_MITIGATION_VM,
+ TSA_MITIGATION_FULL,
+};
+
+static const char * const tsa_strings[] = {
+ [TSA_MITIGATION_NONE] = "Vulnerable",
+ [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
+ [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary",
+ [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM",
+ [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers",
+};
+
+static enum tsa_mitigations tsa_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_AUTO : TSA_MITIGATION_NONE;
+
+static int __init tsa_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ tsa_mitigation = TSA_MITIGATION_NONE;
+ else if (!strcmp(str, "on"))
+ tsa_mitigation = TSA_MITIGATION_FULL;
+ else if (!strcmp(str, "user"))
+ tsa_mitigation = TSA_MITIGATION_USER_KERNEL;
+ else if (!strcmp(str, "vm"))
+ tsa_mitigation = TSA_MITIGATION_VM;
+ else
+ pr_err("Ignoring unknown tsa=%s option.\n", str);
+
+ return 0;
+}
+early_param("tsa", tsa_parse_cmdline);
+
+static void __init tsa_select_mitigation(void)
+{
+ if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) {
+ tsa_mitigation = TSA_MITIGATION_NONE;
+ return;
+ }
+
+ if (tsa_mitigation == TSA_MITIGATION_NONE)
+ return;
+
+ if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) {
+ tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED;
+ goto out;
+ }
+
+ if (tsa_mitigation == TSA_MITIGATION_AUTO)
+ tsa_mitigation = TSA_MITIGATION_FULL;
+
+ /*
+ * No need to set verw_clear_cpu_buf_mitigation_selected - it
+ * doesn't fit all cases here and it is not needed because this
+ * is the only VERW-based mitigation on AMD.
+ */
+out:
+ pr_info("%s\n", tsa_strings[tsa_mitigation]);
+}
+
+static void __init tsa_apply_mitigation(void)
+{
+ switch (tsa_mitigation) {
+ case TSA_MITIGATION_USER_KERNEL:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ break;
+ case TSA_MITIGATION_VM:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM);
+ break;
+ case TSA_MITIGATION_FULL:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM);
+ break;
+ default:
+ break;
+ }
+}
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
@@ -2249,10 +2341,10 @@ static void update_mds_branch_idle(void)
return;
if (sched_smt_active()) {
- static_branch_enable(&mds_idle_clear);
+ static_branch_enable(&cpu_buf_idle_clear);
} else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
- static_branch_disable(&mds_idle_clear);
+ static_branch_disable(&cpu_buf_idle_clear);
}
}
@@ -2316,6 +2408,25 @@ void cpu_bugs_smt_update(void)
break;
}
+ switch (tsa_mitigation) {
+ case TSA_MITIGATION_USER_KERNEL:
+ case TSA_MITIGATION_VM:
+ case TSA_MITIGATION_AUTO:
+ case TSA_MITIGATION_FULL:
+ /*
+ * TSA-SQ can potentially lead to info leakage between
+ * SMT threads.
+ */
+ if (sched_smt_active())
+ static_branch_enable(&cpu_buf_idle_clear);
+ else
+ static_branch_disable(&cpu_buf_idle_clear);
+ break;
+ case TSA_MITIGATION_NONE:
+ case TSA_MITIGATION_UCODE_NEEDED:
+ break;
+ }
+
mutex_unlock(&spec_ctrl_mutex);
}
@@ -3265,6 +3376,11 @@ static ssize_t gds_show_state(char *buf)
return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]);
}
+static ssize_t tsa_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -3328,6 +3444,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_ITS:
return its_show_state(buf);
+ case X86_BUG_TSA:
+ return tsa_show_state(buf);
+
default:
break;
}
@@ -3414,6 +3533,11 @@ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_att
{
return cpu_show_common(dev, attr, buf, X86_BUG_ITS);
}
+
+ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_TSA);
+}
#endif
void __warn_thunk(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8feb8fd2957a..fb50c1dd53ef 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1233,6 +1233,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define ITS BIT(8)
/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */
#define ITS_NATIVE_ONLY BIT(9)
+/* CPU is affected by Transient Scheduler Attacks */
+#define TSA BIT(10)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS),
@@ -1280,7 +1282,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_AMD(0x16, RETBLEED),
VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO),
- VULNBL_AMD(0x19, SRSO),
+ VULNBL_AMD(0x19, SRSO | TSA),
VULNBL_AMD(0x1a, SRSO),
{}
};
@@ -1530,6 +1532,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY);
}
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) ||
+ !cpu_has(c, X86_FEATURE_TSA_L1_NO)) {
+ if (cpu_matches(cpu_vuln_blacklist, TSA) ||
+ /* Enable bug on Zen guests to allow for live migration. */
+ (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN)))
+ setup_force_cpu_bug(X86_BUG_TSA);
+ }
+ }
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -2243,20 +2255,16 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
#endif
-/*
- * Clear all 6 debug registers:
- */
-static void clear_all_debug_regs(void)
+static void initialize_debug_regs(void)
{
- int i;
-
- for (i = 0; i < 8; i++) {
- /* Ignore db4, db5 */
- if ((i == 4) || (i == 5))
- continue;
-
- set_debugreg(0, i);
- }
+ /* Control register first -- to make sure everything is disabled. */
+ set_debugreg(DR7_FIXED_1, 7);
+ set_debugreg(DR6_RESERVED, 6);
+ /* dr5 and dr4 don't exist */
+ set_debugreg(0, 3);
+ set_debugreg(0, 2);
+ set_debugreg(0, 1);
+ set_debugreg(0, 0);
}
#ifdef CONFIG_KGDB
@@ -2417,7 +2425,7 @@ void cpu_init(void)
load_mm_ldt(&init_mm);
- clear_all_debug_regs();
+ initialize_debug_regs();
dbg_restore_debug_regs();
doublefault_init_cpu_tss();
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 9d852c3b2cb5..5c4eb28c3ac9 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
struct thresh_restart {
struct threshold_block *b;
- int reset;
int set_lvt_off;
int lvt_off;
u16 old_limit;
@@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr)
rdmsr(tr->b->address, lo, hi);
- if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
- tr->reset = 1; /* limit cannot be lower than err count */
-
- if (tr->reset) { /* reset err count and overflow bit */
- hi =
- (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
- (THRESHOLD_MAX - tr->b->threshold_limit);
+ /*
+ * Reset error count and overflow bit.
+ * This is done during init or after handling an interrupt.
+ */
+ if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) {
+ hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI);
+ hi |= THRESHOLD_MAX - tr->b->threshold_limit;
} else if (tr->old_limit) { /* change limit w/o reset */
int new_count = (hi & THRESHOLD_MAX) +
(tr->old_limit - tr->b->threshold_limit);
@@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
}
bank_type = smca_get_bank_type(cpu, bank);
- if (bank_type >= N_SMCA_BANK_TYPES)
- return NULL;
if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
- return NULL;
+ }
+
+ if (b && b->block) {
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block);
+ return buf_mcatype;
+ }
+
+ if (bank_type >= N_SMCA_BANK_TYPES) {
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank);
+ return buf_mcatype;
}
if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index e9b3c5d4a52e..4da4eab56c81 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void)
void (*mc_poll_banks)(void) = mc_poll_banks_default;
+static bool should_enable_timer(unsigned long iv)
+{
+ return !mca_cfg.ignore_ce && iv;
+}
+
static void mce_timer_fn(struct timer_list *t)
{
struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
@@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t)
if (mce_get_storm_mode()) {
__start_timer(t, HZ);
- } else {
+ } else if (should_enable_timer(iv)) {
__this_cpu_write(mce_next_interval, iv);
__start_timer(t, iv);
}
@@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t)
{
unsigned long iv = check_interval * HZ;
- if (mca_cfg.ignore_ce || !iv)
- return;
-
- this_cpu_write(mce_next_interval, iv);
- __start_timer(t, iv);
+ if (should_enable_timer(iv)) {
+ this_cpu_write(mce_next_interval, iv);
+ __start_timer(t, iv);
+ }
}
static void __mcheck_cpu_setup_timer(void)
@@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu)
static int mce_cpu_online(unsigned int cpu)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
- int ret;
mce_device_create(cpu);
-
- ret = mce_threshold_create_device(cpu);
- if (ret) {
- mce_device_remove(cpu);
- return ret;
- }
+ mce_threshold_create_device(cpu);
mce_reenable_cpu();
mce_start_timer(t);
return 0;
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index efcf21e9552e..9b149b9c4109 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
intel_clear_lmce();
+ cmci_clear();
}
bool intel_filter_mce(struct mce *m)
diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c
index 2a1655b1fdd8..1fd349cfc802 100644
--- a/arch/x86/kernel/cpu/microcode/amd_shas.c
+++ b/arch/x86/kernel/cpu/microcode/amd_shas.c
@@ -231,6 +231,13 @@ static const struct patch_digest phashes[] = {
0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21,
}
},
+ { 0xa0011d7, {
+ 0x35,0x07,0xcd,0x40,0x94,0xbc,0x81,0x6b,
+ 0xfc,0x61,0x56,0x1a,0xe2,0xdb,0x96,0x12,
+ 0x1c,0x1c,0x31,0xb1,0x02,0x6f,0xe5,0xd2,
+ 0xfe,0x1b,0x04,0x03,0x2c,0x8f,0x4c,0x36,
+ }
+ },
{ 0xa001223, {
0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8,
0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4,
@@ -294,6 +301,13 @@ static const struct patch_digest phashes[] = {
0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59,
}
},
+ { 0xa00123b, {
+ 0xef,0xa1,0x1e,0x71,0xf1,0xc3,0x2c,0xe2,
+ 0xc3,0xef,0x69,0x41,0x7a,0x54,0xca,0xc3,
+ 0x8f,0x62,0x84,0xee,0xc2,0x39,0xd9,0x28,
+ 0x95,0xa7,0x12,0x49,0x1e,0x30,0x71,0x72,
+ }
+ },
{ 0xa00820c, {
0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3,
0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63,
@@ -301,6 +315,13 @@ static const struct patch_digest phashes[] = {
0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2,
}
},
+ { 0xa00820d, {
+ 0xf9,0x2a,0xc0,0xf4,0x9e,0xa4,0x87,0xa4,
+ 0x7d,0x87,0x00,0xfd,0xab,0xda,0x19,0xca,
+ 0x26,0x51,0x32,0xc1,0x57,0x91,0xdf,0xc1,
+ 0x05,0xeb,0x01,0x7c,0x5a,0x95,0x21,0xb7,
+ }
+ },
{ 0xa10113e, {
0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10,
0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0,
@@ -322,6 +343,13 @@ static const struct patch_digest phashes[] = {
0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4,
}
},
+ { 0xa10114c, {
+ 0x9e,0xb6,0xa2,0xd9,0x87,0x38,0xc5,0x64,
+ 0xd8,0x88,0xfa,0x78,0x98,0xf9,0x6f,0x74,
+ 0x39,0x90,0x1b,0xa5,0xcf,0x5e,0xb4,0x2a,
+ 0x02,0xff,0xd4,0x8c,0x71,0x8b,0xe2,0xc0,
+ }
+ },
{ 0xa10123e, {
0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18,
0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d,
@@ -343,6 +371,13 @@ static const struct patch_digest phashes[] = {
0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75,
}
},
+ { 0xa10124c, {
+ 0x29,0xea,0xf1,0x2c,0xb2,0xe4,0xef,0x90,
+ 0xa4,0xcd,0x1d,0x86,0x97,0x17,0x61,0x46,
+ 0xfc,0x22,0xcb,0x57,0x75,0x19,0xc8,0xcc,
+ 0x0c,0xf5,0xbc,0xac,0x81,0x9d,0x9a,0xd2,
+ }
+ },
{ 0xa108108, {
0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9,
0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6,
@@ -350,6 +385,13 @@ static const struct patch_digest phashes[] = {
0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16,
}
},
+ { 0xa108109, {
+ 0x85,0xb4,0xbd,0x7c,0x49,0xa7,0xbd,0xfa,
+ 0x49,0x36,0x80,0x81,0xc5,0xb7,0x39,0x1b,
+ 0x9a,0xaa,0x50,0xde,0x9b,0xe9,0x32,0x35,
+ 0x42,0x7e,0x51,0x4f,0x52,0x2c,0x28,0x59,
+ }
+ },
{ 0xa20102d, {
0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11,
0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89,
@@ -357,6 +399,13 @@ static const struct patch_digest phashes[] = {
0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4,
}
},
+ { 0xa20102e, {
+ 0xbe,0x1f,0x32,0x04,0x0d,0x3c,0x9c,0xdd,
+ 0xe1,0xa4,0xbf,0x76,0x3a,0xec,0xc2,0xf6,
+ 0x11,0x00,0xa7,0xaf,0x0f,0xe5,0x02,0xc5,
+ 0x54,0x3a,0x1f,0x8c,0x16,0xb5,0xff,0xbe,
+ }
+ },
{ 0xa201210, {
0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe,
0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9,
@@ -364,6 +413,13 @@ static const struct patch_digest phashes[] = {
0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41,
}
},
+ { 0xa201211, {
+ 0x69,0xa1,0x17,0xec,0xd0,0xf6,0x6c,0x95,
+ 0xe2,0x1e,0xc5,0x59,0x1a,0x52,0x0a,0x27,
+ 0xc4,0xed,0xd5,0x59,0x1f,0xbf,0x00,0xff,
+ 0x08,0x88,0xb5,0xe1,0x12,0xb6,0xcc,0x27,
+ }
+ },
{ 0xa404107, {
0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45,
0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0,
@@ -371,6 +427,13 @@ static const struct patch_digest phashes[] = {
0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99,
}
},
+ { 0xa404108, {
+ 0x69,0x67,0x43,0x06,0xf8,0x0c,0x62,0xdc,
+ 0xa4,0x21,0x30,0x4f,0x0f,0x21,0x2c,0xcb,
+ 0xcc,0x37,0xf1,0x1c,0xc3,0xf8,0x2f,0x19,
+ 0xdf,0x53,0x53,0x46,0xb1,0x15,0xea,0x00,
+ }
+ },
{ 0xa500011, {
0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4,
0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1,
@@ -378,6 +441,13 @@ static const struct patch_digest phashes[] = {
0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74,
}
},
+ { 0xa500012, {
+ 0xeb,0x74,0x0d,0x47,0xa1,0x8e,0x09,0xe4,
+ 0x93,0x4c,0xad,0x03,0x32,0x4c,0x38,0x16,
+ 0x10,0x39,0xdd,0x06,0xaa,0xce,0xd6,0x0f,
+ 0x62,0x83,0x9d,0x8e,0x64,0x55,0xbe,0x63,
+ }
+ },
{ 0xa601209, {
0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32,
0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30,
@@ -385,6 +455,13 @@ static const struct patch_digest phashes[] = {
0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d,
}
},
+ { 0xa60120a, {
+ 0x0c,0x8b,0x3d,0xfd,0x52,0x52,0x85,0x7d,
+ 0x20,0x3a,0xe1,0x7e,0xa4,0x21,0x3b,0x7b,
+ 0x17,0x86,0xae,0xac,0x13,0xb8,0x63,0x9d,
+ 0x06,0x01,0xd0,0xa0,0x51,0x9a,0x91,0x2c,
+ }
+ },
{ 0xa704107, {
0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6,
0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93,
@@ -392,6 +469,13 @@ static const struct patch_digest phashes[] = {
0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39,
}
},
+ { 0xa704108, {
+ 0xd7,0x55,0x15,0x2b,0xfe,0xc4,0xbc,0x93,
+ 0xec,0x91,0xa0,0xae,0x45,0xb7,0xc3,0x98,
+ 0x4e,0xff,0x61,0x77,0x88,0xc2,0x70,0x49,
+ 0xe0,0x3a,0x1d,0x84,0x38,0x52,0xbf,0x5a,
+ }
+ },
{ 0xa705206, {
0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4,
0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7,
@@ -399,6 +483,13 @@ static const struct patch_digest phashes[] = {
0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc,
}
},
+ { 0xa705208, {
+ 0x30,0x1d,0x55,0x24,0xbc,0x6b,0x5a,0x19,
+ 0x0c,0x7d,0x1d,0x74,0xaa,0xd1,0xeb,0xd2,
+ 0x16,0x62,0xf7,0x5b,0xe1,0x1f,0x18,0x11,
+ 0x5c,0xf0,0x94,0x90,0x26,0xec,0x69,0xff,
+ }
+ },
{ 0xa708007, {
0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3,
0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2,
@@ -406,6 +497,13 @@ static const struct patch_digest phashes[] = {
0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93,
}
},
+ { 0xa708008, {
+ 0x08,0x6e,0xf0,0x22,0x4b,0x8e,0xc4,0x46,
+ 0x58,0x34,0xe6,0x47,0xa2,0x28,0xfd,0xab,
+ 0x22,0x3d,0xdd,0xd8,0x52,0x9e,0x1d,0x16,
+ 0xfa,0x01,0x68,0x14,0x79,0x3e,0xe8,0x6b,
+ }
+ },
{ 0xa70c005, {
0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b,
0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f,
@@ -413,6 +511,13 @@ static const struct patch_digest phashes[] = {
0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13,
}
},
+ { 0xa70c008, {
+ 0x0f,0xdb,0x37,0xa1,0x10,0xaf,0xd4,0x21,
+ 0x94,0x0d,0xa4,0xa2,0xe9,0x86,0x6c,0x0e,
+ 0x85,0x7c,0x36,0x30,0xa3,0x3a,0x78,0x66,
+ 0x18,0x10,0x60,0x0d,0x78,0x3d,0x44,0xd0,
+ }
+ },
{ 0xaa00116, {
0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63,
0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5,
@@ -441,4 +546,11 @@ static const struct patch_digest phashes[] = {
0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef,
}
},
+ { 0xaa00216, {
+ 0x79,0xfb,0x5b,0x9f,0xb6,0xe6,0xa8,0xf5,
+ 0x4e,0x7c,0x4f,0x8e,0x1d,0xad,0xd0,0x08,
+ 0xc2,0x43,0x7c,0x8b,0xe6,0xdb,0xd0,0xd2,
+ 0xe8,0x39,0x26,0xc1,0xe5,0x5a,0x48,0xf1,
+ }
+ },
};
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 7109cbfcad4f..187d527ef73b 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
struct rdt_hw_mon_domain *hw_dom;
struct rdt_domain_hdr *hdr;
struct rdt_mon_domain *d;
+ struct cacheinfo *ci;
int err;
lockdep_assert_held(&domain_list_lock);
@@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
d = &hw_dom->d_resctrl;
d->hdr.id = id;
d->hdr.type = RESCTRL_MON_DOMAIN;
- d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
- if (!d->ci) {
+ ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!ci) {
pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
mon_domain_free(hw_dom);
return;
}
+ d->ci_id = ci->id;
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
arch_mon_domain_online(r, d);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index dbf6d71bdf18..b4a1f6732a3a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -50,6 +50,8 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
{ X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
+ { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 },
+ { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 },
{ X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 6290dd120f5e..ff40f09ad911 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -33,8 +33,9 @@ void io_bitmap_share(struct task_struct *tsk)
set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
}
-static void task_update_io_bitmap(struct task_struct *tsk)
+static void task_update_io_bitmap(void)
{
+ struct task_struct *tsk = current;
struct thread_struct *t = &tsk->thread;
if (t->iopl_emul == 3 || t->io_bitmap) {
@@ -54,7 +55,12 @@ void io_bitmap_exit(struct task_struct *tsk)
struct io_bitmap *iobm = tsk->thread.io_bitmap;
tsk->thread.io_bitmap = NULL;
- task_update_io_bitmap(tsk);
+ /*
+ * Don't touch the TSS when invoked on a failed fork(). TSS
+ * reflects the state of @current and not the state of @tsk.
+ */
+ if (tsk == current)
+ task_update_io_bitmap();
if (iobm && refcount_dec_and_test(&iobm->refcnt))
kfree(iobm);
}
@@ -192,8 +198,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
}
t->iopl_emul = level;
- task_update_io_bitmap(current);
-
+ task_update_io_bitmap();
return 0;
}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 102641fd2172..8b1a9733d13e 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -385,7 +385,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs)
struct perf_event *bp;
/* Disable hardware debugging while we are in kgdb: */
- set_debugreg(0UL, 7);
+ set_debugreg(DR7_FIXED_1, 7);
for (i = 0; i < HBP_NUM; i++) {
if (!breakinfo[i].enabled)
continue;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c1d2dac72b9c..a838be04f874 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -176,6 +176,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
frame->ret_addr = (unsigned long) ret_from_fork_asm;
p->thread.sp = (unsigned long) fork_frame;
p->thread.io_bitmap = NULL;
+ clear_tsk_thread_flag(p, TIF_IO_BITMAP);
p->thread.iopl_warn = 0;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -464,6 +465,11 @@ void native_tss_update_io_bitmap(void)
} else {
struct io_bitmap *iobm = t->io_bitmap;
+ if (WARN_ON_ONCE(!iobm)) {
+ clear_thread_flag(TIF_IO_BITMAP);
+ native_tss_invalidate_io_bitmap();
+ }
+
/*
* Only copy bitmap data when the sequence number differs. The
* update time is accounted to the incoming task.
@@ -901,16 +907,24 @@ static __init bool prefer_mwait_c1_over_halt(void)
*/
static __cpuidle void mwait_idle(void)
{
+ if (need_resched())
+ return;
+
+ x86_idle_clear_cpu_buffers();
+
if (!current_set_polling_and_test()) {
const void *addr = &current_thread_info()->flags;
alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
__monitor(addr, 0, 0);
- if (!need_resched()) {
- __sti_mwait(0, 0);
- raw_local_irq_disable();
- }
+ if (need_resched())
+ goto out;
+
+ __sti_mwait(0, 0);
+ raw_local_irq_disable();
}
+
+out:
__current_clr_polling();
}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a10e180cbf23..3ef15c2f152f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
/* Only print out debug registers if they are in their non-default state. */
if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
- (d6 == DR6_RESERVED) && (d7 == 0x400))
+ (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1))
return;
printk("%sDR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 8d6cf25127aa..b972bf72fb8b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -133,7 +133,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
/* Only print out debug registers if they are in their non-default state. */
if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
- (d6 == DR6_RESERVED) && (d7 == 0x400))) {
+ (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1))) {
printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n",
log_lvl, d0, d1, d2);
printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n",
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 98123ff10506..42bbc42bd350 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn)
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
@@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn)
struct rt_sigframe_ia32 __user *frame;
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
if (!access_ok(frame, sizeof(*frame)))
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ee9453891901..d483b585c6c6 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
@@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
if (!access_ok(frame, sizeof(*frame)))
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18266cc3d98c..b014e6d229f9 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -299,3 +299,27 @@ struct smp_ops smp_ops = {
.send_call_func_single_ipi = native_send_call_func_single_ipi,
};
EXPORT_SYMBOL_GPL(smp_ops);
+
+int arch_cpu_rescan_dead_smt_siblings(void)
+{
+ enum cpuhp_smt_control old = cpu_smt_control;
+ int ret;
+
+ /*
+ * If SMT has been disabled and SMT siblings are in HLT, bring them back
+ * online and offline them again so that they end up in MWAIT proper.
+ *
+ * Called with hotplug enabled.
+ */
+ if (old != CPU_SMT_DISABLED && old != CPU_SMT_FORCE_DISABLED)
+ return 0;
+
+ ret = cpuhp_smt_enable();
+ if (ret)
+ return ret;
+
+ ret = cpuhp_smt_disable(old);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(arch_cpu_rescan_dead_smt_siblings);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fc78c2325fd2..58ede3fa6a75 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1244,6 +1244,10 @@ void play_dead_common(void)
local_irq_disable();
}
+/*
+ * We need to flush the caches before going to sleep, lest we have
+ * dirty data in our caches when we come back up.
+ */
void __noreturn mwait_play_dead(unsigned int eax_hint)
{
struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
@@ -1290,50 +1294,6 @@ void __noreturn mwait_play_dead(unsigned int eax_hint)
}
/*
- * We need to flush the caches before going to sleep, lest we have
- * dirty data in our caches when we come back up.
- */
-static inline void mwait_play_dead_cpuid_hint(void)
-{
- unsigned int eax, ebx, ecx, edx;
- unsigned int highest_cstate = 0;
- unsigned int highest_subcstate = 0;
- int i;
-
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
- return;
- if (!this_cpu_has(X86_FEATURE_MWAIT))
- return;
- if (!this_cpu_has(X86_FEATURE_CLFLUSH))
- return;
-
- eax = CPUID_LEAF_MWAIT;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
-
- /*
- * eax will be 0 if EDX enumeration is not valid.
- * Initialized below to cstate, sub_cstate value when EDX is valid.
- */
- if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
- eax = 0;
- } else {
- edx >>= MWAIT_SUBSTATE_SIZE;
- for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
- if (edx & MWAIT_SUBSTATE_MASK) {
- highest_cstate = i;
- highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
- }
- }
- eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
- (highest_subcstate - 1);
- }
-
- mwait_play_dead(eax);
-}
-
-/*
* Kick all "offline" CPUs out of mwait on kexec(). See comment in
* mwait_play_dead().
*/
@@ -1383,9 +1343,9 @@ void native_play_dead(void)
play_dead_common();
tboot_shutdown(TB_SHUTDOWN_WFS);
- mwait_play_dead_cpuid_hint();
- if (cpuidle_play_dead())
- hlt_play_dead();
+ /* Below returns only on error. */
+ cpuidle_play_dead();
+ hlt_play_dead();
}
#else /* ... !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c5c897a86418..36354b470590 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1022,24 +1022,32 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
#endif
}
-static __always_inline unsigned long debug_read_clear_dr6(void)
+static __always_inline unsigned long debug_read_reset_dr6(void)
{
unsigned long dr6;
+ get_debugreg(dr6, 6);
+ dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
+
/*
* The Intel SDM says:
*
- * Certain debug exceptions may clear bits 0-3. The remaining
- * contents of the DR6 register are never cleared by the
- * processor. To avoid confusion in identifying debug
- * exceptions, debug handlers should clear the register before
- * returning to the interrupted task.
+ * Certain debug exceptions may clear bits 0-3 of DR6.
+ *
+ * BLD induced #DB clears DR6.BLD and any other debug
+ * exception doesn't modify DR6.BLD.
*
- * Keep it simple: clear DR6 immediately.
+ * RTM induced #DB clears DR6.RTM and any other debug
+ * exception sets DR6.RTM.
+ *
+ * To avoid confusion in identifying debug exceptions,
+ * debug handlers should set DR6.BLD and DR6.RTM, and
+ * clear other DR6 bits before returning.
+ *
+ * Keep it simple: write DR6 with its architectural reset
+ * value 0xFFFF0FF0, defined as DR6_RESERVED, immediately.
*/
- get_debugreg(dr6, 6);
set_debugreg(DR6_RESERVED, 6);
- dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
return dr6;
}
@@ -1239,13 +1247,13 @@ out:
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
- exc_debug_kernel(regs, debug_read_clear_dr6());
+ exc_debug_kernel(regs, debug_read_reset_dr6());
}
/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
- exc_debug_user(regs, debug_read_clear_dr6());
+ exc_debug_user(regs, debug_read_reset_dr6());
}
#ifdef CONFIG_X86_FRED
@@ -1264,7 +1272,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug)
{
/*
* FRED #DB stores DR6 on the stack in the format which
- * debug_read_clear_dr6() returns for the IDT entry points.
+ * debug_read_reset_dr6() returns for the IDT entry points.
*/
unsigned long dr6 = fred_event_data(regs);
@@ -1279,7 +1287,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug)
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
- unsigned long dr6 = debug_read_clear_dr6();
+ unsigned long dr6 = debug_read_reset_dr6();
if (user_mode(regs))
exc_debug_user(regs, dr6);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b2d006756e02..f84bc0569c9c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1165,6 +1165,8 @@ void kvm_set_cpu_caps(void)
*/
SYNTHESIZED_F(LFENCE_RDTSC),
/* SmmPgCfgLock */
+ /* 4: Resv */
+ SYNTHESIZED_F(VERW_CLEAR),
F(NULL_SEL_CLR_BASE),
/* UpperAddressIgnore */
F(AUTOIBRS),
@@ -1179,6 +1181,11 @@ void kvm_set_cpu_caps(void)
F(SRSO_USER_KERNEL_NO),
);
+ kvm_cpu_cap_init(CPUID_8000_0021_ECX,
+ SYNTHESIZED_F(TSA_SQ_NO),
+ SYNTHESIZED_F(TSA_L1_NO),
+ );
+
kvm_cpu_cap_init(CPUID_8000_0022_EAX,
F(PERFMON_V2),
);
@@ -1748,8 +1755,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x80000021:
- entry->ebx = entry->ecx = entry->edx = 0;
+ entry->ebx = entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0021_EAX);
+ cpuid_entry_override(entry, CPUID_8000_0021_ECX);
break;
/* AMD Extended Performance Monitoring and Debug */
case 0x80000022: {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 24f0318c50d7..ee27064dd72f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1979,6 +1979,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY)
goto out_flush_all;
+ if (is_noncanonical_invlpg_address(entries[i], vcpu))
+ continue;
+
/*
* Lower 12 bits of 'address' encode the number of additional
* pages to flush.
@@ -2001,11 +2004,11 @@ out_flush_all:
static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
{
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+ unsigned long *vcpu_mask = hv_vcpu->vcpu_mask;
u64 *sparse_banks = hv_vcpu->sparse_banks;
struct kvm *kvm = vcpu->kvm;
struct hv_tlb_flush_ex flush_ex;
struct hv_tlb_flush flush;
- DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
/*
* Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE'
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index cbc84c6abc2e..4e06e2e89a8f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4896,12 +4896,16 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
{
u64 error_code = PFERR_GUEST_FINAL_MASK;
u8 level = PG_LEVEL_4K;
+ u64 direct_bits;
u64 end;
int r;
if (!vcpu->kvm->arch.pre_fault_allowed)
return -EOPNOTSUPP;
+ if (kvm_is_gfn_alias(vcpu->kvm, gpa_to_gfn(range->gpa)))
+ return -EINVAL;
+
/*
* reload is efficient when called repeatedly, so we can do it on
* every iteration.
@@ -4910,15 +4914,18 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
if (r)
return r;
+ direct_bits = 0;
if (kvm_arch_has_private_mem(vcpu->kvm) &&
kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa)))
error_code |= PFERR_PRIVATE_ACCESS;
+ else
+ direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(vcpu->kvm));
/*
* Shadow paging uses GVA for kvm page fault, so restrict to
* two-dimensional paging.
*/
- r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level);
+ r = kvm_tdp_map_page(vcpu, range->gpa | direct_bits, error_code, &level);
if (r < 0)
return r;
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index fde0ae986003..c53b92379e6e 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -52,6 +52,10 @@
/* CPUID level 0x80000022 (EAX) */
#define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0)
+/* CPUID level 0x80000021 (ECX) */
+#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1)
+#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2)
+
struct cpuid_reg {
u32 function;
u32 index;
@@ -82,6 +86,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX},
[CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
[CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX},
+ [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX},
};
/*
@@ -121,6 +126,8 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO);
+ KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 5a69b657dae9..b201f77fcd49 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1971,6 +1971,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
struct kvm_vcpu *src_vcpu;
unsigned long i;
+ if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
+ dst->created_vcpus != atomic_read(&dst->online_vcpus))
+ return -EBUSY;
+
if (!sev_es_guest(src))
return 0;
@@ -2871,6 +2875,33 @@ void __init sev_set_cpu_caps(void)
}
}
+static bool is_sev_snp_initialized(void)
+{
+ struct sev_user_data_snp_status *status;
+ struct sev_data_snp_addr buf;
+ bool initialized = false;
+ int ret, error = 0;
+
+ status = snp_alloc_firmware_page(GFP_KERNEL | __GFP_ZERO);
+ if (!status)
+ return false;
+
+ buf.address = __psp_pa(status);
+ ret = sev_do_cmd(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &error);
+ if (ret) {
+ pr_err("SEV: SNP_PLATFORM_STATUS failed ret=%d, fw_error=%d (%#x)\n",
+ ret, error, error);
+ goto out;
+ }
+
+ initialized = !!status->state;
+
+out:
+ snp_free_firmware_page(status);
+
+ return initialized;
+}
+
void __init sev_hardware_setup(void)
{
unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
@@ -2975,6 +3006,14 @@ void __init sev_hardware_setup(void)
sev_snp_supported = sev_snp_enabled && cc_platform_has(CC_ATTR_HOST_SEV_SNP);
out:
+ if (sev_enabled) {
+ init_args.probe = true;
+ if (sev_platform_init(&init_args))
+ sev_supported = sev_es_supported = sev_snp_supported = false;
+ else if (sev_snp_supported)
+ sev_snp_supported = is_sev_snp_initialized();
+ }
+
if (boot_cpu_has(X86_FEATURE_SEV))
pr_info("SEV %s (ASIDs %u - %u)\n",
sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" :
@@ -3001,15 +3040,6 @@ out:
sev_supported_vmsa_features = 0;
if (sev_es_debug_swap_enabled)
sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP;
-
- if (!sev_enabled)
- return;
-
- /*
- * Do both SNP and SEV initialization at KVM module load.
- */
- init_args.probe = true;
- sev_platform_init(&init_args);
}
void sev_hardware_unsetup(void)
@@ -4419,8 +4449,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
* the VMSA will be NULL if this vCPU is the destination for intrahost
* migration, and will be copied later.
*/
- if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa)
- svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+ if (!svm->sev_es.snp_has_guest_vmsa) {
+ if (svm->sev_es.vmsa)
+ svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+ else
+ svm->vmcb->control.vmsa_pa = INVALID_PAGE;
+ }
if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES))
svm->vmcb->control.allowed_sev_features = sev->vmsa_features |
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 0c61153b275f..235c4af6b692 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
mov VCPU_RDI(%_ASM_DI), %_ASM_DI
+ /* Clobbers EFLAGS.ZF */
+ VM_CLEAR_CPU_BUFFERS
+
/* Enter guest mode */
3: vmrun %_ASM_AX
4:
@@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
mov SVM_current_vmcb(%rdi), %rax
mov KVM_VMCB_pa(%rax), %rax
+ /* Clobbers EFLAGS.ZF */
+ VM_CLEAR_CPU_BUFFERS
+
/* Enter guest mode */
1: vmrun %rax
2:
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index b952bc673271..ec79aacc446f 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -173,6 +173,8 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i
tdx_clear_unsupported_cpuid(entry);
}
+#define TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT BIT(1)
+
static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf,
struct kvm_tdx_capabilities *caps)
{
@@ -188,6 +190,9 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf,
caps->cpuid.nent = td_conf->num_cpuid_config;
+ caps->user_tdvmcallinfo_1_r11 =
+ TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT;
+
for (i = 0; i < td_conf->num_cpuid_config; i++)
td_init_cpuid_entry2(&caps->cpuid.entries[i], i);
@@ -1212,11 +1217,13 @@ static int tdx_map_gpa(struct kvm_vcpu *vcpu)
/*
* Converting TDVMCALL_MAP_GPA to KVM_HC_MAP_GPA_RANGE requires
* userspace to enable KVM_CAP_EXIT_HYPERCALL with KVM_HC_MAP_GPA_RANGE
- * bit set. If not, the error code is not defined in GHCI for TDX, use
- * TDVMCALL_STATUS_INVALID_OPERAND for this case.
+ * bit set. This is a base call so it should always be supported, but
+ * KVM has no way to ensure that userspace implements the GHCI correctly.
+ * So if KVM_HC_MAP_GPA_RANGE does not cause a VMEXIT, return an error
+ * to the guest.
*/
if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) {
- ret = TDVMCALL_STATUS_INVALID_OPERAND;
+ ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED;
goto error;
}
@@ -1449,20 +1456,106 @@ error:
return 1;
}
+static int tdx_complete_get_td_vm_call_info(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+ tdvmcall_set_return_code(vcpu, vcpu->run->tdx.get_tdvmcall_info.ret);
+
+ /*
+ * For now, there is no TDVMCALL beyond GHCI base API supported by KVM
+ * directly without the support from userspace, just set the value
+ * returned from userspace.
+ */
+ tdx->vp_enter_args.r11 = vcpu->run->tdx.get_tdvmcall_info.r11;
+ tdx->vp_enter_args.r12 = vcpu->run->tdx.get_tdvmcall_info.r12;
+ tdx->vp_enter_args.r13 = vcpu->run->tdx.get_tdvmcall_info.r13;
+ tdx->vp_enter_args.r14 = vcpu->run->tdx.get_tdvmcall_info.r14;
+
+ return 1;
+}
+
static int tdx_get_td_vm_call_info(struct kvm_vcpu *vcpu)
{
struct vcpu_tdx *tdx = to_tdx(vcpu);
- if (tdx->vp_enter_args.r12)
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
- else {
+ switch (tdx->vp_enter_args.r12) {
+ case 0:
tdx->vp_enter_args.r11 = 0;
+ tdx->vp_enter_args.r12 = 0;
tdx->vp_enter_args.r13 = 0;
tdx->vp_enter_args.r14 = 0;
+ tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS);
+ return 1;
+ case 1:
+ vcpu->run->tdx.get_tdvmcall_info.leaf = tdx->vp_enter_args.r12;
+ vcpu->run->exit_reason = KVM_EXIT_TDX;
+ vcpu->run->tdx.flags = 0;
+ vcpu->run->tdx.nr = TDVMCALL_GET_TD_VM_CALL_INFO;
+ vcpu->run->tdx.get_tdvmcall_info.ret = TDVMCALL_STATUS_SUCCESS;
+ vcpu->run->tdx.get_tdvmcall_info.r11 = 0;
+ vcpu->run->tdx.get_tdvmcall_info.r12 = 0;
+ vcpu->run->tdx.get_tdvmcall_info.r13 = 0;
+ vcpu->run->tdx.get_tdvmcall_info.r14 = 0;
+ vcpu->arch.complete_userspace_io = tdx_complete_get_td_vm_call_info;
+ return 0;
+ default:
+ tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
+ return 1;
}
+}
+
+static int tdx_complete_simple(struct kvm_vcpu *vcpu)
+{
+ tdvmcall_set_return_code(vcpu, vcpu->run->tdx.unknown.ret);
return 1;
}
+static int tdx_get_quote(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+ u64 gpa = tdx->vp_enter_args.r12;
+ u64 size = tdx->vp_enter_args.r13;
+
+ /* The gpa of buffer must have shared bit set. */
+ if (vt_is_tdx_private_gpa(vcpu->kvm, gpa)) {
+ tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
+ return 1;
+ }
+
+ vcpu->run->exit_reason = KVM_EXIT_TDX;
+ vcpu->run->tdx.flags = 0;
+ vcpu->run->tdx.nr = TDVMCALL_GET_QUOTE;
+ vcpu->run->tdx.get_quote.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED;
+ vcpu->run->tdx.get_quote.gpa = gpa & ~gfn_to_gpa(kvm_gfn_direct_bits(tdx->vcpu.kvm));
+ vcpu->run->tdx.get_quote.size = size;
+
+ vcpu->arch.complete_userspace_io = tdx_complete_simple;
+
+ return 0;
+}
+
+static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+ u64 vector = tdx->vp_enter_args.r12;
+
+ if (vector < 32 || vector > 255) {
+ tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
+ return 1;
+ }
+
+ vcpu->run->exit_reason = KVM_EXIT_TDX;
+ vcpu->run->tdx.flags = 0;
+ vcpu->run->tdx.nr = TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT;
+ vcpu->run->tdx.setup_event_notify.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED;
+ vcpu->run->tdx.setup_event_notify.vector = vector;
+
+ vcpu->arch.complete_userspace_io = tdx_complete_simple;
+
+ return 0;
+}
+
static int handle_tdvmcall(struct kvm_vcpu *vcpu)
{
switch (tdvmcall_leaf(vcpu)) {
@@ -1472,11 +1565,15 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu)
return tdx_report_fatal_error(vcpu);
case TDVMCALL_GET_TD_VM_CALL_INFO:
return tdx_get_td_vm_call_info(vcpu);
+ case TDVMCALL_GET_QUOTE:
+ return tdx_get_quote(vcpu);
+ case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT:
+ return tdx_setup_event_notify_interrupt(vcpu);
default:
break;
}
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
+ tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED);
return 1;
}
@@ -2172,25 +2269,26 @@ static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
const struct tdx_sys_info_td_conf *td_conf = &tdx_sysinfo->td_conf;
struct kvm_tdx_capabilities __user *user_caps;
struct kvm_tdx_capabilities *caps = NULL;
+ u32 nr_user_entries;
int ret = 0;
/* flags is reserved for future use */
if (cmd->flags)
return -EINVAL;
- caps = kmalloc(sizeof(*caps) +
+ caps = kzalloc(sizeof(*caps) +
sizeof(struct kvm_cpuid_entry2) * td_conf->num_cpuid_config,
GFP_KERNEL);
if (!caps)
return -ENOMEM;
user_caps = u64_to_user_ptr(cmd->data);
- if (copy_from_user(caps, user_caps, sizeof(*caps))) {
+ if (get_user(nr_user_entries, &user_caps->cpuid.nent)) {
ret = -EFAULT;
goto out;
}
- if (caps->cpuid.nent < td_conf->num_cpuid_config) {
+ if (nr_user_entries < td_conf->num_cpuid_config) {
ret = -E2BIG;
goto out;
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4953846cb30d..191a9ed0da22 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7291,7 +7291,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&cpu_buf_vm_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
- mds_clear_cpu_buffers();
+ x86_clear_cpu_buffers();
vmx_disable_fb_clear(vmx);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b58a74c1722d..93636f77c42d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3258,9 +3258,11 @@ int kvm_guest_time_update(struct kvm_vcpu *v)
/* With all the info we got, fill in the values */
- if (kvm_caps.has_tsc_control)
+ if (kvm_caps.has_tsc_control) {
tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz,
v->arch.l1_tsc_scaling_ratio);
+ tgt_tsc_khz = tgt_tsc_khz ? : 1;
+ }
if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
@@ -6186,6 +6188,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
u32 user_tsc_khz;
r = -EINVAL;
+
+ if (vcpu->arch.guest_tsc_protected)
+ goto out;
+
user_tsc_khz = (u32)arg;
if (kvm_caps.has_tsc_control &&
@@ -11035,7 +11041,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (unlikely(vcpu->arch.switch_db_regs &&
!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_AUTO_SWITCH))) {
- set_debugreg(0, 7);
+ set_debugreg(DR7_FIXED_1, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
set_debugreg(vcpu->arch.eff_db[1], 1);
set_debugreg(vcpu->arch.eff_db[2], 2);
@@ -11044,7 +11050,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6);
} else if (unlikely(hw_breakpoint_active())) {
- set_debugreg(0, 7);
+ set_debugreg(DR7_FIXED_1, 7);
}
vcpu->arch.host_debugctl = get_debugctlmsr();
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 38b33cdd4232..d6b2a665b499 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1526,7 +1526,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports,
sched_poll.nr_ports * sizeof(*ports), &e)) {
*r = -EFAULT;
- return true;
+ goto out;
}
for (i = 0; i < sched_poll.nr_ports; i++) {
@@ -1571,7 +1571,8 @@ out:
static void cancel_evtchn_poll(struct timer_list *t)
{
- struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer);
+ struct kvm_vcpu *vcpu = timer_container_of(vcpu, t,
+ arch.xen.poll_timer);
kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
kvm_vcpu_kick(vcpu);
@@ -1970,8 +1971,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm,
{
struct kvm_vcpu *vcpu;
- if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm))
- return -EINVAL;
+ /*
+ * Don't check for the port being within range of max_evtchn_port().
+ * Userspace can configure what ever targets it likes; events just won't
+ * be delivered if/while the target is invalid, just like userspace can
+ * configure MSIs which target non-existent APICs.
+ *
+ * This allow on Live Migration and Live Update, the IRQ routing table
+ * can be restored *independently* of other things like creating vCPUs,
+ * without imposing an ordering dependency on userspace. In this
+ * particular case, the problematic ordering would be with setting the
+ * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096
+ * instead of 1024 event channels.
+ */
/* We only support 2 level event channels for now */
if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 607d6a2e66e2..8a34fff6ab2b 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -30,7 +30,6 @@
#include <linux/initrd.h>
#include <linux/cpumask.h>
#include <linux/gfp.h>
-#include <linux/execmem.h>
#include <asm/asm.h>
#include <asm/bios_ebda.h>
@@ -749,8 +748,6 @@ void mark_rodata_ro(void)
pr_info("Write protecting kernel text and read-only data: %luk\n",
size >> 10);
- execmem_cache_make_ro();
-
kernel_set_to_readonly = 1;
#ifdef CONFIG_CPA_DEBUG
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ee66fae9ebcc..fdb6cab524f0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -34,7 +34,6 @@
#include <linux/gfp.h>
#include <linux/kcore.h>
#include <linux/bootmem_info.h>
-#include <linux/execmem.h>
#include <asm/processor.h>
#include <asm/bios_ebda.h>
@@ -1392,8 +1391,6 @@ void mark_rodata_ro(void)
(end - start) >> 10);
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
- execmem_cache_make_ro();
-
kernel_set_to_readonly = 1;
/*
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 46edc11726b7..8834c76f91c9 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1257,6 +1257,9 @@ static int collapse_pmd_page(pmd_t *pmd, unsigned long addr,
pgprot_t pgprot;
int i = 0;
+ if (!cpu_feature_enabled(X86_FEATURE_PSE))
+ return 0;
+
addr &= PMD_MASK;
pte = pte_offset_kernel(pmd, addr);
first = *pte;
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 190299834011..c0c40b67524e 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -98,6 +98,11 @@ void __init pti_check_boottime_disable(void)
return;
setup_force_cpu_cap(X86_FEATURE_PTI);
+
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+ pr_debug("PTI enabled, disabling INVLPGB\n");
+ setup_clear_cpu_cap(X86_FEATURE_INVLPGB);
+ }
}
static int __init pti_parse_cmdline(char *arg)
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 4933fb337983..c1efd5b0d198 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -8,13 +8,13 @@ obj-$(CONFIG_PCI_OLPC) += olpc.o
obj-$(CONFIG_PCI_XEN) += xen.o
obj-y += fixup.o
-obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
obj-$(CONFIG_ACPI) += acpi.o
obj-y += legacy.o irq.o
-obj-$(CONFIG_X86_NUMACHIP) += numachip.o
+obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
+obj-$(CONFIG_X86_INTEL_MID) += intel_mid.o
-obj-$(CONFIG_X86_INTEL_MID) += intel_mid_pci.o
+obj-$(CONFIG_X86_NUMACHIP) += numachip.o
obj-y += common.o early.o
obj-y += bus_numa.o
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid.c
index b433b1753016..b433b1753016 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid.c
diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index a7c23f2a58c9..a2294c1649f6 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -192,7 +192,8 @@ out:
int arch_resume_nosmt(void)
{
- int ret = 0;
+ int ret;
+
/*
* We reached this while coming out of hibernation. This means
* that SMT siblings are sleeping in hlt, as mwait is not safe
@@ -206,18 +207,10 @@ int arch_resume_nosmt(void)
* Called with hotplug disabled.
*/
cpu_hotplug_enable();
- if (cpu_smt_control == CPU_SMT_DISABLED ||
- cpu_smt_control == CPU_SMT_FORCE_DISABLED) {
- enum cpuhp_smt_control old = cpu_smt_control;
-
- ret = cpuhp_smt_enable();
- if (ret)
- goto out;
- ret = cpuhp_smt_disable(old);
- if (ret)
- goto out;
- }
-out:
+
+ ret = arch_cpu_rescan_dead_smt_siblings();
+
cpu_hotplug_disable();
+
return ret;
}
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
index b07824500363..ddc144657efa 100644
--- a/arch/x86/um/asm/checksum.h
+++ b/arch/x86/um/asm/checksum.h
@@ -20,6 +20,9 @@
*/
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+/* Do not call this directly. Declared for export type visibility. */
+extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
+
/**
* csum_fold - Fold and invert a 32bit checksum.
* sum: 32bit unfolded sum
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
index 478710384b34..e222d2ae28fd 100644
--- a/arch/x86/um/asm/processor.h
+++ b/arch/x86/um/asm/processor.h
@@ -21,10 +21,10 @@
#include <asm/user.h>
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static __always_inline void rep_nop(void)
+/* PAUSE is a good thing to insert into busy-wait loops. */
+static __always_inline void native_pause(void)
{
- __asm__ __volatile__("rep;nop": : :"memory");
+ __asm__ __volatile__("pause": : :"memory");
}
static __always_inline void cpu_relax(void)
@@ -33,7 +33,7 @@ static __always_inline void cpu_relax(void)
time_travel_mode == TT_MODE_EXTERNAL)
time_travel_ndelay(1);
else
- rep_nop();
+ native_pause();
}
#define task_pt_regs(t) (&(t)->thread.regs)
diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c
index 37decaa74761..a21403df6663 100644
--- a/arch/x86/um/os-Linux/mcontext.c
+++ b/arch/x86/um/os-Linux/mcontext.c
@@ -1,7 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-#include <sys/ucontext.h>
#define __FRAME_OFFSETS
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <sys/ucontext.h>
#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
#include <sysdep/ptrace.h>
#include <sysdep/mcontext.h>
#include <arch.h>
@@ -18,6 +21,10 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
COPY2(UESP, ESP); /* sic */
COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
+#undef COPY2
+#undef COPY
+#undef COPY_SEG
+#undef COPY_SEG_CPL3
#else
#define COPY2(X,Y) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##Y]
#define COPY(X) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##X]
@@ -29,6 +36,8 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
COPY2(EFLAGS, EFL);
COPY2(CS, CSGSFS);
regs->gp[SS / sizeof(unsigned long)] = mc->gregs[REG_CSGSFS] >> 48;
+#undef COPY2
+#undef COPY
#endif
}
@@ -42,3 +51,210 @@ void mc_set_rip(void *_mc, void *target)
mc->gregs[REG_RIP] = (unsigned long)target;
#endif
}
+
+/* Same thing, but the copy macros are turned around. */
+void get_mc_from_regs(struct uml_pt_regs *regs, mcontext_t *mc, int single_stepping)
+{
+#ifdef __i386__
+#define COPY2(X,Y) mc->gregs[REG_##Y] = regs->gp[X]
+#define COPY(X) mc->gregs[REG_##X] = regs->gp[X]
+#define COPY_SEG(X) mc->gregs[REG_##X] = regs->gp[X] & 0xffff;
+#define COPY_SEG_CPL3(X) mc->gregs[REG_##X] = (regs->gp[X] & 0xffff) | 3;
+ COPY_SEG(GS); COPY_SEG(FS); COPY_SEG(ES); COPY_SEG(DS);
+ COPY(EDI); COPY(ESI); COPY(EBP);
+ COPY2(UESP, ESP); /* sic */
+ COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
+ COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
+#else
+#define COPY2(X,Y) mc->gregs[REG_##Y] = regs->gp[X/sizeof(unsigned long)]
+#define COPY(X) mc->gregs[REG_##X] = regs->gp[X/sizeof(unsigned long)]
+ COPY(R8); COPY(R9); COPY(R10); COPY(R11);
+ COPY(R12); COPY(R13); COPY(R14); COPY(R15);
+ COPY(RDI); COPY(RSI); COPY(RBP); COPY(RBX);
+ COPY(RDX); COPY(RAX); COPY(RCX); COPY(RSP);
+ COPY(RIP);
+ COPY2(EFLAGS, EFL);
+ mc->gregs[REG_CSGSFS] = mc->gregs[REG_CSGSFS] & 0xffffffffffffl;
+ mc->gregs[REG_CSGSFS] |= (regs->gp[SS / sizeof(unsigned long)] & 0xffff) << 48;
+#endif
+
+ if (single_stepping)
+ mc->gregs[REG_EFL] |= X86_EFLAGS_TF;
+ else
+ mc->gregs[REG_EFL] &= ~X86_EFLAGS_TF;
+}
+
+#ifdef CONFIG_X86_32
+struct _xstate_64 {
+ struct _fpstate_64 fpstate;
+ struct _header xstate_hdr;
+ struct _ymmh_state ymmh;
+ /* New processor state extensions go here: */
+};
+
+/* Not quite the right structures as these contain more information */
+int um_i387_from_fxsr(struct _fpstate_32 *i387,
+ const struct _fpstate_64 *fxsave);
+int um_fxsr_from_i387(struct _fpstate_64 *fxsave,
+ const struct _fpstate_32 *from);
+#else
+#define _xstate_64 _xstate
+#endif
+
+static struct _fpstate *get_fpstate(struct stub_data *data,
+ mcontext_t *mcontext,
+ int *fp_size)
+{
+ struct _fpstate *res;
+
+ /* Assume floating point registers are on the same page */
+ res = (void *)(((unsigned long)mcontext->fpregs &
+ (UM_KERN_PAGE_SIZE - 1)) +
+ (unsigned long)&data->sigstack[0]);
+
+ if ((void *)res + sizeof(struct _fpstate) >
+ (void *)data->sigstack + sizeof(data->sigstack))
+ return NULL;
+
+ if (res->sw_reserved.magic1 != FP_XSTATE_MAGIC1) {
+ *fp_size = sizeof(struct _fpstate);
+ } else {
+ char *magic2_addr;
+
+ magic2_addr = (void *)res;
+ magic2_addr += res->sw_reserved.extended_size;
+ magic2_addr -= FP_XSTATE_MAGIC2_SIZE;
+
+ /* We still need to be within our stack */
+ if ((void *)magic2_addr >
+ (void *)data->sigstack + sizeof(data->sigstack))
+ return NULL;
+
+ /* If we do not read MAGIC2, then we did something wrong */
+ if (*(__u32 *)magic2_addr != FP_XSTATE_MAGIC2)
+ return NULL;
+
+ /* Remove MAGIC2 from the size, we do not save/restore it */
+ *fp_size = res->sw_reserved.extended_size -
+ FP_XSTATE_MAGIC2_SIZE;
+ }
+
+ return res;
+}
+
+int get_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
+ unsigned long *fp_size_out)
+{
+ mcontext_t *mcontext;
+ struct _fpstate *fpstate_stub;
+ struct _xstate_64 *xstate_stub;
+ int fp_size, xstate_size;
+
+ /* mctx_offset is verified by wait_stub_done_seccomp */
+ mcontext = (void *)&data->sigstack[data->mctx_offset];
+
+ get_regs_from_mc(regs, mcontext);
+
+ fpstate_stub = get_fpstate(data, mcontext, &fp_size);
+ if (!fpstate_stub)
+ return -EINVAL;
+
+#ifdef CONFIG_X86_32
+ xstate_stub = (void *)&fpstate_stub->_fxsr_env;
+ xstate_size = fp_size - offsetof(struct _fpstate_32, _fxsr_env);
+#else
+ xstate_stub = (void *)fpstate_stub;
+ xstate_size = fp_size;
+#endif
+
+ if (fp_size_out)
+ *fp_size_out = xstate_size;
+
+ if (xstate_size > host_fp_size)
+ return -ENOSPC;
+
+ memcpy(&regs->fp, xstate_stub, xstate_size);
+
+ /* We do not need to read the x86_64 FS_BASE/GS_BASE registers as
+ * we do not permit userspace to set them directly.
+ */
+
+#ifdef CONFIG_X86_32
+ /* Read the i387 legacy FP registers */
+ if (um_fxsr_from_i387((void *)&regs->fp, fpstate_stub))
+ return -EINVAL;
+#endif
+
+ return 0;
+}
+
+/* Copied because we cannot include regset.h here. */
+struct task_struct;
+struct user_regset;
+struct membuf {
+ void *p;
+ size_t left;
+};
+
+int fpregs_legacy_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to);
+
+int set_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
+ int single_stepping)
+{
+ mcontext_t *mcontext;
+ struct _fpstate *fpstate_stub;
+ struct _xstate_64 *xstate_stub;
+ int fp_size, xstate_size;
+
+ /* mctx_offset is verified by wait_stub_done_seccomp */
+ mcontext = (void *)&data->sigstack[data->mctx_offset];
+
+ if ((unsigned long)mcontext < (unsigned long)data->sigstack ||
+ (unsigned long)mcontext >
+ (unsigned long) data->sigstack +
+ sizeof(data->sigstack) - sizeof(*mcontext))
+ return -EINVAL;
+
+ get_mc_from_regs(regs, mcontext, single_stepping);
+
+ fpstate_stub = get_fpstate(data, mcontext, &fp_size);
+ if (!fpstate_stub)
+ return -EINVAL;
+
+#ifdef CONFIG_X86_32
+ xstate_stub = (void *)&fpstate_stub->_fxsr_env;
+ xstate_size = fp_size - offsetof(struct _fpstate_32, _fxsr_env);
+#else
+ xstate_stub = (void *)fpstate_stub;
+ xstate_size = fp_size;
+#endif
+
+ memcpy(xstate_stub, &regs->fp, xstate_size);
+
+#ifdef __i386__
+ /*
+ * On x86, the GDT entries are updated by arch_set_tls.
+ */
+
+ /* Store the i387 legacy FP registers which the host will use */
+ if (um_i387_from_fxsr(fpstate_stub, (void *)&regs->fp))
+ return -EINVAL;
+#else
+ /*
+ * On x86_64, we need to sync the FS_BASE/GS_BASE registers using the
+ * arch specific data.
+ */
+ if (data->arch_data.fs_base != regs->gp[FS_BASE / sizeof(unsigned long)]) {
+ data->arch_data.fs_base = regs->gp[FS_BASE / sizeof(unsigned long)];
+ data->arch_data.sync |= STUB_SYNC_FS_BASE;
+ }
+ if (data->arch_data.gs_base != regs->gp[GS_BASE / sizeof(unsigned long)]) {
+ data->arch_data.gs_base = regs->gp[GS_BASE / sizeof(unsigned long)];
+ data->arch_data.sync |= STUB_SYNC_GS_BASE;
+ }
+#endif
+
+ return 0;
+}
diff --git a/arch/x86/um/ptrace.c b/arch/x86/um/ptrace.c
index 57c504fd5626..fae8aabad10f 100644
--- a/arch/x86/um/ptrace.c
+++ b/arch/x86/um/ptrace.c
@@ -25,7 +25,8 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
return tmp;
}
-static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
+static inline unsigned long
+twd_fxsr_to_i387(const struct user_fxsr_struct *fxsave)
{
struct _fpxreg *st = NULL;
unsigned long twd = (unsigned long) fxsave->twd;
@@ -69,12 +70,16 @@ static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
return ret;
}
-/* Get/set the old 32bit i387 registers (pre-FPX) */
-static int fpregs_legacy_get(struct task_struct *target,
- const struct user_regset *regset,
- struct membuf to)
+/*
+ * Get/set the old 32bit i387 registers (pre-FPX)
+ *
+ * We provide simple wrappers for mcontext.c, they are only defined locally
+ * because mcontext.c is userspace facing and needs to a different definition
+ * of the structures.
+ */
+static int _um_i387_from_fxsr(struct membuf to,
+ const struct user_fxsr_struct *fxsave)
{
- struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp;
int i;
membuf_store(&to, (unsigned long)fxsave->cwd | 0xffff0000ul);
@@ -91,23 +96,36 @@ static int fpregs_legacy_get(struct task_struct *target,
return 0;
}
-static int fpregs_legacy_set(struct task_struct *target,
+int um_i387_from_fxsr(struct user_i387_struct *i387,
+ const struct user_fxsr_struct *fxsave);
+
+int um_i387_from_fxsr(struct user_i387_struct *i387,
+ const struct user_fxsr_struct *fxsave)
+{
+ struct membuf to = {
+ .p = i387,
+ .left = sizeof(*i387),
+ };
+
+ return _um_i387_from_fxsr(to, fxsave);
+}
+
+static int fpregs_legacy_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
+ struct membuf to)
{
struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp;
- const struct user_i387_struct *from;
- struct user_i387_struct buf;
- int i;
- if (ubuf) {
- if (copy_from_user(&buf, ubuf, sizeof(buf)))
- return -EFAULT;
- from = &buf;
- } else {
- from = kbuf;
- }
+ return _um_i387_from_fxsr(to, fxsave);
+}
+
+int um_fxsr_from_i387(struct user_fxsr_struct *fxsave,
+ const struct user_i387_struct *from);
+
+int um_fxsr_from_i387(struct user_fxsr_struct *fxsave,
+ const struct user_i387_struct *from)
+{
+ int i;
fxsave->cwd = (unsigned short)(from->cwd & 0xffff);
fxsave->swd = (unsigned short)(from->swd & 0xffff);
@@ -125,6 +143,26 @@ static int fpregs_legacy_set(struct task_struct *target,
return 0;
}
+
+static int fpregs_legacy_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp;
+ const struct user_i387_struct *from;
+ struct user_i387_struct buf;
+
+ if (ubuf) {
+ if (copy_from_user(&buf, ubuf, sizeof(buf)))
+ return -EFAULT;
+ from = &buf;
+ } else {
+ from = kbuf;
+ }
+
+ return um_fxsr_from_i387(fxsave, from);
+}
#endif
static int genregs_get(struct task_struct *target,
diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h
index 48de3a71f845..6fd1ed400399 100644
--- a/arch/x86/um/shared/sysdep/kernel-offsets.h
+++ b/arch/x86/um/shared/sysdep/kernel-offsets.h
@@ -4,7 +4,9 @@
#include <linux/elf.h>
#include <linux/crypto.h>
#include <linux/kbuild.h>
+#include <linux/audit.h>
#include <asm/mman.h>
+#include <asm/seccomp.h>
/* workaround for a warning with -Wmissing-prototypes */
void foo(void);
diff --git a/arch/x86/um/shared/sysdep/mcontext.h b/arch/x86/um/shared/sysdep/mcontext.h
index b724c54da316..6fe490cc5b98 100644
--- a/arch/x86/um/shared/sysdep/mcontext.h
+++ b/arch/x86/um/shared/sysdep/mcontext.h
@@ -6,7 +6,16 @@
#ifndef __SYS_SIGCONTEXT_X86_H
#define __SYS_SIGCONTEXT_X86_H
+#include <stub-data.h>
+
extern void get_regs_from_mc(struct uml_pt_regs *, mcontext_t *);
+extern void get_mc_from_regs(struct uml_pt_regs *regs, mcontext_t *mc,
+ int single_stepping);
+
+extern int get_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
+ unsigned long *fp_size_out);
+extern int set_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
+ int single_stepping);
#ifdef __i386__
diff --git a/arch/x86/um/shared/sysdep/stub-data.h b/arch/x86/um/shared/sysdep/stub-data.h
new file mode 100644
index 000000000000..82b1b7f8ac3d
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/stub-data.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_STUB_DATA_H
+#define __ARCH_STUB_DATA_H
+
+#ifdef __i386__
+#include <generated/asm-offsets.h>
+#include <asm/ldt.h>
+
+struct stub_data_arch {
+ int sync;
+ struct user_desc tls[UM_KERN_GDT_ENTRY_TLS_ENTRIES];
+};
+#else
+#define STUB_SYNC_FS_BASE (1 << 0)
+#define STUB_SYNC_GS_BASE (1 << 1)
+struct stub_data_arch {
+ int sync;
+ unsigned long fs_base;
+ unsigned long gs_base;
+};
+#endif
+
+#endif /* __ARCH_STUB_DATA_H */
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
index dc89f4423454..4fa58f5b4fca 100644
--- a/arch/x86/um/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -13,3 +13,5 @@
extern void stub_segv_handler(int, siginfo_t *, void *);
extern void stub_syscall_handler(void);
+extern void stub_signal_interrupt(int, siginfo_t *, void *);
+extern void stub_signal_restorer(void);
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index 390988132c0a..df568fc3ceb4 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -131,4 +131,17 @@ static __always_inline void *get_stub_data(void)
"call *%%eax ;" \
:: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
"i" (&fn))
+
+static __always_inline void
+stub_seccomp_restore_state(struct stub_data_arch *arch)
+{
+ for (int i = 0; i < sizeof(arch->tls) / sizeof(arch->tls[0]); i++) {
+ if (arch->sync & (1 << i))
+ stub_syscall1(__NR_set_thread_area,
+ (unsigned long) &arch->tls[i]);
+ }
+
+ arch->sync = 0;
+}
+
#endif
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index 294affbec742..9cfd31afa769 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -10,6 +10,7 @@
#include <sysdep/ptrace_user.h>
#include <generated/asm-offsets.h>
#include <linux/stddef.h>
+#include <asm/prctl.h>
#define STUB_MMAP_NR __NR_mmap
#define MMAP_OFFSET(o) (o)
@@ -134,4 +135,20 @@ static __always_inline void *get_stub_data(void)
"call *%%rax ;" \
:: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
"i" (&fn))
+
+static __always_inline void
+stub_seccomp_restore_state(struct stub_data_arch *arch)
+{
+ /*
+ * We could use _writefsbase_u64/_writegsbase_u64 if the host reports
+ * support in the hwcaps (HWCAP2_FSGSBASE).
+ */
+ if (arch->sync & STUB_SYNC_FS_BASE)
+ stub_syscall2(__NR_arch_prctl, ARCH_SET_FS, arch->fs_base);
+ if (arch->sync & STUB_SYNC_GS_BASE)
+ stub_syscall2(__NR_arch_prctl, ARCH_SET_GS, arch->gs_base);
+
+ arch->sync = 0;
+}
+
#endif
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index fbb129023080..cb3f17627d16 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -12,6 +12,7 @@
#include <skas.h>
#include <sysdep/tls.h>
#include <asm/desc.h>
+#include <stub-data.h>
/*
* If needed we can detect when it's uninitialized.
@@ -21,14 +22,25 @@
static int host_supports_tls = -1;
int host_gdt_entry_tls_min;
-static int do_set_thread_area(struct user_desc *info)
+static int do_set_thread_area(struct task_struct* task, struct user_desc *info)
{
int ret;
- u32 cpu;
- cpu = get_cpu();
- ret = os_set_thread_area(info, userspace_pid[cpu]);
- put_cpu();
+ if (info->entry_number < host_gdt_entry_tls_min ||
+ info->entry_number >= host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES)
+ return -EINVAL;
+
+ if (using_seccomp) {
+ int idx = info->entry_number - host_gdt_entry_tls_min;
+ struct stub_data *data = (void *)task->mm->context.id.stack;
+
+ data->arch_data.tls[idx] = *info;
+ data->arch_data.sync |= BIT(idx);
+
+ return 0;
+ }
+
+ ret = os_set_thread_area(info, task->mm->context.id.pid);
if (ret)
printk(KERN_ERR "PTRACE_SET_THREAD_AREA failed, err = %d, "
@@ -97,7 +109,7 @@ static int load_TLS(int flags, struct task_struct *to)
if (!(flags & O_FORCE) && curr->flushed)
continue;
- ret = do_set_thread_area(&curr->tls);
+ ret = do_set_thread_area(current, &curr->tls);
if (ret)
goto out;
@@ -275,7 +287,7 @@ SYSCALL_DEFINE1(set_thread_area, struct user_desc __user *, user_desc)
return -EFAULT;
}
- ret = do_set_thread_area(&info);
+ ret = do_set_thread_area(current, &info);
if (ret)
return ret;
return set_tls_entry(current, &info, idx, 1);
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 2457d13c3f9e..c7a9a087ccaf 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -75,8 +75,9 @@ static inline void seamcall_err_ret(u64 fn, u64 err,
args->r9, args->r10, args->r11);
}
-static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func,
- u64 fn, struct tdx_module_args *args)
+static __always_inline int sc_retry_prerr(sc_func_t func,
+ sc_err_func_t err_func,
+ u64 fn, struct tdx_module_args *args)
{
u64 sret = sc_retry(func, fn, args);
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index cb1725c40e36..d62aa1c316fc 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -349,7 +349,7 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index f28b8e3d717e..d3ef0407401f 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -3,7 +3,7 @@
# Makefile for the Linux/Xtensa kernel.
#
-extra-y := vmlinux.lds
+always-$(KBUILD_BUILTIN) := vmlinux.lds
obj-y := head.o align.o coprocessor.o entry.o irq.o platform.o process.o \
ptrace.o setup.o signal.o stacktrace.o syscall.o time.o traps.o \
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index c6d8c62695e1..f0a63b2f85cc 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -338,7 +338,7 @@ static int iss_net_poll(struct iss_net_private *lp)
static void iss_net_timer(struct timer_list *t)
{
- struct iss_net_private *lp = from_timer(lp, t, timer);
+ struct iss_net_private *lp = timer_container_of(lp, t, timer);
iss_net_poll(lp);
mod_timer(&lp->timer, jiffies + lp->timer_val);