summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/arc/Kconfig5
-rw-r--r--arch/arc/include/asm/cacheflush.h43
-rw-r--r--arch/arc/include/asm/entry-arcv2.h32
-rw-r--r--arch/arc/include/asm/entry-compact.h87
-rw-r--r--arch/arc/include/asm/entry.h110
-rw-r--r--arch/arc/include/asm/hugepage.h7
-rw-r--r--arch/arc/include/asm/ptrace.h14
-rw-r--r--arch/arc/kernel/setup.c4
-rw-r--r--arch/arc/kernel/signal.c6
-rw-r--r--arch/arc/mm/cache.c136
-rw-r--r--arch/arc/mm/mmap.c21
-rw-r--r--arch/arc/mm/tlb.c16
-rw-r--r--arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts4
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts4
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7s.dtsi8
-rw-r--r--arch/arm/boot/dts/nxp/mxs/imx28-xea.dts1
-rw-r--r--arch/arm/boot/dts/rockchip/rk3128.dtsi2
-rw-r--r--arch/arm/boot/dts/rockchip/rk322x.dtsi6
-rw-r--r--arch/arm/boot/dts/ti/omap/am33xx.dtsi1
-rw-r--r--arch/arm/boot/dts/ti/omap/dra7.dtsi2
-rw-r--r--arch/arm/include/asm/topology.h1
-rw-r--r--arch/arm/kernel/perf_event_v6.c28
-rw-r--r--arch/arm/kernel/perf_event_v7.c50
-rw-r--r--arch/arm/kernel/perf_event_xscale.c44
-rw-r--r--arch/arm/mach-imx/mmdc.c7
-rw-r--r--arch/arm/mach-omap2/id.c5
-rw-r--r--arch/arm/mach-sunxi/mc_smp.c8
-rw-r--r--arch/arm/tools/syscall.tbl2
-rw-r--r--arch/arm64/Kconfig4
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/boot/Makefile2
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi3
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts3
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi5
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi8
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi11
-rw-r--r--arch/arm64/boot/dts/freescale/imx8ulp.dtsi6
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx93.dtsi10
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts12
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a.dtsi24
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173-evb.dts4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-evb.dts4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi8
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi96
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183.dtsi242
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186.dtsi44
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195.dtsi6
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399.dtsi6
-rw-r--r--arch/arm64/boot/dts/rockchip/rk356x.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s.dtsi1
-rwxr-xr-xarch/arm64/boot/install.sh3
-rw-r--r--arch/arm64/include/asm/assembler.h2
-rw-r--r--arch/arm64/include/asm/cache.h6
-rw-r--r--arch/arm64/include/asm/cpufeature.h6
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h8
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h27
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h7
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h2
-rw-r--r--arch/arm64/include/asm/memory.h7
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h6
-rw-r--r--arch/arm64/include/asm/processor.h3
-rw-r--r--arch/arm64/include/asm/simd.h11
-rw-r--r--arch/arm64/include/asm/stacktrace/common.h19
-rw-r--r--arch/arm64/include/asm/stacktrace/nvhe.h2
-rw-r--r--arch/arm64/include/asm/syscall_wrapper.h4
-rw-r--r--arch/arm64/include/asm/sysreg.h25
-rw-r--r--arch/arm64/include/asm/thread_info.h1
-rw-r--r--arch/arm64/include/asm/tlb.h15
-rw-r--r--arch/arm64/include/asm/tlbflush.h100
-rw-r--r--arch/arm64/include/asm/topology.h1
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h4
-rw-r--r--arch/arm64/kernel/cpufeature.c160
-rw-r--r--arch/arm64/kernel/cpuinfo.c5
-rw-r--r--arch/arm64/kernel/fpsimd.c169
-rw-r--r--arch/arm64/kernel/head.S2
-rw-r--r--arch/arm64/kernel/idreg-override.c153
-rw-r--r--arch/arm64/kernel/irq.c7
-rw-r--r--arch/arm64/kernel/kaslr.c7
-rw-r--r--arch/arm64/kernel/pi/Makefile1
-rw-r--r--arch/arm64/kernel/smp.c12
-rw-r--r--arch/arm64/kernel/stacktrace.c146
-rw-r--r--arch/arm64/kernel/topology.c26
-rw-r--r--arch/arm64/kernel/vdso32/Makefile8
-rw-r--r--arch/arm64/kvm/arm.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c61
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c13
-rw-r--r--arch/arm64/kvm/pmu-emul.c8
-rw-r--r--arch/arm64/kvm/sys_regs.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c47
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic.h1
-rw-r--r--arch/arm64/lib/copy_page.S11
-rw-r--r--arch/arm64/mm/fault.c2
-rw-r--r--arch/arm64/mm/mmu.c6
-rw-r--r--arch/arm64/tools/cpucaps2
-rw-r--r--arch/arm64/tools/sysreg325
-rw-r--r--arch/loongarch/Makefile2
-rw-r--r--arch/loongarch/include/asm/efi.h2
-rw-r--r--arch/loongarch/include/asm/elf.h2
-rw-r--r--arch/loongarch/include/asm/loongarch.h5
-rw-r--r--arch/loongarch/kernel/stacktrace.c2
-rw-r--r--arch/loongarch/kernel/unwind.c1
-rw-r--r--arch/loongarch/kernel/unwind_prologue.c2
-rw-r--r--arch/loongarch/net/bpf_jit.c18
-rw-r--r--arch/m68k/configs/amiga_defconfig2
-rw-r--r--arch/m68k/configs/apollo_defconfig2
-rw-r--r--arch/m68k/configs/atari_defconfig2
-rw-r--r--arch/m68k/configs/bvme6000_defconfig2
-rw-r--r--arch/m68k/configs/hp300_defconfig2
-rw-r--r--arch/m68k/configs/mac_defconfig5
-rw-r--r--arch/m68k/configs/multi_defconfig5
-rw-r--r--arch/m68k/configs/mvme147_defconfig2
-rw-r--r--arch/m68k/configs/mvme16x_defconfig2
-rw-r--r--arch/m68k/configs/q40_defconfig2
-rw-r--r--arch/m68k/configs/sun3_defconfig2
-rw-r--r--arch/m68k/configs/sun3x_defconfig2
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi3
-rw-r--r--arch/mips/boot/dts/loongson/ls7a-pch.dtsi3
-rw-r--r--arch/mips/include/asm/mach-loongson64/boot_param.h9
-rw-r--r--arch/mips/kernel/process.c25
-rw-r--r--arch/mips/kernel/smp.c4
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl2
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl2
-rw-r--r--arch/mips/loongson64/env.c10
-rw-r--r--arch/mips/loongson64/init.c47
-rw-r--r--arch/parisc/include/asm/bug.h2
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/Kconfig.debug1
-rw-r--r--arch/powerpc/Makefile25
-rw-r--r--arch/powerpc/boot/dts/fsl/t1023si-post.dtsi79
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040si-post.dtsi71
-rw-r--r--arch/powerpc/configs/ppc64_defconfig1
-rw-r--r--arch/powerpc/configs/ps3_defconfig1
-rw-r--r--arch/powerpc/configs/skiroot_defconfig1
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h10
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h9
-rw-r--r--arch/powerpc/include/asm/ftrace.h2
-rw-r--r--arch/powerpc/include/asm/hvcall.h20
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h10
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h1
-rw-r--r--arch/powerpc/include/asm/linkage.h3
-rw-r--r--arch/powerpc/include/asm/mmu.h4
-rw-r--r--arch/powerpc/include/asm/mmzone.h8
-rw-r--r--arch/powerpc/include/asm/papr-sysparm.h17
-rw-r--r--arch/powerpc/include/asm/paravirt.h33
-rw-r--r--arch/powerpc/include/asm/ppc-pci.h5
-rw-r--r--arch/powerpc/include/asm/ps3.h6
-rw-r--r--arch/powerpc/include/asm/reg.h1
-rw-r--r--arch/powerpc/include/asm/reg_a2.h154
-rw-r--r--arch/powerpc/include/asm/rtas.h91
-rw-r--r--arch/powerpc/include/uapi/asm/papr-miscdev.h9
-rw-r--r--arch/powerpc/include/uapi/asm/papr-sysparm.h58
-rw-r--r--arch/powerpc/include/uapi/asm/papr-vpd.h22
-rw-r--r--arch/powerpc/kernel/cpu_specs_book3s_64.h15
-rw-r--r--arch/powerpc/kernel/cputable.c4
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S1
-rw-r--r--arch/powerpc/kernel/rtas.c207
-rw-r--r--arch/powerpc/kernel/rtas_pci.c8
-rw-r--r--arch/powerpc/kernel/smp.c124
-rw-r--r--arch/powerpc/kernel/swsusp_64.c2
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/kernel/trace/ftrace_entry.S6
-rw-r--r--arch/powerpc/kernel/traps.c2
-rw-r--r--arch/powerpc/kernel/udbg_16550.c1
-rw-r--r--arch/powerpc/kernel/vdso/Makefile2
-rw-r--r--arch/powerpc/kexec/core.c1
-rw-r--r--arch/powerpc/kvm/book3s.c4
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c7
-rw-r--r--arch/powerpc/kvm/book3s_hv.c72
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_nestedv2.c29
-rw-r--r--arch/powerpc/kvm/book3s_pr.c1
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c21
-rw-r--r--arch/powerpc/lib/Makefile2
-rw-r--r--arch/powerpc/lib/sstep.c14
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c7
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c2
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c3
-rw-r--r--arch/powerpc/mm/fault.c2
-rw-r--r--arch/powerpc/mm/init-common.c5
-rw-r--r--arch/powerpc/mm/mmu_decl.h5
-rw-r--r--arch/powerpc/perf/core-book3s.c2
-rw-r--r--arch/powerpc/perf/hv-gpci.c3
-rw-r--r--arch/powerpc/perf/imc-pmu.c6
-rw-r--r--arch/powerpc/platforms/44x/Kconfig1
-rw-r--r--arch/powerpc/platforms/44x/idle.c2
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads_cpld.c2
-rw-r--r--arch/powerpc/platforms/512x/pdm360ng.c2
-rw-r--r--arch/powerpc/platforms/83xx/suspend.c5
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_rdb.c2
-rw-r--r--arch/powerpc/platforms/86xx/Kconfig7
-rw-r--r--arch/powerpc/platforms/pasemi/setup.c2
-rw-r--r--arch/powerpc/platforms/powermac/smp.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-powercap.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c5
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c3
-rw-r--r--arch/powerpc/platforms/ps3/Kconfig12
-rw-r--r--arch/powerpc/platforms/ps3/Makefile2
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c1
-rw-r--r--arch/powerpc/platforms/ps3/gelic_udbg.c1
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c18
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c16
-rw-r--r--arch/powerpc/platforms/pseries/papr-sysparm.c205
-rw-r--r--arch/powerpc/platforms/pseries/papr-vpd.c541
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h1
-rw-r--r--arch/powerpc/platforms/pseries/suspend.c1
-rw-r--r--arch/powerpc/platforms/pseries/vas.c51
-rw-r--r--arch/powerpc/platforms/pseries/vas.h2
-rw-r--r--arch/powerpc/sysdev/grackle.c19
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c2
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts7
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts7
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-polarberry.dts7
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts7
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts7
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs.dtsi1
-rw-r--r--arch/riscv/boot/dts/sophgo/cv1800b.dtsi1
-rw-r--r--arch/riscv/errata/andes/errata.c20
-rw-r--r--arch/riscv/include/asm/syscall_wrapper.h5
-rw-r--r--arch/riscv/include/asm/topology.h1
-rw-r--r--arch/riscv/kernel/head.S2
-rw-r--r--arch/riscv/kernel/module.c114
-rw-r--r--arch/riscv/kernel/sys_riscv.c2
-rw-r--r--arch/riscv/kernel/tests/module_test/test_uleb128.S8
-rw-r--r--arch/riscv/kernel/traps_misaligned.c6
-rw-r--r--arch/riscv/kvm/aia_imsic.c13
-rw-r--r--arch/riscv/mm/fault.c2
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/configs/debug_defconfig10
-rw-r--r--arch/s390/configs/defconfig9
-rw-r--r--arch/s390/configs/zfcpdump_defconfig3
-rw-r--r--arch/s390/include/asm/fpu/api.h2
-rw-r--r--arch/s390/include/asm/syscall_wrapper.h13
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/s390/kvm/vsie.c4
-rw-r--r--arch/s390/mm/fault.c3
-rw-r--r--arch/s390/mm/pgtable.c2
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/Kconfig.cpu6
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/boot/compressed/ident_map_64.c5
-rw-r--r--arch/x86/boot/compressed/idt_64.c1
-rw-r--r--arch/x86/boot/compressed/idt_handlers_64.S1
-rw-r--r--arch/x86/boot/compressed/mem.c2
-rw-r--r--arch/x86/boot/compressed/misc.h1
-rw-r--r--arch/x86/boot/pm.c7
-rw-r--r--arch/x86/boot/string.c2
-rw-r--r--arch/x86/coco/tdx/tdx.c3
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S2
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S2
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S2
-rw-r--r--arch/x86/crypto/sha512-avx-asm.S2
-rw-r--r--arch/x86/crypto/sha512-ssse3-asm.S2
-rw-r--r--arch/x86/entry/calling.h12
-rw-r--r--arch/x86/entry/common.c93
-rw-r--r--arch/x86/entry/entry_64.S33
-rw-r--r--arch/x86/entry/entry_64_compat.S77
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/events/amd/brs.c2
-rw-r--r--arch/x86/events/amd/core.c4
-rw-r--r--arch/x86/events/amd/ibs.c3
-rw-r--r--arch/x86/events/core.c4
-rw-r--r--arch/x86/events/intel/core.c154
-rw-r--r--arch/x86/events/intel/cstate.c158
-rw-r--r--arch/x86/events/intel/ds.c4
-rw-r--r--arch/x86/events/intel/lbr.c85
-rw-r--r--arch/x86/events/intel/uncore.c12
-rw-r--r--arch/x86/events/intel/uncore.h10
-rw-r--r--arch/x86/events/intel/uncore_discovery.c5
-rw-r--r--arch/x86/events/intel/uncore_discovery.h2
-rw-r--r--arch/x86/events/intel/uncore_nhmex.c2
-rw-r--r--arch/x86/events/intel/uncore_snbep.c208
-rw-r--r--arch/x86/events/perf_event.h12
-rw-r--r--arch/x86/events/perf_event_flags.h2
-rw-r--r--arch/x86/hyperv/hv_apic.c2
-rw-r--r--arch/x86/hyperv/irqdomain.c2
-rw-r--r--arch/x86/hyperv/ivm.c2
-rw-r--r--arch/x86/include/asm/alternative.h30
-rw-r--r--arch/x86/include/asm/amd_nb.h2
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/apicdef.h276
-rw-r--r--arch/x86/include/asm/barrier.h18
-rw-r--r--arch/x86/include/asm/cpufeatures.h8
-rw-r--r--arch/x86/include/asm/desc_defs.h78
-rw-r--r--arch/x86/include/asm/elf.h2
-rw-r--r--arch/x86/include/asm/extable_fixup_types.h2
-rw-r--r--arch/x86/include/asm/fpu/types.h2
-rw-r--r--arch/x86/include/asm/ia32.h18
-rw-r--r--arch/x86/include/asm/idtentry.h4
-rw-r--r--arch/x86/include/asm/io.h8
-rw-r--r--arch/x86/include/asm/iosf_mbi.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/mce.h4
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/mwait.h20
-rw-r--r--arch/x86/include/asm/nospec-branch.h4
-rw-r--r--arch/x86/include/asm/paravirt.h79
-rw-r--r--arch/x86/include/asm/paravirt_types.h87
-rw-r--r--arch/x86/include/asm/perf_event.h4
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/processor.h18
-rw-r--r--arch/x86/include/asm/proto.h4
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h4
-rw-r--r--arch/x86/include/asm/setup.h2
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h34
-rw-r--r--arch/x86/include/asm/text-patching.h12
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h2
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h2
-rw-r--r--arch/x86/include/asm/xen/interface_64.h2
-rw-r--r--arch/x86/include/uapi/asm/amd_hsmp.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/alternative.c150
-rw-r--r--arch/x86/kernel/amd_gart_64.c2
-rw-r--r--arch/x86/kernel/apic/Makefile2
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c2
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/vector.c4
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/callthunks.c17
-rw-r--r--arch/x86/kernel/cpu/amd.c271
-rw-r--r--arch/x86/kernel/cpu/common.c57
-rw-r--r--arch/x86/kernel/cpu/hygon.c3
-rw-r--r--arch/x86/kernel/cpu/intel_epb.c2
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c80
-rw-r--r--arch/x86/kernel/cpu/mce/core.c72
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c1
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c304
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h66
-rw-r--r--arch/x86/kernel/cpu/mce/threshold.c115
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c20
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c14
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c2
-rw-r--r--arch/x86/kernel/fpu/core.c2
-rw-r--r--arch/x86/kernel/head64.c6
-rw-r--r--arch/x86/kernel/head_64.S53
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/idt.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c3
-rw-r--r--arch/x86/kernel/kvm.c6
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/kernel/module.c20
-rw-r--r--arch/x86/kernel/paravirt.c54
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--arch/x86/kernel/setup_percpu.c4
-rw-r--r--arch/x86/kernel/sev-shared.c2
-rw-r--r--arch/x86/kernel/sev.c11
-rw-r--r--arch/x86/kernel/smpboot.c1
-rw-r--r--arch/x86/kernel/vmlinux.lds.S13
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/debugfs.c1
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c4
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.c2
-rw-r--r--arch/x86/kvm/svm/sev.c19
-rw-r--r--arch/x86/kvm/svm/svm.c11
-rw-r--r--arch/x86/kvm/svm/svm.h2
-rw-r--r--arch/x86/kvm/vmx/nested.c2
-rw-r--r--arch/x86/kvm/vmx/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c15
-rw-r--r--arch/x86/kvm/xen.c2
-rw-r--r--arch/x86/lib/csum-partial_64.c105
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/lib/misc.c2
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init_64.c6
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c11
-rw-r--r--arch/x86/mm/pat/memtype.c2
-rw-r--r--arch/x86/mm/pat/set_memory.c4
-rw-r--r--arch/x86/mm/pti.c2
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c48
-rw-r--r--arch/x86/net/bpf_jit_comp32.c2
-rw-r--r--arch/x86/platform/intel-quark/imr_selftest.c2
-rw-r--r--arch/x86/platform/pvh/head.S9
-rw-r--r--arch/x86/platform/uv/uv_irq.c2
-rw-r--r--arch/x86/platform/uv/uv_nmi.c2
-rw-r--r--arch/x86/platform/uv/uv_time.c2
-rw-r--r--arch/x86/realmode/init.c2
-rw-r--r--arch/x86/realmode/rm/reboot.S3
-rw-r--r--arch/x86/tools/Makefile2
-rw-r--r--arch/x86/tools/chkobjdump.awk34
-rw-r--r--arch/x86/tools/objdump_reformat.awk6
-rw-r--r--arch/x86/tools/relocs.c2
-rw-r--r--arch/x86/xen/Kconfig1
-rw-r--r--arch/x86/xen/enlighten_pv.c2
-rw-r--r--arch/x86/xen/irq.c2
-rw-r--r--arch/x86/xen/mmu_pv.c2
-rw-r--r--arch/x86/xen/xen-asm.S2
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl2
433 files changed, 5129 insertions, 3495 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 18c842ca6c32..186e785f5b56 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -496,3 +496,5 @@
564 common futex_wake sys_futex_wake
565 common futex_wait sys_futex_wait
566 common futex_requeue sys_futex_requeue
+567 common statmount sys_statmount
+568 common listmount sys_listmount
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 3162db540ee9..1b0483c51cc1 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -49,7 +49,6 @@ config ARC
select OF
select OF_EARLY_FLATTREE
select PCI_SYSCALL if PCI
- select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
select TRACE_IRQFLAGS_SUPPORT
@@ -232,10 +231,6 @@ config ARC_CACHE_PAGES
Note that Global I/D ENABLE + Per Page DISABLE works but corollary
Global DISABLE + Per Page ENABLE won't work
-config ARC_CACHE_VIPT_ALIASING
- bool "Support VIPT Aliasing D$"
- depends on ARC_HAS_DCACHE && ISA_ARCOMPACT
-
endif #ARC_CACHE
config ARC_HAS_ICCM
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index bd5b1a9a0544..563af3e75f01 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -44,31 +44,10 @@ void dma_cache_wback(phys_addr_t start, unsigned long sz);
#define flush_cache_dup_mm(mm) /* called on fork (VIVT only) */
-#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING
-
#define flush_cache_mm(mm) /* called on munmap/exit */
#define flush_cache_range(mm, u_vstart, u_vend)
#define flush_cache_page(vma, u_vaddr, pfn) /* PF handling/COW-break */
-#else /* VIPT aliasing dcache */
-
-/* To clear out stale userspace mappings */
-void flush_cache_mm(struct mm_struct *mm);
-void flush_cache_range(struct vm_area_struct *vma,
- unsigned long start,unsigned long end);
-void flush_cache_page(struct vm_area_struct *vma,
- unsigned long user_addr, unsigned long page);
-
-/*
- * To make sure that userspace mapping is flushed to memory before
- * get_user_pages() uses a kernel mapping to access the page
- */
-#define ARCH_HAS_FLUSH_ANON_PAGE
-void flush_anon_page(struct vm_area_struct *vma,
- struct page *page, unsigned long u_vaddr);
-
-#endif /* CONFIG_ARC_CACHE_VIPT_ALIASING */
-
/*
* A new pagecache page has PG_arch_1 clear - thus dcache dirty by default
* This works around some PIO based drivers which don't call flush_dcache_page
@@ -76,28 +55,6 @@ void flush_anon_page(struct vm_area_struct *vma,
*/
#define PG_dc_clean PG_arch_1
-#define CACHE_COLORS_NUM 4
-#define CACHE_COLORS_MSK (CACHE_COLORS_NUM - 1)
-#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & CACHE_COLORS_MSK)
-
-/*
- * Simple wrapper over config option
- * Bootup code ensures that hardware matches kernel configuration
- */
-static inline int cache_is_vipt_aliasing(void)
-{
- return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-}
-
-/*
- * checks if two addresses (after page aligning) index into same cache set
- */
-#define addr_not_cache_congruent(addr1, addr2) \
-({ \
- cache_is_vipt_aliasing() ? \
- (CACHE_COLOR(addr1) != CACHE_COLOR(addr2)) : 0; \
-})
-
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
do { \
memcpy(dst, src, len); \
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
index 4d13320e0c1b..3802a2daaf86 100644
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -291,4 +291,36 @@
/* M = 8-1 N = 8 */
.endm
+.macro SAVE_ABI_CALLEE_REGS
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r18
+ push r19
+ push r20
+ push r21
+ push r22
+ push r23
+ push r24
+ push r25
+.endm
+
+.macro RESTORE_ABI_CALLEE_REGS
+ pop r25
+ pop r24
+ pop r23
+ pop r22
+ pop r21
+ pop r20
+ pop r19
+ pop r18
+ pop r17
+ pop r16
+ pop r15
+ pop r14
+ pop r13
+.endm
+
#endif
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index a0e760eb35a8..92c3e9f13252 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -33,6 +33,91 @@
#include <asm/irqflags-compact.h>
#include <asm/thread_info.h> /* For THREAD_SIZE */
+/* Note on the LD/ST addr modes with addr reg wback
+ *
+ * LD.a same as LD.aw
+ *
+ * LD.a reg1, [reg2, x] => Pre Incr
+ * Eff Addr for load = [reg2 + x]
+ *
+ * LD.ab reg1, [reg2, x] => Post Incr
+ * Eff Addr for load = [reg2]
+ */
+
+.macro PUSHAX aux
+ lr r9, [\aux]
+ push r9
+.endm
+
+.macro POPAX aux
+ pop r9
+ sr r9, [\aux]
+.endm
+
+.macro SAVE_R0_TO_R12
+ push r0
+ push r1
+ push r2
+ push r3
+ push r4
+ push r5
+ push r6
+ push r7
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+.endm
+
+.macro RESTORE_R12_TO_R0
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ pop r7
+ pop r6
+ pop r5
+ pop r4
+ pop r3
+ pop r2
+ pop r1
+ pop r0
+.endm
+
+.macro SAVE_ABI_CALLEE_REGS
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r18
+ push r19
+ push r20
+ push r21
+ push r22
+ push r23
+ push r24
+ push r25
+.endm
+
+.macro RESTORE_ABI_CALLEE_REGS
+ pop r25
+ pop r24
+ pop r23
+ pop r22
+ pop r21
+ pop r20
+ pop r19
+ pop r18
+ pop r17
+ pop r16
+ pop r15
+ pop r14
+ pop r13
+.endm
+
/*--------------------------------------------------------------
* Switch to Kernel Mode stack if SP points to User Mode stack
*
@@ -235,7 +320,7 @@
SWITCH_TO_KERNEL_STK
- PUSH 0x003\LVL\()abcd /* Dummy ECR */
+ st.a 0x003\LVL\()abcd, [sp, -4] /* Dummy ECR */
sub sp, sp, 8 /* skip orig_r0 (not needed)
skip pt_regs->sp, already saved above */
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index 49c2e090cb5c..cf1ba376e992 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -21,114 +21,12 @@
#include <asm/entry-arcv2.h>
#endif
-/* Note on the LD/ST addr modes with addr reg wback
- *
- * LD.a same as LD.aw
- *
- * LD.a reg1, [reg2, x] => Pre Incr
- * Eff Addr for load = [reg2 + x]
- *
- * LD.ab reg1, [reg2, x] => Post Incr
- * Eff Addr for load = [reg2]
- */
-
-.macro PUSH reg
- st.a \reg, [sp, -4]
-.endm
-
-.macro PUSHAX aux
- lr r9, [\aux]
- PUSH r9
-.endm
-
-.macro POP reg
- ld.ab \reg, [sp, 4]
-.endm
-
-.macro POPAX aux
- POP r9
- sr r9, [\aux]
-.endm
-
-/*--------------------------------------------------------------
- * Helpers to save/restore Scratch Regs:
- * used by Interrupt/Exception Prologue/Epilogue
- *-------------------------------------------------------------*/
-.macro SAVE_R0_TO_R12
- PUSH r0
- PUSH r1
- PUSH r2
- PUSH r3
- PUSH r4
- PUSH r5
- PUSH r6
- PUSH r7
- PUSH r8
- PUSH r9
- PUSH r10
- PUSH r11
- PUSH r12
-.endm
-
-.macro RESTORE_R12_TO_R0
- POP r12
- POP r11
- POP r10
- POP r9
- POP r8
- POP r7
- POP r6
- POP r5
- POP r4
- POP r3
- POP r2
- POP r1
- POP r0
-
-.endm
-
-/*--------------------------------------------------------------
- * Helpers to save/restore callee-saved regs:
- * used by several macros below
- *-------------------------------------------------------------*/
-.macro SAVE_R13_TO_R25
- PUSH r13
- PUSH r14
- PUSH r15
- PUSH r16
- PUSH r17
- PUSH r18
- PUSH r19
- PUSH r20
- PUSH r21
- PUSH r22
- PUSH r23
- PUSH r24
- PUSH r25
-.endm
-
-.macro RESTORE_R25_TO_R13
- POP r25
- POP r24
- POP r23
- POP r22
- POP r21
- POP r20
- POP r19
- POP r18
- POP r17
- POP r16
- POP r15
- POP r14
- POP r13
-.endm
-
/*
* save user mode callee regs as struct callee_regs
* - needed by fork/do_signal/unaligned-access-emulation.
*/
.macro SAVE_CALLEE_SAVED_USER
- SAVE_R13_TO_R25
+ SAVE_ABI_CALLEE_REGS
.endm
/*
@@ -136,18 +34,18 @@
* - could have been changed by ptrace tracer or unaligned-access fixup
*/
.macro RESTORE_CALLEE_SAVED_USER
- RESTORE_R25_TO_R13
+ RESTORE_ABI_CALLEE_REGS
.endm
/*
* save/restore kernel mode callee regs at the time of context switch
*/
.macro SAVE_CALLEE_SAVED_KERNEL
- SAVE_R13_TO_R25
+ SAVE_ABI_CALLEE_REGS
.endm
.macro RESTORE_CALLEE_SAVED_KERNEL
- RESTORE_R25_TO_R13
+ RESTORE_ABI_CALLEE_REGS
.endm
/*--------------------------------------------------------------
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index ef8d4166370c..8a2441670a8f 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -10,6 +10,13 @@
#include <linux/types.h>
#include <asm-generic/pgtable-nopmd.h>
+/*
+ * Hugetlb definitions.
+ */
+#define HPAGE_SHIFT PMD_SHIFT
+#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK (~(HPAGE_SIZE - 1))
+
static inline pte_t pmd_pte(pmd_t pmd)
{
return __pte(pmd_val(pmd));
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 4a2b30fb5a98..00b9318e551e 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -54,6 +54,10 @@ struct pt_regs {
ecr_reg ecr;
};
+struct callee_regs {
+ unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
+};
+
#define MAX_REG_OFFSET offsetof(struct pt_regs, ecr)
#else
@@ -92,16 +96,14 @@ struct pt_regs {
unsigned long status32;
};
-#define MAX_REG_OFFSET offsetof(struct pt_regs, status32)
-
-#endif
-
-/* Callee saved registers - need to be saved only when you are scheduled out */
-
struct callee_regs {
unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
};
+#define MAX_REG_OFFSET offsetof(struct pt_regs, status32)
+
+#endif
+
#define instruction_pointer(regs) ((regs)->ret)
#define profile_pc(regs) instruction_pointer(regs)
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 4dcf8589b708..d08a5092c2b4 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -153,7 +153,7 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
{
int n = 0;
#ifdef CONFIG_ISA_ARCV2
- const char *release, *cpu_nm, *isa_nm = "ARCv2";
+ const char *release = "", *cpu_nm = "HS38", *isa_nm = "ARCv2";
int dual_issue = 0, dual_enb = 0, mpy_opt, present;
int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk;
char mpy_nm[16], lpb_nm[32];
@@ -172,8 +172,6 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
* releases only update it.
*/
- cpu_nm = "HS38";
-
if (info->arcver > 0x50 && info->arcver <= 0x53) {
release = arc_hs_rel[info->arcver - 0x51].str;
} else {
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index 0b3bb529d246..8f6f4a542964 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -62,7 +62,7 @@ struct rt_sigframe {
unsigned int sigret_magic;
};
-static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+static int save_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs)
{
int err = 0;
#ifndef CONFIG_ISA_ARCOMPACT
@@ -75,12 +75,12 @@ static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
#else
v2abi.r58 = v2abi.r59 = 0;
#endif
- err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi));
+ err = __copy_to_user(&mctx->v2abi, (void const *)&v2abi, sizeof(v2abi));
#endif
return err;
}
-static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+static int restore_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs)
{
int err = 0;
#ifndef CONFIG_ISA_ARCOMPACT
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index f7e05c146637..9106ceac323c 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -145,10 +145,9 @@ dc_chk:
p_dc->sz_k = 1 << (dbcr.sz - 1);
n += scnprintf(buf + n, len - n,
- "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",
+ "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s\n",
p_dc->sz_k, assoc, p_dc->line_len,
vipt ? "VIPT" : "PIPT",
- p_dc->colors > 1 ? " aliasing" : "",
IS_USED_CFG(CONFIG_ARC_HAS_DCACHE));
slc_chk:
@@ -703,51 +702,10 @@ static inline void arc_slc_enable(void)
* Exported APIs
*/
-/*
- * Handle cache congruency of kernel and userspace mappings of page when kernel
- * writes-to/reads-from
- *
- * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
- * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
- * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
- * -In SMP, if hardware caches are coherent
- *
- * There's a corollary case, where kernel READs from a userspace mapped page.
- * If the U-mapping is not congruent to K-mapping, former needs flushing.
- */
void flush_dcache_folio(struct folio *folio)
{
- struct address_space *mapping;
-
- if (!cache_is_vipt_aliasing()) {
- clear_bit(PG_dc_clean, &folio->flags);
- return;
- }
-
- /* don't handle anon pages here */
- mapping = folio_flush_mapping(folio);
- if (!mapping)
- return;
-
- /*
- * pagecache page, file not yet mapped to userspace
- * Make a note that K-mapping is dirty
- */
- if (!mapping_mapped(mapping)) {
- clear_bit(PG_dc_clean, &folio->flags);
- } else if (folio_mapped(folio)) {
- /* kernel reading from page with U-mapping */
- phys_addr_t paddr = (unsigned long)folio_address(folio);
- unsigned long vaddr = folio_pos(folio);
-
- /*
- * vaddr is not actually the virtual address, but is
- * congruent to every user mapping.
- */
- if (addr_not_cache_congruent(paddr, vaddr))
- __flush_dcache_pages(paddr, vaddr,
- folio_nr_pages(folio));
- }
+ clear_bit(PG_dc_clean, &folio->flags);
+ return;
}
EXPORT_SYMBOL(flush_dcache_folio);
@@ -921,44 +879,6 @@ noinline void flush_cache_all(void)
}
-#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
-
-void flush_cache_mm(struct mm_struct *mm)
-{
- flush_cache_all();
-}
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
- unsigned long pfn)
-{
- phys_addr_t paddr = pfn << PAGE_SHIFT;
-
- u_vaddr &= PAGE_MASK;
-
- __flush_dcache_pages(paddr, u_vaddr, 1);
-
- if (vma->vm_flags & VM_EXEC)
- __inv_icache_pages(paddr, u_vaddr, 1);
-}
-
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
-{
- flush_cache_all();
-}
-
-void flush_anon_page(struct vm_area_struct *vma, struct page *page,
- unsigned long u_vaddr)
-{
- /* TBD: do we really need to clear the kernel mapping */
- __flush_dcache_pages((phys_addr_t)page_address(page), u_vaddr, 1);
- __flush_dcache_pages((phys_addr_t)page_address(page),
- (phys_addr_t)page_address(page), 1);
-
-}
-
-#endif
-
void copy_user_highpage(struct page *to, struct page *from,
unsigned long u_vaddr, struct vm_area_struct *vma)
{
@@ -966,46 +886,11 @@ void copy_user_highpage(struct page *to, struct page *from,
struct folio *dst = page_folio(to);
void *kfrom = kmap_atomic(from);
void *kto = kmap_atomic(to);
- int clean_src_k_mappings = 0;
-
- /*
- * If SRC page was already mapped in userspace AND it's U-mapping is
- * not congruent with K-mapping, sync former to physical page so that
- * K-mapping in memcpy below, sees the right data
- *
- * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
- * equally valid for SRC page as well
- *
- * For !VIPT cache, all of this gets compiled out as
- * addr_not_cache_congruent() is 0
- */
- if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
- __flush_dcache_pages((unsigned long)kfrom, u_vaddr, 1);
- clean_src_k_mappings = 1;
- }
copy_page(kto, kfrom);
- /*
- * Mark DST page K-mapping as dirty for a later finalization by
- * update_mmu_cache(). Although the finalization could have been done
- * here as well (given that both vaddr/paddr are available).
- * But update_mmu_cache() already has code to do that for other
- * non copied user pages (e.g. read faults which wire in pagecache page
- * directly).
- */
clear_bit(PG_dc_clean, &dst->flags);
-
- /*
- * if SRC was already usermapped and non-congruent to kernel mapping
- * sync the kernel mapping back to physical page
- */
- if (clean_src_k_mappings) {
- __flush_dcache_pages((unsigned long)kfrom,
- (unsigned long)kfrom, 1);
- } else {
- clear_bit(PG_dc_clean, &src->flags);
- }
+ clear_bit(PG_dc_clean, &src->flags);
kunmap_atomic(kto);
kunmap_atomic(kfrom);
@@ -1140,17 +1025,8 @@ static noinline void __init arc_cache_init_master(void)
dc->line_len, L1_CACHE_BYTES);
/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
- if (is_isa_arcompact()) {
- int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-
- if (dc->colors > 1) {
- if (!handled)
- panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
- if (CACHE_COLORS_NUM != dc->colors)
- panic("CACHE_COLORS_NUM not optimized for config\n");
- } else if (handled && dc->colors == 1) {
- panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
- }
+ if (is_isa_arcompact() && dc->colors > 1) {
+ panic("Aliasing VIPT cache not supported\n");
}
}
diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
index fce5fa2b4f52..3c1c7ae73292 100644
--- a/arch/arc/mm/mmap.c
+++ b/arch/arc/mm/mmap.c
@@ -14,10 +14,6 @@
#include <asm/cacheflush.h>
-#define COLOUR_ALIGN(addr, pgoff) \
- ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \
- (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1)))
-
/*
* Ensure that shared mappings are correctly aligned to
* avoid aliasing issues with VIPT caches.
@@ -31,21 +27,13 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- int do_align = 0;
- int aliasing = cache_is_vipt_aliasing();
struct vm_unmapped_area_info info;
/*
- * We only need to do colour alignment if D cache aliases.
- */
- if (aliasing)
- do_align = filp || (flags & MAP_SHARED);
-
- /*
* We enforce the MAP_FIXED case.
*/
if (flags & MAP_FIXED) {
- if (aliasing && flags & MAP_SHARED &&
+ if (flags & MAP_SHARED &&
(addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))
return -EINVAL;
return addr;
@@ -55,10 +43,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return -ENOMEM;
if (addr) {
- if (do_align)
- addr = COLOUR_ALIGN(addr, pgoff);
- else
- addr = PAGE_ALIGN(addr);
+ addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
@@ -70,7 +55,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
- info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+ info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
return vm_unmapped_area(&info);
}
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index e536b2dcd4b0..ad702b49aeb3 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -478,21 +478,15 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
create_tlb(vma, vaddr, ptep);
- if (page == ZERO_PAGE(0)) {
+ if (page == ZERO_PAGE(0))
return;
- }
/*
- * Exec page : Independent of aliasing/page-color considerations,
- * since icache doesn't snoop dcache on ARC, any dirty
- * K-mapping of a code page needs to be wback+inv so that
- * icache fetch by userspace sees code correctly.
- * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it
- * so userspace sees the right data.
- * (Avoids the flush for Non-exec + congruent mapping case)
+ * For executable pages, since icache doesn't snoop dcache, any
+ * dirty K-mapping of a code page needs to be wback+inv so that
+ * icache fetch by userspace sees code correctly.
*/
- if ((vma->vm_flags & VM_EXEC) ||
- addr_not_cache_congruent(paddr, vaddr)) {
+ if (vma->vm_flags & VM_EXEC) {
struct folio *folio = page_folio(page);
int dirty = !test_and_set_bit(PG_dc_clean, &folio->flags);
if (dirty) {
diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts
index 1ab8184302db..5a2869a18bd5 100644
--- a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts
+++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts
@@ -36,9 +36,7 @@
gpios = <&gpio 42 GPIO_ACTIVE_HIGH>;
};
-&leds {
- /delete-node/ led_act;
-};
+/delete-node/ &led_act;
&pm {
/delete-property/ system-power-controller;
diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts
index a3f247c722b4..0342a79ccd5d 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts
@@ -37,9 +37,9 @@
&clks {
assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>,
- <&clks IMX6QDL_CLK_LDB_DI1_SEL>;
+ <&clks IMX6QDL_CLK_LDB_DI1_SEL>, <&clks IMX6QDL_CLK_ENET_REF_SEL>;
assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>,
- <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>;
+ <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, <&clk50m_phy>;
};
&hdmi {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi
index 4ffe99ed55ca..07dcecbe485d 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi
@@ -121,6 +121,8 @@
max-speed = <100>;
interrupt-parent = <&gpio5>;
interrupts = <6 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&clks IMX6UL_CLK_ENET_REF>;
+ clock-names = "rmii-ref";
};
};
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
index 29b8fd03567a..5387da8a2a0a 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
@@ -454,7 +454,7 @@
};
gpt1: timer@302d0000 {
- compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+ compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
reg = <0x302d0000 0x10000>;
interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_GPT1_ROOT_CLK>,
@@ -463,7 +463,7 @@
};
gpt2: timer@302e0000 {
- compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+ compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
reg = <0x302e0000 0x10000>;
interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_GPT2_ROOT_CLK>,
@@ -473,7 +473,7 @@
};
gpt3: timer@302f0000 {
- compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+ compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
reg = <0x302f0000 0x10000>;
interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_GPT3_ROOT_CLK>,
@@ -483,7 +483,7 @@
};
gpt4: timer@30300000 {
- compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+ compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
reg = <0x30300000 0x10000>;
interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_GPT4_ROOT_CLK>,
diff --git a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts
index a400c108f66a..6c5e6856648a 100644
--- a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts
+++ b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts
@@ -8,6 +8,7 @@
#include "imx28-lwe.dtsi"
/ {
+ model = "Liebherr XEA board";
compatible = "lwn,imx28-xea", "fsl,imx28";
};
diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi
index 7bf557c99561..01edf244ddee 100644
--- a/arch/arm/boot/dts/rockchip/rk3128.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi
@@ -848,7 +848,7 @@
};
sdmmc_pwren: sdmmc-pwren {
- rockchip,pins = <1 RK_PB6 1 &pcfg_pull_default>;
+ rockchip,pins = <1 RK_PB6 RK_FUNC_GPIO &pcfg_pull_default>;
};
sdmmc_bus4: sdmmc-bus4 {
diff --git a/arch/arm/boot/dts/rockchip/rk322x.dtsi b/arch/arm/boot/dts/rockchip/rk322x.dtsi
index ffc16d6b97e1..a721744cbfd1 100644
--- a/arch/arm/boot/dts/rockchip/rk322x.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk322x.dtsi
@@ -215,9 +215,9 @@
power-domain@RK3228_PD_VOP {
reg = <RK3228_PD_VOP>;
- clocks =<&cru ACLK_VOP>,
- <&cru DCLK_VOP>,
- <&cru HCLK_VOP>;
+ clocks = <&cru ACLK_VOP>,
+ <&cru DCLK_VOP>,
+ <&cru HCLK_VOP>;
pm_qos = <&qos_vop>;
#power-domain-cells = <0>;
};
diff --git a/arch/arm/boot/dts/ti/omap/am33xx.dtsi b/arch/arm/boot/dts/ti/omap/am33xx.dtsi
index 1a2cd5baf402..5b9e01a8aa5d 100644
--- a/arch/arm/boot/dts/ti/omap/am33xx.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am33xx.dtsi
@@ -359,6 +359,7 @@
<SYSC_IDLE_NO>,
<SYSC_IDLE_SMART>,
<SYSC_IDLE_SMART_WKUP>;
+ ti,sysc-delay-us = <2>;
clocks = <&l3s_clkctrl AM3_L3S_USB_OTG_HS_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
diff --git a/arch/arm/boot/dts/ti/omap/dra7.dtsi b/arch/arm/boot/dts/ti/omap/dra7.dtsi
index 3f3e52e3b375..6509c742fb58 100644
--- a/arch/arm/boot/dts/ti/omap/dra7.dtsi
+++ b/arch/arm/boot/dts/ti/omap/dra7.dtsi
@@ -147,7 +147,7 @@
l3-noc@44000000 {
compatible = "ti,dra7-l3-noc";
- reg = <0x44000000 0x1000>,
+ reg = <0x44000000 0x1000000>,
<0x45000000 0x1000>;
interrupts-extended = <&crossbar_mpu GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
<&wakeupgen GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index c7d2510e5a78..853c4f81ba4a 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -13,6 +13,7 @@
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
+#define arch_scale_freq_ref topology_get_freq_ref
#endif
/* Replace task scheduler's default cpu-invariant accounting */
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 1ae99deeec54..8fc080c9e4fb 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -268,10 +268,8 @@ static inline void armv6pmu_write_counter(struct perf_event *event, u64 value)
static void armv6pmu_enable_event(struct perf_event *event)
{
- unsigned long val, mask, evt, flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long val, mask, evt;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
if (ARMV6_CYCLE_COUNTER == idx) {
@@ -294,12 +292,10 @@ static void armv6pmu_enable_event(struct perf_event *event)
* Mask out the current event and set the counter to count the event
* that we're interested in.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static irqreturn_t
@@ -362,26 +358,20 @@ armv6pmu_handle_irq(struct arm_pmu *cpu_pmu)
static void armv6pmu_start(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
val |= ARMV6_PMCR_ENABLE;
armv6_pmcr_write(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~ARMV6_PMCR_ENABLE;
armv6_pmcr_write(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static int
@@ -419,10 +409,8 @@ static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc,
static void armv6pmu_disable_event(struct perf_event *event)
{
- unsigned long val, mask, evt, flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long val, mask, evt;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
if (ARMV6_CYCLE_COUNTER == idx) {
@@ -444,20 +432,16 @@ static void armv6pmu_disable_event(struct perf_event *event)
* of ETM bus signal assertion cycles. The external reporting should
* be disabled and so this should never increment.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv6mpcore_pmu_disable_event(struct perf_event *event)
{
- unsigned long val, mask, flags, evt = 0;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long val, mask, evt = 0;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
if (ARMV6_CYCLE_COUNTER == idx) {
@@ -475,12 +459,10 @@ static void armv6mpcore_pmu_disable_event(struct perf_event *event)
* Unlike UP ARMv6, we don't have a way of stopping the counters. We
* simply disable the interrupt reporting.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static int armv6_map_event(struct perf_event *event)
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index eb2190477da1..a3322e2b3ea4 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -870,10 +870,8 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
static void armv7pmu_enable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
@@ -886,7 +884,6 @@ static void armv7pmu_enable_event(struct perf_event *event)
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/*
* Disable counter
@@ -910,16 +907,12 @@ static void armv7pmu_enable_event(struct perf_event *event)
* Enable counter
*/
armv7_pmnc_enable_counter(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv7pmu_disable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
@@ -931,7 +924,6 @@ static void armv7pmu_disable_event(struct perf_event *event)
/*
* Disable counter and interrupt
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/*
* Disable counter
@@ -942,8 +934,6 @@ static void armv7pmu_disable_event(struct perf_event *event)
* Disable interrupt for this counter
*/
armv7_pmnc_disable_intens(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
@@ -1009,24 +999,14 @@ static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
static void armv7pmu_start(struct arm_pmu *cpu_pmu)
{
- unsigned long flags;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Enable all counters */
armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
{
- unsigned long flags;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable all counters */
armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
@@ -1072,8 +1052,10 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event,
{
unsigned long config_base = 0;
- if (attr->exclude_idle)
- return -EPERM;
+ if (attr->exclude_idle) {
+ pr_debug("ARM performance counters do not support mode exclusion\n");
+ return -EOPNOTSUPP;
+ }
if (attr->exclude_user)
config_base |= ARMV7_EXCLUDE_USER;
if (attr->exclude_kernel)
@@ -1492,14 +1474,10 @@ static void krait_clearpmu(u32 config_base)
static void krait_pmu_disable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
/* Disable counter and interrupt */
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable counter */
armv7_pmnc_disable_counter(idx);
@@ -1512,23 +1490,17 @@ static void krait_pmu_disable_event(struct perf_event *event)
/* Disable interrupt for this counter */
armv7_pmnc_disable_intens(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void krait_pmu_enable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
/*
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable counter */
armv7_pmnc_disable_counter(idx);
@@ -1548,8 +1520,6 @@ static void krait_pmu_enable_event(struct perf_event *event)
/* Enable counter */
armv7_pmnc_enable_counter(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void krait_pmu_reset(void *info)
@@ -1825,14 +1795,10 @@ static void scorpion_clearpmu(u32 config_base)
static void scorpion_pmu_disable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
/* Disable counter and interrupt */
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable counter */
armv7_pmnc_disable_counter(idx);
@@ -1845,23 +1811,17 @@ static void scorpion_pmu_disable_event(struct perf_event *event)
/* Disable interrupt for this counter */
armv7_pmnc_disable_intens(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void scorpion_pmu_enable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
/*
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable counter */
armv7_pmnc_disable_counter(idx);
@@ -1881,8 +1841,6 @@ static void scorpion_pmu_enable_event(struct perf_event *event)
/* Enable counter */
armv7_pmnc_enable_counter(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void scorpion_pmu_reset(void *info)
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index f6cdcacfb96d..7a2ba1c689a7 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -203,10 +203,8 @@ xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu)
static void xscale1pmu_enable_event(struct perf_event *event)
{
- unsigned long val, mask, evt, flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long val, mask, evt;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
switch (idx) {
@@ -229,20 +227,16 @@ static void xscale1pmu_enable_event(struct perf_event *event)
return;
}
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~mask;
val |= evt;
xscale1pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void xscale1pmu_disable_event(struct perf_event *event)
{
- unsigned long val, mask, evt, flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long val, mask, evt;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
switch (idx) {
@@ -263,12 +257,10 @@ static void xscale1pmu_disable_event(struct perf_event *event)
return;
}
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~mask;
val |= evt;
xscale1pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static int
@@ -300,26 +292,20 @@ static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc,
static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val |= XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static inline u64 xscale1pmu_read_counter(struct perf_event *event)
@@ -549,10 +535,8 @@ xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu)
static void xscale2pmu_enable_event(struct perf_event *event)
{
- unsigned long flags, ien, evtsel;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long ien, evtsel;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
ien = xscale2pmu_read_int_enable();
@@ -587,18 +571,14 @@ static void xscale2pmu_enable_event(struct perf_event *event)
return;
}
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
xscale2pmu_write_event_select(evtsel);
xscale2pmu_write_int_enable(ien);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void xscale2pmu_disable_event(struct perf_event *event)
{
- unsigned long flags, ien, evtsel, of_flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long ien, evtsel, of_flags;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
ien = xscale2pmu_read_int_enable();
@@ -638,11 +618,9 @@ static void xscale2pmu_disable_event(struct perf_event *event)
return;
}
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
xscale2pmu_write_event_select(evtsel);
xscale2pmu_write_int_enable(ien);
xscale2pmu_write_overflow_flags(of_flags);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static int
@@ -663,26 +641,20 @@ out:
static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
val |= XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale2pmu_read_pmnc();
val &= ~XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static inline u64 xscale2pmu_read_counter(struct perf_event *event)
diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index 2157493b78a9..df69af932375 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -501,6 +501,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
name = devm_kasprintf(&pdev->dev,
GFP_KERNEL, "mmdc%d", ret);
+ if (!name) {
+ ret = -ENOMEM;
+ goto pmu_release_id;
+ }
pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk;
pmu_mmdc->devtype_data = (struct fsl_mmdc_devtype_data *)of_id->data;
@@ -523,9 +527,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
pmu_register_err:
pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret);
- ida_simple_remove(&mmdc_ida, pmu_mmdc->id);
cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
hrtimer_cancel(&pmu_mmdc->hrtimer);
+pmu_release_id:
+ ida_simple_remove(&mmdc_ida, pmu_mmdc->id);
pmu_free:
kfree(pmu_mmdc);
return ret;
diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c
index 98999aa8cc0c..7f387706368a 100644
--- a/arch/arm/mach-omap2/id.c
+++ b/arch/arm/mach-omap2/id.c
@@ -793,11 +793,16 @@ void __init omap_soc_device_init(void)
soc_dev_attr->machine = soc_name;
soc_dev_attr->family = omap_get_family();
+ if (!soc_dev_attr->family) {
+ kfree(soc_dev_attr);
+ return;
+ }
soc_dev_attr->revision = soc_rev;
soc_dev_attr->custom_attr_group = omap_soc_groups[0];
soc_dev = soc_device_register(soc_dev_attr);
if (IS_ERR(soc_dev)) {
+ kfree(soc_dev_attr->family);
kfree(soc_dev_attr);
return;
}
diff --git a/arch/arm/mach-sunxi/mc_smp.c b/arch/arm/mach-sunxi/mc_smp.c
index cb63921232a6..277f6aa8e6c2 100644
--- a/arch/arm/mach-sunxi/mc_smp.c
+++ b/arch/arm/mach-sunxi/mc_smp.c
@@ -803,16 +803,16 @@ static int __init sunxi_mc_smp_init(void)
for (i = 0; i < ARRAY_SIZE(sunxi_mc_smp_data); i++) {
ret = of_property_match_string(node, "enable-method",
sunxi_mc_smp_data[i].enable_method);
- if (!ret)
+ if (ret >= 0)
break;
}
- is_a83t = sunxi_mc_smp_data[i].is_a83t;
-
of_node_put(node);
- if (ret)
+ if (ret < 0)
return -ENODEV;
+ is_a83t = sunxi_mc_smp_data[i].is_a83t;
+
if (!sunxi_mc_smp_cpu_table_init())
return -EINVAL;
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 584f9528c996..d6a324dbff2e 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -470,3 +470,5 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 442539fd06fe..8f6cf1221b6a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -155,7 +155,7 @@ config ARM64
select HAVE_MOVE_PUD
select HAVE_PCI
select HAVE_ACPI_APEI if (ACPI && EFI)
- select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+ select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
select HAVE_ARCH_COMPILER_H
@@ -1550,7 +1550,7 @@ config ARCH_FORCE_MAX_ORDER
Don't change if unsure.
config UNMAP_KERNEL_AT_EL0
- bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT
+ bool "Unmap kernel when running in userspace (KPTI)" if EXPERT
default y
help
Speculation attacks against some high-performance processors can
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 9a2d3723cd0f..47ecc4cff9d2 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -200,7 +200,7 @@ endif
endif
vdso-install-y += arch/arm64/kernel/vdso/vdso.so.dbg
-vdso-install-$(CONFIG_COMPAT_VDSO) += arch/arm64/kernel/vdso32/vdso.so.dbg:vdso32.so
+vdso-install-$(CONFIG_COMPAT_VDSO) += arch/arm64/kernel/vdso32/vdso32.so.dbg
include $(srctree)/scripts/Makefile.defconf
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
index 1761f5972443..a5a787371117 100644
--- a/arch/arm64/boot/Makefile
+++ b/arch/arm64/boot/Makefile
@@ -44,7 +44,7 @@ EFI_ZBOOT_BFD_TARGET := elf64-littleaarch64
EFI_ZBOOT_MACH_TYPE := ARM64
EFI_ZBOOT_FORWARD_CFI := $(CONFIG_ARM64_BTI_KERNEL)
-EFI_ZBOOT_OBJCOPY_FLAGS = --add-symbol zboot_code_size=0x$(shell \
+EFI_ZBOOT_OBJCOPY_FLAGS = --add-symbol zboot_code_size=0x$$( \
$(NM) vmlinux|grep _kernel_codesize|cut -d' ' -f1)
include $(srctree)/drivers/firmware/efi/libstub/Makefile.zboot
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi
index 15290e6892fc..fc7315b94406 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi
@@ -68,10 +68,7 @@
&emac0 {
pinctrl-names = "default";
pinctrl-0 = <&ext_rgmii_pins>;
- phy-mode = "rgmii";
phy-handle = <&ext_rgmii_phy>;
- allwinner,rx-delay-ps = <3100>;
- allwinner,tx-delay-ps = <700>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts
index d83852e72f06..b5d713926a34 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts
@@ -13,6 +13,9 @@
};
&emac0 {
+ allwinner,rx-delay-ps = <3100>;
+ allwinner,tx-delay-ps = <700>;
+ phy-mode = "rgmii";
phy-supply = <&reg_dcdce>;
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts
index 00fe28caac93..b3b1b8692125 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts
@@ -13,6 +13,8 @@
};
&emac0 {
+ allwinner,tx-delay-ps = <700>;
+ phy-mode = "rgmii-rxid";
phy-supply = <&reg_dldo1>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
index 5ce5fbf2b38e..f69b0c17560a 100644
--- a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
@@ -82,12 +82,9 @@
pinctrl-0 = <&pinctrl_wifi_pdn>;
gpio = <&lsio_gpio1 28 GPIO_ACTIVE_HIGH>;
enable-active-high;
+ regulator-always-on;
regulator-name = "wifi_pwrdn_fake_regulator";
regulator-settling-time-us = <100>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
};
reg_pcie_switch: regulator-pcie-switch {
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
index ce66d30a4839..b0bb77150adc 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
@@ -149,7 +149,7 @@ dma_subsys: bus@5a000000 {
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
- #pwm-cells = <2>;
+ #pwm-cells = <3>;
power-domains = <&pd IMX_SC_R_LCD_0_PWM_0>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
index 49ad3413db94..7e510b21bbac 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
@@ -29,7 +29,7 @@ lsio_subsys: bus@5d000000 {
<&pwm0_lpcg 1>;
assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
- #pwm-cells = <2>;
+ #pwm-cells = <3>;
interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
@@ -42,7 +42,7 @@ lsio_subsys: bus@5d000000 {
<&pwm1_lpcg 1>;
assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
- #pwm-cells = <2>;
+ #pwm-cells = <3>;
interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
@@ -55,7 +55,7 @@ lsio_subsys: bus@5d000000 {
<&pwm2_lpcg 1>;
assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
- #pwm-cells = <2>;
+ #pwm-cells = <3>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
@@ -68,7 +68,7 @@ lsio_subsys: bus@5d000000 {
<&pwm3_lpcg 1>;
assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
- #pwm-cells = <2>;
+ #pwm-cells = <3>;
interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index c9a610ba4836..1264da6012f9 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -2072,6 +2072,7 @@
phys = <&usb3_phy0>, <&usb3_phy0>;
phy-names = "usb2-phy", "usb3-phy";
snps,gfladj-refclk-lpm-sel-quirk;
+ snps,parkmode-disable-ss-quirk;
};
};
@@ -2114,6 +2115,7 @@
phys = <&usb3_phy1>, <&usb3_phy1>;
phy-names = "usb2-phy", "usb3-phy";
snps,gfladj-refclk-lpm-sel-quirk;
+ snps,parkmode-disable-ss-quirk;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 4b1ce9fc1758..c6dc3ba0d43b 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -1649,6 +1649,7 @@
phys = <&usb3_phy0>, <&usb3_phy0>;
phy-names = "usb2-phy", "usb3-phy";
power-domains = <&pgc_otg1>;
+ snps,parkmode-disable-ss-quirk;
status = "disabled";
};
@@ -1680,6 +1681,7 @@
phys = <&usb3_phy1>, <&usb3_phy1>;
phy-names = "usb2-phy", "usb3-phy";
power-domains = <&pgc_otg2>;
+ snps,parkmode-disable-ss-quirk;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
index 01539df335f8..8439dd6b3935 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
@@ -96,6 +96,17 @@
status = "okay";
};
+&edma3 {
+ power-domains = <&pd IMX_SC_R_DMA_1_CH0>,
+ <&pd IMX_SC_R_DMA_1_CH1>,
+ <&pd IMX_SC_R_DMA_1_CH2>,
+ <&pd IMX_SC_R_DMA_1_CH3>,
+ <&pd IMX_SC_R_DMA_1_CH4>,
+ <&pd IMX_SC_R_DMA_1_CH5>,
+ <&pd IMX_SC_R_DMA_1_CH6>,
+ <&pd IMX_SC_R_DMA_1_CH7>;
+};
+
&flexcan1 {
fsl,clk-source = /bits/ 8 <1>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
index f22c1ac391c9..c4a0082f30d3 100644
--- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
@@ -483,7 +483,7 @@
};
};
- gpioe: gpio@2d000080 {
+ gpioe: gpio@2d000000 {
compatible = "fsl,imx8ulp-gpio";
reg = <0x2d000000 0x1000>;
gpio-controller;
@@ -498,7 +498,7 @@
gpio-ranges = <&iomuxc1 0 32 24>;
};
- gpiof: gpio@2d010080 {
+ gpiof: gpio@2d010000 {
compatible = "fsl,imx8ulp-gpio";
reg = <0x2d010000 0x1000>;
gpio-controller;
@@ -534,7 +534,7 @@
};
};
- gpiod: gpio@2e200080 {
+ gpiod: gpio@2e200000 {
compatible = "fsl,imx8ulp-gpio";
reg = <0x2e200000 0x1000>;
gpio-controller;
diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
index f06139bdff97..3c5c67ebee5d 100644
--- a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
+++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
@@ -577,7 +577,7 @@
fsl,pins = <
MX93_PAD_UART2_TXD__LPUART2_TX 0x31e
MX93_PAD_UART2_RXD__LPUART2_RX 0x31e
- MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x31e
+ MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x51e
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi
index ceccf4766440..34c0540276d1 100644
--- a/arch/arm64/boot/dts/freescale/imx93.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx93.dtsi
@@ -417,7 +417,7 @@
compatible = "fsl,imx93-src-slice";
reg = <0x44462400 0x400>, <0x44465800 0x400>;
#power-domain-cells = <0>;
- clocks = <&clk IMX93_CLK_MEDIA_AXI>,
+ clocks = <&clk IMX93_CLK_NIC_MEDIA_GATE>,
<&clk IMX93_CLK_MEDIA_APB>;
};
};
@@ -957,7 +957,7 @@
};
};
- gpio2: gpio@43810080 {
+ gpio2: gpio@43810000 {
compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
reg = <0x43810000 0x1000>;
gpio-controller;
@@ -972,7 +972,7 @@
gpio-ranges = <&iomuxc 0 4 30>;
};
- gpio3: gpio@43820080 {
+ gpio3: gpio@43820000 {
compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
reg = <0x43820000 0x1000>;
gpio-controller;
@@ -988,7 +988,7 @@
<&iomuxc 26 34 2>, <&iomuxc 28 0 4>;
};
- gpio4: gpio@43830080 {
+ gpio4: gpio@43830000 {
compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
reg = <0x43830000 0x1000>;
gpio-controller;
@@ -1003,7 +1003,7 @@
gpio-ranges = <&iomuxc 0 38 28>, <&iomuxc 28 36 2>;
};
- gpio1: gpio@47400080 {
+ gpio1: gpio@47400000 {
compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
reg = <0x47400000 0x1000>;
gpio-controller;
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
index 3b7a176b7904..c46682150e50 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
@@ -73,7 +73,7 @@
};
};
- memory {
+ memory@40000000 {
reg = <0 0x40000000 0 0x40000000>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
index a885a3fbe456..2dc1bdc74e21 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
@@ -55,7 +55,7 @@
};
};
- memory {
+ memory@40000000 {
reg = <0 0x40000000 0 0x20000000>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
index af4a4309bda4..b876e501216b 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
@@ -126,6 +126,7 @@
compatible = "sff,sfp";
i2c-bus = <&i2c_sfp1>;
los-gpios = <&pio 46 GPIO_ACTIVE_HIGH>;
+ maximum-power-milliwatt = <3000>;
mod-def0-gpios = <&pio 49 GPIO_ACTIVE_LOW>;
tx-disable-gpios = <&pio 20 GPIO_ACTIVE_HIGH>;
tx-fault-gpios = <&pio 7 GPIO_ACTIVE_HIGH>;
@@ -137,6 +138,7 @@
i2c-bus = <&i2c_sfp2>;
los-gpios = <&pio 31 GPIO_ACTIVE_HIGH>;
mod-def0-gpios = <&pio 47 GPIO_ACTIVE_LOW>;
+ maximum-power-milliwatt = <3000>;
tx-disable-gpios = <&pio 15 GPIO_ACTIVE_HIGH>;
tx-fault-gpios = <&pio 48 GPIO_ACTIVE_HIGH>;
};
@@ -150,16 +152,16 @@
trip = <&cpu_trip_active_high>;
};
- cpu-active-low {
+ cpu-active-med {
/* active: set fan to cooling level 1 */
cooling-device = <&fan 1 1>;
- trip = <&cpu_trip_active_low>;
+ trip = <&cpu_trip_active_med>;
};
- cpu-passive {
- /* passive: set fan to cooling level 0 */
+ cpu-active-low {
+ /* active: set fan to cooling level 0 */
cooling-device = <&fan 0 0>;
- trip = <&cpu_trip_passive>;
+ trip = <&cpu_trip_active_low>;
};
};
};
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
index 24eda00e320d..fc751e049953 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
@@ -374,6 +374,10 @@
reg = <0 0x11230000 0 0x1000>,
<0 0x11c20000 0 0x1000>;
interrupts = <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+ assigned-clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>,
+ <&topckgen CLK_TOP_EMMC_250M_SEL>;
+ assigned-clock-parents = <&apmixedsys CLK_APMIXED_MPLL>,
+ <&topckgen CLK_TOP_NET1PLL_D5_D2>;
clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>,
<&infracfg CLK_INFRA_MSDC_HCK_CK>,
<&infracfg CLK_INFRA_MSDC_CK>,
@@ -610,22 +614,34 @@
thermal-sensors = <&thermal 0>;
trips {
+ cpu_trip_crit: crit {
+ temperature = <125000>;
+ hysteresis = <2000>;
+ type = "critical";
+ };
+
+ cpu_trip_hot: hot {
+ temperature = <120000>;
+ hysteresis = <2000>;
+ type = "hot";
+ };
+
cpu_trip_active_high: active-high {
temperature = <115000>;
hysteresis = <2000>;
type = "active";
};
- cpu_trip_active_low: active-low {
+ cpu_trip_active_med: active-med {
temperature = <85000>;
hysteresis = <2000>;
type = "active";
};
- cpu_trip_passive: passive {
- temperature = <40000>;
+ cpu_trip_active_low: active-low {
+ temperature = <60000>;
hysteresis = <2000>;
- type = "passive";
+ type = "active";
};
};
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
index 5122963d8743..d258c80213b2 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -44,7 +44,7 @@
id-gpio = <&pio 16 GPIO_ACTIVE_HIGH>;
};
- usb_p1_vbus: regulator@0 {
+ usb_p1_vbus: regulator-usb-p1 {
compatible = "regulator-fixed";
regulator-name = "usb_vbus";
regulator-min-microvolt = <5000000>;
@@ -53,7 +53,7 @@
enable-active-high;
};
- usb_p0_vbus: regulator@1 {
+ usb_p0_vbus: regulator-usb-p0 {
compatible = "regulator-fixed";
regulator-name = "vbus";
regulator-min-microvolt = <5000000>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts
index ce336a48c897..77f9ab94c00b 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts
@@ -31,14 +31,14 @@
#address-cells = <2>;
#size-cells = <2>;
ranges;
- scp_mem_reserved: scp_mem_region {
+ scp_mem_reserved: memory@50000000 {
compatible = "shared-dma-pool";
reg = <0 0x50000000 0 0x2900000>;
no-map;
};
};
- ntc@0 {
+ thermal-sensor {
compatible = "murata,ncp03wf104";
pullup-uv = <1800000>;
pullup-ohm = <390000>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
index bf97b60ae4d1..820260348de9 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
@@ -91,6 +91,8 @@
&dsi0 {
status = "okay";
+ /delete-property/#size-cells;
+ /delete-property/#address-cells;
/delete-node/panel@0;
ports {
port {
@@ -441,20 +443,20 @@
};
touchscreen_pins: touchscreen-pins {
- touch_int_odl {
+ touch-int-odl {
pinmux = <PINMUX_GPIO155__FUNC_GPIO155>;
input-enable;
bias-pull-up;
};
- touch_rst_l {
+ touch-rst-l {
pinmux = <PINMUX_GPIO156__FUNC_GPIO156>;
output-high;
};
};
trackpad_pins: trackpad-pins {
- trackpad_int {
+ trackpad-int {
pinmux = <PINMUX_GPIO7__FUNC_GPIO7>;
input-enable;
bias-disable; /* pulled externally */
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
index bf7de35ffcbc..7881a27be029 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
@@ -116,7 +116,7 @@
#size-cells = <2>;
ranges;
- scp_mem_reserved: scp_mem_region {
+ scp_mem_reserved: memory@50000000 {
compatible = "shared-dma-pool";
reg = <0 0x50000000 0 0x2900000>;
no-map;
@@ -460,7 +460,7 @@
&pio {
aud_pins_default: audiopins {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO97__FUNC_I2S2_MCK>,
<PINMUX_GPIO98__FUNC_I2S2_BCK>,
<PINMUX_GPIO101__FUNC_I2S2_LRCK>,
@@ -482,7 +482,7 @@
};
aud_pins_tdm_out_on: audiotdmouton {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO169__FUNC_TDM_BCK_2ND>,
<PINMUX_GPIO170__FUNC_TDM_LRCK_2ND>,
<PINMUX_GPIO171__FUNC_TDM_DATA0_2ND>,
@@ -494,7 +494,7 @@
};
aud_pins_tdm_out_off: audiotdmoutoff {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO169__FUNC_GPIO169>,
<PINMUX_GPIO170__FUNC_GPIO170>,
<PINMUX_GPIO171__FUNC_GPIO171>,
@@ -508,13 +508,13 @@
};
bt_pins: bt-pins {
- pins_bt_en {
+ pins-bt-en {
pinmux = <PINMUX_GPIO120__FUNC_GPIO120>;
output-low;
};
};
- ec_ap_int_odl: ec_ap_int_odl {
+ ec_ap_int_odl: ec-ap-int-odl {
pins1 {
pinmux = <PINMUX_GPIO151__FUNC_GPIO151>;
input-enable;
@@ -522,7 +522,7 @@
};
};
- h1_int_od_l: h1_int_od_l {
+ h1_int_od_l: h1-int-od-l {
pins1 {
pinmux = <PINMUX_GPIO153__FUNC_GPIO153>;
input-enable;
@@ -530,7 +530,7 @@
};
i2c0_pins: i2c0 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO82__FUNC_SDA0>,
<PINMUX_GPIO83__FUNC_SCL0>;
mediatek,pull-up-adv = <3>;
@@ -539,7 +539,7 @@
};
i2c1_pins: i2c1 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO81__FUNC_SDA1>,
<PINMUX_GPIO84__FUNC_SCL1>;
mediatek,pull-up-adv = <3>;
@@ -548,7 +548,7 @@
};
i2c2_pins: i2c2 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO103__FUNC_SCL2>,
<PINMUX_GPIO104__FUNC_SDA2>;
bias-disable;
@@ -557,7 +557,7 @@
};
i2c3_pins: i2c3 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO50__FUNC_SCL3>,
<PINMUX_GPIO51__FUNC_SDA3>;
mediatek,pull-up-adv = <3>;
@@ -566,7 +566,7 @@
};
i2c4_pins: i2c4 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO105__FUNC_SCL4>,
<PINMUX_GPIO106__FUNC_SDA4>;
bias-disable;
@@ -575,7 +575,7 @@
};
i2c5_pins: i2c5 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO48__FUNC_SCL5>,
<PINMUX_GPIO49__FUNC_SDA5>;
mediatek,pull-up-adv = <3>;
@@ -584,7 +584,7 @@
};
i2c6_pins: i2c6 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO11__FUNC_SCL6>,
<PINMUX_GPIO12__FUNC_SDA6>;
bias-disable;
@@ -592,7 +592,7 @@
};
mmc0_pins_default: mmc0-pins-default {
- pins_cmd_dat {
+ pins-cmd-dat {
pinmux = <PINMUX_GPIO123__FUNC_MSDC0_DAT0>,
<PINMUX_GPIO128__FUNC_MSDC0_DAT1>,
<PINMUX_GPIO125__FUNC_MSDC0_DAT2>,
@@ -607,13 +607,13 @@
mediatek,pull-up-adv = <01>;
};
- pins_clk {
+ pins-clk {
pinmux = <PINMUX_GPIO124__FUNC_MSDC0_CLK>;
drive-strength = <MTK_DRIVE_14mA>;
mediatek,pull-down-adv = <10>;
};
- pins_rst {
+ pins-rst {
pinmux = <PINMUX_GPIO133__FUNC_MSDC0_RSTB>;
drive-strength = <MTK_DRIVE_14mA>;
mediatek,pull-down-adv = <01>;
@@ -621,7 +621,7 @@
};
mmc0_pins_uhs: mmc0-pins-uhs {
- pins_cmd_dat {
+ pins-cmd-dat {
pinmux = <PINMUX_GPIO123__FUNC_MSDC0_DAT0>,
<PINMUX_GPIO128__FUNC_MSDC0_DAT1>,
<PINMUX_GPIO125__FUNC_MSDC0_DAT2>,
@@ -636,19 +636,19 @@
mediatek,pull-up-adv = <01>;
};
- pins_clk {
+ pins-clk {
pinmux = <PINMUX_GPIO124__FUNC_MSDC0_CLK>;
drive-strength = <MTK_DRIVE_14mA>;
mediatek,pull-down-adv = <10>;
};
- pins_ds {
+ pins-ds {
pinmux = <PINMUX_GPIO131__FUNC_MSDC0_DSL>;
drive-strength = <MTK_DRIVE_14mA>;
mediatek,pull-down-adv = <10>;
};
- pins_rst {
+ pins-rst {
pinmux = <PINMUX_GPIO133__FUNC_MSDC0_RSTB>;
drive-strength = <MTK_DRIVE_14mA>;
mediatek,pull-up-adv = <01>;
@@ -656,7 +656,7 @@
};
mmc1_pins_default: mmc1-pins-default {
- pins_cmd_dat {
+ pins-cmd-dat {
pinmux = <PINMUX_GPIO31__FUNC_MSDC1_CMD>,
<PINMUX_GPIO32__FUNC_MSDC1_DAT0>,
<PINMUX_GPIO34__FUNC_MSDC1_DAT1>,
@@ -666,7 +666,7 @@
mediatek,pull-up-adv = <10>;
};
- pins_clk {
+ pins-clk {
pinmux = <PINMUX_GPIO29__FUNC_MSDC1_CLK>;
input-enable;
mediatek,pull-down-adv = <10>;
@@ -674,7 +674,7 @@
};
mmc1_pins_uhs: mmc1-pins-uhs {
- pins_cmd_dat {
+ pins-cmd-dat {
pinmux = <PINMUX_GPIO31__FUNC_MSDC1_CMD>,
<PINMUX_GPIO32__FUNC_MSDC1_DAT0>,
<PINMUX_GPIO34__FUNC_MSDC1_DAT1>,
@@ -685,7 +685,7 @@
mediatek,pull-up-adv = <10>;
};
- pins_clk {
+ pins-clk {
pinmux = <PINMUX_GPIO29__FUNC_MSDC1_CLK>;
drive-strength = <MTK_DRIVE_8mA>;
mediatek,pull-down-adv = <10>;
@@ -693,15 +693,15 @@
};
};
- panel_pins_default: panel_pins_default {
- panel_reset {
+ panel_pins_default: panel-pins-default {
+ panel-reset {
pinmux = <PINMUX_GPIO45__FUNC_GPIO45>;
output-low;
bias-pull-up;
};
};
- pwm0_pin_default: pwm0_pin_default {
+ pwm0_pin_default: pwm0-pin-default {
pins1 {
pinmux = <PINMUX_GPIO176__FUNC_GPIO176>;
output-high;
@@ -713,14 +713,14 @@
};
scp_pins: scp {
- pins_scp_uart {
+ pins-scp-uart {
pinmux = <PINMUX_GPIO110__FUNC_TP_URXD1_AO>,
<PINMUX_GPIO112__FUNC_TP_UTXD1_AO>;
};
};
spi0_pins: spi0 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO85__FUNC_SPI0_MI>,
<PINMUX_GPIO86__FUNC_GPIO86>,
<PINMUX_GPIO87__FUNC_SPI0_MO>,
@@ -730,7 +730,7 @@
};
spi1_pins: spi1 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO161__FUNC_SPI1_A_MI>,
<PINMUX_GPIO162__FUNC_SPI1_A_CSB>,
<PINMUX_GPIO163__FUNC_SPI1_A_MO>,
@@ -740,20 +740,20 @@
};
spi2_pins: spi2 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO0__FUNC_SPI2_CSB>,
<PINMUX_GPIO1__FUNC_SPI2_MO>,
<PINMUX_GPIO2__FUNC_SPI2_CLK>;
bias-disable;
};
- pins_spi_mi {
+ pins-spi-mi {
pinmux = <PINMUX_GPIO94__FUNC_SPI2_MI>;
mediatek,pull-down-adv = <00>;
};
};
spi3_pins: spi3 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO21__FUNC_SPI3_MI>,
<PINMUX_GPIO22__FUNC_SPI3_CSB>,
<PINMUX_GPIO23__FUNC_SPI3_MO>,
@@ -763,7 +763,7 @@
};
spi4_pins: spi4 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO17__FUNC_SPI4_MI>,
<PINMUX_GPIO18__FUNC_SPI4_CSB>,
<PINMUX_GPIO19__FUNC_SPI4_MO>,
@@ -773,7 +773,7 @@
};
spi5_pins: spi5 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO13__FUNC_SPI5_MI>,
<PINMUX_GPIO14__FUNC_SPI5_CSB>,
<PINMUX_GPIO15__FUNC_SPI5_MO>,
@@ -783,63 +783,63 @@
};
uart0_pins_default: uart0-pins-default {
- pins_rx {
+ pins-rx {
pinmux = <PINMUX_GPIO95__FUNC_URXD0>;
input-enable;
bias-pull-up;
};
- pins_tx {
+ pins-tx {
pinmux = <PINMUX_GPIO96__FUNC_UTXD0>;
};
};
uart1_pins_default: uart1-pins-default {
- pins_rx {
+ pins-rx {
pinmux = <PINMUX_GPIO121__FUNC_URXD1>;
input-enable;
bias-pull-up;
};
- pins_tx {
+ pins-tx {
pinmux = <PINMUX_GPIO115__FUNC_UTXD1>;
};
- pins_rts {
+ pins-rts {
pinmux = <PINMUX_GPIO47__FUNC_URTS1>;
output-enable;
};
- pins_cts {
+ pins-cts {
pinmux = <PINMUX_GPIO46__FUNC_UCTS1>;
input-enable;
};
};
uart1_pins_sleep: uart1-pins-sleep {
- pins_rx {
+ pins-rx {
pinmux = <PINMUX_GPIO121__FUNC_GPIO121>;
input-enable;
bias-pull-up;
};
- pins_tx {
+ pins-tx {
pinmux = <PINMUX_GPIO115__FUNC_UTXD1>;
};
- pins_rts {
+ pins-rts {
pinmux = <PINMUX_GPIO47__FUNC_URTS1>;
output-enable;
};
- pins_cts {
+ pins-cts {
pinmux = <PINMUX_GPIO46__FUNC_UCTS1>;
input-enable;
};
};
wifi_pins_pwrseq: wifi-pins-pwrseq {
- pins_wifi_enable {
+ pins-wifi-enable {
pinmux = <PINMUX_GPIO119__FUNC_GPIO119>;
output-low;
};
};
wifi_pins_wakeup: wifi-pins-wakeup {
- pins_wifi_wakeup {
+ pins-wifi-wakeup {
pinmux = <PINMUX_GPIO113__FUNC_GPIO113>;
input-enable;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
index 5169779d01df..976dc968b3ca 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
@@ -1210,127 +1210,6 @@
nvmem-cell-names = "calibration-data";
};
- thermal_zones: thermal-zones {
- cpu_thermal: cpu-thermal {
- polling-delay-passive = <100>;
- polling-delay = <500>;
- thermal-sensors = <&thermal 0>;
- sustainable-power = <5000>;
-
- trips {
- threshold: trip-point0 {
- temperature = <68000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- target: trip-point1 {
- temperature = <80000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- cpu_crit: cpu-crit {
- temperature = <115000>;
- hysteresis = <2000>;
- type = "critical";
- };
- };
-
- cooling-maps {
- map0 {
- trip = <&target>;
- cooling-device = <&cpu0
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu1
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu2
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu3
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>;
- contribution = <3072>;
- };
- map1 {
- trip = <&target>;
- cooling-device = <&cpu4
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu5
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu6
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu7
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>;
- contribution = <1024>;
- };
- };
- };
-
- /* The tzts1 ~ tzts6 don't need to polling */
- /* The tzts1 ~ tzts6 don't need to thermal throttle */
-
- tzts1: tzts1 {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 1>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
-
- tzts2: tzts2 {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 2>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
-
- tzts3: tzts3 {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 3>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
-
- tzts4: tzts4 {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 4>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
-
- tzts5: tzts5 {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 5>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
-
- tztsABB: tztsABB {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 6>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
- };
-
pwm0: pwm@1100e000 {
compatible = "mediatek,mt8183-disp-pwm";
reg = <0 0x1100e000 0 0x1000>;
@@ -2105,4 +1984,125 @@
power-domains = <&spm MT8183_POWER_DOMAIN_CAM>;
};
};
+
+ thermal_zones: thermal-zones {
+ cpu_thermal: cpu-thermal {
+ polling-delay-passive = <100>;
+ polling-delay = <500>;
+ thermal-sensors = <&thermal 0>;
+ sustainable-power = <5000>;
+
+ trips {
+ threshold: trip-point0 {
+ temperature = <68000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ target: trip-point1 {
+ temperature = <80000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu_crit: cpu-crit {
+ temperature = <115000>;
+ hysteresis = <2000>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&target>;
+ cooling-device = <&cpu0
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu1
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu2
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu3
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ contribution = <3072>;
+ };
+ map1 {
+ trip = <&target>;
+ cooling-device = <&cpu4
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu5
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu6
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu7
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ contribution = <1024>;
+ };
+ };
+ };
+
+ /* The tzts1 ~ tzts6 don't need to polling */
+ /* The tzts1 ~ tzts6 don't need to thermal throttle */
+
+ tzts1: tzts1 {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 1>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+
+ tzts2: tzts2 {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 2>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+
+ tzts3: tzts3 {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 3>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+
+ tzts4: tzts4 {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 4>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+
+ tzts5: tzts5 {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 5>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+
+ tztsABB: tztsABB {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 6>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+ };
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi
index f04ae70c470a..df0c04f2ba1d 100644
--- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi
@@ -924,7 +924,8 @@
reg = <MT8186_POWER_DOMAIN_CSIRX_TOP>;
clocks = <&topckgen CLK_TOP_SENINF>,
<&topckgen CLK_TOP_SENINF1>;
- clock-names = "csirx_top0", "csirx_top1";
+ clock-names = "subsys-csirx-top0",
+ "subsys-csirx-top1";
#power-domain-cells = <0>;
};
@@ -942,7 +943,8 @@
reg = <MT8186_POWER_DOMAIN_ADSP_AO>;
clocks = <&topckgen CLK_TOP_AUDIODSP>,
<&topckgen CLK_TOP_ADSP_BUS>;
- clock-names = "audioadsp", "adsp_bus";
+ clock-names = "audioadsp",
+ "subsys-adsp-bus";
#address-cells = <1>;
#size-cells = <0>;
#power-domain-cells = <1>;
@@ -975,8 +977,11 @@
<&mmsys CLK_MM_SMI_COMMON>,
<&mmsys CLK_MM_SMI_GALS>,
<&mmsys CLK_MM_SMI_IOMMU>;
- clock-names = "disp", "mdp", "smi_infra", "smi_common",
- "smi_gals", "smi_iommu";
+ clock-names = "disp", "mdp",
+ "subsys-smi-infra",
+ "subsys-smi-common",
+ "subsys-smi-gals",
+ "subsys-smi-iommu";
mediatek,infracfg = <&infracfg_ao>;
#address-cells = <1>;
#size-cells = <0>;
@@ -993,15 +998,17 @@
power-domain@MT8186_POWER_DOMAIN_CAM {
reg = <MT8186_POWER_DOMAIN_CAM>;
- clocks = <&topckgen CLK_TOP_CAM>,
- <&topckgen CLK_TOP_SENINF>,
+ clocks = <&topckgen CLK_TOP_SENINF>,
<&topckgen CLK_TOP_SENINF1>,
<&topckgen CLK_TOP_SENINF2>,
<&topckgen CLK_TOP_SENINF3>,
+ <&camsys CLK_CAM2MM_GALS>,
<&topckgen CLK_TOP_CAMTM>,
- <&camsys CLK_CAM2MM_GALS>;
- clock-names = "cam-top", "cam0", "cam1", "cam2",
- "cam3", "cam-tm", "gals";
+ <&topckgen CLK_TOP_CAM>;
+ clock-names = "cam0", "cam1", "cam2",
+ "cam3", "gals",
+ "subsys-cam-tm",
+ "subsys-cam-top";
mediatek,infracfg = <&infracfg_ao>;
#address-cells = <1>;
#size-cells = <0>;
@@ -1020,9 +1027,9 @@
power-domain@MT8186_POWER_DOMAIN_IMG {
reg = <MT8186_POWER_DOMAIN_IMG>;
- clocks = <&topckgen CLK_TOP_IMG1>,
- <&imgsys1 CLK_IMG1_GALS_IMG1>;
- clock-names = "img-top", "gals";
+ clocks = <&imgsys1 CLK_IMG1_GALS_IMG1>,
+ <&topckgen CLK_TOP_IMG1>;
+ clock-names = "gals", "subsys-img-top";
mediatek,infracfg = <&infracfg_ao>;
#address-cells = <1>;
#size-cells = <0>;
@@ -1041,8 +1048,11 @@
<&ipesys CLK_IPE_LARB20>,
<&ipesys CLK_IPE_SMI_SUBCOM>,
<&ipesys CLK_IPE_GALS_IPE>;
- clock-names = "ipe-top", "ipe-larb0", "ipe-larb1",
- "ipe-smi", "ipe-gals";
+ clock-names = "subsys-ipe-top",
+ "subsys-ipe-larb0",
+ "subsys-ipe-larb1",
+ "subsys-ipe-smi",
+ "subsys-ipe-gals";
mediatek,infracfg = <&infracfg_ao>;
#power-domain-cells = <0>;
};
@@ -1061,7 +1071,9 @@
clocks = <&topckgen CLK_TOP_WPE>,
<&wpesys CLK_WPE_SMI_LARB8_CK_EN>,
<&wpesys CLK_WPE_SMI_LARB8_PCLK_EN>;
- clock-names = "wpe0", "larb-ck", "larb-pclk";
+ clock-names = "wpe0",
+ "subsys-larb-ck",
+ "subsys-larb-pclk";
mediatek,infracfg = <&infracfg_ao>;
#power-domain-cells = <0>;
};
@@ -1656,7 +1668,7 @@
#address-cells = <1>;
#size-cells = <1>;
- gpu_speedbin: gpu-speed-bin@59c {
+ gpu_speedbin: gpu-speedbin@59c {
reg = <0x59c 0x4>;
bits = <0 3>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
index dd5b89b73190..5a7cab489ff3 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
@@ -389,7 +389,7 @@
pinctrl-0 = <&i2c7_pins>;
pmic@34 {
- #interrupt-cells = <1>;
+ #interrupt-cells = <2>;
compatible = "mediatek,mt6360";
reg = <0x34>;
interrupt-controller;
diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
index 54c674c45b49..e0ac2e9f5b72 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
@@ -627,6 +627,8 @@
power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 {
reg = <MT8195_POWER_DOMAIN_VENC_CORE1>;
+ clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>;
+ clock-names = "venc1-larb";
mediatek,infracfg = <&infracfg_ao>;
#power-domain-cells = <0>;
};
@@ -689,6 +691,8 @@
power-domain@MT8195_POWER_DOMAIN_VENC {
reg = <MT8195_POWER_DOMAIN_VENC>;
+ clocks = <&vencsys CLK_VENC_LARB>;
+ clock-names = "venc0-larb";
mediatek,infracfg = <&infracfg_ao>;
#power-domain-cells = <0>;
};
@@ -2665,7 +2669,7 @@
reg = <0 0x1b010000 0 0x1000>;
mediatek,larb-id = <20>;
mediatek,smi = <&smi_common_vpp>;
- clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>,
+ clocks = <&vencsys_core1 CLK_VENC_CORE1_VENC>,
<&vencsys_core1 CLK_VENC_CORE1_GALS>,
<&vppsys0 CLK_VPP0_GALS_VDO0_VDO1_VENCSYS_CORE1>;
clock-names = "apb", "smi", "gals";
diff --git a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts
index 70b465f7c6a7..00ac59a873e8 100644
--- a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts
@@ -238,6 +238,7 @@
mt6360: pmic@34 {
compatible = "mediatek,mt6360";
reg = <0x34>;
+ interrupt-parent = <&pio>;
interrupts = <128 IRQ_TYPE_EDGE_FALLING>;
interrupt-names = "IRQB";
interrupt-controller;
diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
index de0a1f2af983..7d4c5324c61b 100644
--- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
+++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
@@ -86,7 +86,7 @@
sgtl5000_clk: sgtl5000-oscillator {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <24576000>;
+ clock-frequency = <24576000>;
};
dc_12v: dc-12v-regulator {
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index e729e7a22b23..cc8209795c3e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -668,7 +668,7 @@
vdec: video-codec@ff360000 {
compatible = "rockchip,rk3328-vdec", "rockchip,rk3399-vdec";
- reg = <0x0 0xff360000 0x0 0x400>;
+ reg = <0x0 0xff360000 0x0 0x480>;
interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>,
<&cru SCLK_VDEC_CABAC>, <&cru SCLK_VDEC_CORE>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
index 5c1929d41cc0..cacbad35cfc8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
@@ -509,8 +509,7 @@ ap_i2c_tp: &i2c5 {
&pci_rootport {
mvl_wifi: wifi@0,0 {
compatible = "pci1b4b,2b42";
- reg = <0x83010000 0x0 0x00000000 0x0 0x00100000
- 0x83010000 0x0 0x00100000 0x0 0x00100000>;
+ reg = <0x0000 0x0 0x0 0x0 0x0>;
interrupt-parent = <&gpio0>;
interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts
index 853e88455e75..9e4b12ed62cb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts
@@ -34,8 +34,8 @@
&pci_rootport {
wifi@0,0 {
compatible = "qcom,ath10k";
- reg = <0x00010000 0x0 0x00000000 0x0 0x00000000>,
- <0x03010010 0x0 0x00000000 0x0 0x00200000>;
+ reg = <0x00000000 0x0 0x00000000 0x0 0x00000000>,
+ <0x03000010 0x0 0x00000000 0x0 0x00200000>;
qcom,ath10k-calibration-variant = "GO_DUMO";
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
index c9bf1d5c3a42..789fd0dcc88b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
@@ -489,6 +489,7 @@ ap_i2c_audio: &i2c8 {
#address-cells = <3>;
#size-cells = <2>;
ranges;
+ device_type = "pci";
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index faf02e59d6c7..da0dfb237f85 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -1109,7 +1109,9 @@
power-domain@RK3399_PD_VDU {
reg = <RK3399_PD_VDU>;
clocks = <&cru ACLK_VDU>,
- <&cru HCLK_VDU>;
+ <&cru HCLK_VDU>,
+ <&cru SCLK_VDU_CA>,
+ <&cru SCLK_VDU_CORE>;
pm_qos = <&qos_video_m1_r>,
<&qos_video_m1_w>;
#power-domain-cells = <0>;
@@ -1384,7 +1386,7 @@
vdec: video-codec@ff660000 {
compatible = "rockchip,rk3399-vdec";
- reg = <0x0 0xff660000 0x0 0x400>;
+ reg = <0x0 0xff660000 0x0 0x480>;
interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH 0>;
clocks = <&cru ACLK_VDU>, <&cru HCLK_VDU>,
<&cru SCLK_VDU_CA>, <&cru SCLK_VDU_CORE>;
diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
index 0964761e3ce9..c19c0f1b3778 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
@@ -977,7 +977,7 @@
<GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "sys", "pmc", "msi", "legacy", "err";
+ interrupt-names = "sys", "pmc", "msg", "legacy", "err";
bus-range = <0x0 0xf>;
clocks = <&cru ACLK_PCIE20_MST>, <&cru ACLK_PCIE20_SLV>,
<&cru ACLK_PCIE20_DBI>, <&cru PCLK_PCIE20>,
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
index 9570b34aca2e..d88c0e852356 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
@@ -235,13 +235,13 @@
&pinctrl {
fan {
fan_int: fan-int {
- rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_none>;
+ rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
hym8563 {
hym8563_int: hym8563-int {
- rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
+ rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
index 8f399c4317bd..e3a839a12dc6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
@@ -38,7 +38,7 @@
leds {
compatible = "gpio-leds";
pinctrl-names = "default";
- pinctrl-0 =<&leds_gpio>;
+ pinctrl-0 = <&leds_gpio>;
led-1 {
gpios = <&gpio1 RK_PA2 GPIO_ACTIVE_HIGH>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi
index 63151d9d2377..30db12c4fc82 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi
@@ -369,7 +369,7 @@
emmc_data_strobe: emmc-data-strobe {
rockchip,pins =
/* emmc_data_strobe */
- <2 RK_PA2 1 &pcfg_pull_none>;
+ <2 RK_PA2 1 &pcfg_pull_down>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 7064c0e9179f..8aa0499f9b03 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -1362,7 +1362,6 @@
<GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH 0>,
<GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH 0>,
<GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>;
- interrupt-names = "ch0", "ch1", "ch2", "ch3";
rockchip,pmu = <&pmu1grf>;
};
diff --git a/arch/arm64/boot/install.sh b/arch/arm64/boot/install.sh
index 7399d706967a..9b7a09808a3d 100755
--- a/arch/arm64/boot/install.sh
+++ b/arch/arm64/boot/install.sh
@@ -17,7 +17,8 @@
# $3 - kernel map file
# $4 - default install path (blank if root directory)
-if [ "$(basename $2)" = "Image.gz" ]; then
+if [ "$(basename $2)" = "Image.gz" ] || [ "$(basename $2)" = "vmlinuz.efi" ]
+then
# Compressed install
echo "Installing compressed kernel"
base=vmlinuz
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 376a980f2bad..7b1975bf4b90 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -12,7 +12,7 @@
#ifndef __ASM_ASSEMBLER_H
#define __ASM_ASSEMBLER_H
-#include <asm-generic/export.h>
+#include <linux/export.h>
#include <asm/alternative.h>
#include <asm/asm-bug.h>
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index ceb368d33bf4..06a4670bdb0b 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -58,7 +58,6 @@ static inline unsigned int arch_slab_minalign(void)
#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr)
#define ICACHEF_ALIASING 0
-#define ICACHEF_VPIPT 1
extern unsigned long __icache_flags;
/*
@@ -70,11 +69,6 @@ static inline int icache_is_aliasing(void)
return test_bit(ICACHEF_ALIASING, &__icache_flags);
}
-static __always_inline int icache_is_vpipt(void)
-{
- return test_bit(ICACHEF_VPIPT, &__icache_flags);
-}
-
static inline u32 cache_type_cwg(void)
{
return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype());
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index f6d416fe49b0..21c824edf8ce 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -617,6 +617,7 @@ static inline bool id_aa64pfr1_mte(u64 pfr1)
return val >= ID_AA64PFR1_EL1_MTE_MTE2;
}
+void __init setup_boot_cpu_features(void);
void __init setup_system_features(void);
void __init setup_user_features(void);
@@ -819,6 +820,11 @@ static inline bool system_supports_tlb_range(void)
return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE);
}
+static inline bool system_supports_lpa2(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_LPA2);
+}
+
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index cdf6a35e3994..cda81d009c9b 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -242,14 +242,6 @@
| (\nx << 5)
.endm
-/*
- * Zero the entire ZA array
- * ZERO ZA
- */
-.macro zero_za
- .inst 0xc00800ff
-.endm
-
.macro __for from:req, to:req
.if (\from) == (\to)
_for__body %\from
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 85d26143faa5..83ddb14b95a5 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -37,27 +37,12 @@
/*
- * If KASLR is enabled, then an offset K is added to the kernel address
- * space. The bottom 21 bits of this offset are zero to guarantee 2MB
- * alignment for PA and VA.
- *
- * For each pagetable level of the swapper, we know that the shift will
- * be larger than 21 (for the 4KB granule case we use section maps thus
- * the smallest shift is actually 30) thus there is the possibility that
- * KASLR can increase the number of pagetable entries by 1, so we make
- * room for this extra entry.
- *
- * Note KASLR cannot increase the number of required entries for a level
- * by more than one because it increments both the virtual start and end
- * addresses equally (the extra entry comes from the case where the end
- * address is just pushed over a boundary and the start address isn't).
+ * A relocatable kernel may execute from an address that differs from the one at
+ * which it was linked. In the worst case, its runtime placement may intersect
+ * with two adjacent PGDIR entries, which means that an additional page table
+ * may be needed at each subordinate level.
*/
-
-#ifdef CONFIG_RANDOMIZE_BASE
-#define EARLY_KASLR (1)
-#else
-#define EARLY_KASLR (0)
-#endif
+#define EXTRA_PAGE __is_defined(CONFIG_RELOCATABLE)
#define SPAN_NR_ENTRIES(vstart, vend, shift) \
((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1)
@@ -83,7 +68,7 @@
+ EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \
+ EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \
+ EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */
-#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR))
+#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE))
/* the initial ID map may need two extra pages if it needs to be extended */
#if VA_BITS < 48
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 49e0d4b36bd0..e3e793d0ec30 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -244,13 +244,6 @@ static inline size_t __invalidate_icache_max_range(void)
static inline void __invalidate_icache_guest_page(void *va, size_t size)
{
/*
- * VPIPT I-cache maintenance must be done from EL2. See comment in the
- * nVHE flavor of __kvm_tlb_flush_vmid_ipa().
- */
- if (icache_is_vpipt() && read_sysreg(CurrentEL) != CurrentEL_EL2)
- return;
-
- /*
* Blow the whole I-cache if it is aliasing (i.e. VIPT) or the
* invalidation range exceeds our arbitrary limit on invadations by
* cache line.
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index d3e354bb8351..10068500d601 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -25,6 +25,8 @@
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U
#endif
+#define kvm_lpa2_is_enabled() false
+
static inline u64 kvm_get_parange(u64 mmfr0)
{
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0f139cb4467b..d82305ab420f 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -208,6 +208,7 @@
#include <linux/types.h>
#include <asm/boot.h>
#include <asm/bug.h>
+#include <asm/sections.h>
#if VA_BITS > 48
extern u64 vabits_actual;
@@ -219,15 +220,12 @@ extern s64 memstart_addr;
/* PHYS_OFFSET - the physical address of the start of memory. */
#define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
-/* the virtual base of the kernel image */
-extern u64 kimage_vaddr;
-
/* the offset between the kernel virtual and physical mappings */
extern u64 kimage_voffset;
static inline unsigned long kaslr_offset(void)
{
- return kimage_vaddr - KIMAGE_VADDR;
+ return (u64)&_text - KIMAGE_VADDR;
}
#ifdef CONFIG_RANDOMIZE_BASE
@@ -433,6 +431,5 @@ void dump_mem_limit(void);
#define INIT_MEMBLOCK_MEMORY_REGIONS (INIT_MEMBLOCK_REGIONS * 8)
#endif
-#include <asm-generic/memory_model.h>
#endif /* __ASM_MEMORY_H */
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index e9624f6326dd..483dbfa39c4c 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -71,6 +71,8 @@ extern bool arm64_use_ng_mappings;
#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
+#define lpa2_is_enabled() false
+
/*
* If we have userspace only BTI we don't want to mark kernel pages
* guarded even if the system does support BTI.
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b19a8aee684c..79ce70fbb751 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -834,6 +834,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
+ /*
+ * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware
+ * dirtiness again.
+ */
+ if (pte_sw_dirty(pte))
+ pte = pte_mkdirty(pte);
return pte;
}
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index e5bc54522e71..5b0a04810b23 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -167,6 +167,9 @@ struct thread_struct {
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
+
+ struct user_fpsimd_state kernel_fpsimd_state;
+ unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 6a75d7ecdcaa..8e86c9e70e48 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -12,8 +12,6 @@
#include <linux/preempt.h>
#include <linux/types.h>
-DECLARE_PER_CPU(bool, fpsimd_context_busy);
-
#ifdef CONFIG_KERNEL_MODE_NEON
/*
@@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void)
/*
* We must make sure that the SVE has been initialized properly
* before using the SIMD in kernel.
- * fpsimd_context_busy is only set while preemption is disabled,
- * and is clear whenever preemption is enabled. Since
- * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
- * cannot change under our feet -- if it's set we cannot be
- * migrated, and if it's clear we cannot be migrated to a CPU
- * where it is set.
*/
return !WARN_ON(!system_capabilities_finalized()) &&
system_supports_fpsimd() &&
- !in_hardirq() && !irqs_disabled() && !in_nmi() &&
- !this_cpu_read(fpsimd_context_busy);
+ !in_hardirq() && !irqs_disabled() && !in_nmi();
}
#else /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
index 508f734de46e..f63dc654e545 100644
--- a/arch/arm64/include/asm/stacktrace/common.h
+++ b/arch/arm64/include/asm/stacktrace/common.h
@@ -9,7 +9,6 @@
#ifndef __ASM_STACKTRACE_COMMON_H
#define __ASM_STACKTRACE_COMMON_H
-#include <linux/kprobes.h>
#include <linux/types.h>
struct stack_info {
@@ -23,12 +22,6 @@ struct stack_info {
* @fp: The fp value in the frame record (or the real fp)
* @pc: The lr value in the frame record (or the real lr)
*
- * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
- * associated with the most recently encountered replacement lr
- * value.
- *
- * @task: The task being unwound.
- *
* @stack: The stack currently being unwound.
* @stacks: An array of stacks which can be unwound.
* @nr_stacks: The number of stacks in @stacks.
@@ -36,10 +29,6 @@ struct stack_info {
struct unwind_state {
unsigned long fp;
unsigned long pc;
-#ifdef CONFIG_KRETPROBES
- struct llist_node *kr_cur;
-#endif
- struct task_struct *task;
struct stack_info stack;
struct stack_info *stacks;
@@ -66,14 +55,8 @@ static inline bool stackinfo_on_stack(const struct stack_info *info,
return true;
}
-static inline void unwind_init_common(struct unwind_state *state,
- struct task_struct *task)
+static inline void unwind_init_common(struct unwind_state *state)
{
- state->task = task;
-#ifdef CONFIG_KRETPROBES
- state->kr_cur = NULL;
-#endif
-
state->stack = stackinfo_get_unknown();
}
diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h
index 25ab83a315a7..44759281d0d4 100644
--- a/arch/arm64/include/asm/stacktrace/nvhe.h
+++ b/arch/arm64/include/asm/stacktrace/nvhe.h
@@ -31,7 +31,7 @@ static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
unsigned long fp,
unsigned long pc)
{
- unwind_init_common(state, NULL);
+ unwind_init_common(state);
state->fp = fp;
state->pc = pc;
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index d977713ec0ba..abb57bc54305 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -44,9 +44,6 @@
return sys_ni_syscall(); \
}
-#define COMPAT_SYS_NI(name) \
- SYSCALL_ALIAS(__arm64_compat_sys_##name, sys_ni_posix_timers);
-
#endif /* CONFIG_COMPAT */
#define __SYSCALL_DEFINEx(x, name, ...) \
@@ -81,6 +78,5 @@
}
asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused);
-#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers);
#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 5e65f51c10d2..c3b19b376c86 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -645,6 +645,7 @@
#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2)
#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
@@ -781,10 +782,16 @@
#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
/* Misc instructions */
+#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4)
+#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5)
+#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6)
+#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0)
+
#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
+#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6)
#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
/* Common SCTLR_ELx flags. */
@@ -871,10 +878,12 @@
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1
+#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf
#define ARM64_MIN_PARANGE_BITS 32
@@ -882,6 +891,7 @@
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7
#ifdef CONFIG_ARM64_PA_BITS_52
@@ -892,11 +902,13 @@
#if defined(CONFIG_ARM64_4K_PAGES)
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
#elif defined(CONFIG_ARM64_16K_PAGES)
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
@@ -1039,6 +1051,19 @@
#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
+/*
+ * Permission Overlay Extension (POE) permission encodings.
+ */
+#define POE_NONE UL(0x0)
+#define POE_R UL(0x1)
+#define POE_X UL(0x2)
+#define POE_RX UL(0x3)
+#define POE_W UL(0x4)
+#define POE_RW UL(0x5)
+#define POE_XW UL(0x6)
+#define POE_RXW UL(0x7)
+#define POE_MASK UL(0xf)
+
#define ARM64_FEATURE_FIELD_BITS 4
/* Defined for compatibility only, do not add new users. */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 553d1bc559c6..e72a3bf9e563 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -80,6 +80,7 @@ void arch_setup_new_exec(void);
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
#define TIF_SME 27 /* SME in use */
#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
+#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 846c563689a8..0150deb332af 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -22,15 +22,15 @@ static void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
/*
- * get the tlbi levels in arm64. Default value is 0 if more than one
- * of cleared_* is set or neither is set.
- * Arm64 doesn't support p4ds now.
+ * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than
+ * one of cleared_* is set or neither is set - this elides the level hinting to
+ * the hardware.
*/
static inline int tlb_get_level(struct mmu_gather *tlb)
{
/* The TTL field is only valid for the leaf entry. */
if (tlb->freed_tables)
- return 0;
+ return TLBI_TTL_UNKNOWN;
if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
tlb->cleared_puds ||
@@ -47,7 +47,12 @@ static inline int tlb_get_level(struct mmu_gather *tlb)
tlb->cleared_p4ds))
return 1;
- return 0;
+ if (tlb->cleared_p4ds && !(tlb->cleared_ptes ||
+ tlb->cleared_pmds ||
+ tlb->cleared_puds))
+ return 0;
+
+ return TLBI_TTL_UNKNOWN;
}
static inline void tlb_flush(struct mmu_gather *tlb)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bb2c2833a987..1deb5d789c2e 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -94,19 +94,22 @@ static inline unsigned long get_trans_granule(void)
* When ARMv8.4-TTL exists, TLBI operations take an additional hint for
* the level at which the invalidation must take place. If the level is
* wrong, no invalidation may take place. In the case where the level
- * cannot be easily determined, a 0 value for the level parameter will
- * perform a non-hinted invalidation.
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform
+ * a non-hinted invalidation. Any provided level outside the hint range
+ * will also cause fall-back to non-hinted invalidation.
*
* For Stage-2 invalidation, use the level values provided to that effect
* in asm/stage2_pgtable.h.
*/
#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
+#define TLBI_TTL_UNKNOWN INT_MAX
+
#define __tlbi_level(op, addr, level) do { \
u64 arg = addr; \
\
if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \
- level) { \
+ level >= 0 && level <= 3) { \
u64 ttl = level & 3; \
ttl |= get_trans_granule() << 2; \
arg &= ~TLBI_TTL_MASK; \
@@ -122,28 +125,34 @@ static inline unsigned long get_trans_granule(void)
} while (0)
/*
- * This macro creates a properly formatted VA operand for the TLB RANGE.
- * The value bit assignments are:
+ * This macro creates a properly formatted VA operand for the TLB RANGE. The
+ * value bit assignments are:
*
* +----------+------+-------+-------+-------+----------------------+
* | ASID | TG | SCALE | NUM | TTL | BADDR |
* +-----------------+-------+-------+-------+----------------------+
* |63 48|47 46|45 44|43 39|38 37|36 0|
*
- * The address range is determined by below formula:
- * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
+ * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) *
+ * 2^(5*SCALE + 1) * PAGESIZE)
+ *
+ * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR
+ * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI
+ * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA,
+ * EL1, Inner Shareable".
*
*/
-#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \
- ({ \
- unsigned long __ta = (addr) >> PAGE_SHIFT; \
- __ta &= GENMASK_ULL(36, 0); \
- __ta |= (unsigned long)(ttl) << 37; \
- __ta |= (unsigned long)(num) << 39; \
- __ta |= (unsigned long)(scale) << 44; \
- __ta |= get_trans_granule() << 46; \
- __ta |= (unsigned long)(asid) << 48; \
- __ta; \
+#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \
+ ({ \
+ unsigned long __ta = (baddr); \
+ unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \
+ __ta &= GENMASK_ULL(36, 0); \
+ __ta |= __ttl << 37; \
+ __ta |= (unsigned long)(num) << 39; \
+ __ta |= (unsigned long)(scale) << 44; \
+ __ta |= get_trans_granule() << 46; \
+ __ta |= (unsigned long)(asid) << 48; \
+ __ta; \
})
/* These macros are used by the TLBI RANGE feature. */
@@ -216,12 +225,16 @@ static inline unsigned long get_trans_granule(void)
* CPUs, ensuring that any walk-cache entries associated with the
* translation are also invalidated.
*
- * __flush_tlb_range(vma, start, end, stride, last_level)
+ * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level)
* Invalidate the virtual-address range '[start, end)' on all
* CPUs for the user address space corresponding to 'vma->mm'.
* The invalidation operations are issued at a granularity
* determined by 'stride' and only affect any walk-cache entries
- * if 'last_level' is equal to false.
+ * if 'last_level' is equal to false. tlb_level is the level at
+ * which the invalidation must take place. If the level is wrong,
+ * no invalidation may take place. In the case where the level
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will
+ * perform a non-hinted invalidation.
*
*
* Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
@@ -345,34 +358,44 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
* @tlb_level: Translation Table level hint, if known
* @tlbi_user: If 'true', call an additional __tlbi_user()
* (typically for user ASIDs). 'flase' for IPA instructions
+ * @lpa2: If 'true', the lpa2 scheme is used as set out below
*
* When the CPU does not support TLB range operations, flush the TLB
* entries one by one at the granularity of 'stride'. If the TLB
* range ops are supported, then:
*
- * 1. If 'pages' is odd, flush the first page through non-range
- * operations;
+ * 1. If FEAT_LPA2 is in use, the start address of a range operation must be
+ * 64KB aligned, so flush pages one by one until the alignment is reached
+ * using the non-range operations. This step is skipped if LPA2 is not in
+ * use.
+ *
+ * 2. The minimum range granularity is decided by 'scale', so multiple range
+ * TLBI operations may be required. Start from scale = 3, flush the largest
+ * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the
+ * requested range, then decrement scale and continue until one or zero pages
+ * are left. We must start from highest scale to ensure 64KB start alignment
+ * is maintained in the LPA2 case.
*
- * 2. For remaining pages: the minimum range granularity is decided
- * by 'scale', so multiple range TLBI operations may be required.
- * Start from scale = 0, flush the corresponding number of pages
- * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
- * until no pages left.
+ * 3. If there is 1 page remaining, flush it through non-range operations. Range
+ * operations can only span an even number of pages. We save this for last to
+ * ensure 64KB start alignment is maintained for the LPA2 case.
*
* Note that certain ranges can be represented by either num = 31 and
* scale or num = 0 and scale + 1. The loop below favours the latter
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
#define __flush_tlb_range_op(op, start, pages, stride, \
- asid, tlb_level, tlbi_user) \
+ asid, tlb_level, tlbi_user, lpa2) \
do { \
int num = 0; \
- int scale = 0; \
+ int scale = 3; \
+ int shift = lpa2 ? 16 : PAGE_SHIFT; \
unsigned long addr; \
\
while (pages > 0) { \
if (!system_supports_tlb_range() || \
- pages % 2 == 1) { \
+ pages == 1 || \
+ (lpa2 && start != ALIGN(start, SZ_64K))) { \
addr = __TLBI_VADDR(start, asid); \
__tlbi_level(op, addr, tlb_level); \
if (tlbi_user) \
@@ -384,20 +407,20 @@ do { \
\
num = __TLBI_RANGE_NUM(pages, scale); \
if (num >= 0) { \
- addr = __TLBI_VADDR_RANGE(start, asid, scale, \
- num, tlb_level); \
+ addr = __TLBI_VADDR_RANGE(start >> shift, asid, \
+ scale, num, tlb_level); \
__tlbi(r##op, addr); \
if (tlbi_user) \
__tlbi_user(r##op, addr); \
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
pages -= __TLBI_RANGE_PAGES(num, scale); \
} \
- scale++; \
+ scale--; \
} \
} while (0)
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
- __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
+ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
@@ -427,9 +450,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
asid = ASID(vma->vm_mm);
if (last_level)
- __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
+ __flush_tlb_range_op(vale1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
else
- __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
+ __flush_tlb_range_op(vae1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
dsb(ish);
mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
@@ -441,9 +466,10 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
/*
* We cannot use leaf-only invalidation here, since we may be invalidating
* table entries as part of collapsing hugepages or moving page tables.
- * Set the tlb_level to 0 because we can not get enough information here.
+ * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
+ * information here.
*/
- __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
}
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 9fab663dd2de..a323b109b9c4 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -23,6 +23,7 @@ void update_freq_counters_refs(void);
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
+#define arch_scale_freq_ref topology_get_freq_ref
#ifdef CONFIG_ACPI_CPPC_LIB
#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 531effca5f1f..b63f870debaf 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -39,7 +39,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 457
+#define __NR_compat_syscalls 459
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 9f7c1bf99526..8a191423c316 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -919,6 +919,10 @@ __SYSCALL(__NR_futex_wake, sys_futex_wake)
__SYSCALL(__NR_futex_wait, sys_futex_wait)
#define __NR_futex_requeue 456
__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
+#define __NR_statmount 457
+__SYSCALL(__NR_statmount, sys_statmount)
+#define __NR_listmount 458
+__SYSCALL(__NR_listmount, sys_listmount)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 91d2d6714969..01a4c1d7fc09 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1081,25 +1081,6 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
-
- /*
- * Initialize the indirect array of CPU capabilities pointers before we
- * handle the boot CPU below.
- */
- init_cpucap_indirect_list();
-
- /*
- * Detect broken pseudo-NMI. Must be called _before_ the call to
- * setup_boot_cpu_capabilities() since it interacts with
- * can_use_gic_priorities().
- */
- detect_system_supports_pseudo_nmi();
-
- /*
- * Detect and enable early CPU capabilities based on the boot CPU,
- * after we have initialised the CPU feature infrastructure.
- */
- setup_boot_cpu_capabilities();
}
static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -1584,16 +1565,6 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry,
return has_sre;
}
-static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused)
-{
- u32 midr = read_cpuid_id();
-
- /* Cavium ThunderX pass 1.x and 2.x */
- return midr_is_cpu_model_range(midr, MIDR_THUNDERX,
- MIDR_CPU_VAR_REV(0, 0),
- MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
-}
-
static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
int scope)
{
@@ -1768,6 +1739,39 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
return !meltdown_safe;
}
+#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
+static bool has_lpa2_at_stage1(u64 mmfr0)
+{
+ unsigned int tgran;
+
+ tgran = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+ return tgran == ID_AA64MMFR0_EL1_TGRAN_LPA2;
+}
+
+static bool has_lpa2_at_stage2(u64 mmfr0)
+{
+ unsigned int tgran;
+
+ tgran = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_2_SHIFT);
+ return tgran == ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2;
+}
+
+static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 mmfr0;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ return has_lpa2_at_stage1(mmfr0) && has_lpa2_at_stage2(mmfr0);
+}
+#else
+static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
@@ -1840,7 +1844,7 @@ static int __init __kpti_install_ng_mappings(void *__unused)
static void __init kpti_install_ng_mappings(void)
{
/* Check whether KPTI is going to be used */
- if (!cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
+ if (!arm64_kernel_unmapped_at_el0())
return;
/*
@@ -2326,12 +2330,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
},
#endif /* CONFIG_ARM64_LSE_ATOMICS */
{
- .desc = "Software prefetching using PRFM",
- .capability = ARM64_HAS_NO_HW_PREFETCH,
- .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
- .matches = has_no_hw_prefetch,
- },
- {
.desc = "Virtualization Host Extensions",
.capability = ARM64_HAS_VIRT_HOST_EXTN,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
@@ -2735,6 +2733,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
},
+ {
+ .desc = "52-bit Virtual Addressing for KVM (LPA2)",
+ .capability = ARM64_HAS_LPA2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_lpa2,
+ },
{},
};
@@ -3275,14 +3279,6 @@ void check_local_cpu_capabilities(void)
verify_local_cpu_capabilities();
}
-static void __init setup_boot_cpu_capabilities(void)
-{
- /* Detect capabilities with either SCOPE_BOOT_CPU or SCOPE_LOCAL_CPU */
- update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU);
- /* Enable the SCOPE_BOOT_CPU capabilities alone right away */
- enable_cpu_capabilities(SCOPE_BOOT_CPU);
-}
-
bool this_cpu_has_cap(unsigned int n)
{
if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) {
@@ -3338,37 +3334,52 @@ unsigned long cpu_get_elf_hwcap2(void)
return elf_hwcap[1];
}
-void __init setup_system_features(void)
+static void __init setup_boot_cpu_capabilities(void)
{
- int i;
/*
- * The system-wide safe feature feature register values have been
- * finalized. Finalize and log the available system capabilities.
+ * The boot CPU's feature register values have been recorded. Detect
+ * boot cpucaps and local cpucaps for the boot CPU, then enable and
+ * patch alternatives for the available boot cpucaps.
*/
- update_cpu_capabilities(SCOPE_SYSTEM);
- if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
- !cpus_have_cap(ARM64_HAS_PAN))
- pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
+ update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU);
+ enable_cpu_capabilities(SCOPE_BOOT_CPU);
+ apply_boot_alternatives();
+}
+void __init setup_boot_cpu_features(void)
+{
/*
- * Enable all the available capabilities which have not been enabled
- * already.
+ * Initialize the indirect array of CPU capabilities pointers before we
+ * handle the boot CPU.
*/
- enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
+ init_cpucap_indirect_list();
- kpti_install_ng_mappings();
+ /*
+ * Detect broken pseudo-NMI. Must be called _before_ the call to
+ * setup_boot_cpu_capabilities() since it interacts with
+ * can_use_gic_priorities().
+ */
+ detect_system_supports_pseudo_nmi();
- sve_setup();
- sme_setup();
+ setup_boot_cpu_capabilities();
+}
+static void __init setup_system_capabilities(void)
+{
/*
- * Check for sane CTR_EL0.CWG value.
+ * The system-wide safe feature register values have been finalized.
+ * Detect, enable, and patch alternatives for the available system
+ * cpucaps.
*/
- if (!cache_type_cwg())
- pr_warn("No Cache Writeback Granule information, assuming %d\n",
- ARCH_DMA_MINALIGN);
+ update_cpu_capabilities(SCOPE_SYSTEM);
+ enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
+ apply_alternatives_all();
- for (i = 0; i < ARM64_NCAPS; i++) {
+ /*
+ * Log any cpucaps with a cpumask as these aren't logged by
+ * update_cpu_capabilities().
+ */
+ for (int i = 0; i < ARM64_NCAPS; i++) {
const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
if (caps && caps->cpus && caps->desc &&
@@ -3376,6 +3387,29 @@ void __init setup_system_features(void)
pr_info("detected: %s on CPU%*pbl\n",
caps->desc, cpumask_pr_args(caps->cpus));
}
+
+ /*
+ * TTBR0 PAN doesn't have its own cpucap, so log it manually.
+ */
+ if (system_uses_ttbr0_pan())
+ pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
+}
+
+void __init setup_system_features(void)
+{
+ setup_system_capabilities();
+
+ kpti_install_ng_mappings();
+
+ sve_setup();
+ sme_setup();
+
+ /*
+ * Check for sane CTR_EL0.CWG value.
+ */
+ if (!cache_type_cwg())
+ pr_warn("No Cache Writeback Granule information, assuming %d\n",
+ ARCH_DMA_MINALIGN);
}
void __init setup_user_features(void)
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index a257da7b56fe..47043c0d95ec 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -36,8 +36,6 @@ static struct cpuinfo_arm64 boot_cpu_data;
static inline const char *icache_policy_str(int l1ip)
{
switch (l1ip) {
- case CTR_EL0_L1Ip_VPIPT:
- return "VPIPT";
case CTR_EL0_L1Ip_VIPT:
return "VIPT";
case CTR_EL0_L1Ip_PIPT:
@@ -388,9 +386,6 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
switch (l1ip) {
case CTR_EL0_L1Ip_PIPT:
break;
- case CTR_EL0_L1Ip_VPIPT:
- set_bit(ICACHEF_VPIPT, &__icache_flags);
- break;
case CTR_EL0_L1Ip_VIPT:
default:
/* Assume aliasing */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 1559c706d32d..505f389be3e0 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -85,13 +85,13 @@
* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
* flag the register state as invalid.
*
- * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
- * save the task's FPSIMD context back to task_struct from softirq context.
- * To prevent this from racing with the manipulation of the task's FPSIMD state
- * from task context and thereby corrupting the state, it is necessary to
- * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
- * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
- * run but prevent them to use FPSIMD.
+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be
+ * called from softirq context, which will save the task's FPSIMD context back
+ * to task_struct. To prevent this from racing with the manipulation of the
+ * task's FPSIMD state from task context and thereby corrupting the state, it
+ * is necessary to protect any manipulation of a task's fpsimd_state or
+ * TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend
+ * softirq servicing entirely until put_cpu_fpsimd_context() is called.
*
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_cpu field
@@ -209,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { }
#endif
-DEFINE_PER_CPU(bool, fpsimd_context_busy);
-EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
-
static void fpsimd_bind_task_to_cpu(void);
-static void __get_cpu_fpsimd_context(void)
-{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
-
- WARN_ON(busy);
-}
-
/*
* Claim ownership of the CPU FPSIMD context for use by the calling context.
*
* The caller may freely manipulate the FPSIMD context metadata until
* put_cpu_fpsimd_context() is called.
*
- * The double-underscore version must only be called if you know the task
- * can't be preempted.
- *
* On RT kernels local_bh_disable() is not sufficient because it only
* serializes soft interrupt related sections via a local lock, but stays
* preemptible. Disabling preemption is the right choice here as bottom
@@ -242,14 +229,6 @@ static void get_cpu_fpsimd_context(void)
local_bh_disable();
else
preempt_disable();
- __get_cpu_fpsimd_context();
-}
-
-static void __put_cpu_fpsimd_context(void)
-{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
-
- WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
}
/*
@@ -261,18 +240,12 @@ static void __put_cpu_fpsimd_context(void)
*/
static void put_cpu_fpsimd_context(void)
{
- __put_cpu_fpsimd_context();
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_bh_enable();
else
preempt_enable();
}
-static bool have_cpu_fpsimd_context(void)
-{
- return !preemptible() && __this_cpu_read(fpsimd_context_busy);
-}
-
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
{
return task->thread.vl[type];
@@ -383,7 +356,8 @@ static void task_fpsimd_load(void)
bool restore_ffr;
WARN_ON(!system_supports_fpsimd());
- WARN_ON(!have_cpu_fpsimd_context());
+ WARN_ON(preemptible());
+ WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
@@ -406,7 +380,7 @@ static void task_fpsimd_load(void)
default:
/*
* This indicates either a bug in
- * fpsimd_save() or memory corruption, we
+ * fpsimd_save_user_state() or memory corruption, we
* should always record an explicit format
* when we save. We always at least have the
* memory allocated for FPSMID registers so
@@ -457,7 +431,7 @@ static void task_fpsimd_load(void)
* than via current, if we are saving KVM state then it will have
* ensured that the type of registers to save is set in last->to_save.
*/
-static void fpsimd_save(void)
+static void fpsimd_save_user_state(void)
{
struct cpu_fp_state const *last =
this_cpu_ptr(&fpsimd_last_state);
@@ -467,7 +441,7 @@ static void fpsimd_save(void)
unsigned int vl;
WARN_ON(!system_supports_fpsimd());
- WARN_ON(!have_cpu_fpsimd_context());
+ WARN_ON(preemptible());
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
@@ -888,7 +862,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
if (task == current) {
get_cpu_fpsimd_context();
- fpsimd_save();
+ fpsimd_save_user_state();
}
fpsimd_flush_task_state(task);
@@ -1171,7 +1145,7 @@ void __init sve_setup(void)
unsigned long b;
int max_bit;
- if (!cpus_have_cap(ARM64_SVE))
+ if (!system_supports_sve())
return;
/*
@@ -1301,7 +1275,7 @@ void __init sme_setup(void)
struct vl_info *info = &vl_info[ARM64_VEC_SME];
int min_bit, max_bit;
- if (!cpus_have_cap(ARM64_SME))
+ if (!system_supports_sme())
return;
/*
@@ -1500,6 +1474,34 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
current);
}
+static void fpsimd_load_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
+
+ /*
+ * Elide the load if this CPU holds the most recent kernel mode
+ * FPSIMD context of the current task.
+ */
+ if (last->st == &task->thread.kernel_fpsimd_state &&
+ task->thread.kernel_fpsimd_cpu == smp_processor_id())
+ return;
+
+ fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+}
+
+static void fpsimd_save_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state cpu_fp_state = {
+ .st = &task->thread.kernel_fpsimd_state,
+ .to_save = FP_STATE_FPSIMD,
+ };
+
+ fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_bind_state_to_cpu(&cpu_fp_state);
+
+ task->thread.kernel_fpsimd_cpu = smp_processor_id();
+}
+
void fpsimd_thread_switch(struct task_struct *next)
{
bool wrong_task, wrong_cpu;
@@ -1507,24 +1509,31 @@ void fpsimd_thread_switch(struct task_struct *next)
if (!system_supports_fpsimd())
return;
- __get_cpu_fpsimd_context();
+ WARN_ON_ONCE(!irqs_disabled());
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
-
- /*
- * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
- * state. For kernel threads, FPSIMD registers are never loaded
- * and wrong_task and wrong_cpu will always be true.
- */
- wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
- &next->thread.uw.fpsimd_state;
- wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE))
+ fpsimd_save_kernel_state(current);
+ else
+ fpsimd_save_user_state();
- update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
- wrong_task || wrong_cpu);
+ if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
+ fpsimd_load_kernel_state(next);
+ set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
+ } else {
+ /*
+ * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
+ * state. For kernel threads, FPSIMD registers are never
+ * loaded with user mode FPSIMD state and so wrong_task and
+ * wrong_cpu will always be true.
+ */
+ wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+ &next->thread.uw.fpsimd_state;
+ wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
- __put_cpu_fpsimd_context();
+ update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+ wrong_task || wrong_cpu);
+ }
}
static void fpsimd_flush_thread_vl(enum vec_type type)
@@ -1614,7 +1623,7 @@ void fpsimd_preserve_current_state(void)
return;
get_cpu_fpsimd_context();
- fpsimd_save();
+ fpsimd_save_user_state();
put_cpu_fpsimd_context();
}
@@ -1826,13 +1835,15 @@ static void fpsimd_flush_cpu_state(void)
*/
void fpsimd_save_and_flush_cpu_state(void)
{
+ unsigned long flags;
+
if (!system_supports_fpsimd())
return;
WARN_ON(preemptible());
- __get_cpu_fpsimd_context();
- fpsimd_save();
+ local_irq_save(flags);
+ fpsimd_save_user_state();
fpsimd_flush_cpu_state();
- __put_cpu_fpsimd_context();
+ local_irq_restore(flags);
}
#ifdef CONFIG_KERNEL_MODE_NEON
@@ -1864,10 +1875,37 @@ void kernel_neon_begin(void)
get_cpu_fpsimd_context();
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
+ BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
+ fpsimd_save_kernel_state(current);
+ } else {
+ fpsimd_save_user_state();
+
+ /*
+ * Set the thread flag so that the kernel mode FPSIMD state
+ * will be context switched along with the rest of the task
+ * state.
+ *
+ * On non-PREEMPT_RT, softirqs may interrupt task level kernel
+ * mode FPSIMD, but the task will not be preemptible so setting
+ * TIF_KERNEL_FPSTATE for those would be both wrong (as it
+ * would mark the task context FPSIMD state as requiring a
+ * context switch) and unnecessary.
+ *
+ * On PREEMPT_RT, softirqs are serviced from a separate thread,
+ * which is scheduled as usual, and this guarantees that these
+ * softirqs are not interrupting use of the FPSIMD in kernel
+ * mode in task context. So in this case, setting the flag here
+ * is always appropriate.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
+
+ put_cpu_fpsimd_context();
}
EXPORT_SYMBOL_GPL(kernel_neon_begin);
@@ -1885,7 +1923,16 @@ void kernel_neon_end(void)
if (!system_supports_fpsimd())
return;
- put_cpu_fpsimd_context();
+ /*
+ * If we are returning from a nested use of kernel mode FPSIMD, restore
+ * the task context kernel mode FPSIMD state. This can only happen when
+ * running in softirq context on non-PREEMPT_RT.
+ */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
+ test_thread_flag(TIF_KERNEL_FPSTATE))
+ fpsimd_load_kernel_state(current);
+ else
+ clear_thread_flag(TIF_KERNEL_FPSTATE);
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 7b236994f0e1..cab7f91949d8 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -482,7 +482,7 @@ SYM_FUNC_START_LOCAL(__primary_switched)
str_l x21, __fdt_pointer, x5 // Save FDT pointer
- ldr_l x4, kimage_vaddr // Save the offset between
+ adrp x4, _text // Save the offset between
sub x4, x4, x0 // the kernel virtual and
str_l x4, kimage_voffset, x5 // physical mappings
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 3addc09f8746..e30fd9e32ef3 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -21,14 +21,25 @@
static u64 __boot_status __initdata;
+// temporary __prel64 related definitions
+// to be removed when this code is moved under pi/
+
+#define __prel64_initconst __initconst
+
+#define PREL64(type, name) union { type *name; }
+
+#define prel64_pointer(__d) (__d)
+
+typedef bool filter_t(u64 val);
+
struct ftr_set_desc {
char name[FTR_DESC_NAME_LEN];
- struct arm64_ftr_override *override;
+ PREL64(struct arm64_ftr_override, override);
struct {
char name[FTR_DESC_FIELD_LEN];
u8 shift;
u8 width;
- bool (*filter)(u64 val);
+ PREL64(filter_t, filter);
} fields[];
};
@@ -46,7 +57,7 @@ static bool __init mmfr1_vh_filter(u64 val)
val == 0);
}
-static const struct ftr_set_desc mmfr1 __initconst = {
+static const struct ftr_set_desc mmfr1 __prel64_initconst = {
.name = "id_aa64mmfr1",
.override = &id_aa64mmfr1_override,
.fields = {
@@ -70,7 +81,7 @@ static bool __init pfr0_sve_filter(u64 val)
return true;
}
-static const struct ftr_set_desc pfr0 __initconst = {
+static const struct ftr_set_desc pfr0 __prel64_initconst = {
.name = "id_aa64pfr0",
.override = &id_aa64pfr0_override,
.fields = {
@@ -94,7 +105,7 @@ static bool __init pfr1_sme_filter(u64 val)
return true;
}
-static const struct ftr_set_desc pfr1 __initconst = {
+static const struct ftr_set_desc pfr1 __prel64_initconst = {
.name = "id_aa64pfr1",
.override = &id_aa64pfr1_override,
.fields = {
@@ -105,7 +116,7 @@ static const struct ftr_set_desc pfr1 __initconst = {
},
};
-static const struct ftr_set_desc isar1 __initconst = {
+static const struct ftr_set_desc isar1 __prel64_initconst = {
.name = "id_aa64isar1",
.override = &id_aa64isar1_override,
.fields = {
@@ -117,7 +128,7 @@ static const struct ftr_set_desc isar1 __initconst = {
},
};
-static const struct ftr_set_desc isar2 __initconst = {
+static const struct ftr_set_desc isar2 __prel64_initconst = {
.name = "id_aa64isar2",
.override = &id_aa64isar2_override,
.fields = {
@@ -128,7 +139,7 @@ static const struct ftr_set_desc isar2 __initconst = {
},
};
-static const struct ftr_set_desc smfr0 __initconst = {
+static const struct ftr_set_desc smfr0 __prel64_initconst = {
.name = "id_aa64smfr0",
.override = &id_aa64smfr0_override,
.fields = {
@@ -149,7 +160,7 @@ static bool __init hvhe_filter(u64 val)
ID_AA64MMFR1_EL1_VH_SHIFT));
}
-static const struct ftr_set_desc sw_features __initconst = {
+static const struct ftr_set_desc sw_features __prel64_initconst = {
.name = "arm64_sw",
.override = &arm64_sw_feature_override,
.fields = {
@@ -159,22 +170,23 @@ static const struct ftr_set_desc sw_features __initconst = {
},
};
-static const struct ftr_set_desc * const regs[] __initconst = {
- &mmfr1,
- &pfr0,
- &pfr1,
- &isar1,
- &isar2,
- &smfr0,
- &sw_features,
+static const
+PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = {
+ { &mmfr1 },
+ { &pfr0 },
+ { &pfr1 },
+ { &isar1 },
+ { &isar2 },
+ { &smfr0 },
+ { &sw_features },
};
static const struct {
char alias[FTR_ALIAS_NAME_LEN];
char feature[FTR_ALIAS_OPTION_LEN];
} aliases[] __initconst = {
- { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
- { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=protected", "id_aa64mmfr1.vh=0" },
{ "arm64.nosve", "id_aa64pfr0.sve=0" },
{ "arm64.nosme", "id_aa64pfr1.sme=0" },
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
@@ -187,45 +199,61 @@ static const struct {
{ "nokaslr", "arm64_sw.nokaslr=1" },
};
-static int __init parse_nokaslr(char *unused)
+static int __init parse_hexdigit(const char *p, u64 *v)
{
- /* nokaslr param handling is done by early cpufeature code */
+ // skip "0x" if it comes next
+ if (p[0] == '0' && tolower(p[1]) == 'x')
+ p += 2;
+
+ // check whether the RHS is a single hex digit
+ if (!isxdigit(p[0]) || (p[1] && !isspace(p[1])))
+ return -EINVAL;
+
+ *v = tolower(*p) - (isdigit(*p) ? '0' : 'a' - 10);
return 0;
}
-early_param("nokaslr", parse_nokaslr);
-static int __init find_field(const char *cmdline,
+static int __init find_field(const char *cmdline, char *opt, int len,
const struct ftr_set_desc *reg, int f, u64 *v)
{
- char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
- int len;
+ int flen = strlen(reg->fields[f].name);
- len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=",
- reg->name, reg->fields[f].name);
+ // append '<fieldname>=' to obtain '<name>.<fieldname>='
+ memcpy(opt + len, reg->fields[f].name, flen);
+ len += flen;
+ opt[len++] = '=';
- if (!parameqn(cmdline, opt, len))
+ if (memcmp(cmdline, opt, len))
return -1;
- return kstrtou64(cmdline + len, 0, v);
+ return parse_hexdigit(cmdline + len, v);
}
static void __init match_options(const char *cmdline)
{
+ char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
int i;
for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ const struct ftr_set_desc *reg = prel64_pointer(regs[i].reg);
+ struct arm64_ftr_override *override;
+ int len = strlen(reg->name);
int f;
- if (!regs[i]->override)
- continue;
+ override = prel64_pointer(reg->override);
- for (f = 0; strlen(regs[i]->fields[f].name); f++) {
- u64 shift = regs[i]->fields[f].shift;
- u64 width = regs[i]->fields[f].width ?: 4;
+ // set opt[] to '<name>.'
+ memcpy(opt, reg->name, len);
+ opt[len++] = '.';
+
+ for (f = 0; reg->fields[f].name[0] != '\0'; f++) {
+ u64 shift = reg->fields[f].shift;
+ u64 width = reg->fields[f].width ?: 4;
u64 mask = GENMASK_ULL(shift + width - 1, shift);
+ bool (*filter)(u64 val);
u64 v;
- if (find_field(cmdline, regs[i], f, &v))
+ if (find_field(cmdline, opt, len, reg, f, &v))
continue;
/*
@@ -233,16 +261,16 @@ static void __init match_options(const char *cmdline)
* it by setting the value to the all-ones while
* clearing the mask... Yes, this is fragile.
*/
- if (regs[i]->fields[f].filter &&
- !regs[i]->fields[f].filter(v)) {
- regs[i]->override->val |= mask;
- regs[i]->override->mask &= ~mask;
+ filter = prel64_pointer(reg->fields[f].filter);
+ if (filter && !filter(v)) {
+ override->val |= mask;
+ override->mask &= ~mask;
continue;
}
- regs[i]->override->val &= ~mask;
- regs[i]->override->val |= (v << shift) & mask;
- regs[i]->override->mask |= mask;
+ override->val &= ~mask;
+ override->val |= (v << shift) & mask;
+ override->mask |= mask;
return;
}
@@ -258,23 +286,29 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
cmdline = skip_spaces(cmdline);
- for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++);
- if (!len)
+ /* terminate on "--" appearing on the command line by itself */
+ if (cmdline[0] == '-' && cmdline[1] == '-' && isspace(cmdline[2]))
return;
- len = min(len, ARRAY_SIZE(buf) - 1);
- memcpy(buf, cmdline, len);
- buf[len] = '\0';
-
- if (strcmp(buf, "--") == 0)
+ for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++) {
+ if (len >= sizeof(buf) - 1)
+ break;
+ if (cmdline[len] == '-')
+ buf[len] = '_';
+ else
+ buf[len] = cmdline[len];
+ }
+ if (!len)
return;
+ buf[len] = 0;
+
cmdline += len;
match_options(buf);
for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++)
- if (parameq(buf, aliases[i].alias))
+ if (!memcmp(buf, aliases[i].alias, len + 1))
__parse_cmdline(aliases[i].feature, false);
} while (1);
}
@@ -316,13 +350,16 @@ void init_feature_override(u64 boot_status);
asmlinkage void __init init_feature_override(u64 boot_status)
{
+ struct arm64_ftr_override *override;
+ const struct ftr_set_desc *reg;
int i;
for (i = 0; i < ARRAY_SIZE(regs); i++) {
- if (regs[i]->override) {
- regs[i]->override->val = 0;
- regs[i]->override->mask = 0;
- }
+ reg = prel64_pointer(regs[i].reg);
+ override = prel64_pointer(reg->override);
+
+ override->val = 0;
+ override->mask = 0;
}
__boot_status = boot_status;
@@ -330,9 +367,9 @@ asmlinkage void __init init_feature_override(u64 boot_status)
parse_cmdline();
for (i = 0; i < ARRAY_SIZE(regs); i++) {
- if (regs[i]->override)
- dcache_clean_inval_poc((unsigned long)regs[i]->override,
- (unsigned long)regs[i]->override +
- sizeof(*regs[i]->override));
+ reg = prel64_pointer(regs[i].reg);
+ override = prel64_pointer(reg->override);
+ dcache_clean_inval_poc((unsigned long)override,
+ (unsigned long)(override + 1));
}
}
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 6ad5c6ef5329..85087e2df564 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -22,6 +22,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/exception.h>
+#include <asm/numa.h>
#include <asm/softirq_stack.h>
#include <asm/stacktrace.h>
#include <asm/vmap_stack.h>
@@ -47,17 +48,17 @@ static void init_irq_scs(void)
for_each_possible_cpu(cpu)
per_cpu(irq_shadow_call_stack_ptr, cpu) =
- scs_alloc(cpu_to_node(cpu));
+ scs_alloc(early_cpu_to_node(cpu));
}
#ifdef CONFIG_VMAP_STACK
-static void init_irq_stacks(void)
+static void __init init_irq_stacks(void)
{
int cpu;
unsigned long *p;
for_each_possible_cpu(cpu) {
- p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
+ p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, early_cpu_to_node(cpu));
per_cpu(irq_stack_ptr, cpu) = p;
}
}
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 94a269cd1f07..12c7f3c8ba76 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -36,3 +36,10 @@ void __init kaslr_init(void)
pr_info("KASLR enabled\n");
__kaslr_is_enabled = true;
}
+
+static int __init parse_nokaslr(char *unused)
+{
+ /* nokaslr param handling is done by early cpufeature code */
+ return 0;
+}
+early_param("nokaslr", parse_nokaslr);
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index 4c0ea3cd4ea4..c844a0546d7f 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -3,6 +3,7 @@
KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
+ $(DISABLE_LATENT_ENTROPY_PLUGIN) \
$(call cc-option,-mbranch-protection=none) \
-I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
-include $(srctree)/include/linux/hidden.h \
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index defbab84e9e5..4ced34f62dab 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -439,9 +439,8 @@ static void __init hyp_mode_check(void)
void __init smp_cpus_done(unsigned int max_cpus)
{
pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
- setup_system_features();
hyp_mode_check();
- apply_alternatives_all();
+ setup_system_features();
setup_user_features();
mark_linear_text_alias_ro();
}
@@ -454,14 +453,9 @@ void __init smp_prepare_boot_cpu(void)
* freed shortly, so we must move over to the runtime per-cpu area.
*/
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
- cpuinfo_store_boot_cpu();
- /*
- * We now know enough about the boot CPU to apply the
- * alternatives that cannot wait until interrupt handling
- * and/or scheduling is enabled.
- */
- apply_boot_alternatives();
+ cpuinfo_store_boot_cpu();
+ setup_boot_cpu_features();
/* Conditionally switch to GIC PMR for interrupt masking */
if (system_uses_irq_prio_masking())
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 17f66a74c745..7f88028a00c0 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -8,6 +8,7 @@
#include <linux/efi.h>
#include <linux/export.h>
#include <linux/ftrace.h>
+#include <linux/kprobes.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
@@ -19,6 +20,31 @@
#include <asm/stacktrace.h>
/*
+ * Kernel unwind state
+ *
+ * @common: Common unwind state.
+ * @task: The task being unwound.
+ * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
+ * associated with the most recently encountered replacement lr
+ * value.
+ */
+struct kunwind_state {
+ struct unwind_state common;
+ struct task_struct *task;
+#ifdef CONFIG_KRETPROBES
+ struct llist_node *kr_cur;
+#endif
+};
+
+static __always_inline void
+kunwind_init(struct kunwind_state *state,
+ struct task_struct *task)
+{
+ unwind_init_common(&state->common);
+ state->task = task;
+}
+
+/*
* Start an unwind from a pt_regs.
*
* The unwind will begin at the PC within the regs.
@@ -26,13 +52,13 @@
* The regs must be on a stack currently owned by the calling task.
*/
static __always_inline void
-unwind_init_from_regs(struct unwind_state *state,
- struct pt_regs *regs)
+kunwind_init_from_regs(struct kunwind_state *state,
+ struct pt_regs *regs)
{
- unwind_init_common(state, current);
+ kunwind_init(state, current);
- state->fp = regs->regs[29];
- state->pc = regs->pc;
+ state->common.fp = regs->regs[29];
+ state->common.pc = regs->pc;
}
/*
@@ -44,12 +70,12 @@ unwind_init_from_regs(struct unwind_state *state,
* The function which invokes this must be noinline.
*/
static __always_inline void
-unwind_init_from_caller(struct unwind_state *state)
+kunwind_init_from_caller(struct kunwind_state *state)
{
- unwind_init_common(state, current);
+ kunwind_init(state, current);
- state->fp = (unsigned long)__builtin_frame_address(1);
- state->pc = (unsigned long)__builtin_return_address(0);
+ state->common.fp = (unsigned long)__builtin_frame_address(1);
+ state->common.pc = (unsigned long)__builtin_return_address(0);
}
/*
@@ -63,35 +89,38 @@ unwind_init_from_caller(struct unwind_state *state)
* call this for the current task.
*/
static __always_inline void
-unwind_init_from_task(struct unwind_state *state,
- struct task_struct *task)
+kunwind_init_from_task(struct kunwind_state *state,
+ struct task_struct *task)
{
- unwind_init_common(state, task);
+ kunwind_init(state, task);
- state->fp = thread_saved_fp(task);
- state->pc = thread_saved_pc(task);
+ state->common.fp = thread_saved_fp(task);
+ state->common.pc = thread_saved_pc(task);
}
static __always_inline int
-unwind_recover_return_address(struct unwind_state *state)
+kunwind_recover_return_address(struct kunwind_state *state)
{
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (state->task->ret_stack &&
- (state->pc == (unsigned long)return_to_handler)) {
+ (state->common.pc == (unsigned long)return_to_handler)) {
unsigned long orig_pc;
- orig_pc = ftrace_graph_ret_addr(state->task, NULL, state->pc,
- (void *)state->fp);
- if (WARN_ON_ONCE(state->pc == orig_pc))
+ orig_pc = ftrace_graph_ret_addr(state->task, NULL,
+ state->common.pc,
+ (void *)state->common.fp);
+ if (WARN_ON_ONCE(state->common.pc == orig_pc))
return -EINVAL;
- state->pc = orig_pc;
+ state->common.pc = orig_pc;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#ifdef CONFIG_KRETPROBES
- if (is_kretprobe_trampoline(state->pc)) {
- state->pc = kretprobe_find_ret_addr(state->task,
- (void *)state->fp,
- &state->kr_cur);
+ if (is_kretprobe_trampoline(state->common.pc)) {
+ unsigned long orig_pc;
+ orig_pc = kretprobe_find_ret_addr(state->task,
+ (void *)state->common.fp,
+ &state->kr_cur);
+ state->common.pc = orig_pc;
}
#endif /* CONFIG_KRETPROBES */
@@ -106,38 +135,40 @@ unwind_recover_return_address(struct unwind_state *state)
* and the location (but not the fp value) of B.
*/
static __always_inline int
-unwind_next(struct unwind_state *state)
+kunwind_next(struct kunwind_state *state)
{
struct task_struct *tsk = state->task;
- unsigned long fp = state->fp;
+ unsigned long fp = state->common.fp;
int err;
/* Final frame; nothing to unwind */
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
return -ENOENT;
- err = unwind_next_frame_record(state);
+ err = unwind_next_frame_record(&state->common);
if (err)
return err;
- state->pc = ptrauth_strip_kernel_insn_pac(state->pc);
+ state->common.pc = ptrauth_strip_kernel_insn_pac(state->common.pc);
- return unwind_recover_return_address(state);
+ return kunwind_recover_return_address(state);
}
+typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
+
static __always_inline void
-unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry,
- void *cookie)
+do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
+ void *cookie)
{
- if (unwind_recover_return_address(state))
+ if (kunwind_recover_return_address(state))
return;
while (1) {
int ret;
- if (!consume_entry(cookie, state->pc))
+ if (!consume_state(state, cookie))
break;
- ret = unwind_next(state);
+ ret = kunwind_next(state);
if (ret < 0)
break;
}
@@ -172,9 +203,10 @@ unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry,
: stackinfo_get_unknown(); \
})
-noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
- void *cookie, struct task_struct *task,
- struct pt_regs *regs)
+static __always_inline void
+kunwind_stack_walk(kunwind_consume_fn consume_state,
+ void *cookie, struct task_struct *task,
+ struct pt_regs *regs)
{
struct stack_info stacks[] = {
stackinfo_get_task(task),
@@ -190,22 +222,48 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
STACKINFO_EFI,
#endif
};
- struct unwind_state state = {
- .stacks = stacks,
- .nr_stacks = ARRAY_SIZE(stacks),
+ struct kunwind_state state = {
+ .common = {
+ .stacks = stacks,
+ .nr_stacks = ARRAY_SIZE(stacks),
+ },
};
if (regs) {
if (task != current)
return;
- unwind_init_from_regs(&state, regs);
+ kunwind_init_from_regs(&state, regs);
} else if (task == current) {
- unwind_init_from_caller(&state);
+ kunwind_init_from_caller(&state);
} else {
- unwind_init_from_task(&state, task);
+ kunwind_init_from_task(&state, task);
}
- unwind(&state, consume_entry, cookie);
+ do_kunwind(&state, consume_state, cookie);
+}
+
+struct kunwind_consume_entry_data {
+ stack_trace_consume_fn consume_entry;
+ void *cookie;
+};
+
+static bool
+arch_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ struct kunwind_consume_entry_data *data = cookie;
+ return data->consume_entry(data->cookie, state->common.pc);
+}
+
+noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task,
+ struct pt_regs *regs)
+{
+ struct kunwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
}
static bool dump_backtrace_entry(void *arg, unsigned long where)
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 817d788cd866..1a2c72f3e7f8 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -82,7 +82,12 @@ int __init parse_acpi_topology(void)
#undef pr_fmt
#define pr_fmt(fmt) "AMU: " fmt
-static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale);
+/*
+ * Ensure that amu_scale_freq_tick() will return SCHED_CAPACITY_SCALE until
+ * the CPU capacity and its associated frequency have been correctly
+ * initialized.
+ */
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT);
static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
static cpumask_var_t amu_fie_cpus;
@@ -112,14 +117,14 @@ static inline bool freq_counters_valid(int cpu)
return true;
}
-static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
+void freq_inv_set_max_ratio(int cpu, u64 max_rate)
{
- u64 ratio;
+ u64 ratio, ref_rate = arch_timer_get_rate();
if (unlikely(!max_rate || !ref_rate)) {
- pr_debug("CPU%d: invalid maximum or reference frequency.\n",
+ WARN_ONCE(1, "CPU%d: invalid maximum or reference frequency.\n",
cpu);
- return -EINVAL;
+ return;
}
/*
@@ -139,12 +144,10 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
ratio = div64_u64(ratio, max_rate);
if (!ratio) {
WARN_ONCE(1, "Reference frequency too low.\n");
- return -EINVAL;
+ return;
}
- per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio;
-
- return 0;
+ WRITE_ONCE(per_cpu(arch_max_freq_scale, cpu), (unsigned long)ratio);
}
static void amu_scale_freq_tick(void)
@@ -195,10 +198,7 @@ static void amu_fie_setup(const struct cpumask *cpus)
return;
for_each_cpu(cpu, cpus) {
- if (!freq_counters_valid(cpu) ||
- freq_inv_set_max_ratio(cpu,
- cpufreq_get_hw_max_freq(cpu) * 1000ULL,
- arch_timer_get_rate()))
+ if (!freq_counters_valid(cpu))
return;
}
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 1f911a76c5af..2266fcdff78a 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -118,7 +118,7 @@ endif
VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
# Build rules
-targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw
+targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso32.so.dbg vdso.so.raw
c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso))
c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
@@ -127,15 +127,15 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
-include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
$(call if_changed,vdsosym)
# Strip rule for vdso.so
$(obj)/vdso.so: OBJCOPYFLAGS := -S
-$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE
+$(obj)/vdso.so: $(obj)/vdso32.so.dbg FORCE
$(call if_changed,objcopy)
-$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
+$(obj)/vdso32.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
$(call if_changed,vdsomunge)
# Link rule for the .so file, .lds has to be first
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e5f75f1f1085..4796104c4471 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -410,7 +410,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
-
+ kvm_vgic_vcpu_destroy(vcpu);
kvm_arm_vcpu_destroy(vcpu);
}
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 9d23a51d7f75..b29f15418c0a 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -12,7 +12,7 @@
#include <nvhe/pkvm.h>
#include <nvhe/trap_handler.h>
-/* Used by icache_is_vpipt(). */
+/* Used by icache_is_aliasing(). */
unsigned long __icache_flags;
/* Used by kvm_get_vttbr(). */
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 1b265713d6be..a60fb13e2192 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -105,28 +105,6 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- /*
- * If the host is running at EL1 and we have a VPIPT I-cache,
- * then we must perform I-cache maintenance at EL2 in order for
- * it to have an effect on the guest. Since the guest cannot hit
- * I-cache lines allocated with a different VMID, we don't need
- * to worry about junk out of guest reset (we nuke the I-cache on
- * VMID rollover), but we do need to be careful when remapping
- * executable pages for the same guest. This can happen when KSM
- * takes a CoW fault on an executable page, copies the page into
- * a page that was previously mapped in the guest and then needs
- * to invalidate the guest view of the I-cache for that page
- * from EL1. To solve this, we invalidate the entire I-cache when
- * unmapping a page from a guest if we have a VPIPT I-cache but
- * the host is running at EL1. As above, we could do better if
- * we had the VA.
- *
- * The moral of this story is: if you have a VPIPT I-cache, then
- * you should be running with VHE enabled.
- */
- if (icache_is_vpipt())
- icache_inval_all_pou();
-
__tlb_switch_to_host(&cxt);
}
@@ -157,28 +135,6 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- /*
- * If the host is running at EL1 and we have a VPIPT I-cache,
- * then we must perform I-cache maintenance at EL2 in order for
- * it to have an effect on the guest. Since the guest cannot hit
- * I-cache lines allocated with a different VMID, we don't need
- * to worry about junk out of guest reset (we nuke the I-cache on
- * VMID rollover), but we do need to be careful when remapping
- * executable pages for the same guest. This can happen when KSM
- * takes a CoW fault on an executable page, copies the page into
- * a page that was previously mapped in the guest and then needs
- * to invalidate the guest view of the I-cache for that page
- * from EL1. To solve this, we invalidate the entire I-cache when
- * unmapping a page from a guest if we have a VPIPT I-cache but
- * the host is running at EL1. As above, we could do better if
- * we had the VA.
- *
- * The moral of this story is: if you have a VPIPT I-cache, then
- * you should be running with VHE enabled.
- */
- if (icache_is_vpipt())
- icache_inval_all_pou();
-
__tlb_switch_to_host(&cxt);
}
@@ -205,10 +161,6 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- /* See the comment in __kvm_tlb_flush_vmid_ipa() */
- if (icache_is_vpipt())
- icache_inval_all_pou();
-
__tlb_switch_to_host(&cxt);
}
@@ -246,18 +198,5 @@ void __kvm_flush_vm_context(void)
/* Same remark as in __tlb_switch_to_guest() */
dsb(ish);
__tlbi(alle1is);
-
- /*
- * VIPT and PIPT caches are not affected by VMID, so no maintenance
- * is necessary across a VMID rollover.
- *
- * VPIPT caches constrain lookup and maintenance to the active VMID,
- * so we need to invalidate lines with a stale VMID to avoid an ABA
- * race after multiple rollovers.
- *
- */
- if (icache_is_vpipt())
- asm volatile("ic ialluis");
-
dsb(ish);
}
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index b636b4111dbf..b32e2940df7d 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -216,18 +216,5 @@ void __kvm_flush_vm_context(void)
{
dsb(ishst);
__tlbi(alle1is);
-
- /*
- * VIPT and PIPT caches are not affected by VMID, so no maintenance
- * is necessary across a VMID rollover.
- *
- * VPIPT caches constrain lookup and maintenance to the active VMID,
- * so we need to invalidate lines with a stale VMID to avoid an ABA
- * race after multiple rollovers.
- *
- */
- if (icache_is_vpipt())
- asm volatile("ic ialluis");
-
dsb(ish);
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index fe99b3dab6ce..3d9467ff73bc 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -267,9 +267,8 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
{
- u64 val = kvm_vcpu_read_pmcr(vcpu) >> ARMV8_PMU_PMCR_N_SHIFT;
+ u64 val = FIELD_GET(ARMV8_PMU_PMCR_N, kvm_vcpu_read_pmcr(vcpu));
- val &= ARMV8_PMU_PMCR_N_MASK;
if (val == 0)
return BIT(ARMV8_PMU_CYCLE_IDX);
else
@@ -1136,8 +1135,7 @@ u8 kvm_arm_pmu_get_pmuver_limit(void)
*/
u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu)
{
- u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0) &
- ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
+ u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
- return pmcr | ((u64)vcpu->kvm->arch.pmcr_n << ARMV8_PMU_PMCR_N_SHIFT);
+ return u64_replace_bits(pmcr, vcpu->kvm->arch.pmcr_n, ARMV8_PMU_PMCR_N);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 4735e1b37fb3..ff45d688bd7d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -877,7 +877,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
u64 pmcr, val;
pmcr = kvm_vcpu_read_pmcr(vcpu);
- val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
+ val = FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) {
kvm_inject_undefined(vcpu);
return false;
@@ -1143,7 +1143,7 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
u64 val)
{
- u8 new_n = (val >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
+ u8 new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
struct kvm *kvm = vcpu->kvm;
mutex_lock(&kvm->arch.config_lock);
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index c8c3cb812783..e949e1d0fd9f 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -368,7 +368,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
vgic_v4_teardown(kvm);
}
-void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -379,29 +379,39 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
vgic_flush_pending_lpis(vcpu);
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
- vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+ if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+ vgic_unregister_redist_iodev(vcpu);
+ vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+ }
}
-static void __kvm_vgic_destroy(struct kvm *kvm)
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ mutex_lock(&kvm->slots_lock);
+ __kvm_vgic_vcpu_destroy(vcpu);
+ mutex_unlock(&kvm->slots_lock);
+}
+
+void kvm_vgic_destroy(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
unsigned long i;
- lockdep_assert_held(&kvm->arch.config_lock);
+ mutex_lock(&kvm->slots_lock);
vgic_debug_destroy(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_vgic_vcpu_destroy(vcpu);
+ __kvm_vgic_vcpu_destroy(vcpu);
+
+ mutex_lock(&kvm->arch.config_lock);
kvm_vgic_dist_destroy(kvm);
-}
-void kvm_vgic_destroy(struct kvm *kvm)
-{
- mutex_lock(&kvm->arch.config_lock);
- __kvm_vgic_destroy(kvm);
mutex_unlock(&kvm->arch.config_lock);
+ mutex_unlock(&kvm->slots_lock);
}
/**
@@ -469,25 +479,26 @@ int kvm_vgic_map_resources(struct kvm *kvm)
type = VGIC_V3;
}
- if (ret) {
- __kvm_vgic_destroy(kvm);
+ if (ret)
goto out;
- }
+
dist->ready = true;
dist_base = dist->vgic_dist_base;
mutex_unlock(&kvm->arch.config_lock);
ret = vgic_register_dist_iodev(kvm, dist_base, type);
- if (ret) {
+ if (ret)
kvm_err("Unable to register VGIC dist MMIO regions\n");
- kvm_vgic_destroy(kvm);
- }
- mutex_unlock(&kvm->slots_lock);
- return ret;
+ goto out_slots;
out:
mutex_unlock(&kvm->arch.config_lock);
+out_slots:
mutex_unlock(&kvm->slots_lock);
+
+ if (ret)
+ kvm_vgic_destroy(kvm);
+
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 89117ba2528a..a764b0ab8bf9 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -820,7 +820,7 @@ out_unlock:
return ret;
}
-static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
+void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
{
struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
@@ -833,6 +833,8 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
unsigned long c;
int ret = 0;
+ lockdep_assert_held(&kvm->slots_lock);
+
kvm_for_each_vcpu(c, vcpu, kvm) {
ret = vgic_register_redist_iodev(vcpu);
if (ret)
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 339a55194b2c..74a67ad87f29 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -436,6 +436,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
if (ret)
goto out;
+ /* Silently exit if the vLPI is already mapped */
+ if (irq->hw)
+ goto out;
+
/*
* Emit the mapping request. If it fails, the ITS probably
* isn't v4 compatible, so let's silently bail out. Holding
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 0ab09b0d4440..8d134569d0a1 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -241,6 +241,7 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
int vgic_v3_save_pending_tables(struct kvm *kvm);
int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count);
int vgic_register_redist_iodev(struct kvm_vcpu *vcpu);
+void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu);
bool vgic_v3_check_base(struct kvm *kvm);
void vgic_v3_load(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index c336d2ffdec5..6a56d7cf309d 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -18,13 +18,6 @@
* x1 - src
*/
SYM_FUNC_START(__pi_copy_page)
-alternative_if ARM64_HAS_NO_HW_PREFETCH
- // Prefetch three cache lines ahead.
- prfm pldl1strm, [x1, #128]
- prfm pldl1strm, [x1, #256]
- prfm pldl1strm, [x1, #384]
-alternative_else_nop_endif
-
ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
ldp x6, x7, [x1, #32]
@@ -39,10 +32,6 @@ alternative_else_nop_endif
1:
tst x0, #(PAGE_SIZE - 1)
-alternative_if ARM64_HAS_NO_HW_PREFETCH
- prfm pldl1strm, [x1, #384]
-alternative_else_nop_endif
-
stnp x2, x3, [x0, #-256]
ldp x2, x3, [x1]
stnp x4, x5, [x0, #16 - 256]
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 460d799e1296..55f6455a8284 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -607,6 +607,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ mm_flags |= FAULT_FLAG_TRIED;
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 15f6347d23b6..1ac7467d34c9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -52,9 +52,6 @@ u64 vabits_actual __ro_after_init = VA_BITS_MIN;
EXPORT_SYMBOL(vabits_actual);
#endif
-u64 kimage_vaddr __ro_after_init = (u64)&_text;
-EXPORT_SYMBOL(kimage_vaddr);
-
u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
@@ -674,6 +671,9 @@ static int __init map_entry_trampoline(void)
{
int i;
+ if (!arm64_kernel_unmapped_at_el0())
+ return 0;
+
pgprot_t prot = kernel_exec_prot();
phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index b98c38288a9d..1e07d74d7a6c 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -37,10 +37,10 @@ HAS_GIC_PRIO_MASKING
HAS_GIC_PRIO_RELAXED_SYNC
HAS_HCX
HAS_LDAPR
+HAS_LPA2
HAS_LSE_ATOMICS
HAS_MOPS
HAS_NESTED_VIRT
-HAS_NO_HW_PREFETCH
HAS_PAN
HAS_S1PIE
HAS_RAS_EXTN
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 96cbeeab4eec..4c9b67934367 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1002,6 +1002,27 @@ UnsignedEnum 3:0 BT
EndEnum
EndSysreg
+Sysreg ID_AA64PFR2_EL1 3 0 0 4 2
+Res0 63:36
+UnsignedEnum 35:32 FPMR
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Res0 31:12
+UnsignedEnum 11:8 MTEFAR
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 7:4 MTESTOREONLY
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 3:0 MTEPERM
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+EndSysreg
+
Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4
Res0 63:60
UnsignedEnum 59:56 F64MM
@@ -1058,7 +1079,11 @@ UnsignedEnum 63 FA64
0b0 NI
0b1 IMP
EndEnum
-Res0 62:60
+Res0 62:61
+UnsignedEnum 60 LUTv2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
UnsignedEnum 59:56 SMEver
0b0000 SME
0b0001 SME2
@@ -1086,7 +1111,14 @@ UnsignedEnum 42 F16F16
0b0 NI
0b1 IMP
EndEnum
-Res0 41:40
+UnsignedEnum 41 F8F16
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 40 F8F32
+ 0b0 NI
+ 0b1 IMP
+EndEnum
UnsignedEnum 39:36 I8I32
0b0000 NI
0b1111 IMP
@@ -1107,7 +1139,49 @@ UnsignedEnum 32 F32F32
0b0 NI
0b1 IMP
EndEnum
-Res0 31:0
+Res0 31
+UnsignedEnum 30 SF8FMA
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 29 SF8DP4
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 28 SF8DP2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 27:0
+EndSysreg
+
+Sysreg ID_AA64FPFR0_EL1 3 0 0 4 7
+Res0 63:32
+UnsignedEnum 31 F8CVT
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 30 F8FMA
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 29 F8DP4
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 28 F8DP2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 27:2
+UnsignedEnum 1 F8E4M3
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 0 F8E5M2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
EndSysreg
Sysreg ID_AA64DFR0_EL1 3 0 0 5 0
@@ -1115,7 +1189,10 @@ Enum 63:60 HPMN0
0b0000 UNPREDICTABLE
0b0001 DEF
EndEnum
-Res0 59:56
+UnsignedEnum 59:56 ExtTrcBuff
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 55:52 BRBE
0b0000 NI
0b0001 IMP
@@ -1327,6 +1404,7 @@ UnsignedEnum 11:8 API
0b0011 PAuth2
0b0100 FPAC
0b0101 FPACCOMBINE
+ 0b0110 PAuth_LR
EndEnum
UnsignedEnum 7:4 APA
0b0000 NI
@@ -1335,6 +1413,7 @@ UnsignedEnum 7:4 APA
0b0011 PAuth2
0b0100 FPAC
0b0101 FPACCOMBINE
+ 0b0110 PAuth_LR
EndEnum
UnsignedEnum 3:0 DPB
0b0000 NI
@@ -1344,7 +1423,14 @@ EndEnum
EndSysreg
Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2
-Res0 63:56
+UnsignedEnum 63:60 ATS1A
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 59:56 LUT
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 55:52 CSSC
0b0000 NI
0b0001 IMP
@@ -1353,7 +1439,19 @@ UnsignedEnum 51:48 RPRFM
0b0000 NI
0b0001 IMP
EndEnum
-Res0 47:32
+Res0 47:44
+UnsignedEnum 43:40 PRFMSLC
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 39:36 SYSINSTR_128
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 35:32 SYSREG_128
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 31:28 CLRBHB
0b0000 NI
0b0001 IMP
@@ -1377,6 +1475,7 @@ UnsignedEnum 15:12 APA3
0b0011 PAuth2
0b0100 FPAC
0b0101 FPACCOMBINE
+ 0b0110 PAuth_LR
EndEnum
UnsignedEnum 11:8 GPA3
0b0000 NI
@@ -1392,6 +1491,23 @@ UnsignedEnum 3:0 WFxT
EndEnum
EndSysreg
+Sysreg ID_AA64ISAR3_EL1 3 0 0 6 3
+Res0 63:12
+UnsignedEnum 11:8 TLBIW
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 7:4 FAMINMAX
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 3:0 CPA
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 CPA2
+EndEnum
+EndSysreg
+
Sysreg ID_AA64MMFR0_EL1 3 0 0 7 0
UnsignedEnum 63:60 ECV
0b0000 NI
@@ -1680,7 +1796,8 @@ Field 63 TIDCP
Field 62 SPINTMASK
Field 61 NMI
Field 60 EnTP2
-Res0 59:58
+Field 59 TCSO
+Field 58 TCSO0
Field 57 EPAN
Field 56 EnALS
Field 55 EnAS0
@@ -1709,7 +1826,7 @@ EndEnum
Field 37 ITFSB
Field 36 BT1
Field 35 BT0
-Res0 34
+Field 34 EnFPM
Field 33 MSCEn
Field 32 CMOW
Field 31 EnIA
@@ -1747,7 +1864,8 @@ Field 0 M
EndSysreg
SysregFields CPACR_ELx
-Res0 63:29
+Res0 63:30
+Field 29 E0POE
Field 28 TTA
Res0 27:26
Field 25:24 SMEN
@@ -1790,6 +1908,41 @@ Sysreg SMCR_EL1 3 0 1 2 6
Fields SMCR_ELx
EndSysreg
+SysregFields GCSCR_ELx
+Res0 63:10
+Field 9 STREn
+Field 8 PUSHMEn
+Res0 7
+Field 6 EXLOCKEN
+Field 5 RVCHKEN
+Res0 4:1
+Field 0 PCRSEL
+EndSysregFields
+
+Sysreg GCSCR_EL1 3 0 2 5 0
+Fields GCSCR_ELx
+EndSysreg
+
+SysregFields GCSPR_ELx
+Field 63:3 PTR
+Res0 2:0
+EndSysregFields
+
+Sysreg GCSPR_EL1 3 0 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
+Sysreg GCSCRE0_EL1 3 0 2 5 2
+Res0 63:11
+Field 10 nTR
+Field 9 STREn
+Field 8 PUSHMEn
+Res0 7:6
+Field 5 RVCHKEN
+Res0 4:1
+Field 0 PCRSEL
+EndSysreg
+
Sysreg ALLINT 3 0 4 3 0
Res0 63:14
Field 13 ALLINT
@@ -1933,10 +2086,18 @@ Sysreg CONTEXTIDR_EL1 3 0 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
+Sysreg RCWSMASK_EL1 3 0 13 0 3
+Field 63:0 RCWSMASK
+EndSysreg
+
Sysreg TPIDR_EL1 3 0 13 0 4
Field 63:0 ThreadID
EndSysreg
+Sysreg RCWMASK_EL1 3 0 13 0 6
+Field 63:0 RCWMASK
+EndSysreg
+
Sysreg SCXTNUM_EL1 3 0 13 0 7
Field 63:0 SoftwareContextNumber
EndSysreg
@@ -2004,9 +2165,10 @@ Field 27:24 CWG
Field 23:20 ERG
Field 19:16 DminLine
Enum 15:14 L1Ip
- 0b00 VPIPT
+ # This was named as VPIPT in the ARM but now documented as reserved
+ 0b00 RESERVED_VPIPT
# This is named as AIVIVT in the ARM but documented as reserved
- 0b01 RESERVED
+ 0b01 RESERVED_AIVIVT
0b10 VIPT
0b11 PIPT
EndEnum
@@ -2020,12 +2182,39 @@ Field 4 DZP
Field 3:0 BS
EndSysreg
+Sysreg GCSPR_EL0 3 3 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
Sysreg SVCR 3 3 4 2 2
Res0 63:2
Field 1 ZA
Field 0 SM
EndSysreg
+Sysreg FPMR 3 3 4 4 2
+Res0 63:38
+Field 37:32 LSCALE2
+Field 31:24 NSCALE
+Res0 23
+Field 22:16 LSCALE
+Field 15 OSC
+Field 14 OSM
+Res0 13:9
+UnsignedEnum 8:6 F8D
+ 0b000 E5M2
+ 0b001 E4M3
+EndEnum
+UnsignedEnum 5:3 F8S2
+ 0b000 E5M2
+ 0b001 E4M3
+EndEnum
+UnsignedEnum 2:0 F8S1
+ 0b000 E5M2
+ 0b001 E4M3
+EndEnum
+EndSysreg
+
SysregFields HFGxTR_EL2
Field 63 nAMAIR2_EL1
Field 62 nMAIR2_EL1
@@ -2102,7 +2291,9 @@ Fields HFGxTR_EL2
EndSysreg
Sysreg HFGITR_EL2 3 4 1 1 6
-Res0 63:61
+Res0 63
+Field 62 ATS1E1A
+Res0 61
Field 60 COSPRCTX
Field 59 nGCSEPP
Field 58 nGCSSTR_EL1
@@ -2295,12 +2486,57 @@ Field 1 DBGBVRn_EL1
Field 0 DBGBCRn_EL1
EndSysreg
+Sysreg HAFGRTR_EL2 3 4 3 1 6
+Res0 63:50
+Field 49 AMEVTYPER115_EL0
+Field 48 AMEVCNTR115_EL0
+Field 47 AMEVTYPER114_EL0
+Field 46 AMEVCNTR114_EL0
+Field 45 AMEVTYPER113_EL0
+Field 44 AMEVCNTR113_EL0
+Field 43 AMEVTYPER112_EL0
+Field 42 AMEVCNTR112_EL0
+Field 41 AMEVTYPER111_EL0
+Field 40 AMEVCNTR111_EL0
+Field 39 AMEVTYPER110_EL0
+Field 38 AMEVCNTR110_EL0
+Field 37 AMEVTYPER19_EL0
+Field 36 AMEVCNTR19_EL0
+Field 35 AMEVTYPER18_EL0
+Field 34 AMEVCNTR18_EL0
+Field 33 AMEVTYPER17_EL0
+Field 32 AMEVCNTR17_EL0
+Field 31 AMEVTYPER16_EL0
+Field 30 AMEVCNTR16_EL0
+Field 29 AMEVTYPER15_EL0
+Field 28 AMEVCNTR15_EL0
+Field 27 AMEVTYPER14_EL0
+Field 26 AMEVCNTR14_EL0
+Field 25 AMEVTYPER13_EL0
+Field 24 AMEVCNTR13_EL0
+Field 23 AMEVTYPER12_EL0
+Field 22 AMEVCNTR12_EL0
+Field 21 AMEVTYPER11_EL0
+Field 20 AMEVCNTR11_EL0
+Field 19 AMEVTYPER10_EL0
+Field 18 AMEVCNTR10_EL0
+Field 17 AMCNTEN1
+Res0 16:5
+Field 4 AMEVCNTR03_EL0
+Field 3 AMEVCNTR02_EL0
+Field 2 AMEVCNTR01_EL0
+Field 1 AMEVCNTR00_EL0
+Field 0 AMCNTEN0
+EndSysreg
+
Sysreg ZCR_EL2 3 4 1 2 0
Fields ZCR_ELx
EndSysreg
Sysreg HCRX_EL2 3 4 1 2 2
-Res0 63:23
+Res0 63:25
+Field 24 PACMEn
+Field 23 EnFPM
Field 22 GCSEn
Field 21 EnIDCP128
Field 20 EnSDERR
@@ -2348,6 +2584,14 @@ Sysreg SMCR_EL2 3 4 1 2 6
Fields SMCR_ELx
EndSysreg
+Sysreg GCSCR_EL2 3 4 2 5 0
+Fields GCSCR_ELx
+EndSysreg
+
+Sysreg GCSPR_EL2 3 4 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
Sysreg DACR32_EL2 3 4 3 0 0
Res0 63:32
Field 31:30 D15
@@ -2407,6 +2651,14 @@ Sysreg SMCR_EL12 3 5 1 2 6
Fields SMCR_ELx
EndSysreg
+Sysreg GCSCR_EL12 3 5 2 5 0
+Fields GCSCR_ELx
+EndSysreg
+
+Sysreg GCSPR_EL12 3 5 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
Sysreg FAR_EL12 3 5 6 0 0
Field 63:0 ADDR
EndSysreg
@@ -2471,6 +2723,33 @@ Field 1 PIE
Field 0 PnCH
EndSysreg
+SysregFields MAIR2_ELx
+Field 63:56 Attr7
+Field 55:48 Attr6
+Field 47:40 Attr5
+Field 39:32 Attr4
+Field 31:24 Attr3
+Field 23:16 Attr2
+Field 15:8 Attr1
+Field 7:0 Attr0
+EndSysregFields
+
+Sysreg MAIR2_EL1 3 0 10 2 1
+Fields MAIR2_ELx
+EndSysreg
+
+Sysreg MAIR2_EL2 3 4 10 1 1
+Fields MAIR2_ELx
+EndSysreg
+
+Sysreg AMAIR2_EL1 3 0 10 3 1
+Field 63:0 ImpDef
+EndSysreg
+
+Sysreg AMAIR2_EL2 3 4 10 3 1
+Field 63:0 ImpDef
+EndSysreg
+
SysregFields PIRx_ELx
Field 63:60 Perm15
Field 59:56 Perm14
@@ -2510,6 +2789,26 @@ Sysreg PIR_EL2 3 4 10 2 3
Fields PIRx_ELx
EndSysreg
+Sysreg POR_EL0 3 3 10 2 4
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg POR_EL1 3 0 10 2 4
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg POR_EL12 3 5 10 2 4
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg S2POR_EL1 3 0 10 2 5
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg S2PIR_EL2 3 4 10 2 5
+Fields PIRx_ELx
+EndSysreg
+
Sysreg LORSA_EL1 3 0 10 4 0
Res0 63:52
Field 51:16 SA
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 204b94b2e6aa..4ba8d67ddb09 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -83,7 +83,7 @@ endif
ifeq ($(CONFIG_RELOCATABLE),y)
KBUILD_CFLAGS_KERNEL += -fPIE
-LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext
+LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs)
endif
cflags-y += $(call cc-option, -mno-check-zero-division)
diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h
index 091897d40b03..91d81f9730ab 100644
--- a/arch/loongarch/include/asm/efi.h
+++ b/arch/loongarch/include/asm/efi.h
@@ -32,6 +32,6 @@ static inline unsigned long efi_get_kimg_min_align(void)
#define EFI_KIMG_PREFERRED_ADDRESS PHYSADDR(VMLINUX_LOAD_ADDRESS)
-unsigned long kernel_entry_address(void);
+unsigned long kernel_entry_address(unsigned long kernel_addr);
#endif /* _ASM_LOONGARCH_EFI_H */
diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h
index b9a4ab54285c..9b16a3b8e706 100644
--- a/arch/loongarch/include/asm/elf.h
+++ b/arch/loongarch/include/asm/elf.h
@@ -293,7 +293,7 @@ extern const char *__elf_platform;
#define ELF_PLAT_INIT(_r, load_addr) do { \
_r->regs[1] = _r->regs[2] = _r->regs[3] = _r->regs[4] = 0; \
_r->regs[5] = _r->regs[6] = _r->regs[7] = _r->regs[8] = 0; \
- _r->regs[9] = _r->regs[10] = _r->regs[11] = _r->regs[12] = 0; \
+ _r->regs[9] = _r->regs[10] /* syscall n */ = _r->regs[12] = 0; \
_r->regs[13] = _r->regs[14] = _r->regs[15] = _r->regs[16] = 0; \
_r->regs[17] = _r->regs[18] = _r->regs[19] = _r->regs[20] = 0; \
_r->regs[21] = _r->regs[22] = _r->regs[23] = _r->regs[24] = 0; \
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 9b4957cefa8a..46366e783c84 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -1098,12 +1098,11 @@
static __always_inline u64 drdtime(void)
{
- int rID = 0;
u64 val = 0;
__asm__ __volatile__(
- "rdtime.d %0, %1 \n\t"
- : "=r"(val), "=r"(rID)
+ "rdtime.d %0, $zero\n\t"
+ : "=r"(val)
:
);
return val;
diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c
index 92270f14db94..f623feb2129f 100644
--- a/arch/loongarch/kernel/stacktrace.c
+++ b/arch/loongarch/kernel/stacktrace.c
@@ -32,7 +32,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
}
for (unwind_start(&state, task, regs);
- !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
+ !unwind_done(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
if (!addr || !consume_entry(cookie, addr))
break;
diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c
index ba324ba76fa1..a463d6961344 100644
--- a/arch/loongarch/kernel/unwind.c
+++ b/arch/loongarch/kernel/unwind.c
@@ -28,6 +28,5 @@ bool default_next_frame(struct unwind_state *state)
} while (!get_stack_info(state->sp, state->task, info));
- state->error = true;
return false;
}
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index 55afc27320e1..929ae240280a 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -227,7 +227,7 @@ static bool next_frame(struct unwind_state *state)
} while (!get_stack_info(state->sp, state->task, info));
out:
- state->error = true;
+ state->stack_info.type = STACK_TYPE_UNKNOWN;
return false;
}
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 169ff8b3915e..4fcd6cd6da23 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -480,10 +480,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
case 8:
move_reg(ctx, t1, src);
emit_insn(ctx, extwb, dst, t1);
+ emit_zext_32(ctx, dst, is32);
break;
case 16:
move_reg(ctx, t1, src);
emit_insn(ctx, extwh, dst, t1);
+ emit_zext_32(ctx, dst, is32);
break;
case 32:
emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
@@ -772,8 +774,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
break;
case 32:
emit_insn(ctx, revb2w, dst, dst);
- /* zero-extend 32 bits into 64 bits */
- emit_zext_32(ctx, dst, is32);
+ /* clear the upper 32 bits */
+ emit_zext_32(ctx, dst, true);
break;
case 64:
emit_insn(ctx, revbd, dst, dst);
@@ -911,8 +913,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* function return */
case BPF_JMP | BPF_EXIT:
- emit_sext_32(ctx, regmap[BPF_REG_0], true);
-
if (i == ctx->prog->len - 1)
break;
@@ -988,14 +988,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
}
break;
case BPF_DW:
- if (is_signed_imm12(off)) {
- emit_insn(ctx, ldd, dst, src, off);
- } else if (is_signed_imm14(off)) {
- emit_insn(ctx, ldptrd, dst, src, off);
- } else {
- move_imm(ctx, t1, off, is32);
- emit_insn(ctx, ldxd, dst, src, t1);
- }
+ move_imm(ctx, t1, off, is32);
+ emit_insn(ctx, ldxd, dst, src, t1);
break;
}
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 7e6b74b6eecd..b4d71fea558f 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -453,6 +453,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -550,7 +551,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 0b403e2efcd5..682d8cd3dd3c 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -410,6 +410,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -507,7 +508,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 57aac3f4b001..15259ced8463 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -430,6 +430,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -527,7 +528,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 3c160636a2e9..7395c12caef6 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -402,6 +402,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -499,7 +500,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 23cf07c49d14..92506bc7f78d 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -412,6 +412,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -509,7 +510,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 619a0d93ce5b..144bc8c0d8b5 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -269,9 +269,6 @@ CONFIG_RDS_TCP=m
CONFIG_L2TP=m
CONFIG_BRIDGE=m
CONFIG_ATALK=m
-CONFIG_DEV_APPLETALK=m
-CONFIG_IPDDP=m
-CONFIG_IPDDP_ENCAP=y
CONFIG_6LOWPAN=m
CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
CONFIG_6LOWPAN_GHC_UDP=m
@@ -432,6 +429,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -529,7 +527,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index d9430bc2b2de..07594c729497 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -289,9 +289,6 @@ CONFIG_RDS_TCP=m
CONFIG_L2TP=m
CONFIG_BRIDGE=m
CONFIG_ATALK=m
-CONFIG_DEV_APPLETALK=m
-CONFIG_IPDDP=m
-CONFIG_IPDDP_ENCAP=y
CONFIG_6LOWPAN=m
CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
CONFIG_6LOWPAN_GHC_UDP=m
@@ -518,6 +515,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -615,7 +613,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index eb6132f29bf5..c34de6c1de20 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -401,6 +401,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -498,7 +499,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index d0bad674cbb7..83bc029d1f33 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -402,6 +402,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -499,7 +500,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index dad6bcfcaeed..4f551dac2ed7 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -419,6 +419,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -516,7 +517,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index eb1b489b3139..b1bf01182bd3 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -400,6 +400,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -497,7 +498,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 939589826546..5c9a3f71f036 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -400,6 +400,7 @@ CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_BCACHEFS_FS=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS_FS=m
@@ -497,7 +498,6 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 7a4b780e82cb..37db1a810b67 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -456,3 +456,5 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 5b6a0b02b7de..07fff5ad1c9c 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -462,3 +462,5 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 76db82542519..797ae590ebdb 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -460,6 +460,7 @@ config MACH_LOONGSON2EF
config MACH_LOONGSON64
bool "Loongson 64-bit family of machines"
+ select ARCH_DMA_DEFAULT_COHERENT
select ARCH_SPARSEMEM_ENABLE
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
@@ -1251,6 +1252,7 @@ config CPU_LOONGSON64
select CPU_SUPPORTS_MSA
select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
select CPU_MIPSR2_IRQ_VI
+ select DMA_NONCOHERENT
select WEAK_ORDERING
select WEAK_REORDERING_BEYOND_LLSC
select MIPS_ASID_BITS_VARIABLE
diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
index f878f47e4501..ee3e2153dd13 100644
--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
+++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
@@ -130,8 +130,7 @@
compatible = "pci0014,7a03.0",
"pci0014,7a03",
"pciclass0c0320",
- "pciclass0c03",
- "loongson, pci-gmac";
+ "pciclass0c03";
reg = <0x1800 0x0 0x0 0x0 0x0>;
interrupts = <12 IRQ_TYPE_LEVEL_LOW>,
diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
index 7c69e8245c2f..cce9428afc41 100644
--- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
+++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
@@ -193,8 +193,7 @@
compatible = "pci0014,7a03.0",
"pci0014,7a03",
"pciclass020000",
- "pciclass0200",
- "loongson, pci-gmac";
+ "pciclass0200";
reg = <0x1800 0x0 0x0 0x0 0x0>;
interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h
index 035b1a69e2d0..e007edd6b60a 100644
--- a/arch/mips/include/asm/mach-loongson64/boot_param.h
+++ b/arch/mips/include/asm/mach-loongson64/boot_param.h
@@ -14,7 +14,11 @@
#define ADAPTER_ROM 8
#define ACPI_TABLE 9
#define SMBIOS_TABLE 10
-#define MAX_MEMORY_TYPE 11
+#define UMA_VIDEO_RAM 11
+#define VUMA_VIDEO_RAM 12
+#define MAX_MEMORY_TYPE 13
+
+#define MEM_SIZE_IS_IN_BYTES (1 << 31)
#define LOONGSON3_BOOT_MEM_MAP_MAX 128
struct efi_memory_map_loongson {
@@ -117,7 +121,8 @@ struct irq_source_routing_table {
u64 pci_io_start_addr;
u64 pci_io_end_addr;
u64 pci_config_addr;
- u32 dma_mask_bits;
+ u16 dma_mask_bits;
+ u16 dma_noncoherent;
} __packed;
struct interface_info {
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 5387ed0a5186..b630604c577f 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -121,6 +121,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
/* Put the stack after the struct pt_regs. */
childksp = (unsigned long) childregs;
p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK;
+
+ /*
+ * New tasks lose permission to use the fpu. This accelerates context
+ * switching for most programs since they don't use the fpu.
+ */
+ clear_tsk_thread_flag(p, TIF_USEDFPU);
+ clear_tsk_thread_flag(p, TIF_USEDMSA);
+ clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE);
+
+#ifdef CONFIG_MIPS_MT_FPAFF
+ clear_tsk_thread_flag(p, TIF_FPUBOUND);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
if (unlikely(args->fn)) {
/* kernel thread */
unsigned long status = p->thread.cp0_status;
@@ -149,20 +162,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.reg29 = (unsigned long) childregs;
p->thread.reg31 = (unsigned long) ret_from_fork;
- /*
- * New tasks lose permission to use the fpu. This accelerates context
- * switching for most programs since they don't use the fpu.
- */
childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
- clear_tsk_thread_flag(p, TIF_USEDFPU);
- clear_tsk_thread_flag(p, TIF_USEDMSA);
- clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE);
-
-#ifdef CONFIG_MIPS_MT_FPAFF
- clear_tsk_thread_flag(p, TIF_FPUBOUND);
-#endif /* CONFIG_MIPS_MT_FPAFF */
-
#ifdef CONFIG_MIPS_FP_SUPPORT
atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE);
#endif
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 8fbef537fb88..82e2e051b416 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -351,10 +351,11 @@ early_initcall(mips_smp_ipi_init);
*/
asmlinkage void start_secondary(void)
{
- unsigned int cpu;
+ unsigned int cpu = raw_smp_processor_id();
cpu_probe();
per_cpu_trap_init(false);
+ rcutree_report_cpu_starting(cpu);
mips_clockevent_init();
mp_ops->init_secondary();
cpu_report();
@@ -366,7 +367,6 @@ asmlinkage void start_secondary(void)
*/
calibrate_delay();
- cpu = smp_processor_id();
cpu_data[cpu].udelay_val = loops_per_jiffy;
set_cpu_sibling_map(cpu);
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index a842b41c8e06..134ea054b1c7 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -395,3 +395,5 @@
454 n32 futex_wake sys_futex_wake
455 n32 futex_wait sys_futex_wait
456 n32 futex_requeue sys_futex_requeue
+457 n32 statmount sys_statmount
+458 n32 listmount sys_listmount
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 116ff501bf92..959a21664703 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -371,3 +371,5 @@
454 n64 futex_wake sys_futex_wake
455 n64 futex_wait sys_futex_wait
456 n64 futex_requeue sys_futex_requeue
+457 n64 statmount sys_statmount
+458 n64 listmount sys_listmount
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 525cc54bc63b..e55bc1d4bf0f 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -444,3 +444,5 @@
454 o32 futex_wake sys_futex_wake
455 o32 futex_wait sys_futex_wait
456 o32 futex_requeue sys_futex_requeue
+457 o32 statmount sys_statmount
+458 o32 listmount sys_listmount
diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c
index c961e2999f15..ef3750a6ffac 100644
--- a/arch/mips/loongson64/env.c
+++ b/arch/mips/loongson64/env.c
@@ -13,6 +13,8 @@
* Copyright (C) 2009 Lemote Inc.
* Author: Wu Zhangjin, wuzhangjin@gmail.com
*/
+
+#include <linux/dma-map-ops.h>
#include <linux/export.h>
#include <linux/pci_ids.h>
#include <asm/bootinfo.h>
@@ -147,8 +149,14 @@ void __init prom_lefi_init_env(void)
loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits;
if (loongson_sysconf.dma_mask_bits < 32 ||
- loongson_sysconf.dma_mask_bits > 64)
+ loongson_sysconf.dma_mask_bits > 64) {
loongson_sysconf.dma_mask_bits = 32;
+ dma_default_coherent = true;
+ } else {
+ dma_default_coherent = !eirq_source->dma_noncoherent;
+ }
+
+ pr_info("Firmware: Coherent DMA: %s\n", dma_default_coherent ? "on" : "off");
loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm;
loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown;
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
index ee8de1735b7c..f25caa6aa9d3 100644
--- a/arch/mips/loongson64/init.c
+++ b/arch/mips/loongson64/init.c
@@ -49,8 +49,7 @@ void virtual_early_config(void)
void __init szmem(unsigned int node)
{
u32 i, mem_type;
- static unsigned long num_physpages;
- u64 node_id, node_psize, start_pfn, end_pfn, mem_start, mem_size;
+ phys_addr_t node_id, mem_start, mem_size;
/* Otherwise come from DTB */
if (loongson_sysconf.fw_interface != LOONGSON_LEFI)
@@ -64,30 +63,46 @@ void __init szmem(unsigned int node)
mem_type = loongson_memmap->map[i].mem_type;
mem_size = loongson_memmap->map[i].mem_size;
- mem_start = loongson_memmap->map[i].mem_start;
+
+ /* Memory size comes in MB if MEM_SIZE_IS_IN_BYTES not set */
+ if (mem_size & MEM_SIZE_IS_IN_BYTES)
+ mem_size &= ~MEM_SIZE_IS_IN_BYTES;
+ else
+ mem_size = mem_size << 20;
+
+ mem_start = (node_id << 44) | loongson_memmap->map[i].mem_start;
switch (mem_type) {
case SYSTEM_RAM_LOW:
case SYSTEM_RAM_HIGH:
- start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT;
- node_psize = (mem_size << 20) >> PAGE_SHIFT;
- end_pfn = start_pfn + node_psize;
- num_physpages += node_psize;
- pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
- (u32)node_id, mem_type, mem_start, mem_size);
- pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
- start_pfn, end_pfn, num_physpages);
- memblock_add_node(PFN_PHYS(start_pfn),
- PFN_PHYS(node_psize), node,
+ case UMA_VIDEO_RAM:
+ pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes usable\n",
+ (u32)node_id, mem_type, &mem_start, &mem_size);
+ memblock_add_node(mem_start, mem_size, node,
MEMBLOCK_NONE);
break;
case SYSTEM_RAM_RESERVED:
- pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
- (u32)node_id, mem_type, mem_start, mem_size);
- memblock_reserve(((node_id << 44) + mem_start), mem_size << 20);
+ case VIDEO_ROM:
+ case ADAPTER_ROM:
+ case ACPI_TABLE:
+ case SMBIOS_TABLE:
+ pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes reserved\n",
+ (u32)node_id, mem_type, &mem_start, &mem_size);
+ memblock_reserve(mem_start, mem_size);
+ break;
+ /* We should not reserve VUMA_VIDEO_RAM as it overlaps with MMIO */
+ case VUMA_VIDEO_RAM:
+ default:
+ pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes unhandled\n",
+ (u32)node_id, mem_type, &mem_start, &mem_size);
break;
}
}
+
+ /* Reserve vgabios if it comes from firmware */
+ if (loongson_sysconf.vgabios_addr)
+ memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr),
+ SZ_256K);
}
#ifndef CONFIG_NUMA
diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h
index 1641ff9a8b83..833555f74ffa 100644
--- a/arch/parisc/include/asm/bug.h
+++ b/arch/parisc/include/asm/bug.h
@@ -71,7 +71,7 @@
asm volatile("\n" \
"1:\t" PARISC_BUG_BREAK_ASM "\n" \
"\t.pushsection __bug_table,\"a\"\n" \
- "\t.align %2\n" \
+ "\t.align 4\n" \
"2:\t" __BUG_REL(1b) "\n" \
"\t.short %0\n" \
"\t.blockz %1-4-2\n" \
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index a47798fed54e..9c84470c31c7 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -455,3 +455,5 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 52d7e3fad553..414b978b8010 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -189,6 +189,7 @@ config PPC
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+ select FUNCTION_ALIGNMENT_4B
select GENERIC_ATOMIC64 if PPC32
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_CMOS_UPDATE
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index ea4033abc07d..8c80b154e814 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -271,7 +271,6 @@ config PPC_EARLY_DEBUG_USBGECKO
config PPC_EARLY_DEBUG_PS3GELIC
bool "Early debugging through the PS3 Ethernet port"
depends on PPC_PS3
- select PS3GELIC_UDBG
help
Select this to enable early debugging for the PlayStation3 via
UDP broadcasts sent out through the Ethernet port.
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index f19dbaa1d541..051247027da0 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -10,15 +10,26 @@
# Rewritten by Cort Dougan and Paul Mackerras
#
+ifdef cross_compiling
+ ifeq ($(CROSS_COMPILE),)
+ # Auto detect cross compiler prefix.
+ # Look for: (powerpc(64(le)?)?)(-unknown)?-linux(-gnu)?-
+ CC_ARCHES := powerpc powerpc64 powerpc64le
+ CC_SUFFIXES := linux linux-gnu unknown-linux-gnu
+ CROSS_COMPILE := $(call cc-cross-prefix, $(foreach a,$(CC_ARCHES), \
+ $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-)))
+ endif
+endif
+
HAS_BIARCH := $(call cc-option-yn, -m32)
# Set default 32 bits cross compilers for vdso and boot wrapper
CROSS32_COMPILE ?=
# If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use
-# ppc64_defconfig because we have nothing better to go on.
+# ppc64le_defconfig because we have nothing better to go on.
uname := $(shell uname -m)
-KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig
+KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64le)_defconfig
new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
@@ -161,7 +172,7 @@ CFLAGS-y += $(CONFIG_TUNE_CPU)
asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
-KBUILD_CPPFLAGS += -I $(srctree)/arch/$(ARCH) $(asinstr)
+KBUILD_CPPFLAGS += -I $(srctree)/arch/powerpc $(asinstr)
KBUILD_AFLAGS += $(AFLAGS-y)
KBUILD_CFLAGS += $(call cc-option,-msoft-float)
KBUILD_CFLAGS += $(CFLAGS-y)
@@ -232,7 +243,7 @@ BOOT_TARGETS2 := zImage% dtbImage% treeImage.% cuImage.% simpleImage.% uImage.%
PHONY += $(BOOT_TARGETS1) $(BOOT_TARGETS2)
-boot := arch/$(ARCH)/boot
+boot := arch/powerpc/boot
$(BOOT_TARGETS1): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
@@ -336,7 +347,7 @@ PHONY += $(generated_configs)
define archhelp
echo '* zImage - Build default images selected by kernel config'
- echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)'
+ echo ' zImage.* - Compressed kernel image (arch/powerpc/boot/zImage.*)'
echo ' uImage - U-Boot native image format'
echo ' cuImage.<dt> - Backwards compatible U-Boot image for older'
echo ' versions which do not support device trees'
@@ -347,12 +358,12 @@ define archhelp
echo ' (your) ~/bin/$(INSTALLKERNEL) or'
echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
echo ' install to $$(INSTALL_PATH) and run lilo'
- echo ' *_defconfig - Select default config from arch/$(ARCH)/configs'
+ echo ' *_defconfig - Select default config from arch/powerpc/configs'
echo ''
echo ' Targets with <dt> embed a device tree blob inside the image'
echo ' These targets support board with firmware that does not'
echo ' support passing a device tree directly. Replace <dt> with the'
- echo ' name of a dts file from the arch/$(ARCH)/boot/dts/ directory'
+ echo ' name of a dts file from the arch/powerpc/boot/dts/ directory'
echo ' (minus the .dts extension).'
echo
$(foreach cfg,$(generated_configs),
diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
index d552044c5afc..aa5152ca8120 100644
--- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
@@ -367,45 +367,46 @@
reg = <0xf0000 0x1000>;
interrupts = <18 2 0 0>;
fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x30061>;
- fsl,tmu-calibration = <0x00000000 0x0000000f
- 0x00000001 0x00000017
- 0x00000002 0x0000001e
- 0x00000003 0x00000026
- 0x00000004 0x0000002e
- 0x00000005 0x00000035
- 0x00000006 0x0000003d
- 0x00000007 0x00000044
- 0x00000008 0x0000004c
- 0x00000009 0x00000053
- 0x0000000a 0x0000005b
- 0x0000000b 0x00000064
-
- 0x00010000 0x00000011
- 0x00010001 0x0000001c
- 0x00010002 0x00000024
- 0x00010003 0x0000002b
- 0x00010004 0x00000034
- 0x00010005 0x00000039
- 0x00010006 0x00000042
- 0x00010007 0x0000004c
- 0x00010008 0x00000051
- 0x00010009 0x0000005a
- 0x0001000a 0x00000063
-
- 0x00020000 0x00000013
- 0x00020001 0x00000019
- 0x00020002 0x00000024
- 0x00020003 0x0000002c
- 0x00020004 0x00000035
- 0x00020005 0x0000003d
- 0x00020006 0x00000046
- 0x00020007 0x00000050
- 0x00020008 0x00000059
-
- 0x00030000 0x00000002
- 0x00030001 0x0000000d
- 0x00030002 0x00000019
- 0x00030003 0x00000024>;
+ fsl,tmu-calibration =
+ <0x00000000 0x0000000f>,
+ <0x00000001 0x00000017>,
+ <0x00000002 0x0000001e>,
+ <0x00000003 0x00000026>,
+ <0x00000004 0x0000002e>,
+ <0x00000005 0x00000035>,
+ <0x00000006 0x0000003d>,
+ <0x00000007 0x00000044>,
+ <0x00000008 0x0000004c>,
+ <0x00000009 0x00000053>,
+ <0x0000000a 0x0000005b>,
+ <0x0000000b 0x00000064>,
+
+ <0x00010000 0x00000011>,
+ <0x00010001 0x0000001c>,
+ <0x00010002 0x00000024>,
+ <0x00010003 0x0000002b>,
+ <0x00010004 0x00000034>,
+ <0x00010005 0x00000039>,
+ <0x00010006 0x00000042>,
+ <0x00010007 0x0000004c>,
+ <0x00010008 0x00000051>,
+ <0x00010009 0x0000005a>,
+ <0x0001000a 0x00000063>,
+
+ <0x00020000 0x00000013>,
+ <0x00020001 0x00000019>,
+ <0x00020002 0x00000024>,
+ <0x00020003 0x0000002c>,
+ <0x00020004 0x00000035>,
+ <0x00020005 0x0000003d>,
+ <0x00020006 0x00000046>,
+ <0x00020007 0x00000050>,
+ <0x00020008 0x00000059>,
+
+ <0x00030000 0x00000002>,
+ <0x00030001 0x0000000d>,
+ <0x00030002 0x00000019>,
+ <0x00030003 0x00000024>;
#thermal-sensor-cells = <1>;
};
diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
index ad0ab33336b8..776788623204 100644
--- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -447,41 +447,42 @@
reg = <0xf0000 0x1000>;
interrupts = <18 2 0 0>;
fsl,tmu-range = <0xa0000 0x90026 0x8004a 0x1006a>;
- fsl,tmu-calibration = <0x00000000 0x00000025
- 0x00000001 0x00000028
- 0x00000002 0x0000002d
- 0x00000003 0x00000031
- 0x00000004 0x00000036
- 0x00000005 0x0000003a
- 0x00000006 0x00000040
- 0x00000007 0x00000044
- 0x00000008 0x0000004a
- 0x00000009 0x0000004f
- 0x0000000a 0x00000054
-
- 0x00010000 0x0000000d
- 0x00010001 0x00000013
- 0x00010002 0x00000019
- 0x00010003 0x0000001f
- 0x00010004 0x00000025
- 0x00010005 0x0000002d
- 0x00010006 0x00000033
- 0x00010007 0x00000043
- 0x00010008 0x0000004b
- 0x00010009 0x00000053
-
- 0x00020000 0x00000010
- 0x00020001 0x00000017
- 0x00020002 0x0000001f
- 0x00020003 0x00000029
- 0x00020004 0x00000031
- 0x00020005 0x0000003c
- 0x00020006 0x00000042
- 0x00020007 0x0000004d
- 0x00020008 0x00000056
-
- 0x00030000 0x00000012
- 0x00030001 0x0000001d>;
+ fsl,tmu-calibration =
+ <0x00000000 0x00000025>,
+ <0x00000001 0x00000028>,
+ <0x00000002 0x0000002d>,
+ <0x00000003 0x00000031>,
+ <0x00000004 0x00000036>,
+ <0x00000005 0x0000003a>,
+ <0x00000006 0x00000040>,
+ <0x00000007 0x00000044>,
+ <0x00000008 0x0000004a>,
+ <0x00000009 0x0000004f>,
+ <0x0000000a 0x00000054>,
+
+ <0x00010000 0x0000000d>,
+ <0x00010001 0x00000013>,
+ <0x00010002 0x00000019>,
+ <0x00010003 0x0000001f>,
+ <0x00010004 0x00000025>,
+ <0x00010005 0x0000002d>,
+ <0x00010006 0x00000033>,
+ <0x00010007 0x00000043>,
+ <0x00010008 0x0000004b>,
+ <0x00010009 0x00000053>,
+
+ <0x00020000 0x00000010>,
+ <0x00020001 0x00000017>,
+ <0x00020002 0x0000001f>,
+ <0x00020003 0x00000029>,
+ <0x00020004 0x00000031>,
+ <0x00020005 0x0000003c>,
+ <0x00020006 0x00000042>,
+ <0x00020007 0x0000004d>,
+ <0x00020008 0x00000056>,
+
+ <0x00030000 0x00000012>,
+ <0x00030001 0x0000001d>;
#thermal-sensor-cells = <1>;
};
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 6e7b9e8fd225..544a65fda77b 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -92,6 +92,7 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZONE_DEVICE=y
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 2b175ddf82f0..aa8bb0208bcc 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -24,6 +24,7 @@ CONFIG_PS3_VRAM=m
CONFIG_PS3_LPM=m
# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
CONFIG_KEXEC=y
+# CONFIG_PPC64_BIG_ENDIAN_ELF_ABI_V2 is not set
CONFIG_PPC_4K_PAGES=y
CONFIG_SCHED_SMT=y
CONFIG_PM=y
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 8d3eacb50d56..9d44e6630908 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -301,7 +301,6 @@ CONFIG_WQ_WATCHDOG=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
-CONFIG_DEBUG_CREDENTIALS=y
# CONFIG_FTRACE is not set
CONFIG_XMON=y
# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index cb77eddca54b..927d585652bc 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -17,12 +17,6 @@
#define _PAGE_EXEC 0x00001 /* execute permission */
#define _PAGE_WRITE 0x00002 /* write access allowed */
#define _PAGE_READ 0x00004 /* read access allowed */
-#define _PAGE_NA _PAGE_PRIVILEGED
-#define _PAGE_NAX _PAGE_EXEC
-#define _PAGE_RO _PAGE_READ
-#define _PAGE_ROX (_PAGE_READ | _PAGE_EXEC)
-#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE)
-#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
#define _PAGE_PRIVILEGED 0x00008 /* kernel access only */
#define _PAGE_SAO 0x00010 /* Strong access order */
#define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */
@@ -532,8 +526,8 @@ static inline bool pte_user(pte_t pte)
static inline bool pte_access_permitted(pte_t pte, bool write)
{
/*
- * _PAGE_READ is needed for any access and will be
- * cleared for PROT_NONE
+ * _PAGE_READ is needed for any access and will be cleared for
+ * PROT_NONE. Execute-only mapping via PROT_EXEC also returns false.
*/
if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
return false;
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 1950c1b825b4..fd642b729775 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -158,11 +158,6 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
*/
}
-static inline bool __pte_protnone(unsigned long pte)
-{
- return (pte & (pgprot_val(PAGE_NONE) | _PAGE_RWX)) == pgprot_val(PAGE_NONE);
-}
-
static inline bool __pte_flags_need_flush(unsigned long oldval,
unsigned long newval)
{
@@ -179,8 +174,8 @@ static inline bool __pte_flags_need_flush(unsigned long oldval,
/*
* We do not expect kernel mappings or non-PTEs or not-present PTEs.
*/
- VM_WARN_ON_ONCE(!__pte_protnone(oldval) && oldval & _PAGE_PRIVILEGED);
- VM_WARN_ON_ONCE(!__pte_protnone(newval) && newval & _PAGE_PRIVILEGED);
+ VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED);
+ VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED);
VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE));
VM_WARN_ON_ONCE(!(newval & _PAGE_PTE));
VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT));
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 9e5a39b6a311..1ebd2ca97f12 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -25,7 +25,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY))
addr += MCOUNT_INSN_SIZE;
- return addr;
+ return addr;
}
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index ddb99e982917..a41e542ba94d 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -349,7 +349,16 @@
#define H_GET_ENERGY_SCALE_INFO 0x450
#define H_PKS_SIGNED_UPDATE 0x454
#define H_WATCHDOG 0x45C
-#define MAX_HCALL_OPCODE H_WATCHDOG
+#define H_GUEST_GET_CAPABILITIES 0x460
+#define H_GUEST_SET_CAPABILITIES 0x464
+#define H_GUEST_CREATE 0x470
+#define H_GUEST_CREATE_VCPU 0x474
+#define H_GUEST_GET_STATE 0x478
+#define H_GUEST_SET_STATE 0x47C
+#define H_GUEST_RUN_VCPU 0x480
+#define H_GUEST_COPY_MEMORY 0x484
+#define H_GUEST_DELETE 0x488
+#define MAX_HCALL_OPCODE H_GUEST_DELETE
/* Scope args for H_SCM_UNBIND_ALL */
#define H_UNBIND_SCOPE_ALL (0x1)
@@ -393,15 +402,6 @@
#define H_ENTER_NESTED 0xF804
#define H_TLB_INVALIDATE 0xF808
#define H_COPY_TOFROM_GUEST 0xF80C
-#define H_GUEST_GET_CAPABILITIES 0x460
-#define H_GUEST_SET_CAPABILITIES 0x464
-#define H_GUEST_CREATE 0x470
-#define H_GUEST_CREATE_VCPU 0x474
-#define H_GUEST_GET_STATE 0x478
-#define H_GUEST_SET_STATE 0x47C
-#define H_GUEST_RUN_VCPU 0x480
-#define H_GUEST_COPY_MEMORY 0x484
-#define H_GUEST_DELETE 0x488
/* Flags for H_SVM_PAGE_IN */
#define H_PAGE_IN_SHARED 0x1
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 4f527d09c92b..3e1e2a698c9e 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -302,6 +302,7 @@ void kvmhv_nested_exit(void);
void kvmhv_vm_nested_init(struct kvm *kvm);
long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu);
+void kvmhv_flush_lpid(u64 lpid);
void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1);
void kvmhv_release_all_nested(struct kvm *kvm);
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
@@ -593,13 +594,17 @@ static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \
KVMPPC_BOOK3S_VCORE_ACCESSOR(vtb, 64, KVMPPC_GSID_VTB)
-KVMPPC_BOOK3S_VCORE_ACCESSOR(tb_offset, 64, KVMPPC_GSID_TB_OFFSET)
KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(arch_compat, 32, KVMPPC_GSID_LOGICAL_PVR)
KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(lpcr, 64, KVMPPC_GSID_LPCR)
+KVMPPC_BOOK3S_VCORE_ACCESSOR_SET(tb_offset, 64, KVMPPC_GSID_TB_OFFSET)
+
+static inline u64 kvmppc_get_tb_offset(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.vcore->tb_offset;
+}
static inline u64 kvmppc_get_dec_expires(struct kvm_vcpu *vcpu)
{
- WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_TB_OFFSET) < 0);
WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_DEC_EXPIRY_TB) < 0);
return vcpu->arch.dec_expires;
}
@@ -607,7 +612,6 @@ static inline u64 kvmppc_get_dec_expires(struct kvm_vcpu *vcpu)
static inline void kvmppc_set_dec_expires(struct kvm_vcpu *vcpu, u64 val)
{
vcpu->arch.dec_expires = val;
- WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_TB_OFFSET) < 0);
kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_DEC_EXPIRY_TB);
}
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2477021bff54..d8729ec81ca0 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -682,6 +682,7 @@ void kvmhv_nestedv2_vcpu_free(struct kvm_vcpu *vcpu, struct kvmhv_nestedv2_io *i
int kvmhv_nestedv2_flush_vcpu(struct kvm_vcpu *vcpu, u64 time_limit);
int kvmhv_nestedv2_set_ptbl_entry(unsigned long lpid, u64 dw0, u64 dw1);
int kvmhv_nestedv2_parse_output(struct kvm_vcpu *vcpu);
+int kvmhv_nestedv2_set_vpa(struct kvm_vcpu *vcpu, unsigned long vpa);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h
index b88d1d2cf304..b71b9582e754 100644
--- a/arch/powerpc/include/asm/linkage.h
+++ b/arch/powerpc/include/asm/linkage.h
@@ -4,9 +4,6 @@
#include <asm/types.h>
-#define __ALIGN .align 2
-#define __ALIGN_STR ".align 2"
-
#ifdef CONFIG_PPC64_ELF_ABI_V1
#define cond_syscall(x) \
asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n" \
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 52cc25864a1b..d8b7e246a32f 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -412,5 +412,9 @@ extern void *abatron_pteptrs[2];
#include <asm/nohash/mmu.h>
#endif
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
+#define __HAVE_ARCH_RESERVED_KERNEL_PAGES
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_MMU_H_ */
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
index 4c6c6dbd182f..da827d2d0866 100644
--- a/arch/powerpc/include/asm/mmzone.h
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -42,14 +42,6 @@ u64 memory_hotplug_max(void);
#else
#define memory_hotplug_max() memblock_end_of_DRAM()
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_FA_DUMP
-#define __HAVE_ARCH_RESERVED_KERNEL_PAGES
-#endif
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-extern int create_section_mapping(unsigned long start, unsigned long end,
- int nid, pgprot_t prot);
-#endif
#endif /* __KERNEL__ */
#endif /* _ASM_MMZONE_H_ */
diff --git a/arch/powerpc/include/asm/papr-sysparm.h b/arch/powerpc/include/asm/papr-sysparm.h
index f5fdbd8ae9db..0dbbff59101d 100644
--- a/arch/powerpc/include/asm/papr-sysparm.h
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -2,8 +2,10 @@
#ifndef _ASM_POWERPC_PAPR_SYSPARM_H
#define _ASM_POWERPC_PAPR_SYSPARM_H
+#include <uapi/asm/papr-sysparm.h>
+
typedef struct {
- const u32 token;
+ u32 token;
} papr_sysparm_t;
#define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, })
@@ -20,11 +22,14 @@ typedef struct {
#define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRS mk_papr_sysparm(50)
#define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55)
-enum {
- PAPR_SYSPARM_MAX_INPUT = 1024,
- PAPR_SYSPARM_MAX_OUTPUT = 4000,
-};
-
+/**
+ * struct papr_sysparm_buf - RTAS work area layout for system parameter functions.
+ *
+ * This is the memory layout of the buffers passed to/from
+ * ibm,get-system-parameter and ibm,set-system-parameter. It is
+ * distinct from the papr_sysparm_io_block structure that is passed
+ * between user space and the kernel.
+ */
struct papr_sysparm_buf {
__be16 len;
char val[PAPR_SYSPARM_MAX_OUTPUT];
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index ac4279208d63..b78b82d66057 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -76,6 +76,17 @@ static inline bool is_vcpu_idle(int vcpu)
{
return lppaca_of(vcpu).idle;
}
+
+static inline bool vcpu_is_dispatched(int vcpu)
+{
+ /*
+ * This is the yield_count. An "odd" value (low bit on) means that
+ * the processor is yielded (either because of an OS yield or a
+ * hypervisor preempt). An even value implies that the processor is
+ * currently executing.
+ */
+ return (!(yield_count_of(vcpu) & 1));
+}
#else
static inline bool is_shared_processor(void)
{
@@ -109,6 +120,10 @@ static inline bool is_vcpu_idle(int vcpu)
{
return false;
}
+static inline bool vcpu_is_dispatched(int vcpu)
+{
+ return true;
+}
#endif
#define vcpu_is_preempted vcpu_is_preempted
@@ -134,12 +149,12 @@ static inline bool vcpu_is_preempted(int cpu)
* If the hypervisor has dispatched the target CPU on a physical
* processor, then the target CPU is definitely not preempted.
*/
- if (!(yield_count_of(cpu) & 1))
+ if (vcpu_is_dispatched(cpu))
return false;
/*
- * If the target CPU has yielded to Hypervisor but OS has not
- * requested idle then the target CPU is definitely preempted.
+ * if the target CPU is not dispatched and the guest OS
+ * has not marked the CPU idle, then it is hypervisor preempted.
*/
if (!is_vcpu_idle(cpu))
return true;
@@ -166,7 +181,7 @@ static inline bool vcpu_is_preempted(int cpu)
/*
* The PowerVM hypervisor dispatches VMs on a whole core
- * basis. So we know that a thread sibling of the local CPU
+ * basis. So we know that a thread sibling of the executing CPU
* cannot have been preempted by the hypervisor, even if it
* has called H_CONFER, which will set the yield bit.
*/
@@ -174,15 +189,17 @@ static inline bool vcpu_is_preempted(int cpu)
return false;
/*
- * If any of the threads of the target CPU's core are not
- * preempted or ceded, then consider target CPU to be
- * non-preempted.
+ * The specific target CPU was marked by guest OS as idle, but
+ * then also check all other cpus in the core for PowerVM
+ * because it does core scheduling and one of the vcpu
+ * of the core getting preempted by hypervisor implies
+ * other vcpus can also be considered preempted.
*/
first_cpu = cpu_first_thread_sibling(cpu);
for (i = first_cpu; i < first_cpu + threads_per_core; i++) {
if (i == cpu)
continue;
- if (!(yield_count_of(i) & 1))
+ if (vcpu_is_dispatched(i))
return false;
if (!is_vcpu_idle(i))
return true;
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index d9fcff575027..ce2b1b5eebdd 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -35,6 +35,9 @@ extern void init_pci_config_tokens (void);
extern unsigned long get_phb_buid (struct device_node *);
extern int rtas_setup_phb(struct pci_controller *phb);
+int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val);
+int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val);
+
#ifdef CONFIG_EEH
void eeh_addr_cache_insert_dev(struct pci_dev *dev);
@@ -44,8 +47,6 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
int eeh_pci_enable(struct eeh_pe *pe, int function);
int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed);
void eeh_save_bars(struct eeh_dev *edev);
-int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
-int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
void eeh_pe_state_mark(struct eeh_pe *pe, int state);
void eeh_pe_mark_isolated(struct eeh_pe *pe);
void eeh_pe_state_clear(struct eeh_pe *pe, int state, bool include_passed);
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index a5f36546a052..d13d8fdc3411 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -514,4 +514,10 @@ u64 ps3_get_spe_id(void *arg);
void ps3_early_mm_init(void);
+#ifdef CONFIG_PPC_EARLY_DEBUG_PS3GELIC
+void udbg_shutdown_ps3gelic(void);
+#else
+static inline void udbg_shutdown_ps3gelic(void) {}
+#endif
+
#endif
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 4ae4ab9090a2..7fd09f25452d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1361,6 +1361,7 @@
#define PVR_POWER8E 0x004B
#define PVR_POWER8NVL 0x004C
#define PVR_POWER8 0x004D
+#define PVR_HX_C2000 0x0066
#define PVR_POWER9 0x004E
#define PVR_POWER10 0x0080
#define PVR_BE 0x0070
diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h
deleted file mode 100644
index 74fba29e9491..000000000000
--- a/arch/powerpc/include/asm/reg_a2.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Register definitions specific to the A2 core
- *
- * Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
- */
-
-#ifndef __ASM_POWERPC_REG_A2_H__
-#define __ASM_POWERPC_REG_A2_H__
-
-#include <asm/asm-const.h>
-
-#define SPRN_TENSR 0x1b5
-#define SPRN_TENS 0x1b6 /* Thread ENable Set */
-#define SPRN_TENC 0x1b7 /* Thread ENable Clear */
-
-#define SPRN_A2_CCR0 0x3f0 /* Core Configuration Register 0 */
-#define SPRN_A2_CCR1 0x3f1 /* Core Configuration Register 1 */
-#define SPRN_A2_CCR2 0x3f2 /* Core Configuration Register 2 */
-#define SPRN_MMUCR0 0x3fc /* MMU Control Register 0 */
-#define SPRN_MMUCR1 0x3fd /* MMU Control Register 1 */
-#define SPRN_MMUCR2 0x3fe /* MMU Control Register 2 */
-#define SPRN_MMUCR3 0x3ff /* MMU Control Register 3 */
-
-#define SPRN_IAR 0x372
-
-#define SPRN_IUCR0 0x3f3
-#define IUCR0_ICBI_ACK 0x1000
-
-#define SPRN_XUCR0 0x3f6 /* Execution Unit Config Register 0 */
-
-#define A2_IERAT_SIZE 16
-#define A2_DERAT_SIZE 32
-
-/* A2 MMUCR0 bits */
-#define MMUCR0_ECL 0x80000000 /* Extended Class for TLB fills */
-#define MMUCR0_TID_NZ 0x40000000 /* TID is non-zero */
-#define MMUCR0_TS 0x10000000 /* Translation space for TLB fills */
-#define MMUCR0_TGS 0x20000000 /* Guest space for TLB fills */
-#define MMUCR0_TLBSEL 0x0c000000 /* TLB or ERAT target for TLB fills */
-#define MMUCR0_TLBSEL_U 0x00000000 /* TLBSEL = UTLB */
-#define MMUCR0_TLBSEL_I 0x08000000 /* TLBSEL = I-ERAT */
-#define MMUCR0_TLBSEL_D 0x0c000000 /* TLBSEL = D-ERAT */
-#define MMUCR0_LOCKSRSH 0x02000000 /* Use TLB lock on tlbsx. */
-#define MMUCR0_TID_MASK 0x000000ff /* TID field */
-
-/* A2 MMUCR1 bits */
-#define MMUCR1_IRRE 0x80000000 /* I-ERAT round robin enable */
-#define MMUCR1_DRRE 0x40000000 /* D-ERAT round robin enable */
-#define MMUCR1_REE 0x20000000 /* Reference Exception Enable*/
-#define MMUCR1_CEE 0x10000000 /* Change exception enable */
-#define MMUCR1_CSINV_ALL 0x00000000 /* Inval ERAT on all CS evts */
-#define MMUCR1_CSINV_NISYNC 0x04000000 /* Inval ERAT on all ex isync*/
-#define MMUCR1_CSINV_NEVER 0x0c000000 /* Don't inval ERAT on CS */
-#define MMUCR1_ICTID 0x00080000 /* IERAT class field as TID */
-#define MMUCR1_ITTID 0x00040000 /* IERAT thdid field as TID */
-#define MMUCR1_DCTID 0x00020000 /* DERAT class field as TID */
-#define MMUCR1_DTTID 0x00010000 /* DERAT thdid field as TID */
-#define MMUCR1_DCCD 0x00008000 /* DERAT class ignore */
-#define MMUCR1_TLBWE_BINV 0x00004000 /* back invalidate on tlbwe */
-
-/* A2 MMUCR2 bits */
-#define MMUCR2_PSSEL_SHIFT 4
-
-/* A2 MMUCR3 bits */
-#define MMUCR3_THID 0x0000000f /* Thread ID */
-
-/* *** ERAT TLB bits definitions */
-#define TLB0_EPN_MASK ASM_CONST(0xfffffffffffff000)
-#define TLB0_CLASS_MASK ASM_CONST(0x0000000000000c00)
-#define TLB0_CLASS_00 ASM_CONST(0x0000000000000000)
-#define TLB0_CLASS_01 ASM_CONST(0x0000000000000400)
-#define TLB0_CLASS_10 ASM_CONST(0x0000000000000800)
-#define TLB0_CLASS_11 ASM_CONST(0x0000000000000c00)
-#define TLB0_V ASM_CONST(0x0000000000000200)
-#define TLB0_X ASM_CONST(0x0000000000000100)
-#define TLB0_SIZE_MASK ASM_CONST(0x00000000000000f0)
-#define TLB0_SIZE_4K ASM_CONST(0x0000000000000010)
-#define TLB0_SIZE_64K ASM_CONST(0x0000000000000030)
-#define TLB0_SIZE_1M ASM_CONST(0x0000000000000050)
-#define TLB0_SIZE_16M ASM_CONST(0x0000000000000070)
-#define TLB0_SIZE_1G ASM_CONST(0x00000000000000a0)
-#define TLB0_THDID_MASK ASM_CONST(0x000000000000000f)
-#define TLB0_THDID_0 ASM_CONST(0x0000000000000001)
-#define TLB0_THDID_1 ASM_CONST(0x0000000000000002)
-#define TLB0_THDID_2 ASM_CONST(0x0000000000000004)
-#define TLB0_THDID_3 ASM_CONST(0x0000000000000008)
-#define TLB0_THDID_ALL ASM_CONST(0x000000000000000f)
-
-#define TLB1_RESVATTR ASM_CONST(0x00f0000000000000)
-#define TLB1_U0 ASM_CONST(0x0008000000000000)
-#define TLB1_U1 ASM_CONST(0x0004000000000000)
-#define TLB1_U2 ASM_CONST(0x0002000000000000)
-#define TLB1_U3 ASM_CONST(0x0001000000000000)
-#define TLB1_R ASM_CONST(0x0000800000000000)
-#define TLB1_C ASM_CONST(0x0000400000000000)
-#define TLB1_RPN_MASK ASM_CONST(0x000003fffffff000)
-#define TLB1_W ASM_CONST(0x0000000000000800)
-#define TLB1_I ASM_CONST(0x0000000000000400)
-#define TLB1_M ASM_CONST(0x0000000000000200)
-#define TLB1_G ASM_CONST(0x0000000000000100)
-#define TLB1_E ASM_CONST(0x0000000000000080)
-#define TLB1_VF ASM_CONST(0x0000000000000040)
-#define TLB1_UX ASM_CONST(0x0000000000000020)
-#define TLB1_SX ASM_CONST(0x0000000000000010)
-#define TLB1_UW ASM_CONST(0x0000000000000008)
-#define TLB1_SW ASM_CONST(0x0000000000000004)
-#define TLB1_UR ASM_CONST(0x0000000000000002)
-#define TLB1_SR ASM_CONST(0x0000000000000001)
-
-/* A2 erativax attributes definitions */
-#define ERATIVAX_RS_IS_ALL 0x000
-#define ERATIVAX_RS_IS_TID 0x040
-#define ERATIVAX_RS_IS_CLASS 0x080
-#define ERATIVAX_RS_IS_FULLMATCH 0x0c0
-#define ERATIVAX_CLASS_00 0x000
-#define ERATIVAX_CLASS_01 0x010
-#define ERATIVAX_CLASS_10 0x020
-#define ERATIVAX_CLASS_11 0x030
-#define ERATIVAX_PSIZE_4K (TLB_PSIZE_4K >> 1)
-#define ERATIVAX_PSIZE_64K (TLB_PSIZE_64K >> 1)
-#define ERATIVAX_PSIZE_1M (TLB_PSIZE_1M >> 1)
-#define ERATIVAX_PSIZE_16M (TLB_PSIZE_16M >> 1)
-#define ERATIVAX_PSIZE_1G (TLB_PSIZE_1G >> 1)
-
-/* A2 eratilx attributes definitions */
-#define ERATILX_T_ALL 0
-#define ERATILX_T_TID 1
-#define ERATILX_T_TGS 2
-#define ERATILX_T_FULLMATCH 3
-#define ERATILX_T_CLASS0 4
-#define ERATILX_T_CLASS1 5
-#define ERATILX_T_CLASS2 6
-#define ERATILX_T_CLASS3 7
-
-/* XUCR0 bits */
-#define XUCR0_TRACE_UM_T0 0x40000000 /* Thread 0 */
-#define XUCR0_TRACE_UM_T1 0x20000000 /* Thread 1 */
-#define XUCR0_TRACE_UM_T2 0x10000000 /* Thread 2 */
-#define XUCR0_TRACE_UM_T3 0x08000000 /* Thread 3 */
-
-/* A2 CCR0 register */
-#define A2_CCR0_PME_DISABLED 0x00000000
-#define A2_CCR0_PME_SLEEP 0x40000000
-#define A2_CCR0_PME_RVW 0x80000000
-#define A2_CCR0_PME_DISABLED2 0xc0000000
-
-/* A2 CCR2 register */
-#define A2_CCR2_ERAT_ONLY_MODE 0x00000001
-#define A2_CCR2_ENABLE_ICSWX 0x00000002
-#define A2_CCR2_ENABLE_PC 0x20000000
-#define A2_CCR2_ENABLE_TRACE 0x40000000
-
-#endif /* __ASM_POWERPC_REG_A2_H__ */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index c697c3c74694..9bb2210c8d44 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -3,6 +3,7 @@
#define _POWERPC_RTAS_H
#ifdef __KERNEL__
+#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <asm/page.h>
#include <asm/rtas-types.h>
@@ -201,12 +202,25 @@ typedef struct {
/* Memory set aside for sys_rtas to use with calls that need a work area. */
#define RTAS_USER_REGION_SIZE (64 * 1024)
-/* RTAS return status codes */
-#define RTAS_HARDWARE_ERROR -1 /* Hardware Error */
-#define RTAS_BUSY -2 /* RTAS Busy */
-#define RTAS_INVALID_PARAMETER -3 /* Invalid indicator/domain/sensor etc. */
-#define RTAS_EXTENDED_DELAY_MIN 9900
-#define RTAS_EXTENDED_DELAY_MAX 9905
+/*
+ * Common RTAS function return values, derived from the table "RTAS
+ * Status Word Values" in PAPR+ v2.13 7.2.8: "Return Codes". If a
+ * function can return a value in this table then generally it has the
+ * meaning listed here. More extended commentary in the documentation
+ * for rtas_call().
+ *
+ * RTAS functions may use negative and positive numbers not in this
+ * set for function-specific error and success conditions,
+ * respectively.
+ */
+#define RTAS_SUCCESS 0 /* Success. */
+#define RTAS_HARDWARE_ERROR -1 /* Hardware or other unspecified error. */
+#define RTAS_BUSY -2 /* Retry immediately. */
+#define RTAS_INVALID_PARAMETER -3 /* Invalid indicator/domain/sensor etc. */
+#define RTAS_UNEXPECTED_STATE_CHANGE -7 /* Seems limited to EEH and slot reset. */
+#define RTAS_EXTENDED_DELAY_MIN 9900 /* Retry after delaying for ~1ms. */
+#define RTAS_EXTENDED_DELAY_MAX 9905 /* Retry after delaying for ~100s. */
+#define RTAS_ML_ISOLATION_ERROR -9000 /* Multi-level isolation error. */
/* statuses specific to ibm,suspend-me */
#define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */
@@ -268,7 +282,7 @@ typedef struct {
#define RTAS_TYPE_DEALLOC 0xE3
#define RTAS_TYPE_DUMP 0xE4
#define RTAS_TYPE_HOTPLUG 0xE5
-/* I don't add PowerMGM events right now, this is a different topic */
+/* I don't add PowerMGM events right now, this is a different topic */
#define RTAS_TYPE_PMGM_POWER_SW_ON 0x60
#define RTAS_TYPE_PMGM_POWER_SW_OFF 0x61
#define RTAS_TYPE_PMGM_LID_OPEN 0x62
@@ -408,44 +422,41 @@ static inline bool rtas_function_implemented(const rtas_fn_handle_t handle)
{
return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE;
}
-extern int rtas_token(const char *service);
-extern int rtas_service_present(const char *service);
-extern int rtas_call(int token, int, int, int *, ...);
+int rtas_token(const char *service);
+int rtas_call(int token, int nargs, int nret, int *outputs, ...);
void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
int nret, ...);
-extern void __noreturn rtas_restart(char *cmd);
-extern void rtas_power_off(void);
-extern void __noreturn rtas_halt(void);
-extern void rtas_os_term(char *str);
+void __noreturn rtas_restart(char *cmd);
+void rtas_power_off(void);
+void __noreturn rtas_halt(void);
+void rtas_os_term(char *str);
void rtas_activate_firmware(void);
-extern int rtas_get_sensor(int sensor, int index, int *state);
-extern int rtas_get_sensor_fast(int sensor, int index, int *state);
-extern int rtas_get_power_level(int powerdomain, int *level);
-extern int rtas_set_power_level(int powerdomain, int level, int *setlevel);
-extern bool rtas_indicator_present(int token, int *maxindex);
-extern int rtas_set_indicator(int indicator, int index, int new_value);
-extern int rtas_set_indicator_fast(int indicator, int index, int new_value);
-extern void rtas_progress(char *s, unsigned short hex);
+int rtas_get_sensor(int sensor, int index, int *state);
+int rtas_get_sensor_fast(int sensor, int index, int *state);
+int rtas_get_power_level(int powerdomain, int *level);
+int rtas_set_power_level(int powerdomain, int level, int *setlevel);
+bool rtas_indicator_present(int token, int *maxindex);
+int rtas_set_indicator(int indicator, int index, int new_value);
+int rtas_set_indicator_fast(int indicator, int index, int new_value);
+void rtas_progress(char *s, unsigned short hex);
int rtas_ibm_suspend_me(int *fw_status);
int rtas_error_rc(int rtas_rc);
struct rtc_time;
-extern time64_t rtas_get_boot_time(void);
-extern void rtas_get_rtc_time(struct rtc_time *rtc_time);
-extern int rtas_set_rtc_time(struct rtc_time *rtc_time);
+time64_t rtas_get_boot_time(void);
+void rtas_get_rtc_time(struct rtc_time *rtc_time);
+int rtas_set_rtc_time(struct rtc_time *rtc_time);
-extern unsigned int rtas_busy_delay_time(int status);
+unsigned int rtas_busy_delay_time(int status);
bool rtas_busy_delay(int status);
-extern int early_init_dt_scan_rtas(unsigned long node,
- const char *uname, int depth, void *data);
+int early_init_dt_scan_rtas(unsigned long node, const char *uname, int depth, void *data);
-extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
+void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
#ifdef CONFIG_PPC_PSERIES
extern time64_t last_rtas_event;
-extern int clobbering_unread_rtas_event(void);
-extern void post_mobility_fixup(void);
+int clobbering_unread_rtas_event(void);
int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);
#else
static inline int clobbering_unread_rtas_event(void) { return 0; }
@@ -456,14 +467,14 @@ static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
#endif
#ifdef CONFIG_PPC_RTAS_DAEMON
-extern void rtas_cancel_event_scan(void);
+void rtas_cancel_event_scan(void);
#else
static inline void rtas_cancel_event_scan(void) { }
#endif
/* Error types logged. */
#define ERR_FLAG_ALREADY_LOGGED 0x0
-#define ERR_FLAG_BOOT 0x1 /* log was pulled from NVRAM on boot */
+#define ERR_FLAG_BOOT 0x1 /* log was pulled from NVRAM on boot */
#define ERR_TYPE_RTAS_LOG 0x2 /* from rtas event-scan */
#define ERR_TYPE_KERNEL_PANIC 0x4 /* from die()/panic() */
#define ERR_TYPE_KERNEL_PANIC_GZ 0x8 /* ditto, compressed */
@@ -473,7 +484,7 @@ static inline void rtas_cancel_event_scan(void) { }
(ERR_TYPE_RTAS_LOG | ERR_TYPE_KERNEL_PANIC | ERR_TYPE_KERNEL_PANIC_GZ)
#define RTAS_DEBUG KERN_DEBUG "RTAS: "
-
+
#define RTAS_ERROR_LOG_MAX 2048
/*
@@ -481,7 +492,7 @@ static inline void rtas_cancel_event_scan(void) { }
* for all rtas calls that require an error buffer argument.
* This includes 'check-exception' and 'rtas-last-error'.
*/
-extern int rtas_get_error_log_max(void);
+int rtas_get_error_log_max(void);
/* Event Scan Parameters */
#define EVENT_SCAN_ALL_EVENTS 0xf0000000
@@ -502,6 +513,8 @@ extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
/* RMO buffer reserved for user-space RTAS use */
extern unsigned long rtas_rmo_buf;
+extern struct mutex rtas_ibm_get_vpd_lock;
+
#define GLOBAL_INTERRUPT_QUEUE 9005
/**
@@ -520,8 +533,8 @@ static inline u32 rtas_config_addr(int busno, int devfn, int reg)
(devfn << 8) | (reg & 0xff);
}
-extern void rtas_give_timebase(void);
-extern void rtas_take_timebase(void);
+void rtas_give_timebase(void);
+void rtas_take_timebase(void);
#ifdef CONFIG_PPC_RTAS
static inline int page_is_rtas_user_buf(unsigned long pfn)
@@ -534,7 +547,7 @@ static inline int page_is_rtas_user_buf(unsigned long pfn)
/* Not the best place to put pSeries_coalesce_init, will be fixed when we
* move some of the rtas suspend-me stuff to pseries */
-extern void pSeries_coalesce_init(void);
+void pSeries_coalesce_init(void);
void rtas_initialize(void);
#else
static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;}
@@ -542,8 +555,6 @@ static inline void pSeries_coalesce_init(void) { }
static inline void rtas_initialize(void) { }
#endif
-extern int call_rtas(const char *, int, int, unsigned long *, ...);
-
#ifdef CONFIG_HV_PERF_CTRS
void read_24x7_sys_info(void);
#else
diff --git a/arch/powerpc/include/uapi/asm/papr-miscdev.h b/arch/powerpc/include/uapi/asm/papr-miscdev.h
new file mode 100644
index 000000000000..49a2a270b7f3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-miscdev.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_MISCDEV_H_
+#define _UAPI_PAPR_MISCDEV_H_
+
+enum {
+ PAPR_MISCDEV_IOC_ID = 0xb2,
+};
+
+#endif /* _UAPI_PAPR_MISCDEV_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h b/arch/powerpc/include/uapi/asm/papr-sysparm.h
new file mode 100644
index 000000000000..9f9a0f267ea5
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_SYSPARM_H_
+#define _UAPI_PAPR_SYSPARM_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+enum {
+ PAPR_SYSPARM_MAX_INPUT = 1024,
+ PAPR_SYSPARM_MAX_OUTPUT = 4000,
+};
+
+struct papr_sysparm_io_block {
+ __u32 parameter;
+ __u16 length;
+ char data[PAPR_SYSPARM_MAX_OUTPUT];
+};
+
+/**
+ * PAPR_SYSPARM_IOC_GET - Retrieve the value of a PAPR system parameter.
+ *
+ * Uses _IOWR because of one corner case: Retrieving the value of the
+ * "OS Service Entitlement Status" parameter (60) requires the caller
+ * to supply input data (a date string) in the buffer passed to
+ * firmware. So the @length and @data of the incoming
+ * papr_sysparm_io_block are always used to initialize the work area
+ * supplied to ibm,get-system-parameter. No other parameters are known
+ * to parameterize the result this way, and callers are encouraged
+ * (but not required) to zero-initialize @length and @data in the
+ * common case.
+ *
+ * On error the contents of the ioblock are indeterminate.
+ *
+ * Return:
+ * 0: Success; @length is the length of valid data in @data, not to exceed @PAPR_SYSPARM_MAX_OUTPUT.
+ * -EIO: Platform error. (-1)
+ * -EINVAL: Incorrect data length or format. (-9999)
+ * -EPERM: The calling partition is not allowed to access this parameter. (-9002)
+ * -EOPNOTSUPP: Parameter not supported on this platform (-3)
+ */
+#define PAPR_SYSPARM_IOC_GET _IOWR(PAPR_MISCDEV_IOC_ID, 1, struct papr_sysparm_io_block)
+
+/**
+ * PAPR_SYSPARM_IOC_SET - Update the value of a PAPR system parameter.
+ *
+ * The contents of the ioblock are unchanged regardless of success.
+ *
+ * Return:
+ * 0: Success; the parameter has been updated.
+ * -EIO: Platform error. (-1)
+ * -EINVAL: Incorrect data length or format. (-9999)
+ * -EPERM: The calling partition is not allowed to access this parameter. (-9002)
+ * -EOPNOTSUPP: Parameter not supported on this platform (-3)
+ */
+#define PAPR_SYSPARM_IOC_SET _IOW(PAPR_MISCDEV_IOC_ID, 2, struct papr_sysparm_io_block)
+
+#endif /* _UAPI_PAPR_SYSPARM_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-vpd.h b/arch/powerpc/include/uapi/asm/papr-vpd.h
new file mode 100644
index 000000000000..1c88e87cb420
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-vpd.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_VPD_H_
+#define _UAPI_PAPR_VPD_H_
+
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+struct papr_location_code {
+ /*
+ * PAPR+ v2.13 12.3.2.4 Converged Location Code Rules - Length
+ * Restrictions. 79 characters plus nul.
+ */
+ char str[80];
+};
+
+/*
+ * ioctl for /dev/papr-vpd. Returns a VPD handle fd corresponding to
+ * the location code.
+ */
+#define PAPR_VPD_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 0, struct papr_location_code)
+
+#endif /* _UAPI_PAPR_VPD_H_ */
diff --git a/arch/powerpc/kernel/cpu_specs_book3s_64.h b/arch/powerpc/kernel/cpu_specs_book3s_64.h
index c370c1b804a9..3ff9757df4c0 100644
--- a/arch/powerpc/kernel/cpu_specs_book3s_64.h
+++ b/arch/powerpc/kernel/cpu_specs_book3s_64.h
@@ -238,6 +238,21 @@ static struct cpu_spec cpu_specs[] __initdata = {
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
+ { /* 2.07-compliant processor, HeXin C2000 processor */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00660000,
+ .cpu_name = "HX-C2000",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
{ /* 3.00-compliant processor, i.e. Power9 "architected" mode */
.pvr_mask = 0xffffffff,
.pvr_value = 0x0f000005,
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index e97a0fd0ae90..6f6801da9dc1 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -20,9 +20,9 @@
#include <asm/setup.h>
#include <asm/cpu_setup.h>
-static struct cpu_spec the_cpu_spec __read_mostly;
+static struct cpu_spec the_cpu_spec __ro_after_init;
-struct cpu_spec* cur_cpu_spec __read_mostly = NULL;
+struct cpu_spec *cur_cpu_spec __ro_after_init = NULL;
EXPORT_SYMBOL(cur_cpu_spec);
/* The platform string corresponding to the real PVR */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 7ab4c8c0f1ab..dcf0591ad3c2 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -14,7 +14,6 @@
#include <asm/cputable.h>
#include <asm/setup.h>
#include <asm/thread_info.h>
-#include <asm/reg_a2.h>
#include <asm/exception-64e.h>
#include <asm/bug.h>
#include <asm/irqflags.h>
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index eddc031c4b95..7e793b503e29 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -18,6 +18,7 @@
#include <linux/kernel.h>
#include <linux/lockdep.h>
#include <linux/memblock.h>
+#include <linux/mutex.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/reboot.h>
@@ -70,14 +71,33 @@ struct rtas_filter {
* ppc64le, and we want to keep it that way. It does
* not make sense for this to be set when @filter
* is NULL.
+ * @lock: Pointer to an optional dedicated per-function mutex. This
+ * should be set for functions that require multiple calls in
+ * sequence to complete a single operation, and such sequences
+ * will disrupt each other if allowed to interleave. Users of
+ * this function are required to hold the associated lock for
+ * the duration of the call sequence. Add an explanatory
+ * comment to the function table entry if setting this member.
*/
struct rtas_function {
s32 token;
const bool banned_for_syscall_on_le:1;
const char * const name;
const struct rtas_filter *filter;
+ struct mutex *lock;
};
+/*
+ * Per-function locks for sequence-based RTAS functions.
+ */
+static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock);
+static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
+static DEFINE_MUTEX(rtas_ibm_get_indices_lock);
+static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock);
+static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock);
+static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
+DEFINE_MUTEX(rtas_ibm_get_vpd_lock);
+
static struct rtas_function rtas_function_table[] __ro_after_init = {
[RTAS_FNIDX__CHECK_EXCEPTION] = {
.name = "check-exception",
@@ -125,6 +145,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
.buf_idx1 = -1, .size_idx1 = -1,
.buf_idx2 = -1, .size_idx2 = -1,
},
+ /*
+ * PAPR+ as of v2.13 doesn't explicitly impose any
+ * restriction, but this typically requires multiple
+ * calls before success, and there's no reason to
+ * allow sequences to interleave.
+ */
+ .lock = &rtas_ibm_activate_firmware_lock,
},
[RTAS_FNIDX__IBM_CBE_START_PTCAL] = {
.name = "ibm,cbe-start-ptcal",
@@ -196,6 +223,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
.buf_idx1 = 1, .size_idx1 = -1,
.buf_idx2 = -1, .size_idx2 = -1,
},
+ /*
+ * PAPR+ v2.13 R1–7.3.19–3 is explicit that the OS
+ * must not call ibm,get-dynamic-sensor-state with
+ * different inputs until a non-retry status has been
+ * returned.
+ */
+ .lock = &rtas_ibm_get_dynamic_sensor_state_lock,
},
[RTAS_FNIDX__IBM_GET_INDICES] = {
.name = "ibm,get-indices",
@@ -203,6 +237,12 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
.buf_idx1 = 2, .size_idx1 = 3,
.buf_idx2 = -1, .size_idx2 = -1,
},
+ /*
+ * PAPR+ v2.13 R1–7.3.17–2 says that the OS must not
+ * interleave ibm,get-indices call sequences with
+ * different inputs.
+ */
+ .lock = &rtas_ibm_get_indices_lock,
},
[RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = {
.name = "ibm,get-rio-topology",
@@ -220,6 +260,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
.buf_idx1 = 0, .size_idx1 = -1,
.buf_idx2 = 1, .size_idx2 = 2,
},
+ /*
+ * PAPR+ v2.13 R1–7.3.20–4 indicates that sequences
+ * should not be allowed to interleave.
+ */
+ .lock = &rtas_ibm_get_vpd_lock,
},
[RTAS_FNIDX__IBM_GET_XIVE] = {
.name = "ibm,get-xive",
@@ -239,6 +284,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
.buf_idx1 = 2, .size_idx1 = 3,
.buf_idx2 = -1, .size_idx2 = -1,
},
+ /*
+ * PAPR+ v2.13 R1–7.3.26–6 says the OS should allow
+ * only one call sequence in progress at a time.
+ */
+ .lock = &rtas_ibm_lpar_perftools_lock,
},
[RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = {
.name = "ibm,manage-flash-image",
@@ -277,6 +327,14 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
.buf_idx1 = 0, .size_idx1 = 1,
.buf_idx2 = -1, .size_idx2 = -1,
},
+ /*
+ * This follows a sequence-based pattern similar to
+ * ibm,get-vpd et al. Since PAPR+ restricts
+ * interleaving call sequences for other functions of
+ * this style, assume the restriction applies here,
+ * even though it's not explicit in the spec.
+ */
+ .lock = &rtas_ibm_physical_attestation_lock,
},
[RTAS_FNIDX__IBM_PLATFORM_DUMP] = {
.name = "ibm,platform-dump",
@@ -284,6 +342,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
.buf_idx1 = 4, .size_idx1 = 5,
.buf_idx2 = -1, .size_idx2 = -1,
},
+ /*
+ * PAPR+ v2.13 7.3.3.4.1 indicates that concurrent
+ * sequences of ibm,platform-dump are allowed if they
+ * are operating on different dump tags. So leave the
+ * lock pointer unset for now. This may need
+ * reconsideration if kernel-internal users appear.
+ */
},
[RTAS_FNIDX__IBM_POWER_OFF_UPS] = {
.name = "ibm,power-off-ups",
@@ -326,6 +391,12 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
.buf_idx1 = 2, .size_idx1 = -1,
.buf_idx2 = -1, .size_idx2 = -1,
},
+ /*
+ * PAPR+ v2.13 R1–7.3.18–3 says the OS must not call
+ * this function with different inputs until a
+ * non-retry status has been returned.
+ */
+ .lock = &rtas_ibm_set_dynamic_indicator_lock,
},
[RTAS_FNIDX__IBM_SET_EEH_OPTION] = {
.name = "ibm,set-eeh-option",
@@ -454,6 +525,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
},
};
+#define for_each_rtas_function(funcp) \
+ for (funcp = &rtas_function_table[0]; \
+ funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \
+ ++funcp)
+
/*
* Nearly all RTAS calls need to be serialized. All uses of the
* default rtas_args block must hold rtas_lock.
@@ -525,10 +601,10 @@ static DEFINE_XARRAY(rtas_token_to_function_xarray);
static int __init rtas_token_to_function_xarray_init(void)
{
+ const struct rtas_function *func;
int err = 0;
- for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) {
- const struct rtas_function *func = &rtas_function_table[i];
+ for_each_rtas_function(func) {
const s32 token = func->token;
if (token == RTAS_UNKNOWN_SERVICE)
@@ -544,6 +620,21 @@ static int __init rtas_token_to_function_xarray_init(void)
}
arch_initcall(rtas_token_to_function_xarray_init);
+/*
+ * For use by sys_rtas(), where the token value is provided by user
+ * space and we don't want to warn on failed lookups.
+ */
+static const struct rtas_function *rtas_token_to_function_untrusted(s32 token)
+{
+ return xa_load(&rtas_token_to_function_xarray, token);
+}
+
+/*
+ * Reverse lookup for deriving the function descriptor from a
+ * known-good token value in contexts where the former is not already
+ * available. @token must be valid, e.g. derived from the result of a
+ * prior lookup against the function table.
+ */
static const struct rtas_function *rtas_token_to_function(s32 token)
{
const struct rtas_function *func;
@@ -551,12 +642,22 @@ static const struct rtas_function *rtas_token_to_function(s32 token)
if (WARN_ONCE(token < 0, "invalid token %d", token))
return NULL;
- func = xa_load(&rtas_token_to_function_xarray, token);
-
- if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token))
- return NULL;
+ func = rtas_token_to_function_untrusted(token);
+ if (func)
+ return func;
+ /*
+ * Fall back to linear scan in case the reverse mapping hasn't
+ * been initialized yet.
+ */
+ if (xa_empty(&rtas_token_to_function_xarray)) {
+ for_each_rtas_function(func) {
+ if (func->token == token)
+ return func;
+ }
+ }
- return func;
+ WARN_ONCE(true, "unexpected failed lookup for token %d", token);
+ return NULL;
}
/* This is here deliberately so it's only used in this file */
@@ -570,28 +671,25 @@ static void __do_enter_rtas(struct rtas_args *args)
static void __do_enter_rtas_trace(struct rtas_args *args)
{
- const char *name = NULL;
+ const struct rtas_function *func = rtas_token_to_function(be32_to_cpu(args->token));
- if (args == &rtas_args)
- lockdep_assert_held(&rtas_lock);
/*
- * If the tracepoints that consume the function name aren't
- * active, avoid the lookup.
+ * If there is a per-function lock, it must be held by the
+ * caller.
*/
- if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) {
- const s32 token = be32_to_cpu(args->token);
- const struct rtas_function *func = rtas_token_to_function(token);
+ if (func->lock)
+ lockdep_assert_held(func->lock);
- name = func->name;
- }
+ if (args == &rtas_args)
+ lockdep_assert_held(&rtas_lock);
- trace_rtas_input(args, name);
+ trace_rtas_input(args, func->name);
trace_rtas_ll_entry(args);
__do_enter_rtas(args);
trace_rtas_ll_exit(args);
- trace_rtas_output(args, name);
+ trace_rtas_output(args, func->name);
}
static void do_enter_rtas(struct rtas_args *args)
@@ -670,7 +768,7 @@ static void call_rtas_display_status_delay(char c)
static int pending_newline = 0; /* did last write end with unprinted newline? */
static int width = 16;
- if (c == '\n') {
+ if (c == '\n') {
while (width-- > 0)
call_rtas_display_status(' ');
width = 16;
@@ -680,7 +778,7 @@ static void call_rtas_display_status_delay(char c)
if (pending_newline) {
call_rtas_display_status('\r');
call_rtas_display_status('\n');
- }
+ }
pending_newline = 0;
if (width--) {
call_rtas_display_status(c);
@@ -820,7 +918,7 @@ void rtas_progress(char *s, unsigned short hex)
else
rtas_call(display_character, 1, 1, NULL, '\r');
}
-
+
if (row_width)
width = row_width[current_line];
else
@@ -840,9 +938,9 @@ void rtas_progress(char *s, unsigned short hex)
spin_unlock(&progress_lock);
return;
}
-
+
/* RTAS wants CR-LF, not just LF */
-
+
if (*os == '\n') {
rtas_call(display_character, 1, 1, NULL, '\r');
rtas_call(display_character, 1, 1, NULL, '\n');
@@ -852,7 +950,7 @@ void rtas_progress(char *s, unsigned short hex)
*/
rtas_call(display_character, 1, 1, NULL, *os);
}
-
+
if (row_width)
width = row_width[current_line];
else
@@ -861,15 +959,15 @@ void rtas_progress(char *s, unsigned short hex)
width--;
rtas_call(display_character, 1, 1, NULL, *os);
}
-
+
os++;
-
+
/* if we overwrite the screen length */
if (width <= 0)
while ((*os != 0) && (*os != '\n') && (*os != '\r'))
os++;
}
-
+
spin_unlock(&progress_lock);
}
EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */
@@ -900,11 +998,6 @@ int rtas_token(const char *service)
}
EXPORT_SYMBOL_GPL(rtas_token);
-int rtas_service_present(const char *service)
-{
- return rtas_token(service) != RTAS_UNKNOWN_SERVICE;
-}
-
#ifdef CONFIG_RTAS_ERROR_LOGGING
static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX;
@@ -1638,10 +1731,14 @@ void rtas_activate_firmware(void)
return;
}
+ mutex_lock(&rtas_ibm_activate_firmware_lock);
+
do {
fwrc = rtas_call(token, 0, 1, NULL);
} while (rtas_busy_delay(fwrc));
+ mutex_unlock(&rtas_ibm_activate_firmware_lock);
+
if (fwrc)
pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
}
@@ -1713,24 +1810,18 @@ static bool in_rmo_buf(u32 base, u32 end)
end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE);
}
-static bool block_rtas_call(int token, int nargs,
+static bool block_rtas_call(const struct rtas_function *func, int nargs,
struct rtas_args *args)
{
- const struct rtas_function *func;
const struct rtas_filter *f;
- const bool is_platform_dump = token == rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP);
- const bool is_config_conn = token == rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR);
+ const bool is_platform_dump =
+ func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP];
+ const bool is_config_conn =
+ func == &rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR];
u32 base, size, end;
/*
- * If this token doesn't correspond to a function the kernel
- * understands, you're not allowed to call it.
- */
- func = rtas_token_to_function(token);
- if (!func)
- goto err;
- /*
- * And only functions with filters attached are allowed.
+ * Only functions with filters attached are allowed.
*/
f = func->filter;
if (!f)
@@ -1787,14 +1878,15 @@ static bool block_rtas_call(int token, int nargs,
return false;
err:
pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
- pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n",
- token, nargs, current->comm);
+ pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n",
+ func->name, nargs, current->comm);
return true;
}
/* We assume to be passed big endian arguments */
SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
{
+ const struct rtas_function *func;
struct pin_cookie cookie;
struct rtas_args args;
unsigned long flags;
@@ -1824,13 +1916,18 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
nargs * sizeof(rtas_arg_t)) != 0)
return -EFAULT;
- if (token == RTAS_UNKNOWN_SERVICE)
+ /*
+ * If this token doesn't correspond to a function the kernel
+ * understands, you're not allowed to call it.
+ */
+ func = rtas_token_to_function_untrusted(token);
+ if (!func)
return -EINVAL;
args.rets = &args.args[nargs];
memset(args.rets, 0, nret * sizeof(rtas_arg_t));
- if (block_rtas_call(token, nargs, &args))
+ if (block_rtas_call(func, nargs, &args))
return -EINVAL;
if (token_is_restricted_errinjct(token)) {
@@ -1863,6 +1960,15 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
buff_copy = get_errorlog_buffer();
+ /*
+ * If this function has a mutex assigned to it, we must
+ * acquire it to avoid interleaving with any kernel-based uses
+ * of the same function. Kernel-based sequences acquire the
+ * appropriate mutex explicitly.
+ */
+ if (func->lock)
+ mutex_lock(func->lock);
+
raw_spin_lock_irqsave(&rtas_lock, flags);
cookie = lockdep_pin_lock(&rtas_lock);
@@ -1878,6 +1984,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
lockdep_unpin_lock(&rtas_lock, cookie);
raw_spin_unlock_irqrestore(&rtas_lock, flags);
+ if (func->lock)
+ mutex_unlock(func->lock);
+
if (buff_copy) {
if (errbuf)
log_error(errbuf, ERR_TYPE_RTAS_LOG, 0);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index e1fdc7473b72..fccf96e897f6 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -43,7 +43,7 @@ static inline int config_access_valid(struct pci_dn *dn, int where)
return 0;
}
-int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
{
int returnval = -1;
unsigned long buid, addr;
@@ -87,7 +87,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
pdn = pci_get_pdn_by_devfn(bus, devfn);
/* Validity of pdn is checked in here */
- ret = rtas_read_config(pdn, where, size, val);
+ ret = rtas_pci_dn_read_config(pdn, where, size, val);
if (*val == EEH_IO_ERROR_VALUE(size) &&
eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
return PCIBIOS_DEVICE_NOT_FOUND;
@@ -95,7 +95,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
return ret;
}
-int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val)
{
unsigned long buid, addr;
int ret;
@@ -134,7 +134,7 @@ static int rtas_pci_write_config(struct pci_bus *bus,
pdn = pci_get_pdn_by_devfn(bus, devfn);
/* Validity of pdn is checked in here. */
- return rtas_write_config(pdn, where, size, val);
+ return rtas_pci_dn_write_config(pdn, where, size, val);
}
static struct pci_ops rtas_pci_ops = {
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ab691c89d787..693334c20d07 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -77,10 +77,10 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
#endif
struct task_struct *secondary_current;
-bool has_big_cores;
-bool coregroup_enabled;
-bool thread_group_shares_l2;
-bool thread_group_shares_l3;
+bool has_big_cores __ro_after_init;
+bool coregroup_enabled __ro_after_init;
+bool thread_group_shares_l2 __ro_after_init;
+bool thread_group_shares_l3 __ro_after_init;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -93,15 +93,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
EXPORT_SYMBOL_GPL(has_big_cores);
-enum {
-#ifdef CONFIG_SCHED_SMT
- smt_idx,
-#endif
- cache_idx,
- mc_idx,
- die_idx,
-};
-
#define MAX_THREAD_LIST_SIZE 8
#define THREAD_GROUP_SHARE_L1 1
#define THREAD_GROUP_SHARE_L2_L3 2
@@ -987,7 +978,7 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
return 0;
}
-static bool shared_caches;
+static bool shared_caches __ro_after_init;
#ifdef CONFIG_SCHED_SMT
/* cpumask of CPUs with asymmetric SMT dependency */
@@ -1004,6 +995,13 @@ static int powerpc_smt_flags(void)
#endif
/*
+ * On shared processor LPARs scheduled on a big core (which has two or more
+ * independent thread groups per core), prefer lower numbered CPUs, so
+ * that workload consolidates to lesser number of cores.
+ */
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack);
+
+/*
* P9 has a slightly odd architecture where pairs of cores share an L2 cache.
* This topology makes it *much* cheaper to migrate tasks between adjacent cores
* since the migrated task remains cache hot. We want to take advantage of this
@@ -1011,9 +1009,20 @@ static int powerpc_smt_flags(void)
*/
static int powerpc_shared_cache_flags(void)
{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
+
return SD_SHARE_PKG_RESOURCES;
}
+static int powerpc_shared_proc_flags(void)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_ASYM_PACKING;
+
+ return 0;
+}
+
/*
* We can't just pass cpu_l2_cache_mask() directly because
* returns a non-const pointer and the compiler barfs on that.
@@ -1037,6 +1046,10 @@ static struct cpumask *cpu_coregroup_mask(int cpu)
static bool has_coregroup_support(void)
{
+ /* Coregroup identification not available on shared systems */
+ if (is_shared_processor())
+ return 0;
+
return coregroup_enabled;
}
@@ -1045,16 +1058,6 @@ static const struct cpumask *cpu_mc_mask(int cpu)
return cpu_coregroup_mask(cpu);
}
-static struct sched_domain_topology_level powerpc_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
- { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
- { cpu_mc_mask, SD_INIT_NAME(MC) },
- { cpu_cpu_mask, SD_INIT_NAME(PKG) },
- { NULL, },
-};
-
static int __init init_big_cores(void)
{
int cpu;
@@ -1682,43 +1685,45 @@ void start_secondary(void *unused)
BUG();
}
-static void __init fixup_topology(void)
+static struct sched_domain_topology_level powerpc_topology[6];
+
+static void __init build_sched_topology(void)
{
- int i;
+ int i = 0;
+
+ if (is_shared_processor() && has_big_cores)
+ static_branch_enable(&splpar_asym_pack);
#ifdef CONFIG_SCHED_SMT
if (has_big_cores) {
pr_info("Big cores detected but using small core scheduling\n");
- powerpc_topology[smt_idx].mask = smallcore_smt_mask;
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ smallcore_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
+ };
+ } else {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
+ };
}
#endif
+ if (shared_caches) {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE)
+ };
+ }
+ if (has_coregroup_support()) {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC)
+ };
+ }
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(PKG)
+ };
- if (!has_coregroup_support())
- powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
-
- /*
- * Try to consolidate topology levels here instead of
- * allowing scheduler to degenerate.
- * - Dont consolidate if masks are different.
- * - Dont consolidate if sd_flags exists and are different.
- */
- for (i = 1; i <= die_idx; i++) {
- if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
- continue;
-
- if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
- powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
- continue;
-
- if (!powerpc_topology[i - 1].sd_flags)
- powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
+ /* There must be one trailing NULL entry left. */
+ BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
- powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
- powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
-#ifdef CONFIG_SCHED_DEBUG
- powerpc_topology[i].name = powerpc_topology[i + 1].name;
-#endif
- }
+ set_sched_topology(powerpc_topology);
}
void __init smp_cpus_done(unsigned int max_cpus)
@@ -1733,9 +1738,20 @@ void __init smp_cpus_done(unsigned int max_cpus)
smp_ops->bringup_done();
dump_numa_cpu_topology();
+ build_sched_topology();
+}
- fixup_topology();
- set_sched_topology(powerpc_topology);
+/*
+ * For asym packing, by default lower numbered CPU has higher priority.
+ * On shared processors, pack to lower numbered core. However avoid moving
+ * between thread_groups within the same core.
+ */
+int arch_asym_cpu_priority(int cpu)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return -cpu / threads_per_core;
+
+ return -cpu;
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c
index 16ee3baaf09a..50fa8fc9ef95 100644
--- a/arch/powerpc/kernel/swsusp_64.c
+++ b/arch/powerpc/kernel/swsusp_64.c
@@ -11,6 +11,8 @@
#include <linux/interrupt.h>
#include <linux/nmi.h>
+void do_after_copyback(void);
+
void do_after_copyback(void)
{
iommu_restore();
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 7fab411378f2..6988ecbc316e 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -543,3 +543,5 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S
index 90701885762c..76dbe9fd2c0f 100644
--- a/arch/powerpc/kernel/trace/ftrace_entry.S
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@ -62,7 +62,7 @@
.endif
/* Save previous stack pointer (r1) */
- addi r8, r1, SWITCH_FRAME_SIZE
+ addi r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
PPC_STL r8, GPR1(r1)
.if \allregs == 1
@@ -162,7 +162,6 @@ _GLOBAL(ftrace_regs_caller)
.globl ftrace_regs_call
ftrace_regs_call:
bl ftrace_stub
- nop
ftrace_regs_exit 1
_GLOBAL(ftrace_caller)
@@ -171,7 +170,6 @@ _GLOBAL(ftrace_caller)
.globl ftrace_call
ftrace_call:
bl ftrace_stub
- nop
ftrace_regs_exit 0
_GLOBAL(ftrace_stub)
@@ -182,7 +180,7 @@ ftrace_no_trace:
mflr r3
mtctr r3
REST_GPR(3, r1)
- addi r1, r1, SWITCH_FRAME_SIZE
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
mtlr r0
bctr
#endif
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5ea2014aff90..11e062b47d3f 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1439,10 +1439,12 @@ static int emulate_instruction(struct pt_regs *regs)
return -EINVAL;
}
+#ifdef CONFIG_GENERIC_BUG
int is_valid_bugaddr(unsigned long addr)
{
return is_kernel_addr(addr);
}
+#endif
#ifdef CONFIG_MATH_EMULATION
static int emulate_math(struct pt_regs *regs)
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 74ddf836f7a2..a0467e528b70 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -7,7 +7,6 @@
#include <linux/types.h>
#include <asm/udbg.h>
#include <asm/io.h>
-#include <asm/reg_a2.h>
#include <asm/early_ioremap.h>
extern u8 real_readb(volatile u8 __iomem *addr);
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
index 0c7d82c270c3..1b93655c2857 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -71,7 +71,7 @@ AS64FLAGS := -D__VDSO64__
targets += vdso32.lds
CPPFLAGS_vdso32.lds += -P -C -Upowerpc
targets += vdso64.lds
-CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+CPPFLAGS_vdso64.lds += -P -C
# link rule for the .so file, .lds has to be first
$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index 85846cadb9b5..27fa9098a5b7 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -75,6 +75,7 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_OFFSET(mmu_psize_def, shift);
#endif
VMCOREINFO_SYMBOL(cur_cpu_spec);
+ VMCOREINFO_OFFSET(cpu_spec, cpu_features);
VMCOREINFO_OFFSET(cpu_spec, mmu_features);
vmcoreinfo_append_str("NUMBER(RADIX_MMU)=%d\n", early_radix_enabled());
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 6cd20ab9e94e..8acec144120e 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -302,11 +302,11 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
switch (priority) {
case BOOK3S_IRQPRIO_DECREMENTER:
- deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
+ deliver = !kvmhv_is_nestedv2() && (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_DECREMENTER;
break;
case BOOK3S_IRQPRIO_EXTERNAL:
- deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
+ deliver = !kvmhv_is_nestedv2() && (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_EXTERNAL;
break;
case BOOK3S_IRQPRIO_SYSTEM_RESET:
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 175a8eb2681f..4a1abb9f7c05 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -40,6 +40,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
unsigned long quadrant, ret = n;
bool is_load = !!to;
+ if (kvmhv_is_nestedv2())
+ return H_UNSUPPORTED;
+
/* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */
if (kvmhv_on_pseries())
return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr,
@@ -97,7 +100,7 @@ static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
void *to, void *from, unsigned long n)
{
int lpid = vcpu->kvm->arch.lpid;
- int pid = kvmppc_get_pid(vcpu);
+ int pid;
/* This would cause a data segment intr so don't allow the access */
if (eaddr & (0x3FFUL << 52))
@@ -110,6 +113,8 @@ static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
/* If accessing quadrant 3 then pid is expected to be 0 */
if (((eaddr >> 62) & 0x3) == 0x3)
pid = 0;
+ else
+ pid = kvmppc_get_pid(vcpu);
eaddr &= ~(0xFFFUL << 52);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1ed6ec140701..e48126a59ba7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -650,7 +650,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
return err;
}
-static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
+static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap,
+ struct kvmppc_vpa *old_vpap)
{
struct kvm *kvm = vcpu->kvm;
void *va;
@@ -690,9 +691,8 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
kvmppc_unpin_guest_page(kvm, va, gpa, false);
va = NULL;
}
- if (vpap->pinned_addr)
- kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
- vpap->dirty);
+ *old_vpap = *vpap;
+
vpap->gpa = gpa;
vpap->pinned_addr = va;
vpap->dirty = false;
@@ -702,6 +702,9 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_vpa old_vpa = { 0 };
+
if (!(vcpu->arch.vpa.update_pending ||
vcpu->arch.slb_shadow.update_pending ||
vcpu->arch.dtl.update_pending))
@@ -709,17 +712,34 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->arch.vpa_update_lock);
if (vcpu->arch.vpa.update_pending) {
- kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
- if (vcpu->arch.vpa.pinned_addr)
+ kvmppc_update_vpa(vcpu, &vcpu->arch.vpa, &old_vpa);
+ if (old_vpa.pinned_addr) {
+ if (kvmhv_is_nestedv2())
+ kvmhv_nestedv2_set_vpa(vcpu, ~0ull);
+ kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa,
+ old_vpa.dirty);
+ }
+ if (vcpu->arch.vpa.pinned_addr) {
init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+ if (kvmhv_is_nestedv2())
+ kvmhv_nestedv2_set_vpa(vcpu, __pa(vcpu->arch.vpa.pinned_addr));
+ }
}
if (vcpu->arch.dtl.update_pending) {
- kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
+ kvmppc_update_vpa(vcpu, &vcpu->arch.dtl, &old_vpa);
+ if (old_vpa.pinned_addr)
+ kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa,
+ old_vpa.dirty);
vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
vcpu->arch.dtl_index = 0;
}
- if (vcpu->arch.slb_shadow.update_pending)
- kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
+ if (vcpu->arch.slb_shadow.update_pending) {
+ kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow, &old_vpa);
+ if (old_vpa.pinned_addr)
+ kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa,
+ old_vpa.dirty);
+ }
+
spin_unlock(&vcpu->arch.vpa_update_lock);
}
@@ -1597,7 +1617,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* That can happen due to a bug, or due to a machine check
* occurring at just the wrong time.
*/
- if (__kvmppc_get_msr_hv(vcpu) & MSR_HV) {
+ if (!kvmhv_is_nestedv2() && (__kvmppc_get_msr_hv(vcpu) & MSR_HV)) {
printk(KERN_EMERG "KVM trap in HV mode!\n");
printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
vcpu->arch.trap, kvmppc_get_pc(vcpu),
@@ -1688,7 +1708,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
{
int i;
- if (unlikely(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
+ if (!kvmhv_is_nestedv2() && unlikely(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
/*
* Guest userspace executed sc 1. This can only be
* reached by the P9 path because the old path
@@ -4084,6 +4104,8 @@ static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
if (rc < 0)
return -EINVAL;
+ kvmppc_gse_put_u64(io->vcpu_run_input, KVMPPC_GSID_LPCR, lpcr);
+
accumulate_time(vcpu, &vcpu->arch.in_guest);
rc = plpar_guest_run_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id,
&trap, &i);
@@ -4736,13 +4758,19 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
if (!nested) {
kvmppc_core_prepare_to_enter(vcpu);
- if (__kvmppc_get_msr_hv(vcpu) & MSR_EE) {
- if (xive_interrupt_pending(vcpu))
+ if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
+ &vcpu->arch.pending_exceptions) ||
+ xive_interrupt_pending(vcpu)) {
+ /*
+ * For nested HV, don't synthesize but always pass MER,
+ * the L0 will be able to optimise that more
+ * effectively than manipulating registers directly.
+ */
+ if (!kvmhv_on_pseries() && (__kvmppc_get_msr_hv(vcpu) & MSR_EE))
kvmppc_inject_interrupt_hv(vcpu,
- BOOK3S_INTERRUPT_EXTERNAL, 0);
- } else if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
- &vcpu->arch.pending_exceptions)) {
- lpcr |= LPCR_MER;
+ BOOK3S_INTERRUPT_EXTERNAL, 0);
+ else
+ lpcr |= LPCR_MER;
}
} else if (vcpu->arch.pending_exceptions ||
vcpu->arch.doorbell_request ||
@@ -4806,7 +4834,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
* entering a nested guest in which case the decrementer is now owned
* by L2 and the L1 decrementer is provided in hdec_expires
*/
- if (kvmppc_core_pending_dec(vcpu) &&
+ if (!kvmhv_is_nestedv2() && kvmppc_core_pending_dec(vcpu) &&
((tb < kvmppc_dec_expires_host_tb(vcpu)) ||
(trap == BOOK3S_INTERRUPT_SYSCALL &&
kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
@@ -4949,7 +4977,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
accumulate_time(vcpu, &vcpu->arch.hcall);
- if (WARN_ON_ONCE(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
+ if (!kvmhv_is_nestedv2() && WARN_ON_ONCE(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
/*
* These should have been caught reflected
* into the guest by now. Final sanity check:
@@ -5691,10 +5719,12 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
}
- if (kvmhv_is_nestedv2())
+ if (kvmhv_is_nestedv2()) {
+ kvmhv_flush_lpid(kvm->arch.lpid);
plpar_guest_delete(0, kvm->arch.lpid);
- else
+ } else {
kvmppc_free_lpid(kvm->arch.lpid);
+ }
kvmppc_free_pimap(kvm);
}
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 3b658b8696bc..5c375ec1a3c6 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -503,7 +503,7 @@ void kvmhv_nested_exit(void)
}
}
-static void kvmhv_flush_lpid(u64 lpid)
+void kvmhv_flush_lpid(u64 lpid)
{
long rc;
diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c
index fd3c4f2d9480..5378eb40b162 100644
--- a/arch/powerpc/kvm/book3s_hv_nestedv2.c
+++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c
@@ -856,6 +856,35 @@ free_gsb:
EXPORT_SYMBOL_GPL(kvmhv_nestedv2_set_ptbl_entry);
/**
+ * kvmhv_nestedv2_set_vpa() - register L2 VPA with L0
+ * @vcpu: vcpu
+ * @vpa: L1 logical real address
+ */
+int kvmhv_nestedv2_set_vpa(struct kvm_vcpu *vcpu, unsigned long vpa)
+{
+ struct kvmhv_nestedv2_io *io;
+ struct kvmppc_gs_buff *gsb;
+ int rc = 0;
+
+ io = &vcpu->arch.nestedv2_io;
+ gsb = io->vcpu_run_input;
+
+ kvmppc_gsb_reset(gsb);
+ rc = kvmppc_gse_put_u64(gsb, KVMPPC_GSID_VPA, vpa);
+ if (rc < 0)
+ goto out;
+
+ rc = kvmppc_gsb_send(gsb, 0);
+ if (rc < 0)
+ pr_err("KVM-NESTEDv2: couldn't register the L2 VPA (rc=%d)\n", rc);
+
+out:
+ kvmppc_gsb_reset(gsb);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(kvmhv_nestedv2_set_vpa);
+
+/**
* kvmhv_nestedv2_parse_output() - receive values from H_GUEST_RUN_VCPU output
* @vcpu: vcpu
*
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 9118242063fb..5b92619a05fd 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -604,6 +604,7 @@ static void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
case PVR_POWER8:
case PVR_POWER8E:
case PVR_POWER8NVL:
+ case PVR_HX_C2000:
case PVR_POWER9:
vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
BOOK3S_HFLAG_NEW_TLBIE;
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 077fd88a0b68..ec60c7979718 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -93,7 +93,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
emulated = EMULATE_FAIL;
vcpu->arch.regs.msr = kvmppc_get_msr(vcpu);
- kvmhv_nestedv2_reload_ptregs(vcpu, &vcpu->arch.regs);
if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) {
int type = op.type & INSTR_TYPE_MASK;
int size = GETSIZE(op.type);
@@ -112,7 +111,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
op.reg, size, !instr_byte_swap);
if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
- kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
+ kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed);
break;
}
@@ -132,7 +131,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
KVM_MMIO_REG_FPR|op.reg, size, 1);
if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
- kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
+ kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed);
break;
#endif
@@ -224,16 +223,17 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
break;
}
#endif
- case STORE:
- /* if need byte reverse, op.val has been reversed by
- * analyse_instr().
- */
- emulated = kvmppc_handle_store(vcpu, op.val, size, 1);
+ case STORE: {
+ int instr_byte_swap = op.type & BYTEREV;
+
+ emulated = kvmppc_handle_store(vcpu, kvmppc_get_gpr(vcpu, op.reg),
+ size, !instr_byte_swap);
if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
- kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
+ kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed);
break;
+ }
#ifdef CONFIG_PPC_FPU
case STORE_FP:
if (kvmppc_check_fp_disabled(vcpu))
@@ -254,7 +254,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
kvmppc_get_fpr(vcpu, op.reg), size, 1);
if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
- kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
+ kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed);
break;
#endif
@@ -358,7 +358,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
}
trace_kvm_ppc_instr(ppc_inst_val(inst), kvmppc_get_pc(vcpu), emulated);
- kvmhv_nestedv2_mark_dirty_ptregs(vcpu, &vcpu->arch.regs);
/* Advance past emulated instruction. */
if (emulated != EMULATE_FAIL)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 51ad0397c17a..6eac63e79a89 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -45,7 +45,7 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
# so it is only needed for modules, and only for older linkers which
# do not support --save-restore-funcs
ifndef CONFIG_LD_IS_BFD
-extra-$(CONFIG_PPC64) += crtsavres.o
+always-$(CONFIG_PPC64) += crtsavres.o
endif
obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index a4ab8625061a..5766180f5380 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -586,6 +586,8 @@ static int do_fp_load(struct instruction_op *op, unsigned long ea,
} u;
nb = GETSIZE(op->type);
+ if (nb > sizeof(u))
+ return -EINVAL;
if (!address_ok(regs, ea, nb))
return -EFAULT;
rn = op->reg;
@@ -636,6 +638,8 @@ static int do_fp_store(struct instruction_op *op, unsigned long ea,
} u;
nb = GETSIZE(op->type);
+ if (nb > sizeof(u))
+ return -EINVAL;
if (!address_ok(regs, ea, nb))
return -EFAULT;
rn = op->reg;
@@ -680,6 +684,9 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea,
u8 b[sizeof(__vector128)];
} u = {};
+ if (size > sizeof(u))
+ return -EINVAL;
+
if (!address_ok(regs, ea & ~0xfUL, 16))
return -EFAULT;
/* align to multiple of size */
@@ -688,7 +695,7 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea,
if (err)
return err;
if (unlikely(cross_endian))
- do_byte_reverse(&u.b[ea & 0xf], size);
+ do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u)));
preempt_disable();
if (regs->msr & MSR_VEC)
put_vr(rn, &u.v);
@@ -707,6 +714,9 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea,
u8 b[sizeof(__vector128)];
} u;
+ if (size > sizeof(u))
+ return -EINVAL;
+
if (!address_ok(regs, ea & ~0xfUL, 16))
return -EFAULT;
/* align to multiple of size */
@@ -719,7 +729,7 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea,
u.v = current->thread.vr_state.vr[rn];
preempt_enable();
if (unlikely(cross_endian))
- do_byte_reverse(&u.b[ea & 0xf], size);
+ do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u)));
return copy_mem_out(&u.b[ea & 0xf], ea, size, regs);
}
#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index ad2afa08e62e..0626a25b0d72 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -310,9 +310,16 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags
else
rflags |= 0x3;
}
+ VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request");
} else {
if (pteflags & _PAGE_RWX)
rflags |= 0x2;
+ /*
+ * We should never hit this in normal fault handling because
+ * a permission check (check_pte_access()) will bubble this
+ * to higher level linux handler even for PAGE_NONE.
+ */
+ VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request");
if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
rflags |= 0x1;
}
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index be229290a6a7..3438ab72c346 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -542,6 +542,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
set_pte_at(vma->vm_mm, addr, ptep, pte);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* For hash translation mode, we use the deposited table to store hash slot
* information and they are stored at PTRS_PER_PMD offset from related pmd
@@ -563,6 +564,7 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
return true;
}
+#endif
/*
* Does the CPU support tlbie?
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 125733962033..a974baf8f327 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -89,7 +89,8 @@ static int __init scan_pkey_feature(void)
unsigned long pvr = mfspr(SPRN_PVR);
if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E ||
- PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9)
+ PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9 ||
+ PVR_VER(pvr) == PVR_HX_C2000)
pkeys_total = 32;
}
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 9e49ede2bc1c..53335ae21a40 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -497,6 +497,8 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ flags |= FAULT_FLAG_TRIED;
if (fault_signal_pending(fault, regs))
return user_mode(regs) ? 0 : SIGBUS;
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 119ef491f797..d3a7726ecf51 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -126,7 +126,7 @@ void pgtable_cache_add(unsigned int shift)
* as to leave enough 0 bits in the address to contain it. */
unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1,
HUGEPD_SHIFT_MASK + 1);
- struct kmem_cache *new;
+ struct kmem_cache *new = NULL;
/* It would be nice if this was a BUILD_BUG_ON(), but at the
* moment, gcc doesn't seem to recognize is_power_of_2 as a
@@ -139,7 +139,8 @@ void pgtable_cache_add(unsigned int shift)
align = max_t(unsigned long, align, minalign);
name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
- new = kmem_cache_create(name, table_size, align, 0, ctor(shift));
+ if (name)
+ new = kmem_cache_create(name, table_size, align, 0, ctor(shift));
if (!new)
panic("Could not allocate pgtable cache for order %d", shift);
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 7f9ff0640124..72341b9fb552 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -181,3 +181,8 @@ static inline bool debug_pagealloc_enabled_or_kfence(void)
{
return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled();
}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int create_section_mapping(unsigned long start, unsigned long end,
+ int nid, pgprot_t prot);
+#endif
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 10b946e9c6e7..b7ff680cde96 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2312,7 +2312,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(event, cpuhw);
- perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
+ perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL);
}
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 39dbe6b348df..27f18119fda1 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -534,6 +534,9 @@ static ssize_t affinity_domain_via_partition_show(struct device *dev, struct dev
if (!ret)
goto parse_result;
+ if (ret && (ret != H_PARAMETER))
+ goto out;
+
/*
* ret value as 'H_PARAMETER' implies that the current buffer size
* can't accommodate all the information, and a partial buffer
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 5d12ca386c1f..8664a7d297ad 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -299,6 +299,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
attr_group->attrs = attrs;
do {
ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
+ if (!ev_val_str)
+ continue;
dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
if (!dev_str)
continue;
@@ -306,6 +308,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
attrs[j++] = dev_str;
if (pmu->events[i].scale) {
ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
+ if (!ev_scale_str)
+ continue;
dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
if (!dev_str)
continue;
@@ -315,6 +319,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
if (pmu->events[i].unit) {
ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
+ if (!ev_unit_str)
+ continue;
dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
if (!dev_str)
continue;
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 1624ebf95497..35a1f4b9f827 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -173,6 +173,7 @@ config ISS4xx
config CURRITUCK
bool "IBM Currituck (476fpe) Support"
depends on PPC_47x
+ select I2C
select SWIOTLB
select 476FPE
select FORCE_PCI
diff --git a/arch/powerpc/platforms/44x/idle.c b/arch/powerpc/platforms/44x/idle.c
index f533b495e7db..e2eeef8dff78 100644
--- a/arch/powerpc/platforms/44x/idle.c
+++ b/arch/powerpc/platforms/44x/idle.c
@@ -27,7 +27,7 @@ static void ppc44x_idle(void)
isync();
}
-int __init ppc44x_idle_init(void)
+static int __init ppc44x_idle_init(void)
{
if (!mode_spin) {
/* If we are not setting spin mode
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index 6f08d07aee3b..e995eb30bf09 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -17,6 +17,8 @@
#include <linux/of_address.h>
#include <linux/of_irq.h>
+#include "mpc5121_ads.h"
+
static struct device_node *cpld_pic_node;
static struct irq_domain *cpld_pic_host;
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
index ce51cfeeb066..8bbbf78bb42b 100644
--- a/arch/powerpc/platforms/512x/pdm360ng.c
+++ b/arch/powerpc/platforms/512x/pdm360ng.c
@@ -101,7 +101,7 @@ static inline void __init pdm360ng_touchscreen_init(void)
}
#endif /* CONFIG_TOUCHSCREEN_ADS7846 */
-void __init pdm360ng_init(void)
+static void __init pdm360ng_init(void)
{
mpc512x_init();
pdm360ng_touchscreen_init();
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 9833c36bda83..c9664e46b03d 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -261,9 +261,10 @@ static int mpc83xx_suspend_begin(suspend_state_t state)
static int agent_thread_fn(void *data)
{
+ set_freezable();
+
while (1) {
- wait_event_interruptible(agent_wq, pci_pm_state >= 2);
- try_to_freeze();
+ wait_event_freezable(agent_wq, pci_pm_state >= 2);
if (signal_pending(current) || pci_pm_state < 2)
continue;
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index ec9f60fbebc7..e0cec670d8db 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -76,7 +76,7 @@ static void __init mpc85xx_rdb_setup_arch(void)
/* P1025 has pins muxed for QE and other functions. To
* enable QE UEC mode, we need to set bit QE0 for UCC1
* in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
- * and QE12 for QE MII management singals in PMUXCR
+ * and QE12 for QE MII management signals in PMUXCR
* register.
*/
setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
index 67467cd6f34c..06b1e5c49d6f 100644
--- a/arch/powerpc/platforms/86xx/Kconfig
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -52,10 +52,3 @@ config MPC8641
select MPIC
default y if GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \
|| MVME7100
-
-config MPC8610
- bool
- select HAVE_PCI
- select FSL_PCI if PCI
- select PPC_UDBG_16550
- select MPIC
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index ef985ba2bf21..0761d98e5be3 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -64,7 +64,7 @@ static void __noreturn pas_restart(char *cmd)
}
#ifdef CONFIG_PPC_PASEMI_NEMO
-void pas_shutdown(void)
+static void pas_shutdown(void)
{
/* Set the PLD bit that makes the SB600 think the power button is being pressed */
void __iomem *pld_map = ioremap(0xf5000000,4096);
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index c83d1e14077e..15644be31990 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -413,7 +413,7 @@ static void __init smp_psurge_setup_cpu(int cpu_nr)
printk(KERN_ERR "Couldn't get primary IPI interrupt");
}
-void __init smp_psurge_take_timebase(void)
+static void __init smp_psurge_take_timebase(void)
{
if (psurge_type != PSURGE_DUAL)
return;
@@ -429,7 +429,7 @@ void __init smp_psurge_take_timebase(void)
set_dec(tb_ticks_per_jiffy/2);
}
-void __init smp_psurge_give_timebase(void)
+static void __init smp_psurge_give_timebase(void)
{
/* Nothing to do here */
}
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index f9a7001dacb7..56a1f7ce78d2 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -275,6 +275,8 @@ int __init opal_event_init(void)
else
name = kasprintf(GFP_KERNEL, "opal");
+ if (!name)
+ continue;
/* Install interrupt handler */
rc = request_irq(r->start, opal_interrupt, r->flags & IRQD_TRIGGER_MASK,
name, NULL);
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
index 7bfe4cbeb35a..ea917266aa17 100644
--- a/arch/powerpc/platforms/powernv/opal-powercap.c
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -196,6 +196,12 @@ void __init opal_powercap_init(void)
j = 0;
pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node);
+ if (!pcaps[i].pg.name) {
+ kfree(pcaps[i].pattrs);
+ kfree(pcaps[i].pg.attrs);
+ goto out_pcaps_pattrs;
+ }
+
if (has_min) {
powercap_add_attr(min, "powercap-min",
&pcaps[i].pattrs[j]);
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 327e2f76905d..b66b06efcef1 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -66,6 +66,8 @@ static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size)
const char *label;
addrp = of_get_address(node, 0, &range_size, NULL);
+ if (!addrp)
+ continue;
range_addr = of_read_number(addrp, 2);
range_end = range_addr + range_size;
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 262cd6fac907..748c2b97fa53 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -165,6 +165,11 @@ static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
ent->chip = chip;
snprintf(ent->name, 16, "%08x", chip);
ent->path.data = (void *)kasprintf(GFP_KERNEL, "%pOF", dn);
+ if (!ent->path.data) {
+ kfree(ent);
+ return -ENOMEM;
+ }
+
ent->path.size = strlen((char *)ent->path.data);
dir = debugfs_create_dir(ent->name, root);
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 191424468f10..393e747541fb 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -425,7 +425,8 @@ static int subcore_init(void)
if (pvr_ver != PVR_POWER8 &&
pvr_ver != PVR_POWER8E &&
- pvr_ver != PVR_POWER8NVL)
+ pvr_ver != PVR_POWER8NVL &&
+ pvr_ver != PVR_HX_C2000)
return 0;
/*
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index a44869e5ea70..e9c1087dd42e 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -167,16 +167,4 @@ config PS3_LPM
profiling support of the Cell processor with programs like
perfmon2, then say Y or M, otherwise say N.
-config PS3GELIC_UDBG
- bool "PS3 udbg output via UDP broadcasts on Ethernet"
- depends on PPC_PS3
- help
- Enables udbg early debugging output by sending broadcast UDP
- via the Ethernet port (UDP port number 18194).
-
- This driver uses a trivial implementation and is independent
- from the main PS3 gelic network driver.
-
- If in doubt, say N here.
-
endmenu
diff --git a/arch/powerpc/platforms/ps3/Makefile b/arch/powerpc/platforms/ps3/Makefile
index 86bf2967a8d4..bc79bb124d1e 100644
--- a/arch/powerpc/platforms/ps3/Makefile
+++ b/arch/powerpc/platforms/ps3/Makefile
@@ -3,7 +3,7 @@ obj-y += setup.o mm.o time.o hvcall.o htab.o repository.o
obj-y += interrupt.o exports.o os-area.o
obj-y += system-bus.o
-obj-$(CONFIG_PS3GELIC_UDBG) += gelic_udbg.o
+obj-$(CONFIG_PPC_EARLY_DEBUG_PS3GELIC) += gelic_udbg.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SPU_BASE) += spu.o
obj-y += device-init.o
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index e87360a0fb40..878bc160246e 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -827,6 +827,7 @@ static int ps3_probe_thread(void *data)
if (res)
goto fail_free_irq;
+ set_freezable();
/* Loop here processing the requested notification events. */
do {
try_to_freeze();
diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c
index 6b298010fd84..a5202c18c236 100644
--- a/arch/powerpc/platforms/ps3/gelic_udbg.c
+++ b/arch/powerpc/platforms/ps3/gelic_udbg.c
@@ -14,6 +14,7 @@
#include <linux/ip.h>
#include <linux/udp.h>
+#include <asm/ps3.h>
#include <asm/io.h>
#include <asm/udbg.h>
#include <asm/lv1call.h>
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 1476c5e4433c..f936962a2946 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
obj-y := lpar.o hvCall.o nvram.o reconfig.o \
of_helpers.o rtas-work-area.o papr-sysparm.o \
+ papr-vpd.o \
setup.o iommu.o event_sources.o ras.o \
firmware.o power.o dlpar.o mobility.o rng.o \
pci.o pci_dlpar.o eeh_pseries.o msi.o \
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index def184da51cf..b1ae0c0d1187 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -252,7 +252,7 @@ static int pseries_eeh_cap_start(struct pci_dn *pdn)
if (!pdn)
return 0;
- rtas_read_config(pdn, PCI_STATUS, 2, &status);
+ rtas_pci_dn_read_config(pdn, PCI_STATUS, 2, &status);
if (!(status & PCI_STATUS_CAP_LIST))
return 0;
@@ -270,11 +270,11 @@ static int pseries_eeh_find_cap(struct pci_dn *pdn, int cap)
return 0;
while (cnt--) {
- rtas_read_config(pdn, pos, 1, &pos);
+ rtas_pci_dn_read_config(pdn, pos, 1, &pos);
if (pos < 0x40)
break;
pos &= ~3;
- rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+ rtas_pci_dn_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
if (id == 0xff)
break;
if (id == cap)
@@ -294,7 +294,7 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap)
if (!edev || !edev->pcie_cap)
return 0;
- if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+ if (rtas_pci_dn_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
return 0;
else if (!header)
return 0;
@@ -307,7 +307,7 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap)
if (pos < 256)
break;
- if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+ if (rtas_pci_dn_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
break;
}
@@ -412,8 +412,8 @@ static void pseries_eeh_init_edev(struct pci_dn *pdn)
if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
edev->mode |= EEH_DEV_BRIDGE;
if (edev->pcie_cap) {
- rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
- 2, &pcie_flags);
+ rtas_pci_dn_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+ 2, &pcie_flags);
pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
edev->mode |= EEH_DEV_ROOT_PORT;
@@ -676,7 +676,7 @@ static int pseries_eeh_read_config(struct eeh_dev *edev, int where, int size, u3
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
- return rtas_read_config(pdn, where, size, val);
+ return rtas_pci_dn_read_config(pdn, where, size, val);
}
/**
@@ -692,7 +692,7 @@ static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
- return rtas_write_config(pdn, where, size, val);
+ return rtas_pci_dn_write_config(pdn, where, size, val);
}
#ifdef CONFIG_PCI_IOV
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index a43bfb01720a..3fe3ddb30c04 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -208,8 +208,10 @@ static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online)
int rc;
mem_block = lmb_to_memblock(lmb);
- if (!mem_block)
+ if (!mem_block) {
+ pr_err("Failed memory block lookup for LMB 0x%x\n", lmb->drc_index);
return -EINVAL;
+ }
if (online && mem_block->dev.offline)
rc = device_online(&mem_block->dev);
@@ -436,14 +438,15 @@ static int dlpar_memory_remove_by_index(u32 drc_index)
}
}
- if (!lmb_found)
+ if (!lmb_found) {
+ pr_debug("Failed to look up LMB for drc index %x\n", drc_index);
rc = -EINVAL;
-
- if (rc)
+ } else if (rc) {
pr_debug("Failed to hot-remove memory at %llx\n",
lmb->base_addr);
- else
+ } else {
pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr);
+ }
return rc;
}
@@ -575,6 +578,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
rc = update_lmb_associativity_index(lmb);
if (rc) {
dlpar_release_drc(lmb->drc_index);
+ pr_err("Failed to configure LMB 0x%x\n", lmb->drc_index);
return rc;
}
@@ -588,12 +592,14 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
/* Add the memory */
rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_MEMMAP_ON_MEMORY);
if (rc) {
+ pr_err("Failed to add LMB 0x%x to node %u", lmb->drc_index, nid);
invalidate_lmb_associativity_index(lmb);
return rc;
}
rc = dlpar_online_lmb(lmb);
if (rc) {
+ pr_err("Failed to online LMB 0x%x on node %u\n", lmb->drc_index, nid);
__remove_memory(lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
} else {
diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c b/arch/powerpc/platforms/pseries/papr-sysparm.c
index fedc61599e6c..7063ce8884e4 100644
--- a/arch/powerpc/platforms/pseries/papr-sysparm.c
+++ b/arch/powerpc/platforms/pseries/papr-sysparm.c
@@ -2,14 +2,20 @@
#define pr_fmt(fmt) "papr-sysparm: " fmt
+#include <linux/anon_inodes.h>
#include <linux/bug.h>
+#include <linux/file.h>
+#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/miscdevice.h>
#include <linux/printk.h>
#include <linux/slab.h>
-#include <asm/rtas.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
#include <asm/papr-sysparm.h>
#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
struct papr_sysparm_buf *papr_sysparm_buf_alloc(void)
{
@@ -23,6 +29,46 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf)
kfree(buf);
}
+static size_t papr_sysparm_buf_get_length(const struct papr_sysparm_buf *buf)
+{
+ return be16_to_cpu(buf->len);
+}
+
+static void papr_sysparm_buf_set_length(struct papr_sysparm_buf *buf, size_t length)
+{
+ WARN_ONCE(length > sizeof(buf->val),
+ "bogus length %zu, clamping to safe value", length);
+ length = min(sizeof(buf->val), length);
+ buf->len = cpu_to_be16(length);
+}
+
+/*
+ * For use on buffers returned from ibm,get-system-parameter before
+ * returning them to callers. Ensures the encoded length of valid data
+ * cannot overrun buf->val[].
+ */
+static void papr_sysparm_buf_clamp_length(struct papr_sysparm_buf *buf)
+{
+ papr_sysparm_buf_set_length(buf, papr_sysparm_buf_get_length(buf));
+}
+
+/*
+ * Perform some basic diligence on the system parameter buffer before
+ * submitting it to RTAS.
+ */
+static bool papr_sysparm_buf_can_submit(const struct papr_sysparm_buf *buf)
+{
+ /*
+ * Firmware ought to reject buffer lengths that exceed the
+ * maximum specified in PAPR, but there's no reason for the
+ * kernel to allow them either.
+ */
+ if (papr_sysparm_buf_get_length(buf) > sizeof(buf->val))
+ return false;
+
+ return true;
+}
+
/**
* papr_sysparm_get() - Retrieve the value of a PAPR system parameter.
* @param: PAPR system parameter token as described in
@@ -47,7 +93,6 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf)
*
* Return: 0 on success, -errno otherwise. @buf is unmodified on error.
*/
-
int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf)
{
const s32 token = rtas_function_token(RTAS_FN_IBM_GET_SYSTEM_PARAMETER);
@@ -63,6 +108,9 @@ int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
+ if (!papr_sysparm_buf_can_submit(buf))
+ return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
@@ -77,6 +125,7 @@ int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf)
case 0:
ret = 0;
memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf));
+ papr_sysparm_buf_clamp_length(buf);
break;
case -3: /* parameter not implemented */
ret = -EOPNOTSUPP;
@@ -115,6 +164,9 @@ int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
+ if (!papr_sysparm_buf_can_submit(buf))
+ return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
@@ -149,3 +201,152 @@ int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf)
return ret;
}
+
+static struct papr_sysparm_buf *
+papr_sysparm_buf_from_user(const struct papr_sysparm_io_block __user *user_iob)
+{
+ struct papr_sysparm_buf *kern_spbuf;
+ long err;
+ u16 len;
+
+ /*
+ * The length of valid data that userspace claims to be in
+ * user_iob->data[].
+ */
+ if (get_user(len, &user_iob->length))
+ return ERR_PTR(-EFAULT);
+
+ static_assert(sizeof(user_iob->data) >= PAPR_SYSPARM_MAX_INPUT);
+ static_assert(sizeof(kern_spbuf->val) >= PAPR_SYSPARM_MAX_INPUT);
+
+ if (len > PAPR_SYSPARM_MAX_INPUT)
+ return ERR_PTR(-EINVAL);
+
+ kern_spbuf = papr_sysparm_buf_alloc();
+ if (!kern_spbuf)
+ return ERR_PTR(-ENOMEM);
+
+ papr_sysparm_buf_set_length(kern_spbuf, len);
+
+ if (len > 0 && copy_from_user(kern_spbuf->val, user_iob->data, len)) {
+ err = -EFAULT;
+ goto free_sysparm_buf;
+ }
+
+ return kern_spbuf;
+
+free_sysparm_buf:
+ papr_sysparm_buf_free(kern_spbuf);
+ return ERR_PTR(err);
+}
+
+static int papr_sysparm_buf_to_user(const struct papr_sysparm_buf *kern_spbuf,
+ struct papr_sysparm_io_block __user *user_iob)
+{
+ u16 len_out = papr_sysparm_buf_get_length(kern_spbuf);
+
+ if (put_user(len_out, &user_iob->length))
+ return -EFAULT;
+
+ static_assert(sizeof(user_iob->data) >= PAPR_SYSPARM_MAX_OUTPUT);
+ static_assert(sizeof(kern_spbuf->val) >= PAPR_SYSPARM_MAX_OUTPUT);
+
+ if (copy_to_user(user_iob->data, kern_spbuf->val, PAPR_SYSPARM_MAX_OUTPUT))
+ return -EFAULT;
+
+ return 0;
+}
+
+static long papr_sysparm_ioctl_get(struct papr_sysparm_io_block __user *user_iob)
+{
+ struct papr_sysparm_buf *kern_spbuf;
+ papr_sysparm_t param;
+ long ret;
+
+ if (get_user(param.token, &user_iob->parameter))
+ return -EFAULT;
+
+ kern_spbuf = papr_sysparm_buf_from_user(user_iob);
+ if (IS_ERR(kern_spbuf))
+ return PTR_ERR(kern_spbuf);
+
+ ret = papr_sysparm_get(param, kern_spbuf);
+ if (ret)
+ goto free_sysparm_buf;
+
+ ret = papr_sysparm_buf_to_user(kern_spbuf, user_iob);
+ if (ret)
+ goto free_sysparm_buf;
+
+ ret = 0;
+
+free_sysparm_buf:
+ papr_sysparm_buf_free(kern_spbuf);
+ return ret;
+}
+
+
+static long papr_sysparm_ioctl_set(struct papr_sysparm_io_block __user *user_iob)
+{
+ struct papr_sysparm_buf *kern_spbuf;
+ papr_sysparm_t param;
+ long ret;
+
+ if (get_user(param.token, &user_iob->parameter))
+ return -EFAULT;
+
+ kern_spbuf = papr_sysparm_buf_from_user(user_iob);
+ if (IS_ERR(kern_spbuf))
+ return PTR_ERR(kern_spbuf);
+
+ ret = papr_sysparm_set(param, kern_spbuf);
+ if (ret)
+ goto free_sysparm_buf;
+
+ ret = 0;
+
+free_sysparm_buf:
+ papr_sysparm_buf_free(kern_spbuf);
+ return ret;
+}
+
+static long papr_sysparm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+{
+ void __user *argp = (__force void __user *)arg;
+ long ret;
+
+ switch (ioctl) {
+ case PAPR_SYSPARM_IOC_GET:
+ ret = papr_sysparm_ioctl_get(argp);
+ break;
+ case PAPR_SYSPARM_IOC_SET:
+ if (filp->f_mode & FMODE_WRITE)
+ ret = papr_sysparm_ioctl_set(argp);
+ else
+ ret = -EBADF;
+ break;
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+ return ret;
+}
+
+static const struct file_operations papr_sysparm_ops = {
+ .unlocked_ioctl = papr_sysparm_ioctl,
+};
+
+static struct miscdevice papr_sysparm_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "papr-sysparm",
+ .fops = &papr_sysparm_ops,
+};
+
+static __init int papr_sysparm_init(void)
+{
+ if (!rtas_function_implemented(RTAS_FN_IBM_GET_SYSTEM_PARAMETER))
+ return -ENODEV;
+
+ return misc_register(&papr_sysparm_dev);
+}
+machine_device_initcall(pseries, papr_sysparm_init);
diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c b/arch/powerpc/platforms/pseries/papr-vpd.c
new file mode 100644
index 000000000000..c29e85db5f35
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-vpd.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-vpd: " fmt
+
+#include <linux/anon_inodes.h>
+#include <linux/build_bug.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/lockdep.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/papr-vpd.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
+#include <uapi/asm/papr-vpd.h>
+
+/*
+ * Function-specific return values for ibm,get-vpd, derived from PAPR+
+ * v2.13 7.3.20 "ibm,get-vpd RTAS Call".
+ */
+#define RTAS_IBM_GET_VPD_COMPLETE 0 /* All VPD has been retrieved. */
+#define RTAS_IBM_GET_VPD_MORE_DATA 1 /* More VPD is available. */
+#define RTAS_IBM_GET_VPD_START_OVER -4 /* VPD changed, restart call sequence. */
+
+/**
+ * struct rtas_ibm_get_vpd_params - Parameters (in and out) for ibm,get-vpd.
+ * @loc_code: In: Caller-provided location code buffer. Must be RTAS-addressable.
+ * @work_area: In: Caller-provided work area buffer for results.
+ * @sequence: In: Sequence number. Out: Next sequence number.
+ * @written: Out: Bytes written by ibm,get-vpd to @work_area.
+ * @status: Out: RTAS call status.
+ */
+struct rtas_ibm_get_vpd_params {
+ const struct papr_location_code *loc_code;
+ struct rtas_work_area *work_area;
+ u32 sequence;
+ u32 written;
+ s32 status;
+};
+
+/**
+ * rtas_ibm_get_vpd() - Call ibm,get-vpd to fill a work area buffer.
+ * @params: See &struct rtas_ibm_get_vpd_params.
+ *
+ * Calls ibm,get-vpd until it errors or successfully deposits data
+ * into the supplied work area. Handles RTAS retry statuses. Maps RTAS
+ * error statuses to reasonable errno values.
+ *
+ * The caller is expected to invoke rtas_ibm_get_vpd() multiple times
+ * to retrieve all the VPD for the provided location code. Only one
+ * sequence should be in progress at any time; starting a new sequence
+ * will disrupt any sequence already in progress. Serialization of VPD
+ * retrieval sequences is the responsibility of the caller.
+ *
+ * The caller should inspect @params.status to determine whether more
+ * calls are needed to complete the sequence.
+ *
+ * Context: May sleep.
+ * Return: -ve on error, 0 otherwise.
+ */
+static int rtas_ibm_get_vpd(struct rtas_ibm_get_vpd_params *params)
+{
+ const struct papr_location_code *loc_code = params->loc_code;
+ struct rtas_work_area *work_area = params->work_area;
+ u32 rets[2];
+ s32 fwrc;
+ int ret;
+
+ lockdep_assert_held(&rtas_ibm_get_vpd_lock);
+
+ do {
+ fwrc = rtas_call(rtas_function_token(RTAS_FN_IBM_GET_VPD), 4, 3,
+ rets,
+ __pa(loc_code),
+ rtas_work_area_phys(work_area),
+ rtas_work_area_size(work_area),
+ params->sequence);
+ } while (rtas_busy_delay(fwrc));
+
+ switch (fwrc) {
+ case RTAS_HARDWARE_ERROR:
+ ret = -EIO;
+ break;
+ case RTAS_INVALID_PARAMETER:
+ ret = -EINVAL;
+ break;
+ case RTAS_IBM_GET_VPD_START_OVER:
+ ret = -EAGAIN;
+ break;
+ case RTAS_IBM_GET_VPD_MORE_DATA:
+ params->sequence = rets[0];
+ fallthrough;
+ case RTAS_IBM_GET_VPD_COMPLETE:
+ params->written = rets[1];
+ /*
+ * Kernel or firmware bug, do not continue.
+ */
+ if (WARN(params->written > rtas_work_area_size(work_area),
+ "possible write beyond end of work area"))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ break;
+ default:
+ ret = -EIO;
+ pr_err_ratelimited("unexpected ibm,get-vpd status %d\n", fwrc);
+ break;
+ }
+
+ params->status = fwrc;
+ return ret;
+}
+
+/*
+ * Internal VPD "blob" APIs for accumulating ibm,get-vpd results into
+ * an immutable buffer to be attached to a file descriptor.
+ */
+struct vpd_blob {
+ const char *data;
+ size_t len;
+};
+
+static bool vpd_blob_has_data(const struct vpd_blob *blob)
+{
+ return blob->data && blob->len;
+}
+
+static void vpd_blob_free(const struct vpd_blob *blob)
+{
+ if (blob) {
+ kvfree(blob->data);
+ kfree(blob);
+ }
+}
+
+/**
+ * vpd_blob_extend() - Append data to a &struct vpd_blob.
+ * @blob: The blob to extend.
+ * @data: The new data to append to @blob.
+ * @len: The length of @data.
+ *
+ * Context: May sleep.
+ * Return: -ENOMEM on allocation failure, 0 otherwise.
+ */
+static int vpd_blob_extend(struct vpd_blob *blob, const char *data, size_t len)
+{
+ const size_t new_len = blob->len + len;
+ const size_t old_len = blob->len;
+ const char *old_ptr = blob->data;
+ char *new_ptr;
+
+ new_ptr = old_ptr ?
+ kvrealloc(old_ptr, old_len, new_len, GFP_KERNEL_ACCOUNT) :
+ kvmalloc(len, GFP_KERNEL_ACCOUNT);
+
+ if (!new_ptr)
+ return -ENOMEM;
+
+ memcpy(&new_ptr[old_len], data, len);
+ blob->data = new_ptr;
+ blob->len = new_len;
+ return 0;
+}
+
+/**
+ * vpd_blob_generate() - Construct a new &struct vpd_blob.
+ * @generator: Function that supplies the blob data.
+ * @arg: Context pointer supplied by caller, passed to @generator.
+ *
+ * The @generator callback is invoked until it returns NULL. @arg is
+ * passed to @generator in its first argument on each call. When
+ * @generator returns data, it should store the data length in its
+ * second argument.
+ *
+ * Context: May sleep.
+ * Return: A completely populated &struct vpd_blob, or NULL on error.
+ */
+static const struct vpd_blob *
+vpd_blob_generate(const char * (*generator)(void *, size_t *), void *arg)
+{
+ struct vpd_blob *blob;
+ const char *buf;
+ size_t len;
+ int err = 0;
+
+ blob = kzalloc(sizeof(*blob), GFP_KERNEL_ACCOUNT);
+ if (!blob)
+ return NULL;
+
+ while (err == 0 && (buf = generator(arg, &len)))
+ err = vpd_blob_extend(blob, buf, len);
+
+ if (err != 0 || !vpd_blob_has_data(blob))
+ goto free_blob;
+
+ return blob;
+free_blob:
+ vpd_blob_free(blob);
+ return NULL;
+}
+
+/*
+ * Internal VPD sequence APIs. A VPD sequence is a series of calls to
+ * ibm,get-vpd for a given location code. The sequence ends when an
+ * error is encountered or all VPD for the location code has been
+ * returned.
+ */
+
+/**
+ * struct vpd_sequence - State for managing a VPD sequence.
+ * @error: Shall be zero as long as the sequence has not encountered an error,
+ * -ve errno otherwise. Use vpd_sequence_set_err() to update this.
+ * @params: Parameter block to pass to rtas_ibm_get_vpd().
+ */
+struct vpd_sequence {
+ int error;
+ struct rtas_ibm_get_vpd_params params;
+};
+
+/**
+ * vpd_sequence_begin() - Begin a VPD retrieval sequence.
+ * @seq: Uninitialized sequence state.
+ * @loc_code: Location code that defines the scope of the VPD to return.
+ *
+ * Initializes @seq with the resources necessary to carry out a VPD
+ * sequence. Callers must pass @seq to vpd_sequence_end() regardless
+ * of whether the sequence succeeds.
+ *
+ * Context: May sleep.
+ */
+static void vpd_sequence_begin(struct vpd_sequence *seq,
+ const struct papr_location_code *loc_code)
+{
+ /*
+ * Use a static data structure for the location code passed to
+ * RTAS to ensure it's in the RMA and avoid a separate work
+ * area allocation. Guarded by the function lock.
+ */
+ static struct papr_location_code static_loc_code;
+
+ /*
+ * We could allocate the work area before acquiring the
+ * function lock, but that would allow concurrent requests to
+ * exhaust the limited work area pool for no benefit. So
+ * allocate the work area under the lock.
+ */
+ mutex_lock(&rtas_ibm_get_vpd_lock);
+ static_loc_code = *loc_code;
+ *seq = (struct vpd_sequence) {
+ .params = {
+ .work_area = rtas_work_area_alloc(SZ_4K),
+ .loc_code = &static_loc_code,
+ .sequence = 1,
+ },
+ };
+}
+
+/**
+ * vpd_sequence_end() - Finalize a VPD retrieval sequence.
+ * @seq: Sequence state.
+ *
+ * Releases resources obtained by vpd_sequence_begin().
+ */
+static void vpd_sequence_end(struct vpd_sequence *seq)
+{
+ rtas_work_area_free(seq->params.work_area);
+ mutex_unlock(&rtas_ibm_get_vpd_lock);
+}
+
+/**
+ * vpd_sequence_should_stop() - Determine whether a VPD retrieval sequence
+ * should continue.
+ * @seq: VPD sequence state.
+ *
+ * Examines the sequence error state and outputs of the last call to
+ * ibm,get-vpd to determine whether the sequence in progress should
+ * continue or stop.
+ *
+ * Return: True if the sequence has encountered an error or if all VPD for
+ * this sequence has been retrieved. False otherwise.
+ */
+static bool vpd_sequence_should_stop(const struct vpd_sequence *seq)
+{
+ bool done;
+
+ if (seq->error)
+ return true;
+
+ switch (seq->params.status) {
+ case 0:
+ if (seq->params.written == 0)
+ done = false; /* Initial state. */
+ else
+ done = true; /* All data consumed. */
+ break;
+ case 1:
+ done = false; /* More data available. */
+ break;
+ default:
+ done = true; /* Error encountered. */
+ break;
+ }
+
+ return done;
+}
+
+static int vpd_sequence_set_err(struct vpd_sequence *seq, int err)
+{
+ /* Preserve the first error recorded. */
+ if (seq->error == 0)
+ seq->error = err;
+
+ return seq->error;
+}
+
+/*
+ * Generator function to be passed to vpd_blob_generate().
+ */
+static const char *vpd_sequence_fill_work_area(void *arg, size_t *len)
+{
+ struct vpd_sequence *seq = arg;
+ struct rtas_ibm_get_vpd_params *p = &seq->params;
+
+ if (vpd_sequence_should_stop(seq))
+ return NULL;
+ if (vpd_sequence_set_err(seq, rtas_ibm_get_vpd(p)))
+ return NULL;
+ *len = p->written;
+ return rtas_work_area_raw_buf(p->work_area);
+}
+
+/*
+ * Higher-level VPD retrieval code below. These functions use the
+ * vpd_blob_* and vpd_sequence_* APIs defined above to create fd-based
+ * VPD handles for consumption by user space.
+ */
+
+/**
+ * papr_vpd_run_sequence() - Run a single VPD retrieval sequence.
+ * @loc_code: Location code that defines the scope of VPD to return.
+ *
+ * Context: May sleep. Holds a mutex and an RTAS work area for its
+ * duration. Typically performs multiple sleepable slab
+ * allocations.
+ *
+ * Return: A populated &struct vpd_blob on success. Encoded error
+ * pointer otherwise.
+ */
+static const struct vpd_blob *papr_vpd_run_sequence(const struct papr_location_code *loc_code)
+{
+ const struct vpd_blob *blob;
+ struct vpd_sequence seq;
+
+ vpd_sequence_begin(&seq, loc_code);
+ blob = vpd_blob_generate(vpd_sequence_fill_work_area, &seq);
+ if (!blob)
+ vpd_sequence_set_err(&seq, -ENOMEM);
+ vpd_sequence_end(&seq);
+
+ if (seq.error) {
+ vpd_blob_free(blob);
+ return ERR_PTR(seq.error);
+ }
+
+ return blob;
+}
+
+/**
+ * papr_vpd_retrieve() - Return the VPD for a location code.
+ * @loc_code: Location code that defines the scope of VPD to return.
+ *
+ * Run VPD sequences against @loc_code until a blob is successfully
+ * instantiated, or a hard error is encountered, or a fatal signal is
+ * pending.
+ *
+ * Context: May sleep.
+ * Return: A fully populated VPD blob when successful. Encoded error
+ * pointer otherwise.
+ */
+static const struct vpd_blob *papr_vpd_retrieve(const struct papr_location_code *loc_code)
+{
+ const struct vpd_blob *blob;
+
+ /*
+ * EAGAIN means the sequence errored with a -4 (VPD changed)
+ * status from ibm,get-vpd, and we should attempt a new
+ * sequence. PAPR+ v2.13 R1–7.3.20–5 indicates that this
+ * should be a transient condition, not something that happens
+ * continuously. But we'll stop trying on a fatal signal.
+ */
+ do {
+ blob = papr_vpd_run_sequence(loc_code);
+ if (!IS_ERR(blob)) /* Success. */
+ break;
+ if (PTR_ERR(blob) != -EAGAIN) /* Hard error. */
+ break;
+ pr_info_ratelimited("VPD changed during retrieval, retrying\n");
+ cond_resched();
+ } while (!fatal_signal_pending(current));
+
+ return blob;
+}
+
+static ssize_t papr_vpd_handle_read(struct file *file, char __user *buf, size_t size, loff_t *off)
+{
+ const struct vpd_blob *blob = file->private_data;
+
+ /* bug: we should not instantiate a handle without any data attached. */
+ if (!vpd_blob_has_data(blob)) {
+ pr_err_once("handle without data\n");
+ return -EIO;
+ }
+
+ return simple_read_from_buffer(buf, size, off, blob->data, blob->len);
+}
+
+static int papr_vpd_handle_release(struct inode *inode, struct file *file)
+{
+ const struct vpd_blob *blob = file->private_data;
+
+ vpd_blob_free(blob);
+
+ return 0;
+}
+
+static loff_t papr_vpd_handle_seek(struct file *file, loff_t off, int whence)
+{
+ const struct vpd_blob *blob = file->private_data;
+
+ return fixed_size_llseek(file, off, whence, blob->len);
+}
+
+
+static const struct file_operations papr_vpd_handle_ops = {
+ .read = papr_vpd_handle_read,
+ .llseek = papr_vpd_handle_seek,
+ .release = papr_vpd_handle_release,
+};
+
+/**
+ * papr_vpd_create_handle() - Create a fd-based handle for reading VPD.
+ * @ulc: Location code in user memory; defines the scope of the VPD to
+ * retrieve.
+ *
+ * Handler for PAPR_VPD_IOC_CREATE_HANDLE ioctl command. Validates
+ * @ulc and instantiates an immutable VPD "blob" for it. The blob is
+ * attached to a file descriptor for reading by user space. The memory
+ * backing the blob is freed when the file is released.
+ *
+ * The entire requested VPD is retrieved by this call and all
+ * necessary RTAS interactions are performed before returning the fd
+ * to user space. This keeps the read handler simple and ensures that
+ * the kernel can prevent interleaving of ibm,get-vpd call sequences.
+ *
+ * Return: The installed fd number if successful, -ve errno otherwise.
+ */
+static long papr_vpd_create_handle(struct papr_location_code __user *ulc)
+{
+ struct papr_location_code klc;
+ const struct vpd_blob *blob;
+ struct file *file;
+ long err;
+ int fd;
+
+ if (copy_from_user(&klc, ulc, sizeof(klc)))
+ return -EFAULT;
+
+ if (!string_is_terminated(klc.str, ARRAY_SIZE(klc.str)))
+ return -EINVAL;
+
+ blob = papr_vpd_retrieve(&klc);
+ if (IS_ERR(blob))
+ return PTR_ERR(blob);
+
+ fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ err = fd;
+ goto free_blob;
+ }
+
+ file = anon_inode_getfile("[papr-vpd]", &papr_vpd_handle_ops,
+ (void *)blob, O_RDONLY);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto put_fd;
+ }
+
+ file->f_mode |= FMODE_LSEEK | FMODE_PREAD;
+ fd_install(fd, file);
+ return fd;
+put_fd:
+ put_unused_fd(fd);
+free_blob:
+ vpd_blob_free(blob);
+ return err;
+}
+
+/*
+ * Top-level ioctl handler for /dev/papr-vpd.
+ */
+static long papr_vpd_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+{
+ void __user *argp = (__force void __user *)arg;
+ long ret;
+
+ switch (ioctl) {
+ case PAPR_VPD_IOC_CREATE_HANDLE:
+ ret = papr_vpd_create_handle(argp);
+ break;
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+ return ret;
+}
+
+static const struct file_operations papr_vpd_ops = {
+ .unlocked_ioctl = papr_vpd_dev_ioctl,
+};
+
+static struct miscdevice papr_vpd_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "papr-vpd",
+ .fops = &papr_vpd_ops,
+};
+
+static __init int papr_vpd_init(void)
+{
+ if (!rtas_function_implemented(RTAS_FN_IBM_GET_VPD))
+ return -ENODEV;
+
+ return misc_register(&papr_vpd_dev);
+}
+machine_device_initcall(pseries, papr_vpd_init);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 8376f03f932a..bba4ad192b0f 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -55,6 +55,7 @@ extern int dlpar_detach_node(struct device_node *);
extern int dlpar_acquire_drc(u32 drc_index);
extern int dlpar_release_drc(u32 drc_index);
extern int dlpar_unisolate_drc(u32 drc_index);
+extern void post_mobility_fixup(void);
void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index 5c43435472cc..382003dfdb9a 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -13,6 +13,7 @@
#include <asm/mmu.h>
#include <asm/rtas.h>
#include <asm/topology.h>
+#include "pseries.h"
static struct device suspend_dev;
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index b1f25bac280b..71d52a670d95 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -385,11 +385,15 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
* same fault IRQ is not freed by the OS before.
*/
mutex_lock(&vas_pseries_mutex);
- if (migration_in_progress)
+ if (migration_in_progress) {
rc = -EBUSY;
- else
+ } else {
rc = allocate_setup_window(txwin, (u64 *)&domain[0],
cop_feat_caps->win_type);
+ if (!rc)
+ caps->nr_open_wins_progress++;
+ }
+
mutex_unlock(&vas_pseries_mutex);
if (rc)
goto out;
@@ -404,8 +408,17 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
goto out_free;
txwin->win_type = cop_feat_caps->win_type;
- mutex_lock(&vas_pseries_mutex);
+
/*
+ * The migration SUSPEND thread sets migration_in_progress and
+ * closes all open windows from the list. But the window is
+ * added to the list after open and modify HCALLs. So possible
+ * that migration_in_progress is set before modify HCALL which
+ * may cause some windows are still open when the hypervisor
+ * initiates the migration.
+ * So checks the migration_in_progress flag again and close all
+ * open windows.
+ *
* Possible to lose the acquired credit with DLPAR core
* removal after the window is opened. So if there are any
* closed windows (means with lost credits), do not give new
@@ -413,9 +426,11 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
* after the existing windows are reopened when credits are
* available.
*/
- if (!caps->nr_close_wins) {
+ mutex_lock(&vas_pseries_mutex);
+ if (!caps->nr_close_wins && !migration_in_progress) {
list_add(&txwin->win_list, &caps->list);
caps->nr_open_windows++;
+ caps->nr_open_wins_progress--;
mutex_unlock(&vas_pseries_mutex);
vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
return &txwin->vas_win;
@@ -433,6 +448,12 @@ out_free:
*/
free_irq_setup(txwin);
h_deallocate_vas_window(txwin->vas_win.winid);
+ /*
+ * Hold mutex and reduce nr_open_wins_progress counter.
+ */
+ mutex_lock(&vas_pseries_mutex);
+ caps->nr_open_wins_progress--;
+ mutex_unlock(&vas_pseries_mutex);
out:
atomic_dec(&cop_feat_caps->nr_used_credits);
kfree(txwin);
@@ -937,14 +958,14 @@ int vas_migration_handler(int action)
struct vas_caps *vcaps;
int i, rc = 0;
+ pr_info("VAS migration event %d\n", action);
+
/*
* NX-GZIP is not enabled. Nothing to do for migration.
*/
if (!copypaste_feat)
return rc;
- mutex_lock(&vas_pseries_mutex);
-
if (action == VAS_SUSPEND)
migration_in_progress = true;
else
@@ -990,12 +1011,27 @@ int vas_migration_handler(int action)
switch (action) {
case VAS_SUSPEND:
+ mutex_lock(&vas_pseries_mutex);
rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
true);
+ /*
+ * Windows are included in the list after successful
+ * open. So wait for closing these in-progress open
+ * windows in vas_allocate_window() which will be
+ * done if the migration_in_progress is set.
+ */
+ while (vcaps->nr_open_wins_progress) {
+ mutex_unlock(&vas_pseries_mutex);
+ msleep(10);
+ mutex_lock(&vas_pseries_mutex);
+ }
+ mutex_unlock(&vas_pseries_mutex);
break;
case VAS_RESUME:
+ mutex_lock(&vas_pseries_mutex);
atomic_set(&caps->nr_total_credits, new_nr_creds);
rc = reconfig_open_windows(vcaps, new_nr_creds, true);
+ mutex_unlock(&vas_pseries_mutex);
break;
default:
/* should not happen */
@@ -1011,8 +1047,9 @@ int vas_migration_handler(int action)
goto out;
}
+ pr_info("VAS migration event (%d) successful\n", action);
+
out:
- mutex_unlock(&vas_pseries_mutex);
return rc;
}
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
index 7115043ec488..45567cd13178 100644
--- a/arch/powerpc/platforms/pseries/vas.h
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -91,6 +91,8 @@ struct vas_cop_feat_caps {
struct vas_caps {
struct vas_cop_feat_caps caps;
struct list_head list; /* List of open windows */
+ int nr_open_wins_progress; /* Number of open windows in */
+ /* progress. Used in migration */
int nr_close_wins; /* closed windows in the hypervisor for DLPAR */
int nr_open_windows; /* Number of successful open windows */
u8 feat; /* Feature type */
diff --git a/arch/powerpc/sysdev/grackle.c b/arch/powerpc/sysdev/grackle.c
index fd2f94a884f0..7dce8278b71e 100644
--- a/arch/powerpc/sysdev/grackle.c
+++ b/arch/powerpc/sysdev/grackle.c
@@ -18,24 +18,8 @@
#define GRACKLE_CFA(b, d, o) (0x80 | ((b) << 8) | ((d) << 16) \
| (((o) & ~3) << 24))
-#define GRACKLE_PICR1_STG 0x00000040
#define GRACKLE_PICR1_LOOPSNOOP 0x00000010
-/* N.B. this is called before bridges is initialized, so we can't
- use grackle_pcibios_{read,write}_config_dword. */
-static inline void grackle_set_stg(struct pci_controller* bp, int enable)
-{
- unsigned int val;
-
- out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
- val = in_le32(bp->cfg_data);
- val = enable? (val | GRACKLE_PICR1_STG) :
- (val & ~GRACKLE_PICR1_STG);
- out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
- out_le32(bp->cfg_data, val);
- (void)in_le32(bp->cfg_data);
-}
-
static inline void grackle_set_loop_snoop(struct pci_controller *bp, int enable)
{
unsigned int val;
@@ -56,7 +40,4 @@ void __init setup_grackle(struct pci_controller *hose)
pci_add_flags(PCI_REASSIGN_ALL_BUS);
if (of_machine_is_compatible("AAPL,PowerBook1998"))
grackle_set_loop_snoop(hose, 1);
-#if 0 /* Disabled for now, HW problems ??? */
- grackle_set_stg(hose, 1);
-#endif
}
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index f6ec6dba92dc..700b67476a7d 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -236,6 +236,8 @@ static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation",
cpu, hw_id);
+ if (!rname)
+ return -ENOMEM;
if (!request_mem_region(addr, size, rname)) {
pr_warn("icp_native: Could not reserve ICP MMIO for CPU %d, interrupt server #0x%x\n",
cpu, hw_id);
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
index 90b261114763..dce96f27cc89 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
@@ -8,9 +8,6 @@
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/leds/common.h>
-/* Clock frequency (in Hz) of the rtcclk */
-#define RTCCLK_FREQ 1000000
-
/ {
model = "Microchip PolarFire-SoC Icicle Kit";
compatible = "microchip,mpfs-icicle-reference-rtlv2210", "microchip,mpfs-icicle-kit",
@@ -29,10 +26,6 @@
stdout-path = "serial1:115200n8";
};
- cpus {
- timebase-frequency = <RTCCLK_FREQ>;
- };
-
leds {
compatible = "gpio-leds";
diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts
index 184cb36a175e..a8d623ee9fa4 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts
@@ -10,9 +10,6 @@
#include "mpfs.dtsi"
#include "mpfs-m100pfs-fabric.dtsi"
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ 1000000
-
/ {
model = "Aries Embedded M100PFEVPS";
compatible = "aries,m100pfsevp", "microchip,mpfs";
@@ -33,10 +30,6 @@
stdout-path = "serial1:115200n8";
};
- cpus {
- timebase-frequency = <MTIMER_FREQ>;
- };
-
ddrc_cache_lo: memory@80000000 {
device_type = "memory";
reg = <0x0 0x80000000 0x0 0x40000000>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
index c87cc2d8fe29..ea0808ab1042 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
@@ -6,9 +6,6 @@
#include "mpfs.dtsi"
#include "mpfs-polarberry-fabric.dtsi"
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ 1000000
-
/ {
model = "Sundance PolarBerry";
compatible = "sundance,polarberry", "microchip,mpfs";
@@ -22,10 +19,6 @@
stdout-path = "serial0:115200n8";
};
- cpus {
- timebase-frequency = <MTIMER_FREQ>;
- };
-
ddrc_cache_lo: memory@80000000 {
device_type = "memory";
reg = <0x0 0x80000000 0x0 0x2e000000>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts
index 013cb666c72d..f9a890579438 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts
@@ -6,9 +6,6 @@
#include "mpfs.dtsi"
#include "mpfs-sev-kit-fabric.dtsi"
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ 1000000
-
/ {
#address-cells = <2>;
#size-cells = <2>;
@@ -28,10 +25,6 @@
stdout-path = "serial1:115200n8";
};
- cpus {
- timebase-frequency = <MTIMER_FREQ>;
- };
-
reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts
index e0797c7e1b35..d1120f5f2c01 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts
@@ -11,9 +11,6 @@
#include "mpfs.dtsi"
#include "mpfs-tysom-m-fabric.dtsi"
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ 1000000
-
/ {
model = "Aldec TySOM-M-MPFS250T-REV2";
compatible = "aldec,tysom-m-mpfs250t-rev2", "microchip,mpfs";
@@ -34,10 +31,6 @@
stdout-path = "serial1:115200n8";
};
- cpus {
- timebase-frequency = <MTIMER_FREQ>;
- };
-
ddrc_cache_lo: memory@80000000 {
device_type = "memory";
reg = <0x0 0x80000000 0x0 0x30000000>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi
index a6faf24f1dba..266489d43912 100644
--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
@@ -13,6 +13,7 @@
cpus {
#address-cells = <1>;
#size-cells = <0>;
+ timebase-frequency = <1000000>;
cpu0: cpu@0 {
compatible = "sifive,e51", "sifive,rocket0", "riscv";
diff --git a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi
index df40e87ee063..aec6401a467b 100644
--- a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi
+++ b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi
@@ -34,7 +34,6 @@
cpu0_intc: interrupt-controller {
compatible = "riscv,cpu-intc";
interrupt-controller;
- #address-cells = <0>;
#interrupt-cells = <1>;
};
};
diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c
index 197db68cc8da..17a904869724 100644
--- a/arch/riscv/errata/andes/errata.c
+++ b/arch/riscv/errata/andes/errata.c
@@ -38,29 +38,35 @@ static long ax45mp_iocp_sw_workaround(void)
return ret.error ? 0 : ret.value;
}
-static bool errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid)
+static void errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid)
{
+ static bool done;
+
if (!IS_ENABLED(CONFIG_ERRATA_ANDES_CMO))
- return false;
+ return;
+
+ if (done)
+ return;
+
+ done = true;
if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID)
- return false;
+ return;
if (!ax45mp_iocp_sw_workaround())
- return false;
+ return;
/* Set this just to make core cbo code happy */
riscv_cbom_block_size = 1;
riscv_noncoherent_supported();
-
- return true;
}
void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
unsigned long archid, unsigned long impid,
unsigned int stage)
{
- errata_probe_iocp(stage, archid, impid);
+ if (stage == RISCV_ALTERNATIVES_BOOT)
+ errata_probe_iocp(stage, archid, impid);
/* we have nothing to patch here ATM so just return back */
}
diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h
index 1d7942c8a6cb..eeec04b7dae6 100644
--- a/arch/riscv/include/asm/syscall_wrapper.h
+++ b/arch/riscv/include/asm/syscall_wrapper.h
@@ -46,9 +46,6 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *);
return sys_ni_syscall(); \
}
-#define COMPAT_SYS_NI(name) \
- SYSCALL_ALIAS(__riscv_compat_sys_##name, sys_ni_posix_timers);
-
#endif /* CONFIG_COMPAT */
#define __SYSCALL_DEFINEx(x, name, ...) \
@@ -82,6 +79,4 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *);
return sys_ni_syscall(); \
}
-#define SYS_NI(name) SYSCALL_ALIAS(__riscv_sys_##name, sys_ni_posix_timers);
-
#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/riscv/include/asm/topology.h b/arch/riscv/include/asm/topology.h
index e316ab3b77f3..61183688bdd5 100644
--- a/arch/riscv/include/asm/topology.h
+++ b/arch/riscv/include/asm/topology.h
@@ -9,6 +9,7 @@
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
+#define arch_scale_freq_ref topology_get_freq_ref
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index b77397432403..76ace1e0b46f 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -154,7 +154,6 @@ secondary_start_sbi:
XIP_FIXUP_OFFSET a3
add a3, a3, a1
REG_L sp, (a3)
- scs_load_current
.Lsecondary_start_common:
@@ -165,6 +164,7 @@ secondary_start_sbi:
call relocate_enable_mmu
#endif
call .Lsetup_trap_vector
+ scs_load_current
tail smp_callin
#endif /* CONFIG_SMP */
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 56a8c78e9e21..aac019ed63b1 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -40,15 +40,6 @@ struct relocation_handlers {
long buffer);
};
-unsigned int initialize_relocation_hashtable(unsigned int num_relocations);
-void process_accumulated_relocations(struct module *me);
-int add_relocation_to_accumulate(struct module *me, int type, void *location,
- unsigned int hashtable_bits, Elf_Addr v);
-
-struct hlist_head *relocation_hashtable;
-
-struct list_head used_buckets_list;
-
/*
* The auipc+jalr instruction pair can reach any PC-relative offset
* in the range [-2^31 - 2^11, 2^31 - 2^11)
@@ -64,7 +55,7 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val)
static int riscv_insn_rmw(void *location, u32 keep, u32 set)
{
- u16 *parcel = location;
+ __le16 *parcel = location;
u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
insn &= keep;
@@ -77,7 +68,7 @@ static int riscv_insn_rmw(void *location, u32 keep, u32 set)
static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set)
{
- u16 *parcel = location;
+ __le16 *parcel = location;
u16 insn = le16_to_cpu(*parcel);
insn &= keep;
@@ -604,7 +595,10 @@ static const struct relocation_handlers reloc_handlers[] = {
/* 192-255 nonstandard ABI extensions */
};
-void process_accumulated_relocations(struct module *me)
+static void
+process_accumulated_relocations(struct module *me,
+ struct hlist_head **relocation_hashtable,
+ struct list_head *used_buckets_list)
{
/*
* Only ADD/SUB/SET/ULEB128 should end up here.
@@ -624,18 +618,25 @@ void process_accumulated_relocations(struct module *me)
* - Each relocation entry for a location address
*/
struct used_bucket *bucket_iter;
+ struct used_bucket *bucket_iter_tmp;
struct relocation_head *rel_head_iter;
+ struct hlist_node *rel_head_iter_tmp;
struct relocation_entry *rel_entry_iter;
+ struct relocation_entry *rel_entry_iter_tmp;
int curr_type;
void *location;
long buffer;
- list_for_each_entry(bucket_iter, &used_buckets_list, head) {
- hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) {
+ list_for_each_entry_safe(bucket_iter, bucket_iter_tmp,
+ used_buckets_list, head) {
+ hlist_for_each_entry_safe(rel_head_iter, rel_head_iter_tmp,
+ bucket_iter->bucket, node) {
buffer = 0;
location = rel_head_iter->location;
- list_for_each_entry(rel_entry_iter,
- rel_head_iter->rel_entry, head) {
+ list_for_each_entry_safe(rel_entry_iter,
+ rel_entry_iter_tmp,
+ rel_head_iter->rel_entry,
+ head) {
curr_type = rel_entry_iter->type;
reloc_handlers[curr_type].reloc_handler(
me, &buffer, rel_entry_iter->value);
@@ -648,11 +649,14 @@ void process_accumulated_relocations(struct module *me)
kfree(bucket_iter);
}
- kfree(relocation_hashtable);
+ kfree(*relocation_hashtable);
}
-int add_relocation_to_accumulate(struct module *me, int type, void *location,
- unsigned int hashtable_bits, Elf_Addr v)
+static int add_relocation_to_accumulate(struct module *me, int type,
+ void *location,
+ unsigned int hashtable_bits, Elf_Addr v,
+ struct hlist_head *relocation_hashtable,
+ struct list_head *used_buckets_list)
{
struct relocation_entry *entry;
struct relocation_head *rel_head;
@@ -661,6 +665,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
unsigned long hash;
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+
+ if (!entry)
+ return -ENOMEM;
+
INIT_LIST_HEAD(&entry->head);
entry->type = type;
entry->value = v;
@@ -669,7 +677,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
current_head = &relocation_hashtable[hash];
- /* Find matching location (if any) */
+ /*
+ * Search for the relocation_head for the relocations that happen at the
+ * provided location
+ */
bool found = false;
struct relocation_head *rel_head_iter;
@@ -681,19 +692,45 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
}
}
+ /*
+ * If there has not yet been any relocations at the provided location,
+ * create a relocation_head for that location and populate it with this
+ * relocation_entry.
+ */
if (!found) {
rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL);
+
+ if (!rel_head) {
+ kfree(entry);
+ return -ENOMEM;
+ }
+
rel_head->rel_entry =
kmalloc(sizeof(struct list_head), GFP_KERNEL);
+
+ if (!rel_head->rel_entry) {
+ kfree(entry);
+ kfree(rel_head);
+ return -ENOMEM;
+ }
+
INIT_LIST_HEAD(rel_head->rel_entry);
rel_head->location = location;
INIT_HLIST_NODE(&rel_head->node);
if (!current_head->first) {
bucket =
kmalloc(sizeof(struct used_bucket), GFP_KERNEL);
+
+ if (!bucket) {
+ kfree(entry);
+ kfree(rel_head);
+ kfree(rel_head->rel_entry);
+ return -ENOMEM;
+ }
+
INIT_LIST_HEAD(&bucket->head);
bucket->bucket = current_head;
- list_add(&bucket->head, &used_buckets_list);
+ list_add(&bucket->head, used_buckets_list);
}
hlist_add_head(&rel_head->node, current_head);
}
@@ -704,7 +741,9 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
return 0;
}
-unsigned int initialize_relocation_hashtable(unsigned int num_relocations)
+static unsigned int
+initialize_relocation_hashtable(unsigned int num_relocations,
+ struct hlist_head **relocation_hashtable)
{
/* Can safely assume that bits is not greater than sizeof(long) */
unsigned long hashtable_size = roundup_pow_of_two(num_relocations);
@@ -720,12 +759,13 @@ unsigned int initialize_relocation_hashtable(unsigned int num_relocations)
hashtable_size <<= should_double_size;
- relocation_hashtable = kmalloc_array(hashtable_size,
- sizeof(*relocation_hashtable),
- GFP_KERNEL);
- __hash_init(relocation_hashtable, hashtable_size);
+ *relocation_hashtable = kmalloc_array(hashtable_size,
+ sizeof(*relocation_hashtable),
+ GFP_KERNEL);
+ if (!*relocation_hashtable)
+ return -ENOMEM;
- INIT_LIST_HEAD(&used_buckets_list);
+ __hash_init(*relocation_hashtable, hashtable_size);
return hashtable_bits;
}
@@ -742,7 +782,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
Elf_Addr v;
int res;
unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel);
- unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations);
+ struct hlist_head *relocation_hashtable;
+ struct list_head used_buckets_list;
+ unsigned int hashtable_bits;
+
+ hashtable_bits = initialize_relocation_hashtable(num_relocations,
+ &relocation_hashtable);
+
+ if (hashtable_bits < 0)
+ return hashtable_bits;
+
+ INIT_LIST_HEAD(&used_buckets_list);
pr_debug("Applying relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
@@ -823,14 +873,18 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
}
if (reloc_handlers[type].accumulate_handler)
- res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v);
+ res = add_relocation_to_accumulate(me, type, location,
+ hashtable_bits, v,
+ relocation_hashtable,
+ &used_buckets_list);
else
res = handler(me, location, v);
if (res)
return res;
}
- process_accumulated_relocations(me);
+ process_accumulated_relocations(me, &relocation_hashtable,
+ &used_buckets_list);
return 0;
}
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index c712037dbe10..a2ca5b7756a5 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -169,7 +169,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
pair->value &= ~missing;
}
-static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
+static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext)
{
struct riscv_hwprobe pair;
diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S
index 90f22049d553..8515ed7cd8c1 100644
--- a/arch/riscv/kernel/tests/module_test/test_uleb128.S
+++ b/arch/riscv/kernel/tests/module_test/test_uleb128.S
@@ -6,13 +6,13 @@
.text
.global test_uleb_basic
test_uleb_basic:
- ld a0, second
+ lw a0, second
addi a0, a0, -127
ret
.global test_uleb_large
test_uleb_large:
- ld a0, fourth
+ lw a0, fourth
addi a0, a0, -0x07e8
ret
@@ -22,10 +22,10 @@ first:
second:
.reloc second, R_RISCV_SET_ULEB128, second
.reloc second, R_RISCV_SUB_ULEB128, first
- .dword 0
+ .word 0
third:
.space 1000
fourth:
.reloc fourth, R_RISCV_SET_ULEB128, fourth
.reloc fourth, R_RISCV_SUB_ULEB128, third
- .dword 0
+ .word 0
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 5eba37147caa..5255f8134aef 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -550,16 +550,14 @@ int handle_misaligned_store(struct pt_regs *regs)
} else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) {
len = 8;
val.data_ulong = GET_RS2S(insn, regs);
- } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP &&
- ((insn >> SH_RD) & 0x1f)) {
+ } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP) {
len = 8;
val.data_ulong = GET_RS2C(insn, regs);
#endif
} else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) {
len = 4;
val.data_ulong = GET_RS2S(insn, regs);
- } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP &&
- ((insn >> SH_RD) & 0x1f)) {
+ } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP) {
len = 4;
val.data_ulong = GET_RS2C(insn, regs);
} else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) {
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
index 6cf23b8adb71..e808723a85f1 100644
--- a/arch/riscv/kvm/aia_imsic.c
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -55,6 +55,7 @@ struct imsic {
/* IMSIC SW-file */
struct imsic_mrif *swfile;
phys_addr_t swfile_pa;
+ spinlock_t swfile_extirq_lock;
};
#define imsic_vs_csr_read(__c) \
@@ -613,12 +614,23 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
{
struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
struct imsic_mrif *mrif = imsic->swfile;
+ unsigned long flags;
+
+ /*
+ * The critical section is necessary during external interrupt
+ * updates to avoid the risk of losing interrupts due to potential
+ * interruptions between reading topei and updating pending status.
+ */
+
+ spin_lock_irqsave(&imsic->swfile_extirq_lock, flags);
if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) &&
imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis))
kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
else
kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
+
+ spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags);
}
static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear,
@@ -1039,6 +1051,7 @@ int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
}
imsic->swfile = page_to_virt(swfile_page);
imsic->swfile_pa = page_to_phys(swfile_page);
+ spin_lock_init(&imsic->swfile_extirq_lock);
/* Setup IO device */
kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops);
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 90d4ba36d1d0..081339ddf47e 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -304,6 +304,8 @@ void handle_page_fault(struct pt_regs *regs)
goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ flags |= FAULT_FLAG_TRIED;
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index d5d8f99d1f25..959adae10f80 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -146,7 +146,7 @@ config S390
select GENERIC_TIME_VSYSCALL
select GENERIC_VDSO_TIME_NS
select GENERIC_IOREMAP if PCI
- select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+ select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 438cd92e6080..6de44ede4e14 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -44,8 +44,7 @@ CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
CONFIG_CRASH_DUMP=y
CONFIG_LIVEPATCH=y
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
+CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
CONFIG_HZ_100=y
@@ -76,7 +75,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -93,6 +91,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
+CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
CONFIG_SLUB_STATS=y
# CONFIG_COMPAT_BRK is not set
@@ -619,6 +618,9 @@ CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_BTRFS_DEBUG=y
CONFIG_BTRFS_ASSERT=y
CONFIG_NILFS2_FS=m
+CONFIG_BCACHEFS_FS=y
+CONFIG_BCACHEFS_QUOTA=y
+CONFIG_BCACHEFS_POSIX_ACL=y
CONFIG_FS_DAX=y
CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FS_ENCRYPTION=y
@@ -691,7 +693,6 @@ CONFIG_PERSISTENT_KEYRINGS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_FORTIFY_SOURCE=y
CONFIG_SECURITY_SELINUX=y
@@ -834,7 +835,6 @@ CONFIG_DEBUG_IRQFLAGS=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_DEBUG_CREDENTIALS=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_REF_SCALE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=300
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 1b8150e50f6a..bcae47da6b7c 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -42,8 +42,7 @@ CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
CONFIG_CRASH_DUMP=y
CONFIG_LIVEPATCH=y
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
+CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
CONFIG_HZ_100=y
@@ -71,7 +70,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -88,6 +86,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
+CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
@@ -605,6 +604,9 @@ CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
+CONFIG_BCACHEFS_FS=m
+CONFIG_BCACHEFS_QUOTA=y
+CONFIG_BCACHEFS_POSIX_ACL=y
CONFIG_FS_DAX=y
CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FS_ENCRYPTION=y
@@ -677,7 +679,6 @@ CONFIG_PERSISTENT_KEYRINGS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_LOCKDOWN_LSM=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index b831083b4edd..47028450eee1 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -9,8 +9,7 @@ CONFIG_BPF_SYSCALL=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_CRASH_DUMP=y
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
+CONFIG_MARCH_Z13=y
# CONFIG_COMPAT is not set
CONFIG_NR_CPUS=2
CONFIG_HZ_100=y
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index b714ed0ef688..9acf48e53a87 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -79,7 +79,7 @@ static inline int test_fp_ctl(u32 fpc)
#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
#define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
-#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7)
+#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_LOW)
struct kernel_fpu;
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index 9286430fe729..35c1d1b860d8 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -63,10 +63,6 @@
cond_syscall(__s390x_sys_##name); \
cond_syscall(__s390_sys_##name)
-#define SYS_NI(name) \
- SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers); \
- SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers)
-
#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
long __s390_compat_sys##name(struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \
@@ -85,15 +81,11 @@
/*
* As some compat syscalls may not be implemented, we need to expand
- * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in
- * kernel/time/posix-stubs.c to cover this case as well.
+ * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well.
*/
#define COND_SYSCALL_COMPAT(name) \
cond_syscall(__s390_compat_sys_##name)
-#define COMPAT_SYS_NI(name) \
- SYSCALL_ALIAS(__s390_compat_sys_##name, sys_ni_posix_timers)
-
#define __S390_SYS_STUBx(x, name, ...) \
long __s390_sys##name(struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \
@@ -124,9 +116,6 @@
#define COND_SYSCALL(name) \
cond_syscall(__s390x_sys_##name)
-#define SYS_NI(name) \
- SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers)
-
#define __S390_SYS_STUBx(x, fullname, name, ...)
#endif /* CONFIG_COMPAT */
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 86fec9b080f6..5f5cd20ebb34 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -459,3 +459,5 @@
454 common futex_wake sys_futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue sys_futex_requeue
+457 common statmount sys_statmount sys_statmount
+458 common listmount sys_listmount sys_listmount
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 02dcbe82a8e5..8207a892bbe2 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -587,10 +587,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
if (!gmap_is_shadow(gmap))
return;
- if (start >= 1UL << 31)
- /* We are only interested in prefix pages */
- return;
-
/*
* Only new shadow blocks are added to the list during runtime,
* therefore we can safely reference them all the time.
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 249aefcf7c4e..ab4098886e56 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -337,6 +337,9 @@ static void do_exception(struct pt_regs *regs, int access)
return;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ flags |= FAULT_FLAG_TRIED;
+
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 3bd2ab2a9a34..5cb92941540b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -756,7 +756,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
pte_clear(mm, addr, ptep);
}
if (reset)
- pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+ pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
pgste_set_unlock(ptep, pgste);
preempt_enable();
}
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 363fae0fe9bf..3103ebd2e4cb 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -459,3 +459,5 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 7bcaa3d5ea44..ba147d7ad19a 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -502,3 +502,5 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 04941a1ffc0a..53f2e7797b1d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -170,7 +170,7 @@ config X86
select HAS_IOPORT
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
- select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+ select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select HAVE_ARCH_HUGE_VMALLOC if X86_64
@@ -385,10 +385,6 @@ config HAVE_INTEL_TXT
def_bool y
depends on INTEL_IOMMU && ACPI
-config X86_32_SMP
- def_bool y
- depends on X86_32 && SMP
-
config X86_64_SMP
def_bool y
depends on X86_64 && SMP
@@ -1416,7 +1412,7 @@ config HIGHMEM4G
config HIGHMEM64G
bool "64GB"
- depends on !M486SX && !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !MWINCHIP3D && !MK6
+ depends on X86_HAVE_PAE
select X86_PAE
help
Select this if you have a 32-bit processor and more than 4
@@ -1473,7 +1469,7 @@ config HIGHMEM
config X86_PAE
bool "PAE (Physical Address Extension) Support"
- depends on X86_32 && !HIGHMEM4G
+ depends on X86_32 && X86_HAVE_PAE
select PHYS_ADDR_T_64BIT
select SWIOTLB
help
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 00468adf180f..b9224cf2ee4d 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -362,9 +362,13 @@ config X86_TSC
def_bool y
depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
+config X86_HAVE_PAE
+ def_bool y
+ depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64
+
config X86_CMPXCHG64
def_bool y
- depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
+ depends on X86_HAVE_PAE || M586TSC || M586MMX || MK6 || MK7
# this should be set for all -march=.. options where the compiler
# generates cmov.
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 71fc531b95b4..f19c038409aa 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -53,7 +53,7 @@ KBUILD_CFLAGS += -D__DISABLE_EXPORTS
KBUILD_CFLAGS += $(call cc-option,-Wa$(comma)-mrelax-relocations=no)
KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
-# sev.c indirectly inludes inat-table.h which is generated during
+# sev.c indirectly includes inat-table.h which is generated during
# compilation and stored in $(objtree). Add the directory to the includes so
# that the compiler finds it even with out-of-tree builds (make O=/some/path).
CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
index 473ba59b82a8..d040080d7edb 100644
--- a/arch/x86/boot/compressed/ident_map_64.c
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -386,3 +386,8 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
*/
kernel_add_identity_map(address, end);
}
+
+void do_boot_nmi_trap(struct pt_regs *regs, unsigned long error_code)
+{
+ /* Empty handler to ignore NMI during early boot */
+}
diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c
index 3cdf94b41456..d100284bbef4 100644
--- a/arch/x86/boot/compressed/idt_64.c
+++ b/arch/x86/boot/compressed/idt_64.c
@@ -61,6 +61,7 @@ void load_stage2_idt(void)
boot_idt_desc.address = (unsigned long)boot_idt;
set_idt_entry(X86_TRAP_PF, boot_page_fault);
+ set_idt_entry(X86_TRAP_NMI, boot_nmi_trap);
#ifdef CONFIG_AMD_MEM_ENCRYPT
/*
diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S
index 22890e199f5b..4d03c8562f63 100644
--- a/arch/x86/boot/compressed/idt_handlers_64.S
+++ b/arch/x86/boot/compressed/idt_handlers_64.S
@@ -70,6 +70,7 @@ SYM_FUNC_END(\name)
.code64
EXCEPTION_HANDLER boot_page_fault do_boot_page_fault error_code=1
+EXCEPTION_HANDLER boot_nmi_trap do_boot_nmi_trap error_code=0
#ifdef CONFIG_AMD_MEM_ENCRYPT
EXCEPTION_HANDLER boot_stage1_vc do_vc_no_ghcb error_code=1
diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
index b3c3a4be7471..dbba332e4a12 100644
--- a/arch/x86/boot/compressed/mem.c
+++ b/arch/x86/boot/compressed/mem.c
@@ -8,7 +8,7 @@
/*
* accept_memory() and process_unaccepted_memory() called from EFI stub which
- * runs before decompresser and its early_tdx_detect().
+ * runs before decompressor and its early_tdx_detect().
*
* Enumerate TDX directly from the early users.
*/
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index c0d502bd8716..bc2f0f17fb90 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -196,6 +196,7 @@ static inline void cleanup_exception_handling(void) { }
/* IDT Entry Points */
void boot_page_fault(void);
+void boot_nmi_trap(void);
void boot_stage1_vc(void);
void boot_stage2_vc(void);
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index 40031a614712..5941f930f6c5 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -11,6 +11,7 @@
*/
#include "boot.h"
+#include <asm/desc_defs.h>
#include <asm/segment.h>
/*
@@ -67,13 +68,13 @@ static void setup_gdt(void)
being 8-byte unaligned. Intel recommends 16 byte alignment. */
static const u64 boot_gdt[] __attribute__((aligned(16))) = {
/* CS: code, read/execute, 4 GB, base 0 */
- [GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
+ [GDT_ENTRY_BOOT_CS] = GDT_ENTRY(DESC_CODE32, 0, 0xfffff),
/* DS: data, read/write, 4 GB, base 0 */
- [GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
+ [GDT_ENTRY_BOOT_DS] = GDT_ENTRY(DESC_DATA32, 0, 0xfffff),
/* TSS: 32-bit tss, 104 bytes, base 4096 */
/* We only have a TSS here to keep Intel VT happy;
we don't actually use it for anything. */
- [GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103),
+ [GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(DESC_TSS32, 4096, 103),
};
/* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead
of the gdt_ptr contents. Thus, make it static so it will
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 1c8541ae3b3a..c23f3b9c84fe 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -49,7 +49,7 @@ int strcmp(const char *str1, const char *str2)
{
const unsigned char *s1 = (const unsigned char *)str1;
const unsigned char *s2 = (const unsigned char *)str2;
- int delta = 0;
+ int delta;
while (*s1 || *s2) {
delta = *s1 - *s2;
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 1b5d17a9f70d..c1cb90369915 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -10,6 +10,7 @@
#include <asm/coco.h>
#include <asm/tdx.h>
#include <asm/vmx.h>
+#include <asm/ia32.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include <asm/pgtable.h>
@@ -886,7 +887,7 @@ void __init tdx_early_init(void)
* there.
*
* Intel-TDX has a secure RDMSR hypercall, but that needs to be
- * implemented seperately in the low level startup ASM code.
+ * implemented separately in the low level startup ASM code.
* Until that is in place, disable parallel bringup for TDX.
*/
x86_cpuinit.parallel_bringup = false;
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 187f913cc239..411d8c83e88a 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -666,7 +666,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
.ifc \operation, dec
movdqa %xmm1, %xmm3
- pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn)
+ pxor %xmm1, %xmm9 # Ciphertext XOR E(K, Yn)
mov \PLAIN_CYPH_LEN, %r10
add %r13, %r10
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 74dd230973cf..8c9749ed0651 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -747,7 +747,7 @@ VARIABLE_OFFSET = 16*8
.if \ENC_DEC == DEC
vmovdqa %xmm1, %xmm3
- pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn)
+ pxor %xmm1, %xmm9 # Ciphertext XOR E(K, Yn)
mov \PLAIN_CYPH_LEN, %r10
add %r13, %r10
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 81ce0f4db555..bbcff1fb78cb 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -184,7 +184,7 @@ SYM_FUNC_START(crc_pcl)
xor crc1,crc1
xor crc2,crc2
- # Fall thruogh into top of crc array (crc_128)
+ # Fall through into top of crc array (crc_128)
################################################################
## 3) CRC Array:
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index d902b8ea0721..5bfce4b045fd 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -84,7 +84,7 @@ frame_size = frame_WK + WK_SIZE
# Useful QWORD "arrays" for simpler memory references
# MSG, DIGEST, K_t, W_t are arrays
-# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even
+# WK_2(t) points to 1 of 2 qwords at frame.WK depending on t being odd/even
# Input message (arg1)
#define MSG(i) 8*i(msg)
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index 65be30156816..30a2c4777f9d 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -82,7 +82,7 @@ frame_size = frame_WK + WK_SIZE
# Useful QWORD "arrays" for simpler memory references
# MSG, DIGEST, K_t, W_t are arrays
-# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even
+# WK_2(t) points to 1 of 2 qwords at frame.WK depending on t being odd/even
# Input message (arg1)
#define MSG(i) 8*i(msg)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index f6907627172b..9f1d94790a54 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -175,8 +175,7 @@ For 32-bit we have the following conventions - kernel is built with
#define THIS_CPU_user_pcid_flush_mask \
PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
-.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
- ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+.macro SWITCH_TO_USER_CR3 scratch_reg:req scratch_reg2:req
mov %cr3, \scratch_reg
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
@@ -206,13 +205,20 @@ For 32-bit we have the following conventions - kernel is built with
/* Flip the PGD to the user version */
orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
mov \scratch_reg, %cr3
+.endm
+
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+ SWITCH_TO_USER_CR3 \scratch_reg \scratch_reg2
.Lend_\@:
.endm
.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
pushq %rax
- SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
+ SWITCH_TO_USER_CR3 scratch_reg=\scratch_reg scratch_reg2=%rax
popq %rax
+.Lend_\@:
.endm
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index d813160b14d8..6356060caaf3 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -26,6 +26,7 @@
#include <xen/events.h>
#endif
+#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/traps.h>
#include <asm/vdso.h>
@@ -167,7 +168,96 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
}
}
-/* Handles int $0x80 */
+#ifdef CONFIG_IA32_EMULATION
+static __always_inline bool int80_is_external(void)
+{
+ const unsigned int offs = (0x80 / 32) * 0x10;
+ const u32 bit = BIT(0x80 % 32);
+
+ /* The local APIC on XENPV guests is fake */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /*
+ * If vector 0x80 is set in the APIC ISR then this is an external
+ * interrupt. Either from broken hardware or injected by a VMM.
+ *
+ * Note: In guest mode this is only valid for secure guests where
+ * the secure module fully controls the vAPIC exposed to the guest.
+ */
+ return apic_read(APIC_ISR + offs) & bit;
+}
+
+/**
+ * int80_emulation - 32-bit legacy syscall entry
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls. Instances of INT $0x80 can be found inline in
+ * various programs and libraries. It is also used by the vDSO's
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
+ * entry method. Restarted 32-bit system calls also fall back to INT
+ * $0x80 regardless of what instruction was originally used to do the
+ * system call.
+ *
+ * This is considered a slow path. It is not used by most libc
+ * implementations on modern hardware except during process startup.
+ *
+ * The arguments for the INT $0x80 based syscall are on stack in the
+ * pt_regs structure:
+ * eax: system call number
+ * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
+ */
+DEFINE_IDTENTRY_RAW(int80_emulation)
+{
+ int nr;
+
+ /* Kernel does not use INT $0x80! */
+ if (unlikely(!user_mode(regs))) {
+ irqentry_enter(regs);
+ instrumentation_begin();
+ panic("Unexpected external interrupt 0x80\n");
+ }
+
+ /*
+ * Establish kernel context for instrumentation, including for
+ * int80_is_external() below which calls into the APIC driver.
+ * Identical for soft and external interrupts.
+ */
+ enter_from_user_mode(regs);
+
+ instrumentation_begin();
+ add_random_kstack_offset();
+
+ /* Validate that this is a soft interrupt to the extent possible */
+ if (unlikely(int80_is_external()))
+ panic("Unexpected external interrupt 0x80\n");
+
+ /*
+ * The low level idtentry code pushed -1 into regs::orig_ax
+ * and regs::ax contains the syscall number.
+ *
+ * User tracing code (ptrace or signal handlers) might assume
+ * that the regs::orig_ax contains a 32-bit number on invoking
+ * a 32-bit syscall.
+ *
+ * Establish the syscall convention by saving the 32bit truncated
+ * syscall number in regs::orig_ax and by invalidating regs::ax.
+ */
+ regs->orig_ax = regs->ax & GENMASK(31, 0);
+ regs->ax = -ENOSYS;
+
+ nr = syscall_32_enter(regs);
+
+ local_irq_enable();
+ nr = syscall_enter_from_user_mode_work(regs, nr);
+ do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+}
+#else /* CONFIG_IA32_EMULATION */
+
+/* Handles int $0x80 on a 32bit kernel */
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
int nr = syscall_32_enter(regs);
@@ -186,6 +276,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
instrumentation_end();
syscall_exit_to_user_mode(regs);
}
+#endif /* !CONFIG_IA32_EMULATION */
static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
{
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index de6469dffe3a..c40f89ab1b4c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -559,17 +559,27 @@ __irqentry_text_end:
SYM_CODE_START_LOCAL(common_interrupt_return)
SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
IBRS_EXIT
-#ifdef CONFIG_DEBUG_ENTRY
- /* Assert that pt_regs indicates user mode. */
- testb $3, CS(%rsp)
- jnz 1f
- ud2
-1:
-#endif
#ifdef CONFIG_XEN_PV
ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
#endif
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ ALTERNATIVE "", "jmp .Lpti_restore_regs_and_return_to_usermode", X86_FEATURE_PTI
+#endif
+
+ STACKLEAK_ERASE
+ POP_REGS
+ add $8, %rsp /* orig_ax */
+ UNWIND_HINT_IRET_REGS
+
+.Lswapgs_and_iret:
+ swapgs
+ /* Assert that the IRET frame indicates user mode. */
+ testb $3, 8(%rsp)
+ jnz .Lnative_iret
+ ud2
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+.Lpti_restore_regs_and_return_to_usermode:
POP_REGS pop_rdi=0
/*
@@ -596,13 +606,14 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
*/
STACKLEAK_ERASE_NOCLOBBER
- SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+ push %rax
+ SWITCH_TO_USER_CR3 scratch_reg=%rdi scratch_reg2=%rax
+ pop %rax
/* Restore RDI. */
popq %rdi
- swapgs
- jmp .Lnative_iret
-
+ jmp .Lswapgs_and_iret
+#endif
SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
#ifdef CONFIG_DEBUG_ENTRY
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 27c05d08558a..de94e2e84ecc 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -275,80 +275,3 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
int3
SYM_CODE_END(entry_SYSCALL_compat)
-
-/*
- * 32-bit legacy system call entry.
- *
- * 32-bit x86 Linux system calls traditionally used the INT $0x80
- * instruction. INT $0x80 lands here.
- *
- * This entry point can be used by 32-bit and 64-bit programs to perform
- * 32-bit system calls. Instances of INT $0x80 can be found inline in
- * various programs and libraries. It is also used by the vDSO's
- * __kernel_vsyscall fallback for hardware that doesn't support a faster
- * entry method. Restarted 32-bit system calls also fall back to INT
- * $0x80 regardless of what instruction was originally used to do the
- * system call.
- *
- * This is considered a slow path. It is not used by most libc
- * implementations on modern hardware except during process startup.
- *
- * Arguments:
- * eax system call number
- * ebx arg1
- * ecx arg2
- * edx arg3
- * esi arg4
- * edi arg5
- * ebp arg6
- */
-SYM_CODE_START(entry_INT80_compat)
- UNWIND_HINT_ENTRY
- ENDBR
- /*
- * Interrupts are off on entry.
- */
- ASM_CLAC /* Do this early to minimize exposure */
- ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
-
- /*
- * User tracing code (ptrace or signal handlers) might assume that
- * the saved RAX contains a 32-bit number when we're invoking a 32-bit
- * syscall. Just in case the high bits are nonzero, zero-extend
- * the syscall number. (This could almost certainly be deleted
- * with no ill effects.)
- */
- movl %eax, %eax
-
- /* switch to thread stack expects orig_ax and rdi to be pushed */
- pushq %rax /* pt_regs->orig_ax */
-
- /* Need to switch before accessing the thread stack. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
-
- /* In the Xen PV case we already run on the thread stack. */
- ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
-
- movq %rsp, %rax
- movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
-
- pushq 5*8(%rax) /* regs->ss */
- pushq 4*8(%rax) /* regs->rsp */
- pushq 3*8(%rax) /* regs->eflags */
- pushq 2*8(%rax) /* regs->cs */
- pushq 1*8(%rax) /* regs->ip */
- pushq 0*8(%rax) /* regs->orig_ax */
-.Lint80_keep_stack:
-
- PUSH_AND_CLEAR_REGS rax=$-ENOSYS
- UNWIND_HINT_REGS
-
- cld
-
- IBRS_ENTER
- UNTRAIN_RET
-
- movq %rsp, %rdi
- call do_int80_syscall_32
- jmp swapgs_restore_regs_and_return_to_usermode
-SYM_CODE_END(entry_INT80_compat)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index c8fac5205803..56e6c2f3ee9c 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -461,3 +461,5 @@
454 i386 futex_wake sys_futex_wake
455 i386 futex_wait sys_futex_wait
456 i386 futex_requeue sys_futex_requeue
+457 i386 statmount sys_statmount
+458 i386 listmount sys_listmount
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 8cb8bf68721c..3a22eef585c2 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -378,6 +378,8 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
index ed308719236c..780acd3dff22 100644
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -125,7 +125,7 @@ int amd_brs_hw_config(struct perf_event *event)
* Where X is the number of taken branches due to interrupt
* skid. Skid is large.
*
- * Where Y is the occurences of the event while BRS is
+ * Where Y is the occurrences of the event while BRS is
* capturing the lbr_nr entries.
*
* By using retired taken branches, we limit the impact on the
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index e24976593a29..81f6d8275b6b 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -940,7 +940,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
continue;
if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@@ -1184,7 +1184,7 @@ static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
* period of each one and given that the BRS saturates, it would not be possible
* to guarantee correlated content for all events. Therefore, in situations
* where multiple events want to use BRS, the kernel enforces mutual exclusion.
- * Exclusion is enforced by chosing only one counter for events using BRS.
+ * Exclusion is enforced by choosing only one counter for events using BRS.
* The event scheduling logic will then automatically multiplex the
* events and ensure that at most one event is actively using BRS.
*
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 6911c5399d02..e91970b01d62 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -287,6 +287,9 @@ static int perf_ibs_init(struct perf_event *event)
if (config & ~perf_ibs->config_mask)
return -EINVAL;
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
ret = validate_group(event);
if (ret)
return ret;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 40ad1425ffa2..09050641ce5d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -601,7 +601,7 @@ int x86_pmu_hw_config(struct perf_event *event)
}
}
- if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
+ if (branch_sample_call_stack(event))
event->attach_state |= PERF_ATTACH_TASK_DATA;
/*
@@ -1702,7 +1702,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ce1c777227b4..3804f21ab049 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2527,9 +2527,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx)
perf_report_aux_output_id(event, idx);
}
+static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
+{
+ return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+}
+
static void intel_pmu_del_event(struct perf_event *event)
{
- if (needs_branch_stack(event))
+ if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_del(event);
if (event->attr.precise_ip)
intel_pmu_pebs_del(event);
@@ -2787,6 +2792,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
static void intel_pmu_enable_event(struct perf_event *event)
{
+ u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
@@ -2795,8 +2801,10 @@ static void intel_pmu_enable_event(struct perf_event *event)
switch (idx) {
case 0 ... INTEL_PMC_IDX_FIXED - 1:
+ if (branch_sample_counters(event))
+ enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
intel_set_masks(event, idx);
- __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ __x86_pmu_enable_event(hwc, enable_mask);
break;
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
@@ -2820,7 +2828,7 @@ static void intel_pmu_add_event(struct perf_event *event)
{
if (event->attr.precise_ip)
intel_pmu_pebs_add(event);
- if (needs_branch_stack(event))
+ if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_add(event);
}
@@ -3047,7 +3055,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+ intel_pmu_lbr_save_brstack(&data, cpuc, event);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@@ -3612,6 +3620,13 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
if (cpuc->excl_cntrs)
return intel_get_excl_constraints(cpuc, event, idx, c2);
+ /* Not all counters support the branch counter feature. */
+ if (branch_sample_counters(event)) {
+ c2 = dyn_constraint(cpuc, c2, idx);
+ c2->idxmsk64 &= x86_pmu.lbr_counters;
+ c2->weight = hweight64(c2->idxmsk64);
+ }
+
return c2;
}
@@ -3897,7 +3912,62 @@ static int intel_pmu_hw_config(struct perf_event *event)
x86_pmu.pebs_aliases(event);
}
- if (needs_branch_stack(event)) {
+ if (needs_branch_stack(event) && is_sampling_event(event))
+ event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+
+ if (branch_sample_counters(event)) {
+ struct perf_event *leader, *sibling;
+ int num = 0;
+
+ if (!(x86_pmu.flags & PMU_FL_BR_CNTR) ||
+ (event->attr.config & ~INTEL_ARCH_EVENT_MASK))
+ return -EINVAL;
+
+ /*
+ * The branch counter logging is not supported in the call stack
+ * mode yet, since we cannot simply flush the LBR during e.g.,
+ * multiplexing. Also, there is no obvious usage with the call
+ * stack mode. Simply forbids it for now.
+ *
+ * If any events in the group enable the branch counter logging
+ * feature, the group is treated as a branch counter logging
+ * group, which requires the extra space to store the counters.
+ */
+ leader = event->group_leader;
+ if (branch_sample_call_stack(leader))
+ return -EINVAL;
+ if (branch_sample_counters(leader))
+ num++;
+ leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
+
+ for_each_sibling_event(sibling, leader) {
+ if (branch_sample_call_stack(sibling))
+ return -EINVAL;
+ if (branch_sample_counters(sibling))
+ num++;
+ }
+
+ if (num > fls(x86_pmu.lbr_counters))
+ return -EINVAL;
+ /*
+ * Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't
+ * require any branch stack setup.
+ * Clear the bit to avoid unnecessary branch stack setup.
+ */
+ if (0 == (event->attr.branch_sample_type &
+ ~(PERF_SAMPLE_BRANCH_PLM_ALL |
+ PERF_SAMPLE_BRANCH_COUNTERS)))
+ event->hw.flags &= ~PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+
+ /*
+ * Force the leader to be a LBR event. So LBRs can be reset
+ * with the leader event. See intel_pmu_lbr_del() for details.
+ */
+ if (!intel_pmu_needs_branch_stack(leader))
+ return -EINVAL;
+ }
+
+ if (intel_pmu_needs_branch_stack(event)) {
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
return ret;
@@ -4027,7 +4097,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
/*
* Currently, the only caller of this function is the atomic_switch_perf_msrs().
- * The host perf conext helps to prepare the values of the real hardware for
+ * The host perf context helps to prepare the values of the real hardware for
* a set of msrs that need to be switched atomically in a vmx transaction.
*
* For example, the pseudocode needed to add a new msr should look like:
@@ -4051,12 +4121,17 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable;
int global_ctrl, pebs_enable;
+ /*
+ * In addition to obeying exclude_guest/exclude_host, remove bits being
+ * used for PEBS when running a guest, because PEBS writes to virtual
+ * addresses (not physical addresses).
+ */
*nr = 0;
global_ctrl = (*nr)++;
arr[global_ctrl] = (struct perf_guest_switch_msr){
.msr = MSR_CORE_PERF_GLOBAL_CTRL,
.host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask,
- .guest = intel_ctrl & (~cpuc->intel_ctrl_host_mask | ~pebs_mask),
+ .guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask,
};
if (!x86_pmu.pebs)
@@ -4375,8 +4450,13 @@ cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
*/
if (event->attr.precise_ip == 3) {
/* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
- if (constraint_match(&fixed0_constraint, event->hw.config))
- return &fixed0_counter0_1_constraint;
+ if (constraint_match(&fixed0_constraint, event->hw.config)) {
+ /* The fixed counter 0 doesn't support LBR event logging. */
+ if (branch_sample_counters(event))
+ return &counter0_1_constraint;
+ else
+ return &fixed0_counter0_1_constraint;
+ }
switch (c->idxmsk64 & 0x3ull) {
case 0x1:
@@ -4555,7 +4635,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
goto err;
}
- if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
+ if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
@@ -5527,11 +5607,41 @@ static ssize_t branches_show(struct device *cdev,
static DEVICE_ATTR_RO(branches);
+static ssize_t branch_counter_nr_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", fls(x86_pmu.lbr_counters));
+}
+
+static DEVICE_ATTR_RO(branch_counter_nr);
+
+static ssize_t branch_counter_width_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", LBR_INFO_BR_CNTR_BITS);
+}
+
+static DEVICE_ATTR_RO(branch_counter_width);
+
static struct attribute *lbr_attrs[] = {
&dev_attr_branches.attr,
+ &dev_attr_branch_counter_nr.attr,
+ &dev_attr_branch_counter_width.attr,
NULL
};
+static umode_t
+lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ /* branches */
+ if (i == 0)
+ return x86_pmu.lbr_nr ? attr->mode : 0;
+
+ return (x86_pmu.flags & PMU_FL_BR_CNTR) ? attr->mode : 0;
+}
+
static char pmu_name_str[30];
static ssize_t pmu_name_show(struct device *cdev,
@@ -5559,6 +5669,15 @@ static struct attribute *intel_pmu_attrs[] = {
};
static umode_t
+default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ if (attr == &dev_attr_allow_tsx_force_abort.attr)
+ return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
+
+ return attr->mode;
+}
+
+static umode_t
tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0;
@@ -5580,26 +5699,11 @@ mem_is_visible(struct kobject *kobj, struct attribute *attr, int i)
}
static umode_t
-lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
-{
- return x86_pmu.lbr_nr ? attr->mode : 0;
-}
-
-static umode_t
exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
return x86_pmu.version >= 2 ? attr->mode : 0;
}
-static umode_t
-default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
-{
- if (attr == &dev_attr_allow_tsx_force_abort.attr)
- return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
-
- return attr->mode;
-}
-
static struct attribute_group group_events_td = {
.name = "events",
};
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index cbeb6d2bf5b4..4b50a3a9818a 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -41,7 +41,7 @@
* MSR_CORE_C1_RES: CORE C1 Residency Counter
* perf code: 0x00
* Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
- * MTL
+ * MTL,SRF,GRR
* Scope: Core (each processor core has a MSR)
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01
@@ -52,7 +52,8 @@
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- * TGL,TNT,RKL,ADL,RPL,SPR,MTL
+ * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
+ * GRR
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
@@ -75,7 +76,7 @@
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- * TGL,TNT,RKL,ADL,RPL,SPR,MTL
+ * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
@@ -97,6 +98,10 @@
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
* TNT,RKL,ADL,RPL,MTL
* Scope: Package (physical package)
+ * MSR_MODULE_C6_RES_MS: Module C6 Residency Counter.
+ * perf code: 0x00
+ * Available model: SRF,GRR
+ * Scope: A cluster of cores shared L2 cache
*
*/
@@ -130,6 +135,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
struct cstate_model {
unsigned long core_events;
unsigned long pkg_events;
+ unsigned long module_events;
unsigned long quirks;
};
@@ -189,20 +195,20 @@ static struct attribute *attrs_empty[] = {
* "events" group (with empty attrs) before updating
* it with detected events.
*/
-static struct attribute_group core_events_attr_group = {
+static struct attribute_group cstate_events_attr_group = {
.name = "events",
.attrs = attrs_empty,
};
-DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
-static struct attribute *core_format_attrs[] = {
- &format_attr_core_event.attr,
+DEFINE_CSTATE_FORMAT_ATTR(cstate_event, event, "config:0-63");
+static struct attribute *cstate_format_attrs[] = {
+ &format_attr_cstate_event.attr,
NULL,
};
-static struct attribute_group core_format_attr_group = {
+static struct attribute_group cstate_format_attr_group = {
.name = "format",
- .attrs = core_format_attrs,
+ .attrs = cstate_format_attrs,
};
static cpumask_t cstate_core_cpu_mask;
@@ -217,9 +223,9 @@ static struct attribute_group cpumask_attr_group = {
.attrs = cstate_cpumask_attrs,
};
-static const struct attribute_group *core_attr_groups[] = {
- &core_events_attr_group,
- &core_format_attr_group,
+static const struct attribute_group *cstate_attr_groups[] = {
+ &cstate_events_attr_group,
+ &cstate_format_attr_group,
&cpumask_attr_group,
NULL,
};
@@ -268,30 +274,30 @@ static struct perf_msr pkg_msr[] = {
[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr },
};
-static struct attribute_group pkg_events_attr_group = {
- .name = "events",
- .attrs = attrs_empty,
-};
+static cpumask_t cstate_pkg_cpu_mask;
-DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
-static struct attribute *pkg_format_attrs[] = {
- &format_attr_pkg_event.attr,
- NULL,
-};
-static struct attribute_group pkg_format_attr_group = {
- .name = "format",
- .attrs = pkg_format_attrs,
+/* cstate_module PMU */
+static struct pmu cstate_module_pmu;
+static bool has_cstate_module;
+
+enum perf_cstate_module_events {
+ PERF_CSTATE_MODULE_C6_RES = 0,
+
+ PERF_CSTATE_MODULE_EVENT_MAX,
};
-static cpumask_t cstate_pkg_cpu_mask;
+PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_module_c6, "event=0x00");
-static const struct attribute_group *pkg_attr_groups[] = {
- &pkg_events_attr_group,
- &pkg_format_attr_group,
- &cpumask_attr_group,
- NULL,
+static unsigned long module_msr_mask;
+
+PMU_EVENT_GROUP(events, cstate_module_c6);
+
+static struct perf_msr module_msr[] = {
+ [PERF_CSTATE_MODULE_C6_RES] = { MSR_MODULE_C6_RES_MS, &group_cstate_module_c6, test_msr },
};
+static cpumask_t cstate_module_cpu_mask;
+
static ssize_t cstate_get_attr_cpumask(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -302,6 +308,8 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
else if (pmu == &cstate_pkg_pmu)
return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
+ else if (pmu == &cstate_module_pmu)
+ return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask);
else
return 0;
}
@@ -342,6 +350,15 @@ static int cstate_pmu_event_init(struct perf_event *event)
event->hw.event_base = pkg_msr[cfg].msr;
cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
topology_die_cpumask(event->cpu));
+ } else if (event->pmu == &cstate_module_pmu) {
+ if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX)
+ return -EINVAL;
+ cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_MODULE_EVENT_MAX);
+ if (!(module_msr_mask & (1 << cfg)))
+ return -EINVAL;
+ event->hw.event_base = module_msr[cfg].msr;
+ cpu = cpumask_any_and(&cstate_module_cpu_mask,
+ topology_cluster_cpumask(event->cpu));
} else {
return -ENOENT;
}
@@ -429,6 +446,17 @@ static int cstate_cpu_exit(unsigned int cpu)
perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
}
}
+
+ if (has_cstate_module &&
+ cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) {
+
+ target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu);
+ /* Migrate events if there is a valid target */
+ if (target < nr_cpu_ids) {
+ cpumask_set_cpu(target, &cstate_module_cpu_mask);
+ perf_pmu_migrate_context(&cstate_module_pmu, cpu, target);
+ }
+ }
return 0;
}
@@ -455,6 +483,15 @@ static int cstate_cpu_init(unsigned int cpu)
if (has_cstate_pkg && target >= nr_cpu_ids)
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
+ /*
+ * If this is the first online thread of that cluster, set it
+ * in the cluster cpu mask as the designated reader.
+ */
+ target = cpumask_any_and(&cstate_module_cpu_mask,
+ topology_cluster_cpumask(cpu));
+ if (has_cstate_module && target >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_module_cpu_mask);
+
return 0;
}
@@ -477,8 +514,13 @@ static const struct attribute_group *pkg_attr_update[] = {
NULL,
};
+static const struct attribute_group *module_attr_update[] = {
+ &group_cstate_module_c6,
+ NULL
+};
+
static struct pmu cstate_core_pmu = {
- .attr_groups = core_attr_groups,
+ .attr_groups = cstate_attr_groups,
.attr_update = core_attr_update,
.name = "cstate_core",
.task_ctx_nr = perf_invalid_context,
@@ -493,7 +535,7 @@ static struct pmu cstate_core_pmu = {
};
static struct pmu cstate_pkg_pmu = {
- .attr_groups = pkg_attr_groups,
+ .attr_groups = cstate_attr_groups,
.attr_update = pkg_attr_update,
.name = "cstate_pkg",
.task_ctx_nr = perf_invalid_context,
@@ -507,6 +549,21 @@ static struct pmu cstate_pkg_pmu = {
.module = THIS_MODULE,
};
+static struct pmu cstate_module_pmu = {
+ .attr_groups = cstate_attr_groups,
+ .attr_update = module_attr_update,
+ .name = "cstate_module",
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = cstate_pmu_event_init,
+ .add = cstate_pmu_event_add,
+ .del = cstate_pmu_event_del,
+ .start = cstate_pmu_event_start,
+ .stop = cstate_pmu_event_stop,
+ .read = cstate_pmu_event_update,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+ .module = THIS_MODULE,
+};
+
static const struct cstate_model nhm_cstates __initconst = {
.core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
BIT(PERF_CSTATE_CORE_C6_RES),
@@ -621,6 +678,22 @@ static const struct cstate_model glm_cstates __initconst = {
BIT(PERF_CSTATE_PKG_C10_RES),
};
+static const struct cstate_model grr_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES),
+
+ .module_events = BIT(PERF_CSTATE_MODULE_C6_RES),
+};
+
+static const struct cstate_model srf_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES),
+
+ .module_events = BIT(PERF_CSTATE_MODULE_C6_RES),
+};
+
static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_cstates),
@@ -673,6 +746,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &srf_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &grr_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates),
@@ -714,10 +789,14 @@ static int __init cstate_probe(const struct cstate_model *cm)
pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX,
true, (void *) &cm->pkg_events);
+ module_msr_mask = perf_msr_probe(module_msr, PERF_CSTATE_MODULE_EVENT_MAX,
+ true, (void *) &cm->module_events);
+
has_cstate_core = !!core_msr_mask;
has_cstate_pkg = !!pkg_msr_mask;
+ has_cstate_module = !!module_msr_mask;
- return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+ return (has_cstate_core || has_cstate_pkg || has_cstate_module) ? 0 : -ENODEV;
}
static inline void cstate_cleanup(void)
@@ -730,6 +809,9 @@ static inline void cstate_cleanup(void)
if (has_cstate_pkg)
perf_pmu_unregister(&cstate_pkg_pmu);
+
+ if (has_cstate_module)
+ perf_pmu_unregister(&cstate_module_pmu);
}
static int __init cstate_init(void)
@@ -766,6 +848,16 @@ static int __init cstate_init(void)
return err;
}
}
+
+ if (has_cstate_module) {
+ err = perf_pmu_register(&cstate_module_pmu, cstate_module_pmu.name, -1);
+ if (err) {
+ has_cstate_module = false;
+ pr_info("Failed to register cstate cluster pmu\n");
+ cstate_cleanup();
+ return err;
+ }
+ }
return 0;
}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index bf97ab904d40..d49d661ec0a7 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1755,7 +1755,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
setup_pebs_time(event, data, pebs->tsc);
if (has_branch_stack(event))
- perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
}
static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1912,7 +1912,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
- perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
+ intel_pmu_lbr_save_brstack(data, cpuc, event);
}
}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index c3b0d15a9841..78cd5084104e 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -676,6 +676,25 @@ void intel_pmu_lbr_del(struct perf_event *event)
WARN_ON_ONCE(cpuc->lbr_users < 0);
WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
perf_sched_cb_dec(event->pmu);
+
+ /*
+ * The logged occurrences information is only valid for the
+ * current LBR group. If another LBR group is scheduled in
+ * later, the information from the stale LBRs will be wrongly
+ * interpreted. Reset the LBRs here.
+ *
+ * Only clear once for a branch counter group with the leader
+ * event. Because
+ * - Cannot simply reset the LBRs with the !cpuc->lbr_users.
+ * Because it's possible that the last LBR user is not in a
+ * branch counter group, e.g., a branch_counters group +
+ * several normal LBR events.
+ * - The LBR reset can be done with any one of the events in a
+ * branch counter group, since they are always scheduled together.
+ * It's easy to force the leader event an LBR event.
+ */
+ if (is_branch_counters_group(event) && event == event->group_leader)
+ intel_pmu_lbr_reset();
}
static inline bool vlbr_exclude_host(void)
@@ -866,6 +885,8 @@ static __always_inline u16 get_lbr_cycles(u64 info)
return cycles;
}
+static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS);
+
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
struct lbr_entry *entries)
{
@@ -898,11 +919,67 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info);
e->type = get_lbr_br_type(info);
+
+ /*
+ * Leverage the reserved field of cpuc->lbr_entries[i] to
+ * temporarily store the branch counters information.
+ * The later code will decide what content can be disclosed
+ * to the perf tool. Pleae see intel_pmu_lbr_counters_reorder().
+ */
+ e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK;
}
cpuc->lbr_stack.nr = i;
}
+/*
+ * The enabled order may be different from the counter order.
+ * Update the lbr_counters with the enabled order.
+ */
+static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int i, j, pos = 0, order[X86_PMC_IDX_MAX];
+ struct perf_event *leader, *sibling;
+ u64 src, dst, cnt;
+
+ leader = event->group_leader;
+ if (branch_sample_counters(leader))
+ order[pos++] = leader->hw.idx;
+
+ for_each_sibling_event(sibling, leader) {
+ if (!branch_sample_counters(sibling))
+ continue;
+ order[pos++] = sibling->hw.idx;
+ }
+
+ WARN_ON_ONCE(!pos);
+
+ for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+ src = cpuc->lbr_entries[i].reserved;
+ dst = 0;
+ for (j = 0; j < pos; j++) {
+ cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK;
+ dst |= cnt << j * LBR_INFO_BR_CNTR_BITS;
+ }
+ cpuc->lbr_counters[i] = dst;
+ cpuc->lbr_entries[i].reserved = 0;
+ }
+}
+
+void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
+ struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (is_branch_counters_group(event)) {
+ intel_pmu_lbr_counters_reorder(cpuc, event);
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters);
+ return;
+ }
+
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
+}
+
static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
{
intel_pmu_store_lbr(cpuc, NULL);
@@ -1173,8 +1250,10 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
for (i = 0; i < cpuc->lbr_stack.nr; ) {
if (!cpuc->lbr_entries[i].from) {
j = i;
- while (++j < cpuc->lbr_stack.nr)
+ while (++j < cpuc->lbr_stack.nr) {
cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+ cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j];
+ }
cpuc->lbr_stack.nr--;
if (!cpuc->lbr_entries[i].from)
continue;
@@ -1525,8 +1604,12 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
+ x86_pmu.lbr_counters = ecx.split.lbr_counters;
x86_pmu.lbr_nr = lbr_nr;
+ if (!!x86_pmu.lbr_counters)
+ x86_pmu.flags |= PMU_FL_BR_CNTR;
+
if (x86_pmu.lbr_mispred)
static_branch_enable(&x86_lbr_mispred);
if (x86_pmu.lbr_timed_lbr)
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 01023aa5125b..7927c0b832fa 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1814,6 +1814,14 @@ static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
.uncore_units_ignore = spr_uncore_units_ignore,
};
+static const struct intel_uncore_init_fun gnr_uncore_init __initconst = {
+ .cpu_init = gnr_uncore_cpu_init,
+ .pci_init = gnr_uncore_pci_init,
+ .mmio_init = gnr_uncore_mmio_init,
+ .use_discovery = true,
+ .uncore_units_ignore = gnr_uncore_units_ignore,
+};
+
static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
.cpu_init = intel_uncore_generic_uncore_cpu_init,
.pci_init = intel_uncore_generic_uncore_pci_init,
@@ -1865,8 +1873,12 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &mtl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, &gnr_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, &gnr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &gnr_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &gnr_uncore_init),
{},
};
MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index c30fb5bb1222..4838502d89ae 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -72,9 +72,9 @@ struct intel_uncore_type {
unsigned single_fixed:1;
unsigned pair_ctr_ctl:1;
union {
- unsigned *msr_offsets;
- unsigned *pci_offsets;
- unsigned *mmio_offsets;
+ u64 *msr_offsets;
+ u64 *pci_offsets;
+ u64 *mmio_offsets;
};
unsigned *box_ids;
struct event_constraint unconstrainted;
@@ -593,6 +593,7 @@ extern struct list_head pci2phy_map_head;
extern struct pci_extra_dev *uncore_extra_pci_dev;
extern struct event_constraint uncore_constraint_empty;
extern int spr_uncore_units_ignore[];
+extern int gnr_uncore_units_ignore[];
/* uncore_snb.c */
int snb_uncore_pci_init(void);
@@ -634,6 +635,9 @@ void icx_uncore_mmio_init(void);
int spr_uncore_pci_init(void);
void spr_uncore_cpu_init(void);
void spr_uncore_mmio_init(void);
+int gnr_uncore_pci_init(void);
+void gnr_uncore_cpu_init(void);
+void gnr_uncore_mmio_init(void);
/* uncore_nhmex.c */
void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index cb488e41807c..9a698a92962a 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -125,7 +125,8 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
int die, bool parsed)
{
struct intel_uncore_discovery_type *type;
- unsigned int *box_offset, *ids;
+ unsigned int *ids;
+ u64 *box_offset;
int i;
if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) {
@@ -153,7 +154,7 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
if (!type)
return;
- box_offset = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL);
+ box_offset = kcalloc(type->num_boxes + 1, sizeof(u64), GFP_KERNEL);
if (!box_offset)
return;
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 6ee80ad3423e..22e769a81103 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -125,7 +125,7 @@ struct intel_uncore_discovery_type {
u8 ctr_offset; /* Counter 0 offset */
u16 num_boxes; /* number of boxes for the uncore block */
unsigned int *ids; /* Box IDs */
- unsigned int *box_offset; /* Box offset */
+ u64 *box_offset; /* Box offset */
};
bool intel_uncore_has_discovery_tables(int *ignore);
diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
index 173e2674be6e..56eea2c66cfb 100644
--- a/arch/x86/events/intel/uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -306,7 +306,7 @@ static const struct attribute_group nhmex_uncore_cbox_format_group = {
};
/* msr offset for each instance of cbox */
-static unsigned nhmex_cbox_msr_offsets[] = {
+static u64 nhmex_cbox_msr_offsets[] = {
0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0,
};
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 8250f0f59c2b..a96496bef678 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1396,6 +1396,29 @@ err:
return ret;
}
+static int topology_gidnid_map(int nodeid, u32 gidnid)
+{
+ int i, die_id = -1;
+
+ /*
+ * every three bits in the Node ID mapping register maps
+ * to a particular node.
+ */
+ for (i = 0; i < 8; i++) {
+ if (nodeid == GIDNIDMAP(gidnid, i)) {
+ if (topology_max_die_per_package() > 1)
+ die_id = i;
+ else
+ die_id = topology_phys_to_logical_pkg(i);
+ if (die_id < 0)
+ die_id = -ENODEV;
+ break;
+ }
+ }
+
+ return die_id;
+}
+
/*
* build pci bus to socket mapping
*/
@@ -1435,22 +1458,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
break;
}
- /*
- * every three bits in the Node ID mapping register maps
- * to a particular node.
- */
- for (i = 0; i < 8; i++) {
- if (nodeid == GIDNIDMAP(config, i)) {
- if (topology_max_die_per_package() > 1)
- die_id = i;
- else
- die_id = topology_phys_to_logical_pkg(i);
- if (die_id < 0)
- die_id = -ENODEV;
- map->pbus_to_dieid[bus] = die_id;
- break;
- }
- }
+ map->pbus_to_dieid[bus] = topology_gidnid_map(nodeid, config);
raw_spin_unlock(&pci2phy_map_lock);
} else {
segment = pci_domain_nr(ubox_dev->bus);
@@ -5278,7 +5286,7 @@ void snr_uncore_mmio_init(void)
/* ICX uncore support */
-static unsigned icx_cha_msr_offsets[] = {
+static u64 icx_cha_msr_offsets[] = {
0x2a0, 0x2ae, 0x2bc, 0x2ca, 0x2d8, 0x2e6, 0x2f4, 0x302, 0x310,
0x31e, 0x32c, 0x33a, 0x348, 0x356, 0x364, 0x372, 0x380, 0x38e,
0x3aa, 0x3b8, 0x3c6, 0x3d4, 0x3e2, 0x3f0, 0x3fe, 0x40c, 0x41a,
@@ -5326,7 +5334,7 @@ static struct intel_uncore_type icx_uncore_chabox = {
.format_group = &snr_uncore_chabox_format_group,
};
-static unsigned icx_msr_offsets[] = {
+static u64 icx_msr_offsets[] = {
0x0, 0x20, 0x40, 0x90, 0xb0, 0xd0,
};
@@ -5596,7 +5604,7 @@ static int discover_upi_topology(struct intel_uncore_type *type, int ubox_did, i
struct pci_dev *ubox = NULL;
struct pci_dev *dev = NULL;
u32 nid, gid;
- int i, idx, ret = -EPERM;
+ int idx, lgc_pkg, ret = -EPERM;
struct intel_uncore_topology *upi;
unsigned int devfn;
@@ -5611,20 +5619,21 @@ static int discover_upi_topology(struct intel_uncore_type *type, int ubox_did, i
break;
}
- for (i = 0; i < 8; i++) {
- if (nid != GIDNIDMAP(gid, i))
- continue;
- for (idx = 0; idx < type->num_boxes; idx++) {
- upi = &type->topology[nid][idx];
- devfn = PCI_DEVFN(dev_link0 + idx, ICX_UPI_REGS_ADDR_FUNCTION);
- dev = pci_get_domain_bus_and_slot(pci_domain_nr(ubox->bus),
- ubox->bus->number,
- devfn);
- if (dev) {
- ret = upi_fill_topology(dev, upi, idx);
- if (ret)
- goto err;
- }
+ lgc_pkg = topology_gidnid_map(nid, gid);
+ if (lgc_pkg < 0) {
+ ret = -EPERM;
+ goto err;
+ }
+ for (idx = 0; idx < type->num_boxes; idx++) {
+ upi = &type->topology[lgc_pkg][idx];
+ devfn = PCI_DEVFN(dev_link0 + idx, ICX_UPI_REGS_ADDR_FUNCTION);
+ dev = pci_get_domain_bus_and_slot(pci_domain_nr(ubox->bus),
+ ubox->bus->number,
+ devfn);
+ if (dev) {
+ ret = upi_fill_topology(dev, upi, idx);
+ if (ret)
+ goto err;
}
}
}
@@ -6079,13 +6088,16 @@ static struct uncore_event_desc spr_uncore_imc_events[] = {
{ /* end: all zeroes */ },
};
+#define SPR_UNCORE_MMIO_COMMON_FORMAT() \
+ SPR_UNCORE_COMMON_FORMAT(), \
+ .ops = &spr_uncore_mmio_ops
+
static struct intel_uncore_type spr_uncore_imc = {
- SPR_UNCORE_COMMON_FORMAT(),
+ SPR_UNCORE_MMIO_COMMON_FORMAT(),
.name = "imc",
.fixed_ctr_bits = 48,
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
.fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
- .ops = &spr_uncore_mmio_ops,
.event_descs = spr_uncore_imc_events,
};
@@ -6181,7 +6193,7 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
*/
#define SPR_UNCORE_UPI_NUM_BOXES 4
-static unsigned int spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = {
+static u64 spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = {
0, 0x8000, 0x10000, 0x18000
};
@@ -6412,7 +6424,8 @@ static void uncore_type_customized_copy(struct intel_uncore_type *to_type,
static struct intel_uncore_type **
uncore_get_uncores(enum uncore_access_type type_id, int num_extra,
- struct intel_uncore_type **extra)
+ struct intel_uncore_type **extra, int max_num_types,
+ struct intel_uncore_type **uncores)
{
struct intel_uncore_type **types, **start_types;
int i;
@@ -6421,9 +6434,9 @@ uncore_get_uncores(enum uncore_access_type type_id, int num_extra,
/* Only copy the customized features */
for (; *types; types++) {
- if ((*types)->type_id >= UNCORE_SPR_NUM_UNCORE_TYPES)
+ if ((*types)->type_id >= max_num_types)
continue;
- uncore_type_customized_copy(*types, spr_uncores[(*types)->type_id]);
+ uncore_type_customized_copy(*types, uncores[(*types)->type_id]);
}
for (i = 0; i < num_extra; i++, types++)
@@ -6470,7 +6483,9 @@ void spr_uncore_cpu_init(void)
uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR,
UNCORE_SPR_MSR_EXTRA_UNCORES,
- spr_msr_uncores);
+ spr_msr_uncores,
+ UNCORE_SPR_NUM_UNCORE_TYPES,
+ spr_uncores);
type = uncore_find_type_by_id(uncore_msr_uncores, UNCORE_SPR_CHA);
if (type) {
@@ -6552,7 +6567,9 @@ int spr_uncore_pci_init(void)
spr_update_device_location(UNCORE_SPR_M3UPI);
uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI,
UNCORE_SPR_PCI_EXTRA_UNCORES,
- spr_pci_uncores);
+ spr_pci_uncores,
+ UNCORE_SPR_NUM_UNCORE_TYPES,
+ spr_uncores);
return 0;
}
@@ -6560,15 +6577,116 @@ void spr_uncore_mmio_init(void)
{
int ret = snbep_pci2phy_map_init(0x3250, SKX_CPUNODEID, SKX_GIDNIDMAP, true);
- if (ret)
- uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL);
- else {
+ if (ret) {
+ uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL,
+ UNCORE_SPR_NUM_UNCORE_TYPES,
+ spr_uncores);
+ } else {
uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO,
UNCORE_SPR_MMIO_EXTRA_UNCORES,
- spr_mmio_uncores);
+ spr_mmio_uncores,
+ UNCORE_SPR_NUM_UNCORE_TYPES,
+ spr_uncores);
spr_uncore_imc_free_running.num_boxes = uncore_type_max_boxes(uncore_mmio_uncores, UNCORE_SPR_IMC) / 2;
}
}
/* end of SPR uncore support */
+
+/* GNR uncore support */
+
+#define UNCORE_GNR_NUM_UNCORE_TYPES 23
+#define UNCORE_GNR_TYPE_15 15
+#define UNCORE_GNR_B2UPI 18
+#define UNCORE_GNR_TYPE_21 21
+#define UNCORE_GNR_TYPE_22 22
+
+int gnr_uncore_units_ignore[] = {
+ UNCORE_SPR_UPI,
+ UNCORE_GNR_TYPE_15,
+ UNCORE_GNR_B2UPI,
+ UNCORE_GNR_TYPE_21,
+ UNCORE_GNR_TYPE_22,
+ UNCORE_IGNORE_END
+};
+
+static struct intel_uncore_type gnr_uncore_ubox = {
+ .name = "ubox",
+ .attr_update = uncore_alias_groups,
+};
+
+static struct intel_uncore_type gnr_uncore_b2cmi = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "b2cmi",
+};
+
+static struct intel_uncore_type gnr_uncore_b2cxl = {
+ SPR_UNCORE_MMIO_COMMON_FORMAT(),
+ .name = "b2cxl",
+};
+
+static struct intel_uncore_type gnr_uncore_mdf_sbo = {
+ .name = "mdf_sbo",
+ .attr_update = uncore_alias_groups,
+};
+
+static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = {
+ &spr_uncore_chabox,
+ &spr_uncore_iio,
+ &spr_uncore_irp,
+ NULL,
+ &spr_uncore_pcu,
+ &gnr_uncore_ubox,
+ &spr_uncore_imc,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ &gnr_uncore_b2cmi,
+ &gnr_uncore_b2cxl,
+ NULL,
+ NULL,
+ &gnr_uncore_mdf_sbo,
+ NULL,
+ NULL,
+};
+
+static struct freerunning_counters gnr_iio_freerunning[] = {
+ [SPR_IIO_MSR_IOCLK] = { 0x290e, 0x01, 0x10, 1, 48 },
+ [SPR_IIO_MSR_BW_IN] = { 0x360e, 0x10, 0x80, 8, 48 },
+ [SPR_IIO_MSR_BW_OUT] = { 0x2e0e, 0x10, 0x80, 8, 48 },
+};
+
+void gnr_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR,
+ UNCORE_SPR_MSR_EXTRA_UNCORES,
+ spr_msr_uncores,
+ UNCORE_GNR_NUM_UNCORE_TYPES,
+ gnr_uncores);
+ spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO);
+ spr_uncore_iio_free_running.freerunning = gnr_iio_freerunning;
+}
+
+int gnr_uncore_pci_init(void)
+{
+ uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL,
+ UNCORE_GNR_NUM_UNCORE_TYPES,
+ gnr_uncores);
+ return 0;
+}
+
+void gnr_uncore_mmio_init(void)
+{
+ uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL,
+ UNCORE_GNR_NUM_UNCORE_TYPES,
+ gnr_uncores);
+}
+
+/* end of GNR uncore support */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 53dd5d495ba6..fb56518356ec 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -110,6 +110,11 @@ static inline bool is_topdown_event(struct perf_event *event)
return is_metric_event(event) || is_slots_event(event);
}
+static inline bool is_branch_counters_group(struct perf_event *event)
+{
+ return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS;
+}
+
struct amd_nb {
int nb_id; /* NorthBridge id */
int refcnt; /* reference count */
@@ -283,6 +288,7 @@ struct cpu_hw_events {
int lbr_pebs_users;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
+ u64 lbr_counters[MAX_LBR_ENTRIES]; /* branch stack extra */
union {
struct er_account *lbr_sel;
struct er_account *lbr_ctl;
@@ -888,6 +894,7 @@ struct x86_pmu {
unsigned int lbr_mispred:1;
unsigned int lbr_timed_lbr:1;
unsigned int lbr_br_type:1;
+ unsigned int lbr_counters:4;
void (*lbr_reset)(void);
void (*lbr_read)(struct cpu_hw_events *cpuc);
@@ -1012,6 +1019,7 @@ do { \
#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
+#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -1552,6 +1560,10 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
void intel_ds_init(void);
+void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
+ struct cpu_hw_events *cpuc,
+ struct perf_event *event);
+
void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc);
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index 1dc19b9b4426..6c977c19f2cd 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -20,3 +20,5 @@ PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
+PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
+PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 97bfe5f0531f..5fc45543e955 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -209,7 +209,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
/*
* This particular version of the IPI hypercall can
- * only target upto 64 CPUs.
+ * only target up to 64 CPUs.
*/
if (vcpu >= 64)
goto do_ex_hypercall;
diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
index 42c70d28ef27..3215a4a07408 100644
--- a/arch/x86/hyperv/irqdomain.c
+++ b/arch/x86/hyperv/irqdomain.c
@@ -212,7 +212,7 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
* This interrupt is already mapped. Let's unmap first.
*
* We don't use retarget interrupt hypercalls here because
- * Microsoft Hypervisor doens't allow root to change the vector
+ * Microsoft Hypervisor doesn't allow root to change the vector
* or specify VPs outside of the set that is initially used
* during mapping.
*/
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 02e55237d919..7dcbf153ad72 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -144,7 +144,7 @@ void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason)
/* Tell the hypervisor what went wrong. */
val |= GHCB_SEV_TERM_REASON(set, reason);
- /* Request Guest Termination from Hypvervisor */
+ /* Request Guest Termination from Hypervisor */
wr_ghcb_msr(val);
VMGEXIT();
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 65f79092c9d9..fcd20c6dc7f9 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -10,6 +10,9 @@
#define ALT_FLAG_NOT (1 << 0)
#define ALT_NOT(feature) ((ALT_FLAG_NOT << ALT_FLAGS_SHIFT) | (feature))
+#define ALT_FLAG_DIRECT_CALL (1 << 1)
+#define ALT_DIRECT_CALL(feature) ((ALT_FLAG_DIRECT_CALL << ALT_FLAGS_SHIFT) | (feature))
+#define ALT_CALL_ALWAYS ALT_DIRECT_CALL(X86_FEATURE_ALWAYS)
#ifndef __ASSEMBLY__
@@ -86,6 +89,8 @@ struct alt_instr {
u8 replacementlen; /* length of new instruction */
} __packed;
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
/*
* Debug flag that can be tested to see whether alternative
* instructions were patched in already:
@@ -101,11 +106,10 @@ extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
s32 *start_cfi, s32 *end_cfi);
struct module;
-struct paravirt_patch_site;
struct callthunk_sites {
s32 *call_start, *call_end;
- struct paravirt_patch_site *pv_start, *pv_end;
+ struct alt_instr *alt_start, *alt_end;
};
#ifdef CONFIG_CALL_THUNKS
@@ -150,6 +154,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
}
#endif /* CONFIG_SMP */
+#define ALT_CALL_INSTR "call BUG_func"
+
#define b_replacement(num) "664"#num
#define e_replacement(num) "665"#num
@@ -330,6 +336,22 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+/* Macro for creating assembler functions avoiding any C magic. */
+#define DEFINE_ASM_FUNC(func, instr, sec) \
+ asm (".pushsection " #sec ", \"ax\"\n" \
+ ".global " #func "\n\t" \
+ ".type " #func ", @function\n\t" \
+ ASM_FUNC_ALIGN "\n" \
+ #func ":\n\t" \
+ ASM_ENDBR \
+ instr "\n\t" \
+ ASM_RET \
+ ".size " #func ", . - " #func "\n\t" \
+ ".popsection")
+
+void BUG_func(void);
+void nop_func(void);
+
#else /* __ASSEMBLY__ */
#ifdef CONFIG_SMP
@@ -370,6 +392,10 @@ static inline int alternatives_text_reserved(void *start, void *end)
.byte \alt_len
.endm
+.macro ALT_CALL_INSTR
+ call BUG_func
+.endm
+
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index ed0eaf65c437..5c37944c8a5e 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -104,7 +104,7 @@ static inline bool amd_gart_present(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return false;
- /* GART present only on Fam15h, upto model 0fh */
+ /* GART present only on Fam15h, up to model 0fh */
if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
(boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
return true;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d21f48f1c242..9d159b771dc8 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -272,8 +272,6 @@ struct apic {
void (*send_IPI_all)(int vector);
void (*send_IPI_self)(int vector);
- enum apic_delivery_modes delivery_mode;
-
u32 disable_esr : 1,
dest_mode_logical : 1,
x2apic_set_max_apicid : 1,
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 4b125e5b3187..094106b6a538 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -20,6 +20,13 @@
*/
#define IO_APIC_SLOT_SIZE 1024
+#define APIC_DELIVERY_MODE_FIXED 0
+#define APIC_DELIVERY_MODE_LOWESTPRIO 1
+#define APIC_DELIVERY_MODE_SMI 2
+#define APIC_DELIVERY_MODE_NMI 4
+#define APIC_DELIVERY_MODE_INIT 5
+#define APIC_DELIVERY_MODE_EXTINT 7
+
#define APIC_ID 0x20
#define APIC_LVR 0x30
@@ -165,279 +172,10 @@
#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK)
#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
-#ifndef __ASSEMBLY__
-/*
- * the local APIC register structure, memory mapped. Not terribly well
- * tested, but we might eventually use this one in the future - the
- * problem why we cannot use it right now is the P5 APIC, it has an
- * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
- */
-#define u32 unsigned int
-
-struct local_apic {
-
-/*000*/ struct { u32 __reserved[4]; } __reserved_01;
-
-/*010*/ struct { u32 __reserved[4]; } __reserved_02;
-
-/*020*/ struct { /* APIC ID Register */
- u32 __reserved_1 : 24,
- phys_apic_id : 4,
- __reserved_2 : 4;
- u32 __reserved[3];
- } id;
-
-/*030*/ const
- struct { /* APIC Version Register */
- u32 version : 8,
- __reserved_1 : 8,
- max_lvt : 8,
- __reserved_2 : 8;
- u32 __reserved[3];
- } version;
-
-/*040*/ struct { u32 __reserved[4]; } __reserved_03;
-
-/*050*/ struct { u32 __reserved[4]; } __reserved_04;
-
-/*060*/ struct { u32 __reserved[4]; } __reserved_05;
-
-/*070*/ struct { u32 __reserved[4]; } __reserved_06;
-
-/*080*/ struct { /* Task Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } tpr;
-
-/*090*/ const
- struct { /* Arbitration Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } apr;
-
-/*0A0*/ const
- struct { /* Processor Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } ppr;
-
-/*0B0*/ struct { /* End Of Interrupt Register */
- u32 eoi;
- u32 __reserved[3];
- } eoi;
-
-/*0C0*/ struct { u32 __reserved[4]; } __reserved_07;
-
-/*0D0*/ struct { /* Logical Destination Register */
- u32 __reserved_1 : 24,
- logical_dest : 8;
- u32 __reserved_2[3];
- } ldr;
-
-/*0E0*/ struct { /* Destination Format Register */
- u32 __reserved_1 : 28,
- model : 4;
- u32 __reserved_2[3];
- } dfr;
-
-/*0F0*/ struct { /* Spurious Interrupt Vector Register */
- u32 spurious_vector : 8,
- apic_enabled : 1,
- focus_cpu : 1,
- __reserved_2 : 22;
- u32 __reserved_3[3];
- } svr;
-
-/*100*/ struct { /* In Service Register */
-/*170*/ u32 bitfield;
- u32 __reserved[3];
- } isr [8];
-
-/*180*/ struct { /* Trigger Mode Register */
-/*1F0*/ u32 bitfield;
- u32 __reserved[3];
- } tmr [8];
-
-/*200*/ struct { /* Interrupt Request Register */
-/*270*/ u32 bitfield;
- u32 __reserved[3];
- } irr [8];
-
-/*280*/ union { /* Error Status Register */
- struct {
- u32 send_cs_error : 1,
- receive_cs_error : 1,
- send_accept_error : 1,
- receive_accept_error : 1,
- __reserved_1 : 1,
- send_illegal_vector : 1,
- receive_illegal_vector : 1,
- illegal_register_address : 1,
- __reserved_2 : 24;
- u32 __reserved_3[3];
- } error_bits;
- struct {
- u32 errors;
- u32 __reserved_3[3];
- } all_errors;
- } esr;
-
-/*290*/ struct { u32 __reserved[4]; } __reserved_08;
-
-/*2A0*/ struct { u32 __reserved[4]; } __reserved_09;
-
-/*2B0*/ struct { u32 __reserved[4]; } __reserved_10;
-
-/*2C0*/ struct { u32 __reserved[4]; } __reserved_11;
-
-/*2D0*/ struct { u32 __reserved[4]; } __reserved_12;
-
-/*2E0*/ struct { u32 __reserved[4]; } __reserved_13;
-
-/*2F0*/ struct { u32 __reserved[4]; } __reserved_14;
-
-/*300*/ struct { /* Interrupt Command Register 1 */
- u32 vector : 8,
- delivery_mode : 3,
- destination_mode : 1,
- delivery_status : 1,
- __reserved_1 : 1,
- level : 1,
- trigger : 1,
- __reserved_2 : 2,
- shorthand : 2,
- __reserved_3 : 12;
- u32 __reserved_4[3];
- } icr1;
-
-/*310*/ struct { /* Interrupt Command Register 2 */
- union {
- u32 __reserved_1 : 24,
- phys_dest : 4,
- __reserved_2 : 4;
- u32 __reserved_3 : 24,
- logical_dest : 8;
- } dest;
- u32 __reserved_4[3];
- } icr2;
-
-/*320*/ struct { /* LVT - Timer */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- timer_mode : 1,
- __reserved_3 : 14;
- u32 __reserved_4[3];
- } lvt_timer;
-
-/*330*/ struct { /* LVT - Thermal Sensor */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_thermal;
-
-/*340*/ struct { /* LVT - Performance Counter */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_pc;
-
-/*350*/ struct { /* LVT - LINT0 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint0;
-
-/*360*/ struct { /* LVT - LINT1 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint1;
-
-/*370*/ struct { /* LVT - Error */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_error;
-
-/*380*/ struct { /* Timer Initial Count Register */
- u32 initial_count;
- u32 __reserved_2[3];
- } timer_icr;
-
-/*390*/ const
- struct { /* Timer Current Count Register */
- u32 curr_count;
- u32 __reserved_2[3];
- } timer_ccr;
-
-/*3A0*/ struct { u32 __reserved[4]; } __reserved_16;
-
-/*3B0*/ struct { u32 __reserved[4]; } __reserved_17;
-
-/*3C0*/ struct { u32 __reserved[4]; } __reserved_18;
-
-/*3D0*/ struct { u32 __reserved[4]; } __reserved_19;
-
-/*3E0*/ struct { /* Timer Divide Configuration Register */
- u32 divisor : 4,
- __reserved_1 : 28;
- u32 __reserved_2[3];
- } timer_dcr;
-
-/*3F0*/ struct { u32 __reserved[4]; } __reserved_20;
-
-} __attribute__ ((packed));
-
-#undef u32
-
#ifdef CONFIG_X86_32
#define BAD_APICID 0xFFu
#else
#define BAD_APICID 0xFFFFu
#endif
-enum apic_delivery_modes {
- APIC_DELIVERY_MODE_FIXED = 0,
- APIC_DELIVERY_MODE_LOWESTPRIO = 1,
- APIC_DELIVERY_MODE_SMI = 2,
- APIC_DELIVERY_MODE_NMI = 4,
- APIC_DELIVERY_MODE_INIT = 5,
- APIC_DELIVERY_MODE_EXTINT = 7,
-};
-
-#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_APICDEF_H */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 35389b2af88e..0216f63a366b 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -81,22 +81,4 @@ do { \
#include <asm-generic/barrier.h>
-/*
- * Make previous memory operations globally visible before
- * a WRMSR.
- *
- * MFENCE makes writes visible, but only affects load/store
- * instructions. WRMSR is unfortunately not a load/store
- * instruction and is unaffected by MFENCE. The LFENCE ensures
- * that the WRMSR is not reordered.
- *
- * Most WRMSRs are full serializing instructions themselves and
- * do not require this barrier. This is only required for the
- * IA32_TSC_DEADLINE and X2APIC MSRs.
- */
-static inline void weak_wrmsr_fence(void)
-{
- asm volatile("mfence; lfence" : : : "memory");
-}
-
#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4af140cf5719..632c26cdeeda 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -218,7 +218,7 @@
#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */
+#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
@@ -308,10 +308,14 @@
#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
-
#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
+#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */
+#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */
+#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */
+#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index f7e7099af595..d440a65af8f3 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -8,6 +8,56 @@
* archs.
*/
+/*
+ * Low-level interface mapping flags/field names to bits
+ */
+
+/* Flags for _DESC_S (non-system) descriptors */
+#define _DESC_ACCESSED 0x0001
+#define _DESC_DATA_WRITABLE 0x0002
+#define _DESC_CODE_READABLE 0x0002
+#define _DESC_DATA_EXPAND_DOWN 0x0004
+#define _DESC_CODE_CONFORMING 0x0004
+#define _DESC_CODE_EXECUTABLE 0x0008
+
+/* Common flags */
+#define _DESC_S 0x0010
+#define _DESC_DPL(dpl) ((dpl) << 5)
+#define _DESC_PRESENT 0x0080
+
+#define _DESC_LONG_CODE 0x2000
+#define _DESC_DB 0x4000
+#define _DESC_GRANULARITY_4K 0x8000
+
+/* System descriptors have a numeric "type" field instead of flags */
+#define _DESC_SYSTEM(code) (code)
+
+/*
+ * High-level interface mapping intended usage to low-level combinations
+ * of flags
+ */
+
+#define _DESC_DATA (_DESC_S | _DESC_PRESENT | _DESC_ACCESSED | \
+ _DESC_DATA_WRITABLE)
+#define _DESC_CODE (_DESC_S | _DESC_PRESENT | _DESC_ACCESSED | \
+ _DESC_CODE_READABLE | _DESC_CODE_EXECUTABLE)
+
+#define DESC_DATA16 (_DESC_DATA)
+#define DESC_CODE16 (_DESC_CODE)
+
+#define DESC_DATA32 (_DESC_DATA | _DESC_GRANULARITY_4K | _DESC_DB)
+#define DESC_DATA32_BIOS (_DESC_DATA | _DESC_DB)
+
+#define DESC_CODE32 (_DESC_CODE | _DESC_GRANULARITY_4K | _DESC_DB)
+#define DESC_CODE32_BIOS (_DESC_CODE | _DESC_DB)
+
+#define DESC_TSS32 (_DESC_SYSTEM(9) | _DESC_PRESENT)
+
+#define DESC_DATA64 (_DESC_DATA | _DESC_GRANULARITY_4K | _DESC_DB)
+#define DESC_CODE64 (_DESC_CODE | _DESC_GRANULARITY_4K | _DESC_LONG_CODE)
+
+#define DESC_USER (_DESC_DPL(3))
+
#ifndef __ASSEMBLY__
#include <linux/types.h>
@@ -22,19 +72,19 @@ struct desc_struct {
#define GDT_ENTRY_INIT(flags, base, limit) \
{ \
- .limit0 = (u16) (limit), \
- .limit1 = ((limit) >> 16) & 0x0F, \
- .base0 = (u16) (base), \
- .base1 = ((base) >> 16) & 0xFF, \
- .base2 = ((base) >> 24) & 0xFF, \
- .type = (flags & 0x0f), \
- .s = (flags >> 4) & 0x01, \
- .dpl = (flags >> 5) & 0x03, \
- .p = (flags >> 7) & 0x01, \
- .avl = (flags >> 12) & 0x01, \
- .l = (flags >> 13) & 0x01, \
- .d = (flags >> 14) & 0x01, \
- .g = (flags >> 15) & 0x01, \
+ .limit0 = ((limit) >> 0) & 0xFFFF, \
+ .limit1 = ((limit) >> 16) & 0x000F, \
+ .base0 = ((base) >> 0) & 0xFFFF, \
+ .base1 = ((base) >> 16) & 0x00FF, \
+ .base2 = ((base) >> 24) & 0x00FF, \
+ .type = ((flags) >> 0) & 0x000F, \
+ .s = ((flags) >> 4) & 0x0001, \
+ .dpl = ((flags) >> 5) & 0x0003, \
+ .p = ((flags) >> 7) & 0x0001, \
+ .avl = ((flags) >> 12) & 0x0001, \
+ .l = ((flags) >> 13) & 0x0001, \
+ .d = ((flags) >> 14) & 0x0001, \
+ .g = ((flags) >> 15) & 0x0001, \
}
enum {
@@ -94,6 +144,7 @@ struct gate_struct {
typedef struct gate_struct gate_desc;
+#ifndef _SETUP
static inline unsigned long gate_offset(const gate_desc *g)
{
#ifdef CONFIG_X86_64
@@ -108,6 +159,7 @@ static inline unsigned long gate_segment(const gate_desc *g)
{
return g->segment;
}
+#endif
struct desc_ptr {
unsigned short size;
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index a0234dfd1031..1e16bd5ac781 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -150,7 +150,7 @@ do { \
((x)->e_machine == EM_X86_64)
#define compat_elf_check_arch(x) \
- ((elf_check_arch_ia32(x) && ia32_enabled()) || \
+ ((elf_check_arch_ia32(x) && ia32_enabled_verbose()) || \
(IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64))
static inline void elf_common_init(struct thread_struct *t,
diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h
index 991e31cfde94..fe6312045042 100644
--- a/arch/x86/include/asm/extable_fixup_types.h
+++ b/arch/x86/include/asm/extable_fixup_types.h
@@ -4,7 +4,7 @@
/*
* Our IMM is signed, as such it must live at the top end of the word. Also,
- * since C99 hex constants are of ambigious type, force cast the mask to 'int'
+ * since C99 hex constants are of ambiguous type, force cast the mask to 'int'
* so that FIELD_GET() will DTRT and sign extend the value when it extracts it.
*/
#define EX_DATA_TYPE_MASK ((int)0x000000FF)
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index eb810074f1e7..f1fadc318a88 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -415,7 +415,7 @@ struct fpu_state_perm {
*
* This master permission field is only to be used when
* task.fpu.fpstate based checks fail to validate whether the task
- * is allowed to expand it's xfeatures set which requires to
+ * is allowed to expand its xfeatures set which requires to
* allocate a larger sized fpstate buffer.
*
* Do not access this field directly. Use the provided helper
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 5a2ae24b1204..c7ef6ea2fa99 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -2,7 +2,6 @@
#ifndef _ASM_X86_IA32_H
#define _ASM_X86_IA32_H
-
#ifdef CONFIG_IA32_EMULATION
#include <linux/compat.h>
@@ -75,6 +74,11 @@ static inline bool ia32_enabled(void)
return __ia32_enabled;
}
+static inline void ia32_disable(void)
+{
+ __ia32_enabled = false;
+}
+
#else /* !CONFIG_IA32_EMULATION */
static inline bool ia32_enabled(void)
@@ -82,6 +86,18 @@ static inline bool ia32_enabled(void)
return IS_ENABLED(CONFIG_X86_32);
}
+static inline void ia32_disable(void) {}
+
#endif
+static inline bool ia32_enabled_verbose(void)
+{
+ bool enabled = ia32_enabled();
+
+ if (IS_ENABLED(CONFIG_IA32_EMULATION) && !enabled)
+ pr_notice_once("32-bit emulation disabled. You can reenable with ia32_emulation=on\n");
+
+ return enabled;
+}
+
#endif /* _ASM_X86_IA32_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 05fd175cec7d..13639e57e1f8 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -569,6 +569,10 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op);
DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault);
+#if defined(CONFIG_IA32_EMULATION)
+DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation);
+#endif
+
#ifdef CONFIG_X86_MCE
#ifdef CONFIG_X86_64
DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check);
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 76238842406a..3814a9263d64 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -242,7 +242,7 @@ static inline void slow_down_io(void)
#endif
-#define BUILDIO(bwl, bw, type) \
+#define BUILDIO(bwl, type) \
static inline void out##bwl##_p(type value, u16 port) \
{ \
out##bwl(value, port); \
@@ -288,9 +288,9 @@ static inline void ins##bwl(u16 port, void *addr, unsigned long count) \
} \
}
-BUILDIO(b, b, u8)
-BUILDIO(w, w, u16)
-BUILDIO(l, , u32)
+BUILDIO(b, u8)
+BUILDIO(w, u16)
+BUILDIO(l, u32)
#undef BUILDIO
#define inb_p inb_p
diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
index a1911fea8739..af7541c11821 100644
--- a/arch/x86/include/asm/iosf_mbi.h
+++ b/arch/x86/include/asm/iosf_mbi.h
@@ -111,7 +111,7 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask);
* This function will block all kernel access to the PMIC I2C bus, so that the
* P-Unit can safely access the PMIC over the shared I2C bus.
*
- * Note on these systems the i2c-bus driver will request a sempahore from the
+ * Note on these systems the i2c-bus driver will request a semaphore from the
* P-Unit for exclusive access to the PMIC bus when i2c drivers are accessing
* it, but this does not appear to be sufficient, we still need to avoid making
* certain P-Unit requests during the access window to avoid problems.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d7036982332e..6711da000bb7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1652,7 +1652,7 @@ struct kvm_x86_ops {
/* Whether or not a virtual NMI is pending in hardware. */
bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu);
/*
- * Attempt to pend a virtual NMI in harware. Returns %true on success
+ * Attempt to pend a virtual NMI in hardware. Returns %true on success
* to allow using static_call_ret0 as the fallback.
*/
bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 6de6e1d95952..de3118305838 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -311,6 +311,7 @@ enum smca_bank_types {
SMCA_PIE, /* Power, Interrupts, etc. */
SMCA_UMC, /* Unified Memory Controller */
SMCA_UMC_V2,
+ SMCA_MA_LLC, /* Memory Attached Last Level Cache */
SMCA_PB, /* Parameter Block */
SMCA_PSP, /* Platform Security Processor */
SMCA_PSP_V2,
@@ -326,6 +327,8 @@ enum smca_bank_types {
SMCA_SHUB, /* System HUB Unit */
SMCA_SATA, /* SATA Unit */
SMCA_USB, /* USB Unit */
+ SMCA_USR_DP, /* Ultra Short Reach Data Plane Controller */
+ SMCA_USR_CP, /* Ultra Short Reach Control Plane Controller */
SMCA_GMI_PCS, /* GMI PCS Unit */
SMCA_XGMI_PHY, /* xGMI PHY Unit */
SMCA_WAFL_PHY, /* WAFL PHY Unit */
@@ -333,7 +336,6 @@ enum smca_bank_types {
N_SMCA_BANK_TYPES
};
-extern const char *smca_get_long_name(enum smca_bank_types t);
extern bool amd_mce_is_memory_error(struct mce *m);
extern int mce_threshold_create_device(unsigned int cpu);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1d51e1850ed0..737a52b89e64 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -237,6 +237,11 @@
#define LBR_INFO_CYCLES 0xffff
#define LBR_INFO_BR_TYPE_OFFSET 56
#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
+#define LBR_INFO_BR_CNTR_OFFSET 32
+#define LBR_INFO_BR_CNTR_NUM 4
+#define LBR_INFO_BR_CNTR_BITS 2
+#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0)
+#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0)
#define MSR_ARCH_LBR_CTL 0x000014ce
#define ARCH_LBR_CTL_LBREN BIT(0)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 778df05f8539..920426d691ce 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -87,6 +87,15 @@ static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx,
:: "a" (eax), "b" (ebx), "c" (ecx));
}
+/*
+ * Re-enable interrupts right upon calling mwait in such a way that
+ * no interrupt can fire _before_ the execution of mwait, ie: no
+ * instruction must be placed between "sti" and "mwait".
+ *
+ * This is necessary because if an interrupt queues a timer before
+ * executing mwait, it would otherwise go unnoticed and the next tick
+ * would not be reprogrammed accordingly before mwait ever wakes up.
+ */
static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
mds_idle_clear_cpu_buffers();
@@ -115,8 +124,15 @@ static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned lo
}
__monitor((void *)&current_thread_info()->flags, 0, 0);
- if (!need_resched())
- __mwait(eax, ecx);
+
+ if (!need_resched()) {
+ if (ecx & 1) {
+ __mwait(eax, ecx);
+ } else {
+ __sti_mwait(eax, ecx);
+ raw_local_irq_disable();
+ }
+ }
}
current_clr_polling();
}
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index f93e9b96927a..262e65539f83 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -49,7 +49,7 @@
* but there is still a cushion vs. the RSB depth. The algorithm does not
* claim to be perfect and it can be speculated around by the CPU, but it
* is considered that it obfuscates the problem enough to make exploitation
- * extremly difficult.
+ * extremely difficult.
*/
#define RET_DEPTH_SHIFT 5
#define RSB_RET_STUFF_LOOPS 16
@@ -208,7 +208,7 @@
/*
* Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
- * eventually turn into it's own annotation.
+ * eventually turn into its own annotation.
*/
.macro VALIDATE_UNRET_END
#if defined(CONFIG_NOINSTR_VALIDATION) && \
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6c8ff12140ae..8bcf7584e7dd 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -142,8 +142,7 @@ static inline void write_cr0(unsigned long x)
static __always_inline unsigned long read_cr2(void)
{
return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
- "mov %%cr2, %%rax;",
- ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%cr2, %%rax;", ALT_NOT_XEN);
}
static __always_inline void write_cr2(unsigned long x)
@@ -154,13 +153,12 @@ static __always_inline void write_cr2(unsigned long x)
static inline unsigned long __read_cr3(void)
{
return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
- "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%cr3, %%rax;", ALT_NOT_XEN);
}
static inline void write_cr3(unsigned long x)
{
- PVOP_ALT_VCALL1(mmu.write_cr3, x,
- "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
+ PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN);
}
static inline void __write_cr4(unsigned long x)
@@ -182,7 +180,7 @@ extern noinstr void pv_native_wbinvd(void);
static __always_inline void wbinvd(void)
{
- PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
+ PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT_XEN);
}
static inline u64 paravirt_read_msr(unsigned msr)
@@ -390,27 +388,25 @@ static inline void paravirt_release_p4d(unsigned long pfn)
static inline pte_t __pte(pteval_t val)
{
return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
}
static inline pteval_t pte_val(pte_t pte)
{
return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}
static inline pgd_t __pgd(pgdval_t val)
{
return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
}
static inline pgdval_t pgd_val(pgd_t pgd)
{
return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -444,14 +440,13 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
static inline pmd_t __pmd(pmdval_t val)
{
return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
}
static inline pmdval_t pmd_val(pmd_t pmd)
{
return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}
static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -464,7 +459,7 @@ static inline pud_t __pud(pudval_t val)
pudval_t ret;
ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
return (pud_t) { ret };
}
@@ -472,7 +467,7 @@ static inline pud_t __pud(pudval_t val)
static inline pudval_t pud_val(pud_t pud)
{
return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}
static inline void pud_clear(pud_t *pudp)
@@ -492,8 +487,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
static inline p4d_t __p4d(p4dval_t val)
{
p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
return (p4d_t) { ret };
}
@@ -501,7 +495,7 @@ static inline p4d_t __p4d(p4dval_t val)
static inline p4dval_t p4d_val(p4d_t p4d)
{
return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}
static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -687,17 +681,17 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
static __always_inline unsigned long arch_local_save_flags(void)
{
return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;",
- ALT_NOT(X86_FEATURE_XENPV));
+ ALT_NOT_XEN);
}
static __always_inline void arch_local_irq_disable(void)
{
- PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV));
+ PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT_XEN);
}
static __always_inline void arch_local_irq_enable(void)
{
- PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV));
+ PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT_XEN);
}
static __always_inline unsigned long arch_local_irq_save(void)
@@ -726,52 +720,25 @@ static __always_inline unsigned long arch_local_irq_save(void)
#undef PVOP_VCALL4
#undef PVOP_CALL4
-#define DEFINE_PARAVIRT_ASM(func, instr, sec) \
- asm (".pushsection " #sec ", \"ax\"\n" \
- ".global " #func "\n\t" \
- ".type " #func ", @function\n\t" \
- ASM_FUNC_ALIGN "\n" \
- #func ":\n\t" \
- ASM_ENDBR \
- instr "\n\t" \
- ASM_RET \
- ".size " #func ", . - " #func "\n\t" \
- ".popsection")
-
extern void default_banner(void);
void native_pv_lock_init(void) __init;
#else /* __ASSEMBLY__ */
-#define _PVSITE(ptype, ops, word, algn) \
-771:; \
- ops; \
-772:; \
- .pushsection .parainstructions,"a"; \
- .align algn; \
- word 771b; \
- .byte ptype; \
- .byte 772b-771b; \
- _ASM_ALIGN; \
- .popsection
-
-
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT_XXL
+#ifdef CONFIG_DEBUG_ENTRY
-#define PARA_PATCH(off) ((off) / 8)
-#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8)
#define PARA_INDIRECT(addr) *addr(%rip)
-#ifdef CONFIG_DEBUG_ENTRY
.macro PARA_IRQ_save_fl
- PARA_SITE(PARA_PATCH(PV_IRQ_save_fl),
- ANNOTATE_RETPOLINE_SAFE;
- call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);)
+ ANNOTATE_RETPOLINE_SAFE;
+ call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);
.endm
-#define SAVE_FLAGS ALTERNATIVE "PARA_IRQ_save_fl;", "pushf; pop %rax;", \
- ALT_NOT(X86_FEATURE_XENPV)
+#define SAVE_FLAGS ALTERNATIVE_2 "PARA_IRQ_save_fl;", \
+ "ALT_CALL_INSTR;", ALT_CALL_ALWAYS, \
+ "pushf; pop %rax;", ALT_NOT_XEN
#endif
#endif /* CONFIG_PARAVIRT_XXL */
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 772d03487520..d8e85d2cf8d5 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -2,15 +2,6 @@
#ifndef _ASM_X86_PARAVIRT_TYPES_H
#define _ASM_X86_PARAVIRT_TYPES_H
-#ifndef __ASSEMBLY__
-/* These all sit in the .parainstructions section to tell us what to patch. */
-struct paravirt_patch_site {
- u8 *instr; /* original instructions */
- u8 type; /* type of this instruction */
- u8 len; /* length of original instruction */
-};
-#endif
-
#ifdef CONFIG_PARAVIRT
#ifndef __ASSEMBLY__
@@ -250,43 +241,11 @@ struct paravirt_patch_template {
extern struct pv_info pv_info;
extern struct paravirt_patch_template pv_ops;
-#define PARAVIRT_PATCH(x) \
- (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
-
-#define paravirt_type(op) \
- [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
- [paravirt_opptr] "m" (pv_ops.op)
-/*
- * Generate some code, and mark it as patchable by the
- * apply_paravirt() alternate instruction patcher.
- */
-#define _paravirt_alt(insn_string, type) \
- "771:\n\t" insn_string "\n" "772:\n" \
- ".pushsection .parainstructions,\"a\"\n" \
- _ASM_ALIGN "\n" \
- _ASM_PTR " 771b\n" \
- " .byte " type "\n" \
- " .byte 772b-771b\n" \
- _ASM_ALIGN "\n" \
- ".popsection\n"
-
-/* Generate patchable code, with the default asm parameters. */
-#define paravirt_alt(insn_string) \
- _paravirt_alt(insn_string, "%c[paravirt_typenum]")
-
-/* Simple instruction patching code. */
-#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
-
-unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, unsigned int len);
+#define paravirt_ptr(op) [paravirt_opptr] "m" (pv_ops.op)
int paravirt_disable_iospace(void);
-/*
- * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
- */
+/* This generates an indirect call based on the operation type number. */
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
"call *%[paravirt_opptr];"
@@ -319,12 +278,6 @@ int paravirt_disable_iospace(void);
* However, x86_64 also has to clobber all caller saved registers, which
* unfortunately, are quite a bit (r8 - r11)
*
- * The call instruction itself is marked by placing its start address
- * and size into the .parainstructions section, so that
- * apply_paravirt() in arch/i386/kernel/alternative.c can do the
- * appropriate patching under the control of the backend pv_init_ops
- * implementation.
- *
* Unfortunately there's no way to get gcc to generate the args setup
* for the call, and then allow the call itself to be generated by an
* inline asm. Because of this, we must do the complete arg setup and
@@ -423,14 +376,27 @@ int paravirt_disable_iospace(void);
__mask & __eax; \
})
-
+/*
+ * Use alternative patching for paravirt calls:
+ * - For replacing an indirect call with a direct one, use the "normal"
+ * ALTERNATIVE() macro with the indirect call as the initial code sequence,
+ * which will be replaced with the related direct call by using the
+ * ALT_FLAG_DIRECT_CALL special case and the "always on" feature.
+ * - In case the replacement is either a direct call or a short code sequence
+ * depending on a feature bit, the ALTERNATIVE_2() macro is being used.
+ * The indirect call is the initial code sequence again, while the special
+ * code sequence is selected with the specified feature bit. In case the
+ * feature is not active, the direct call is used as above via the
+ * ALT_FLAG_DIRECT_CALL special case and the "always on" feature.
+ */
#define ____PVOP_CALL(ret, op, call_clbr, extra_clbr, ...) \
({ \
PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
- asm volatile(paravirt_alt(PARAVIRT_CALL) \
+ asm volatile(ALTERNATIVE(PARAVIRT_CALL, ALT_CALL_INSTR, \
+ ALT_CALL_ALWAYS) \
: call_clbr, ASM_CALL_CONSTRAINT \
- : paravirt_type(op), \
+ : paravirt_ptr(op), \
##__VA_ARGS__ \
: "memory", "cc" extra_clbr); \
ret; \
@@ -441,10 +407,11 @@ int paravirt_disable_iospace(void);
({ \
PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
- asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), \
- alt, cond) \
+ asm volatile(ALTERNATIVE_2(PARAVIRT_CALL, \
+ ALT_CALL_INSTR, ALT_CALL_ALWAYS, \
+ alt, cond) \
: call_clbr, ASM_CALL_CONSTRAINT \
- : paravirt_type(op), \
+ : paravirt_ptr(op), \
##__VA_ARGS__ \
: "memory", "cc" extra_clbr); \
ret; \
@@ -542,8 +509,6 @@ int paravirt_disable_iospace(void);
__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-void _paravirt_nop(void);
-void paravirt_BUG(void);
unsigned long paravirt_ret0(void);
#ifdef CONFIG_PARAVIRT_XXL
u64 _paravirt_ident_64(u64);
@@ -553,11 +518,11 @@ void pv_native_irq_enable(void);
unsigned long pv_native_read_cr2(void);
#endif
-#define paravirt_nop ((void *)_paravirt_nop)
-
-extern struct paravirt_patch_site __parainstructions[],
- __parainstructions_end[];
+#define paravirt_nop ((void *)nop_func)
#endif /* __ASSEMBLY__ */
+
+#define ALT_NOT_XEN ALT_NOT(X86_FEATURE_XENPV)
+
#endif /* CONFIG_PARAVIRT */
#endif /* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2618ec7c3d1d..3736b8a46c04 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -31,6 +31,7 @@
#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
+#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35)
#define INTEL_FIXED_BITS_MASK 0xFULL
#define INTEL_FIXED_BITS_STRIDE 4
@@ -223,6 +224,9 @@ union cpuid28_ecx {
unsigned int lbr_timed_lbr:1;
/* Branch Type Field Supported */
unsigned int lbr_br_type:1;
+ unsigned int reserved:13;
+ /* Branch counters (Event Logging) Supported */
+ unsigned int lbr_counters:4;
} split;
unsigned int full;
};
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index a629b1b9f65a..24af25b1551a 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -203,7 +203,7 @@ static inline void native_pgd_clear(pgd_t *pgd)
* F (2) in swp entry is used to record when a pagetable is
* writeprotected by userfaultfd WP support.
*
- * E (3) in swp entry is used to rememeber PG_anon_exclusive.
+ * E (3) in swp entry is used to remember PG_anon_exclusive.
*
* Bit 7 in swp entry should be 0 because pmd_present checks not only P,
* but also L and G.
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ae81a7191c1c..26620d7642a9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -749,4 +749,22 @@ enum mds_mitigations {
extern bool gds_ucode_mitigated(void);
+/*
+ * Make previous memory operations globally visible before
+ * a WRMSR.
+ *
+ * MFENCE makes writes visible, but only affects load/store
+ * instructions. WRMSR is unfortunately not a load/store
+ * instruction and is unaffected by MFENCE. The LFENCE ensures
+ * that the WRMSR is not reordered.
+ *
+ * Most WRMSRs are full serializing instructions themselves and
+ * do not require this barrier. This is only required for the
+ * IA32_TSC_DEADLINE and X2APIC MSRs.
+ */
+static inline void weak_wrmsr_fence(void)
+{
+ alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE));
+}
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 4d84122bd643..484f4f0131a5 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -32,10 +32,6 @@ void entry_SYSCALL_compat(void);
void entry_SYSCALL_compat_safe_stack(void);
void entry_SYSRETL_compat_unsafe_stack(void);
void entry_SYSRETL_compat_end(void);
-void entry_INT80_compat(void);
-#ifdef CONFIG_XEN_PV
-void xen_entry_INT80_compat(void);
-#endif
#else /* !CONFIG_IA32_EMULATION */
#define entry_SYSCALL_compat NULL
#define entry_SYSENTER_compat NULL
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 85b6e3609cb9..ef9697f20129 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -56,8 +56,8 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
"pop %rdx\n\t" \
FRAME_END
-DEFINE_PARAVIRT_ASM(__raw_callee_save___pv_queued_spin_unlock,
- PV_UNLOCK_ASM, .spinlock.text);
+DEFINE_ASM_FUNC(__raw_callee_save___pv_queued_spin_unlock,
+ PV_UNLOCK_ASM, .spinlock.text);
#else /* CONFIG_64BIT */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index bf483fcb4e57..5c83729c8e71 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -31,8 +31,6 @@
#include <asm/bootparam.h>
#include <asm/x86_init.h>
-extern u64 relocated_ramdisk;
-
/* Interrupt control for vSMPowered x86_64 systems */
#ifdef CONFIG_X86_64
void vsmp_init(void);
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index fd2669b1cb2d..21f9407be5d3 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -86,9 +86,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
return sys_ni_syscall(); \
}
-#define __SYS_NI(abi, name) \
- SYSCALL_ALIAS(__##abi##_##name, sys_ni_posix_timers);
-
#ifdef CONFIG_X86_64
#define __X64_SYS_STUB0(name) \
__SYS_STUB0(x64, sys_##name)
@@ -100,13 +97,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
#define __X64_COND_SYSCALL(name) \
__COND_SYSCALL(x64, sys_##name)
-#define __X64_SYS_NI(name) \
- __SYS_NI(x64, sys_##name)
#else /* CONFIG_X86_64 */
#define __X64_SYS_STUB0(name)
#define __X64_SYS_STUBx(x, name, ...)
#define __X64_COND_SYSCALL(name)
-#define __X64_SYS_NI(name)
#endif /* CONFIG_X86_64 */
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
@@ -120,13 +114,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
#define __IA32_COND_SYSCALL(name) \
__COND_SYSCALL(ia32, sys_##name)
-#define __IA32_SYS_NI(name) \
- __SYS_NI(ia32, sys_##name)
#else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
#define __IA32_SYS_STUB0(name)
#define __IA32_SYS_STUBx(x, name, ...)
#define __IA32_COND_SYSCALL(name)
-#define __IA32_SYS_NI(name)
#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
#ifdef CONFIG_IA32_EMULATION
@@ -135,8 +126,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
* additional wrappers (aptly named __ia32_sys_xyzzy) which decode the
* ia32 regs in the proper order for shared or "common" syscalls. As some
* syscalls may not be implemented, we need to expand COND_SYSCALL in
- * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this
- * case as well.
+ * kernel/sys_ni.c to cover this case as well.
*/
#define __IA32_COMPAT_SYS_STUB0(name) \
__SYS_STUB0(ia32, compat_sys_##name)
@@ -148,14 +138,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
#define __IA32_COMPAT_COND_SYSCALL(name) \
__COND_SYSCALL(ia32, compat_sys_##name)
-#define __IA32_COMPAT_SYS_NI(name) \
- __SYS_NI(ia32, compat_sys_##name)
-
#else /* CONFIG_IA32_EMULATION */
#define __IA32_COMPAT_SYS_STUB0(name)
#define __IA32_COMPAT_SYS_STUBx(x, name, ...)
#define __IA32_COMPAT_COND_SYSCALL(name)
-#define __IA32_COMPAT_SYS_NI(name)
#endif /* CONFIG_IA32_EMULATION */
@@ -175,13 +161,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
#define __X32_COMPAT_COND_SYSCALL(name) \
__COND_SYSCALL(x64, compat_sys_##name)
-#define __X32_COMPAT_SYS_NI(name) \
- __SYS_NI(x64, compat_sys_##name)
#else /* CONFIG_X86_X32_ABI */
#define __X32_COMPAT_SYS_STUB0(name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...)
#define __X32_COMPAT_COND_SYSCALL(name)
-#define __X32_COMPAT_SYS_NI(name)
#endif /* CONFIG_X86_X32_ABI */
@@ -212,17 +195,12 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
/*
* As some compat syscalls may not be implemented, we need to expand
- * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in
- * kernel/time/posix-stubs.c to cover this case as well.
+ * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well.
*/
#define COND_SYSCALL_COMPAT(name) \
__IA32_COMPAT_COND_SYSCALL(name) \
__X32_COMPAT_COND_SYSCALL(name)
-#define COMPAT_SYS_NI(name) \
- __IA32_COMPAT_SYS_NI(name) \
- __X32_COMPAT_SYS_NI(name)
-
#endif /* CONFIG_COMPAT */
#define __SYSCALL_DEFINEx(x, name, ...) \
@@ -243,8 +221,8 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
* As the generic SYSCALL_DEFINE0() macro does not decode any parameters for
* obvious reasons, and passing struct pt_regs *regs to it in %rdi does not
* hurt, we only need to re-define it here to keep the naming congruent to
- * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI()
- * macros to work correctly.
+ * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() macro
+ * to work correctly.
*/
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
@@ -257,10 +235,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
__X64_COND_SYSCALL(name) \
__IA32_COND_SYSCALL(name)
-#define SYS_NI(name) \
- __X64_SYS_NI(name) \
- __IA32_SYS_NI(name)
-
/*
* For VSYSCALLS, we need to declare these three syscalls with the new
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 29832c338cdc..0b70653a98c1 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -6,18 +6,6 @@
#include <linux/stddef.h>
#include <asm/ptrace.h>
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end);
-#else
-static inline void apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end)
-{}
-#define __parainstructions NULL
-#define __parainstructions_end NULL
-#endif
-
/*
* Currently, the max observed size in the kernel code is
* JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 5fa76c2ced51..ea877fd83114 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -653,7 +653,7 @@ static inline int uv_blade_to_node(int blade)
return uv_socket_to_node(blade);
}
-/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */
+/* Blade number of current cpu. Numbered 0 .. <#blades -1> */
static inline int uv_numa_blade_id(void)
{
return uv_hub_info->numa_blade_id;
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index c81858d903dc..923053f366d7 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -321,7 +321,7 @@ static __always_inline
u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
{
/*
- * Due to the MSB/Sign-bit being used as invald marker (see
+ * Due to the MSB/Sign-bit being used as invalid marker (see
* arch_vdso_cycles_valid() above), the effective mask is S64_MAX.
*/
u64 delta = (cycles - last) & S64_MAX;
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
index c599ec269a25..c10f279aae93 100644
--- a/arch/x86/include/asm/xen/interface_64.h
+++ b/arch/x86/include/asm/xen/interface_64.h
@@ -61,7 +61,7 @@
* RING1 -> RING3 kernel mode.
* RING2 -> RING3 kernel mode.
* RING3 -> RING3 user mode.
- * However RING0 indicates that the guest kernel should return to iteself
+ * However RING0 indicates that the guest kernel should return to itself
* directly with
* orb $3,1*8(%rsp)
* iretq
diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h
index fce22686c834..e5d182c7373c 100644
--- a/arch/x86/include/uapi/asm/amd_hsmp.h
+++ b/arch/x86/include/uapi/asm/amd_hsmp.h
@@ -238,7 +238,7 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
/*
* HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1
* input: args[0] = DIMM address[7:0]
- * output: args[0] = temperature in degree celcius[31:21] + update rate in ms[16:8] +
+ * output: args[0] = temperature in degree celsius[31:21] + update rate in ms[16:8] +
* DIMM address[7:0]
*/
{1, 1, HSMP_GET},
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 1a0dd80d81ac..85a3ce2a3666 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -293,6 +293,7 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
processor->processor_id, /* ACPI ID */
processor->lapic_flags & ACPI_MADT_ENABLED);
+ has_lapic_cpus = true;
return 0;
}
@@ -1134,7 +1135,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
if (!count) {
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
acpi_parse_lapic, MAX_LOCAL_APIC);
- has_lapic_cpus = count > 0;
x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
acpi_parse_x2apic, MAX_LOCAL_APIC);
}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 73be3931e4f0..55e205b21271 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -160,7 +160,6 @@ extern s32 __retpoline_sites[], __retpoline_sites_end[];
extern s32 __return_sites[], __return_sites_end[];
extern s32 __cfi_sites[], __cfi_sites_end[];
extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
void text_poke_early(void *addr, const void *opcode, size_t len);
@@ -255,6 +254,16 @@ static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
}
}
+static void __init_or_module noinline optimize_nops_inplace(u8 *instr, size_t len)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ optimize_nops(instr, len);
+ sync_core();
+ local_irq_restore(flags);
+}
+
/*
* In this context, "source" is where the instructions are placed in the
* section .altinstr_replacement, for example during kernel build by the
@@ -385,6 +394,63 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
}
}
+/* Low-level backend functions usable from alternative code replacements. */
+DEFINE_ASM_FUNC(nop_func, "", .entry.text);
+EXPORT_SYMBOL_GPL(nop_func);
+
+noinstr void BUG_func(void)
+{
+ BUG();
+}
+EXPORT_SYMBOL_GPL(BUG_func);
+
+#define CALL_RIP_REL_OPCODE 0xff
+#define CALL_RIP_REL_MODRM 0x15
+
+/*
+ * Rewrite the "call BUG_func" replacement to point to the target of the
+ * indirect pv_ops call "call *disp(%ip)".
+ */
+static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
+{
+ void *target, *bug = &BUG_func;
+ s32 disp;
+
+ if (a->replacementlen != 5 || insn_buff[0] != CALL_INSN_OPCODE) {
+ pr_err("ALT_FLAG_DIRECT_CALL set for a non-call replacement instruction\n");
+ BUG();
+ }
+
+ if (a->instrlen != 6 ||
+ instr[0] != CALL_RIP_REL_OPCODE ||
+ instr[1] != CALL_RIP_REL_MODRM) {
+ pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n");
+ BUG();
+ }
+
+ /* Skip CALL_RIP_REL_OPCODE and CALL_RIP_REL_MODRM */
+ disp = *(s32 *)(instr + 2);
+#ifdef CONFIG_X86_64
+ /* ff 15 00 00 00 00 call *0x0(%rip) */
+ /* target address is stored at "next instruction + disp". */
+ target = *(void **)(instr + a->instrlen + disp);
+#else
+ /* ff 15 00 00 00 00 call *0x0 */
+ /* target address is stored at disp. */
+ target = *(void **)disp;
+#endif
+ if (!target)
+ target = bug;
+
+ /* (BUG_func - .) + (target - BUG_func) := target - . */
+ *(s32 *)(insn_buff + 1) += target - bug;
+
+ if (target == &nop_func)
+ return 0;
+
+ return 5;
+}
+
/*
* Replace instructions with better alternatives for this CPU type. This runs
* before SMP is initialized to avoid SMP problems with self modifying code.
@@ -438,20 +504,25 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* patch if feature is *NOT* present.
*/
if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
- optimize_nops(instr, a->instrlen);
+ optimize_nops_inplace(instr, a->instrlen);
continue;
}
- DPRINTK(ALT, "feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
- (a->flags & ALT_FLAG_NOT) ? "!" : "",
+ DPRINTK(ALT, "feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d) flags: 0x%x",
a->cpuid >> 5,
a->cpuid & 0x1f,
instr, instr, a->instrlen,
- replacement, a->replacementlen);
+ replacement, a->replacementlen, a->flags);
memcpy(insn_buff, replacement, a->replacementlen);
insn_buff_sz = a->replacementlen;
+ if (a->flags & ALT_FLAG_DIRECT_CALL) {
+ insn_buff_sz = alt_replace_call(instr, insn_buff, a);
+ if (insn_buff_sz < 0)
+ continue;
+ }
+
for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
insn_buff[insn_buff_sz] = 0x90;
@@ -1411,46 +1482,6 @@ int alternatives_text_reserved(void *start, void *end)
}
#endif /* CONFIG_SMP */
-#ifdef CONFIG_PARAVIRT
-
-/* Use this to add nops to a buffer, then text_poke the whole buffer. */
-static void __init_or_module add_nops(void *insns, unsigned int len)
-{
- while (len > 0) {
- unsigned int noplen = len;
- if (noplen > ASM_NOP_MAX)
- noplen = ASM_NOP_MAX;
- memcpy(insns, x86_nops[noplen], noplen);
- insns += noplen;
- len -= noplen;
- }
-}
-
-void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end)
-{
- struct paravirt_patch_site *p;
- char insn_buff[MAX_PATCH_LEN];
-
- for (p = start; p < end; p++) {
- unsigned int used;
-
- BUG_ON(p->len > MAX_PATCH_LEN);
- /* prep the buffer with the original instructions */
- memcpy(insn_buff, p->instr, p->len);
- used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
-
- BUG_ON(used > p->len);
-
- /* Pad the rest with nops */
- add_nops(insn_buff + used, p->len - used);
- text_poke_early(p->instr, insn_buff, p->len);
- }
-}
-extern struct paravirt_patch_site __start_parainstructions[],
- __stop_parainstructions[];
-#endif /* CONFIG_PARAVIRT */
-
/*
* Self-test for the INT3 based CALL emulation code.
*
@@ -1586,28 +1617,11 @@ void __init alternative_instructions(void)
*/
/*
- * Paravirt patching and alternative patching can be combined to
- * replace a function call with a short direct code sequence (e.g.
- * by setting a constant return value instead of doing that in an
- * external function).
- * In order to make this work the following sequence is required:
- * 1. set (artificial) features depending on used paravirt
- * functions which can later influence alternative patching
- * 2. apply paravirt patching (generally replacing an indirect
- * function call with a direct one)
- * 3. apply alternative patching (e.g. replacing a direct function
- * call with a custom code sequence)
- * Doing paravirt patching after alternative patching would clobber
- * the optimization of the custom code with a function call again.
+ * Make sure to set (artificial) features depending on used paravirt
+ * functions which can later influence alternative patching.
*/
paravirt_set_cap();
- /*
- * First patch paravirt functions, such that we overwrite the indirect
- * call with the direct call.
- */
- apply_paravirt(__parainstructions, __parainstructions_end);
-
__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
__cfi_sites, __cfi_sites_end, true);
@@ -1618,10 +1632,6 @@ void __init alternative_instructions(void)
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
apply_returns(__return_sites, __return_sites_end);
- /*
- * Then patch alternatives, such that those paravirt calls that are in
- * alternatives can be overwritten by their immediate fragments.
- */
apply_alternatives(__alt_instructions, __alt_instructions_end);
/*
@@ -1685,8 +1695,8 @@ void __init_or_module text_poke_early(void *addr, const void *opcode,
} else {
local_irq_save(flags);
memcpy(addr, opcode, len);
- local_irq_restore(flags);
sync_core();
+ local_irq_restore(flags);
/*
* Could also do a CLFLUSH here to speed up CPU recovery; but
@@ -1896,7 +1906,7 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l
* Note that the caller must ensure that if the modified code is part of a
* module, the module would not be removed during poking. This can be achieved
* by registering a module notifier, and ordering module removal and patching
- * trough a mutex.
+ * through a mutex.
*/
void *text_poke(void *addr, const void *opcode, size_t len)
{
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 56a917df410d..2ae98f754e59 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -776,7 +776,7 @@ int __init gart_iommu_init(void)
iommu_size >> PAGE_SHIFT);
/*
* Tricky. The GART table remaps the physical memory range,
- * so the CPU wont notice potential aliases and if the memory
+ * so the CPU won't notice potential aliases and if the memory
* is remapped to UC later on, we might surprise the PCI devices
* with a stray writeout of a cacheline. So play it sure and
* do an explicit, full-scale wbinvd() _after_ having marked all
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 2ee867d796d9..3bf0487cf3b7 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -4,7 +4,7 @@
#
# Leads to non-deterministic coverage that is not a function of syscall inputs.
-# In particualr, smp_apic_timer_interrupt() is called in random places.
+# In particular, smp_apic_timer_interrupt() is called in random places.
KCOV_INSTRUMENT := n
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_common.o apic_noop.o ipi.o vector.o init.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 41093cf20acd..4667bc4b00ab 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -782,7 +782,7 @@ bool __init apic_needs_pit(void)
/*
* If interrupt delivery mode is legacy PIC or virtual wire without
- * configuration, the local APIC timer wont be set up. Make sure
+ * configuration, the local APIC timer won't be set up. Make sure
* that the PIT is initialized.
*/
if (apic_intr_mode == APIC_PIC ||
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 7139867d69cd..b295a056a4fc 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -82,7 +82,6 @@ static struct apic apic_flat __ro_after_init = {
.acpi_madt_oem_check = flat_acpi_madt_oem_check,
.apic_id_registered = default_apic_id_registered,
- .delivery_mode = APIC_DELIVERY_MODE_FIXED,
.dest_mode_logical = true,
.disable_esr = 0,
@@ -154,7 +153,6 @@ static struct apic apic_physflat __ro_after_init = {
.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
.apic_id_registered = default_apic_id_registered,
- .delivery_mode = APIC_DELIVERY_MODE_FIXED,
.dest_mode_logical = false,
.disable_esr = 0,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index b00d52ae84fa..9f1d553eb48f 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -47,7 +47,6 @@ static void noop_apic_write(u32 reg, u32 val)
struct apic apic_noop __ro_after_init = {
.name = "noop",
- .delivery_mode = APIC_DELIVERY_MODE_FIXED,
.dest_mode_logical = true,
.disable_esr = 0,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 456a14c44f67..7d0c51b9d3bc 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -222,7 +222,6 @@ static const struct apic apic_numachip1 __refconst = {
.probe = numachip1_probe,
.acpi_madt_oem_check = numachip1_acpi_madt_oem_check,
- .delivery_mode = APIC_DELIVERY_MODE_FIXED,
.dest_mode_logical = false,
.disable_esr = 0,
@@ -259,7 +258,6 @@ static const struct apic apic_numachip2 __refconst = {
.probe = numachip2_probe,
.acpi_madt_oem_check = numachip2_acpi_madt_oem_check,
- .delivery_mode = APIC_DELIVERY_MODE_FIXED,
.dest_mode_logical = false,
.disable_esr = 0,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 7ee3c486cb33..5a0d60b38e6b 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -80,7 +80,6 @@ static struct apic apic_bigsmp __ro_after_init = {
.name = "bigsmp",
.probe = probe_bigsmp,
- .delivery_mode = APIC_DELIVERY_MODE_FIXED,
.dest_mode_logical = false,
.disable_esr = 1,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 00da6cf6b07d..40c7cf180c20 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -997,7 +997,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
/*
* Legacy ISA IRQ has already been allocated, just add pin to
* the pin list associated with this IRQ and program the IOAPIC
- * entry. The IOAPIC entry
+ * entry.
*/
if (irq_data && irq_data->parent_data) {
if (!mp_check_pin_attr(irq, info))
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 5eb3fbe472da..c0f78059f06a 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -45,7 +45,6 @@ static struct apic apic_default __ro_after_init = {
.probe = probe_default,
.apic_id_registered = default_apic_id_registered,
- .delivery_mode = APIC_DELIVERY_MODE_FIXED,
.dest_mode_logical = true,
.disable_esr = 0,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 319448d87b99..185738c72766 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -738,8 +738,8 @@ int __init arch_probe_nr_irqs(void)
void lapic_assign_legacy_vector(unsigned int irq, bool replace)
{
/*
- * Use assign system here so it wont get accounted as allocated
- * and moveable in the cpu hotplug check and it prevents managed
+ * Use assign system here so it won't get accounted as allocated
+ * and movable in the cpu hotplug check and it prevents managed
* irq reservation from touching it.
*/
irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index a8306089c91b..28a7d3f2312d 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -227,7 +227,6 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
.probe = x2apic_cluster_probe,
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
- .delivery_mode = APIC_DELIVERY_MODE_FIXED,
.dest_mode_logical = true,
.disable_esr = 0,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 558a4a8824f4..409815a40668 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -145,7 +145,6 @@ static struct apic apic_x2apic_phys __ro_after_init = {
.probe = x2apic_phys_probe,
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
- .delivery_mode = APIC_DELIVERY_MODE_FIXED,
.dest_mode_logical = false,
.disable_esr = 0,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1b0d7336a28f..f1766b18dcd0 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -805,7 +805,6 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
.probe = uv_probe,
.acpi_madt_oem_check = uv_acpi_madt_oem_check,
- .delivery_mode = APIC_DELIVERY_MODE_FIXED,
.dest_mode_logical = false,
.disable_esr = 0,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 5934ee5bc087..76a5ced278c2 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -420,7 +420,7 @@ static DEFINE_MUTEX(apm_mutex);
* This is for buggy BIOS's that refer to (real mode) segment 0x40
* even though they are called in protected mode.
*/
-static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092,
+static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(DESC_DATA32_BIOS,
(unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1);
static const char driver_version[] = "1.16ac"; /* no spaces */
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index e9ad518a5003..64ad2ddea121 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -233,14 +233,13 @@ patch_call_sites(s32 *start, s32 *end, const struct core_text *ct)
}
static __init_or_module void
-patch_paravirt_call_sites(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end,
- const struct core_text *ct)
+patch_alt_call_sites(struct alt_instr *start, struct alt_instr *end,
+ const struct core_text *ct)
{
- struct paravirt_patch_site *p;
+ struct alt_instr *a;
- for (p = start; p < end; p++)
- patch_call(p->instr, ct);
+ for (a = start; a < end; a++)
+ patch_call((void *)&a->instr_offset + a->instr_offset, ct);
}
static __init_or_module void
@@ -248,7 +247,7 @@ callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct)
{
prdbg("Patching call sites %s\n", ct->name);
patch_call_sites(cs->call_start, cs->call_end, ct);
- patch_paravirt_call_sites(cs->pv_start, cs->pv_end, ct);
+ patch_alt_call_sites(cs->alt_start, cs->alt_end, ct);
prdbg("Patching call sites done%s\n", ct->name);
}
@@ -257,8 +256,8 @@ void __init callthunks_patch_builtin_calls(void)
struct callthunk_sites cs = {
.call_start = __call_sites,
.call_end = __call_sites_end,
- .pv_start = __parainstructions,
- .pv_end = __parainstructions_end
+ .alt_start = __alt_instructions,
+ .alt_end = __alt_instructions_end
};
if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a7eab05e5f29..9f42d1c59e09 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -34,87 +34,6 @@
*/
static u32 nodes_per_socket = 1;
-/*
- * AMD errata checking
- *
- * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
- * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
- * have an OSVW id assigned, which it takes as first argument. Both take a
- * variable number of family-specific model-stepping ranges created by
- * AMD_MODEL_RANGE().
- *
- * Example:
- *
- * const int amd_erratum_319[] =
- * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
- * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
- * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
- */
-
-#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
-#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
-#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
- ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
-#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
-#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
-#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
-
-static const int amd_erratum_400[] =
- AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
- AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
-
-static const int amd_erratum_383[] =
- AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
-
-/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
-static const int amd_erratum_1054[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
-
-static const int amd_zenbleed[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf),
- AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
- AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf),
- AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
-
-static const int amd_div0[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
- AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
-
-static const int amd_erratum_1485[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
- AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
-
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
-{
- int osvw_id = *erratum++;
- u32 range;
- u32 ms;
-
- if (osvw_id >= 0 && osvw_id < 65536 &&
- cpu_has(cpu, X86_FEATURE_OSVW)) {
- u64 osvw_len;
-
- rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
- if (osvw_id < osvw_len) {
- u64 osvw_bits;
-
- rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
- osvw_bits);
- return osvw_bits & (1ULL << (osvw_id & 0x3f));
- }
- }
-
- /* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 4) | cpu->x86_stepping;
- while ((range = *erratum++))
- if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
- (ms >= AMD_MODEL_RANGE_START(range)) &&
- (ms <= AMD_MODEL_RANGE_END(range)))
- return true;
-
- return false;
-}
-
static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
{
u32 gprs[8] = { 0 };
@@ -616,6 +535,49 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
}
resctrl_cpu_detect(c);
+
+ /* Figure out Zen generations: */
+ switch (c->x86) {
+ case 0x17: {
+ switch (c->x86_model) {
+ case 0x00 ... 0x2f:
+ case 0x50 ... 0x5f:
+ setup_force_cpu_cap(X86_FEATURE_ZEN1);
+ break;
+ case 0x30 ... 0x4f:
+ case 0x60 ... 0x7f:
+ case 0x90 ... 0x91:
+ case 0xa0 ... 0xaf:
+ setup_force_cpu_cap(X86_FEATURE_ZEN2);
+ break;
+ default:
+ goto warn;
+ }
+ break;
+ }
+ case 0x19: {
+ switch (c->x86_model) {
+ case 0x00 ... 0x0f:
+ case 0x20 ... 0x5f:
+ setup_force_cpu_cap(X86_FEATURE_ZEN3);
+ break;
+ case 0x10 ... 0x1f:
+ case 0x60 ... 0xaf:
+ setup_force_cpu_cap(X86_FEATURE_ZEN4);
+ break;
+ default:
+ goto warn;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ return;
+
+warn:
+ WARN_ONCE(1, "Family 0x%x, model: 0x%x??\n", c->x86, c->x86_model);
}
static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -739,15 +701,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
if (c->x86 == 0x16 && c->x86_model <= 0xf)
msr_set_bit(MSR_AMD64_LS_CFG, 15);
- /*
- * Check whether the machine is affected by erratum 400. This is
- * used to select the proper idle routine and to enable the check
- * whether the machine is affected in arch_post_acpi_init(), which
- * sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check.
- */
- if (cpu_has_amd_erratum(c, amd_erratum_400))
- set_cpu_bug(c, X86_BUG_AMD_E400);
-
early_detect_mem_encrypt(c);
/* Re-enable TopologyExtensions if switched off by BIOS */
@@ -814,6 +767,16 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
msr_set_bit(MSR_K7_HWCR, 6);
#endif
set_cpu_bug(c, X86_BUG_SWAPGS_FENCE);
+
+ /*
+ * Check models and steppings affected by erratum 400. This is
+ * used to select the proper idle routine and to enable the
+ * check whether the machine is affected in arch_post_acpi_subsys_init()
+ * which sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check.
+ */
+ if (c->x86_model > 0x41 ||
+ (c->x86_model == 0x41 && c->x86_stepping >= 0x2))
+ setup_force_cpu_bug(X86_BUG_AMD_E400);
}
static void init_amd_gh(struct cpuinfo_x86 *c)
@@ -847,8 +810,17 @@ static void init_amd_gh(struct cpuinfo_x86 *c)
*/
msr_clear_bit(MSR_AMD64_BU_CFG2, 24);
- if (cpu_has_amd_erratum(c, amd_erratum_383))
- set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
+ set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
+
+ /*
+ * Check models and steppings affected by erratum 400. This is
+ * used to select the proper idle routine and to enable the
+ * check whether the machine is affected in arch_post_acpi_subsys_init()
+ * which sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check.
+ */
+ if (c->x86_model > 0x2 ||
+ (c->x86_model == 0x2 && c->x86_stepping >= 0x1))
+ setup_force_cpu_bug(X86_BUG_AMD_E400);
}
static void init_amd_ln(struct cpuinfo_x86 *c)
@@ -941,6 +913,19 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
clear_rdrand_cpuid_bit(c);
}
+static void fix_erratum_1386(struct cpuinfo_x86 *c)
+{
+ /*
+ * Work around Erratum 1386. The XSAVES instruction malfunctions in
+ * certain circumstances on Zen1/2 uarch, and not all parts have had
+ * updated microcode at the time of writing (March 2023).
+ *
+ * Affected parts all have no supervisor XSAVE states, meaning that
+ * the XSAVEC instruction (which works fine) is equivalent.
+ */
+ clear_cpu_cap(c, X86_FEATURE_XSAVES);
+}
+
void init_spectral_chicken(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_CPU_UNRET_ENTRY
@@ -951,34 +936,28 @@ void init_spectral_chicken(struct cpuinfo_x86 *c)
*
* This suppresses speculation from the middle of a basic block, i.e. it
* suppresses non-branch predictions.
- *
- * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H
*/
- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) {
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) {
value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT;
wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
}
}
#endif
- /*
- * Work around Erratum 1386. The XSAVES instruction malfunctions in
- * certain circumstances on Zen1/2 uarch, and not all parts have had
- * updated microcode at the time of writing (March 2023).
- *
- * Affected parts all have no supervisor XSAVE states, meaning that
- * the XSAVEC instruction (which works fine) is equivalent.
- */
- clear_cpu_cap(c, X86_FEATURE_XSAVES);
}
-static void init_amd_zn(struct cpuinfo_x86 *c)
+static void init_amd_zen_common(void)
{
- set_cpu_cap(c, X86_FEATURE_ZEN);
-
+ setup_force_cpu_cap(X86_FEATURE_ZEN);
#ifdef CONFIG_NUMA
node_reclaim_distance = 32;
#endif
+}
+
+static void init_amd_zen1(struct cpuinfo_x86 *c)
+{
+ init_amd_zen_common();
+ fix_erratum_1386(c);
/* Fix up CPUID bits, but only if not virtualised. */
if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
@@ -986,15 +965,10 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
/* Erratum 1076: CPB feature bit not being set in CPUID. */
if (!cpu_has(c, X86_FEATURE_CPB))
set_cpu_cap(c, X86_FEATURE_CPB);
-
- /*
- * Zen3 (Fam19 model < 0x10) parts are not susceptible to
- * Branch Type Confusion, but predate the allocation of the
- * BTC_NO bit.
- */
- if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
- set_cpu_cap(c, X86_FEATURE_BTC_NO);
}
+
+ pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+ setup_force_cpu_bug(X86_BUG_DIV0);
}
static bool cpu_has_zenbleed_microcode(void)
@@ -1018,11 +992,8 @@ static bool cpu_has_zenbleed_microcode(void)
return true;
}
-static void zenbleed_check(struct cpuinfo_x86 *c)
+static void zen2_zenbleed_check(struct cpuinfo_x86 *c)
{
- if (!cpu_has_amd_erratum(c, amd_zenbleed))
- return;
-
if (cpu_has(c, X86_FEATURE_HYPERVISOR))
return;
@@ -1037,6 +1008,37 @@ static void zenbleed_check(struct cpuinfo_x86 *c)
}
}
+static void init_amd_zen2(struct cpuinfo_x86 *c)
+{
+ init_amd_zen_common();
+ init_spectral_chicken(c);
+ fix_erratum_1386(c);
+ zen2_zenbleed_check(c);
+}
+
+static void init_amd_zen3(struct cpuinfo_x86 *c)
+{
+ init_amd_zen_common();
+
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
+ /*
+ * Zen3 (Fam19 model < 0x10) parts are not susceptible to
+ * Branch Type Confusion, but predate the allocation of the
+ * BTC_NO bit.
+ */
+ if (!cpu_has(c, X86_FEATURE_BTC_NO))
+ set_cpu_cap(c, X86_FEATURE_BTC_NO);
+ }
+}
+
+static void init_amd_zen4(struct cpuinfo_x86 *c)
+{
+ init_amd_zen_common();
+
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR))
+ msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
+}
+
static void init_amd(struct cpuinfo_x86 *c)
{
u64 vm_cr;
@@ -1072,11 +1074,17 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
case 0x16: init_amd_jg(c); break;
- case 0x17: init_spectral_chicken(c);
- fallthrough;
- case 0x19: init_amd_zn(c); break;
}
+ if (boot_cpu_has(X86_FEATURE_ZEN1))
+ init_amd_zen1(c);
+ else if (boot_cpu_has(X86_FEATURE_ZEN2))
+ init_amd_zen2(c);
+ else if (boot_cpu_has(X86_FEATURE_ZEN3))
+ init_amd_zen3(c);
+ else if (boot_cpu_has(X86_FEATURE_ZEN4))
+ init_amd_zen4(c);
+
/*
* Enable workaround for FXSAVE leak on CPUs
* without a XSaveErPtr feature
@@ -1136,7 +1144,7 @@ static void init_amd(struct cpuinfo_x86 *c)
* Counter May Be Inaccurate".
*/
if (cpu_has(c, X86_FEATURE_IRPERF) &&
- !cpu_has_amd_erratum(c, amd_erratum_1054))
+ (boot_cpu_has(X86_FEATURE_ZEN1) && c->x86_model > 0x2f))
msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
check_null_seg_clears_base(c);
@@ -1152,16 +1160,8 @@ static void init_amd(struct cpuinfo_x86 *c)
cpu_has(c, X86_FEATURE_AUTOIBRS))
WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
- zenbleed_check(c);
-
- if (cpu_has_amd_erratum(c, amd_div0)) {
- pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
- setup_force_cpu_bug(X86_BUG_DIV0);
- }
-
- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
- cpu_has_amd_erratum(c, amd_erratum_1485))
- msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
+ /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
+ clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
}
#ifdef CONFIG_X86_32
@@ -1315,11 +1315,14 @@ static void zenbleed_check_cpu(void *unused)
{
struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
- zenbleed_check(c);
+ zen2_zenbleed_check(c);
}
void amd_check_microcode(void)
{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return;
+
on_each_cpu(zenbleed_check_cpu, NULL, 1);
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b14fc8c1c953..94bff381ef20 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -188,45 +188,37 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
* TLS descriptors are currently at a different place compared to i386.
* Hopefully nobody expects them at a fixed place (Wine?)
*/
- [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
- [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
- [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
- [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
- [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
- [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(DESC_CODE32, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(DESC_CODE64, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(DESC_DATA64, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(DESC_CODE32 | DESC_USER, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(DESC_DATA64 | DESC_USER, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(DESC_CODE64 | DESC_USER, 0, 0xfffff),
#else
- [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
- [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
- [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
- [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(DESC_CODE32, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(DESC_DATA32, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(DESC_CODE32 | DESC_USER, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(DESC_DATA32 | DESC_USER, 0, 0xfffff),
/*
* Segments used for calling PnP BIOS have byte granularity.
* They code segments and data segments have fixed 64k limits,
* the transfer segment sizes are set at run time.
*/
- /* 32-bit code */
- [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
- /* 16-bit code */
- [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
- /* 16-bit data */
- [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff),
- /* 16-bit data */
- [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0),
- /* 16-bit data */
- [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0),
+ [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(DESC_CODE32_BIOS, 0, 0xffff),
+ [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(DESC_CODE16, 0, 0xffff),
+ [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(DESC_DATA16, 0, 0xffff),
+ [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(DESC_DATA16, 0, 0),
+ [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(DESC_DATA16, 0, 0),
/*
* The APM segments have byte granularity and their bases
* are set at run time. All have 64k limits.
*/
- /* 32-bit code */
- [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
- /* 16-bit code */
- [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
- /* data */
- [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff),
-
- [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
- [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
+ [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(DESC_CODE32_BIOS, 0, 0xffff),
+ [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(DESC_CODE16, 0, 0xffff),
+ [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(DESC_DATA32_BIOS, 0, 0xffff),
+
+ [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(DESC_DATA32, 0, 0xfffff),
+ [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(DESC_DATA32, 0, 0xfffff),
#endif
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
@@ -1856,6 +1848,13 @@ static void identify_cpu(struct cpuinfo_x86 *c)
c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0);
#endif
+
+ /*
+ * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and
+ * Hygon will clear it in ->c_init() below.
+ */
+ set_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
+
/*
* Vendor-specific initialization. In this section we
* canonicalize the feature flags, meaning if there are
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 6f247d66758d..f0cd95502faa 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -354,6 +354,9 @@ static void init_hygon(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
check_null_seg_clears_base(c);
+
+ /* Hygon CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
+ clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
}
static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
index e4c3ba91321c..f18d35fe27a9 100644
--- a/arch/x86/kernel/cpu/intel_epb.c
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -237,4 +237,4 @@ err_out_online:
cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE);
return ret;
}
-subsys_initcall(intel_epb_init);
+late_initcall(intel_epb_init);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index f3517b8a8e91..2b46eb0fdf3a 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -87,42 +87,40 @@ struct smca_bank {
static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks);
static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts);
-struct smca_bank_name {
- const char *name; /* Short name for sysfs */
- const char *long_name; /* Long name for pretty-printing */
-};
-
-static struct smca_bank_name smca_names[] = {
- [SMCA_LS ... SMCA_LS_V2] = { "load_store", "Load Store Unit" },
- [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
- [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
- [SMCA_DE] = { "decode_unit", "Decode Unit" },
- [SMCA_RESERVED] = { "reserved", "Reserved" },
- [SMCA_EX] = { "execution_unit", "Execution Unit" },
- [SMCA_FP] = { "floating_point", "Floating Point Unit" },
- [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
- [SMCA_CS ... SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
- [SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
+static const char * const smca_names[] = {
+ [SMCA_LS ... SMCA_LS_V2] = "load_store",
+ [SMCA_IF] = "insn_fetch",
+ [SMCA_L2_CACHE] = "l2_cache",
+ [SMCA_DE] = "decode_unit",
+ [SMCA_RESERVED] = "reserved",
+ [SMCA_EX] = "execution_unit",
+ [SMCA_FP] = "floating_point",
+ [SMCA_L3_CACHE] = "l3_cache",
+ [SMCA_CS ... SMCA_CS_V2] = "coherent_slave",
+ [SMCA_PIE] = "pie",
/* UMC v2 is separate because both of them can exist in a single system. */
- [SMCA_UMC] = { "umc", "Unified Memory Controller" },
- [SMCA_UMC_V2] = { "umc_v2", "Unified Memory Controller v2" },
- [SMCA_PB] = { "param_block", "Parameter Block" },
- [SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
- [SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" },
- [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
- [SMCA_MPDMA] = { "mpdma", "MPDMA Unit" },
- [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
- [SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" },
- [SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" },
- [SMCA_NBIF] = { "nbif", "NBIF Unit" },
- [SMCA_SHUB] = { "shub", "System Hub Unit" },
- [SMCA_SATA] = { "sata", "SATA Unit" },
- [SMCA_USB] = { "usb", "USB Unit" },
- [SMCA_GMI_PCS] = { "gmi_pcs", "Global Memory Interconnect PCS Unit" },
- [SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" },
- [SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" },
- [SMCA_GMI_PHY] = { "gmi_phy", "Global Memory Interconnect PHY Unit" },
+ [SMCA_UMC] = "umc",
+ [SMCA_UMC_V2] = "umc_v2",
+ [SMCA_MA_LLC] = "ma_llc",
+ [SMCA_PB] = "param_block",
+ [SMCA_PSP ... SMCA_PSP_V2] = "psp",
+ [SMCA_SMU ... SMCA_SMU_V2] = "smu",
+ [SMCA_MP5] = "mp5",
+ [SMCA_MPDMA] = "mpdma",
+ [SMCA_NBIO] = "nbio",
+ [SMCA_PCIE ... SMCA_PCIE_V2] = "pcie",
+ [SMCA_XGMI_PCS] = "xgmi_pcs",
+ [SMCA_NBIF] = "nbif",
+ [SMCA_SHUB] = "shub",
+ [SMCA_SATA] = "sata",
+ [SMCA_USB] = "usb",
+ [SMCA_USR_DP] = "usr_dp",
+ [SMCA_USR_CP] = "usr_cp",
+ [SMCA_GMI_PCS] = "gmi_pcs",
+ [SMCA_XGMI_PHY] = "xgmi_phy",
+ [SMCA_WAFL_PHY] = "wafl_phy",
+ [SMCA_GMI_PHY] = "gmi_phy",
};
static const char *smca_get_name(enum smca_bank_types t)
@@ -130,17 +128,8 @@ static const char *smca_get_name(enum smca_bank_types t)
if (t >= N_SMCA_BANK_TYPES)
return NULL;
- return smca_names[t].name;
-}
-
-const char *smca_get_long_name(enum smca_bank_types t)
-{
- if (t >= N_SMCA_BANK_TYPES)
- return NULL;
-
- return smca_names[t].long_name;
+ return smca_names[t];
}
-EXPORT_SYMBOL_GPL(smca_get_long_name);
enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
{
@@ -178,6 +167,7 @@ static const struct smca_hwid smca_hwid_mcatypes[] = {
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
+ { SMCA_MA_LLC, HWID_MCATYPE(0x2E, 0x4) },
/* Unified Memory Controller MCA type */
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
@@ -212,6 +202,8 @@ static const struct smca_hwid smca_hwid_mcatypes[] = {
{ SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) },
{ SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) },
{ SMCA_USB, HWID_MCATYPE(0xAA, 0x0) },
+ { SMCA_USR_DP, HWID_MCATYPE(0x170, 0x0) },
+ { SMCA_USR_CP, HWID_MCATYPE(0x180, 0x0) },
{ SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) },
{ SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
{ SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) },
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 7b397370b4d6..fd5ce12c4f9a 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -44,6 +44,7 @@
#include <linux/sync_core.h>
#include <linux/task_work.h>
#include <linux/hardirq.h>
+#include <linux/kexec.h>
#include <asm/intel-family.h>
#include <asm/processor.h>
@@ -233,6 +234,7 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
struct llist_node *pending;
struct mce_evt_llist *l;
int apei_err = 0;
+ struct page *p;
/*
* Allow instrumentation around external facilities usage. Not that it
@@ -286,6 +288,20 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
if (!fake_panic) {
if (panic_timeout == 0)
panic_timeout = mca_cfg.panic_timeout;
+
+ /*
+ * Kdump skips the poisoned page in order to avoid
+ * touching the error bits again. Poison the page even
+ * if the error is fatal and the machine is about to
+ * panic.
+ */
+ if (kexec_crash_loaded()) {
+ if (final && (final->status & MCI_STATUS_ADDRV)) {
+ p = pfn_to_online_page(final->addr >> PAGE_SHIFT);
+ if (p)
+ SetPageHWPoison(p);
+ }
+ }
panic(msg);
} else
pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
@@ -670,6 +686,16 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
barrier();
m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ /*
+ * Update storm tracking here, before checking for the
+ * MCI_STATUS_VAL bit. Valid corrected errors count
+ * towards declaring, or maintaining, storm status. No
+ * error in a bank counts towards avoiding, or ending,
+ * storm status.
+ */
+ if (!mca_cfg.cmci_disabled)
+ mce_track_storm(&m);
+
/* If this entry is not valid, ignore it */
if (!(m.status & MCI_STATUS_VAL))
continue;
@@ -1601,13 +1627,6 @@ static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
-static unsigned long mce_adjust_timer_default(unsigned long interval)
-{
- return interval;
-}
-
-static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
-
static void __start_timer(struct timer_list *t, unsigned long interval)
{
unsigned long when = jiffies + interval;
@@ -1637,15 +1656,9 @@ static void mce_timer_fn(struct timer_list *t)
iv = __this_cpu_read(mce_next_interval);
- if (mce_available(this_cpu_ptr(&cpu_info))) {
+ if (mce_available(this_cpu_ptr(&cpu_info)))
mc_poll_banks();
- if (mce_intel_cmci_poll()) {
- iv = mce_adjust_timer(iv);
- goto done;
- }
- }
-
/*
* Alert userspace if needed. If we logged an MCE, reduce the polling
* interval, otherwise increase the polling interval.
@@ -1655,23 +1668,29 @@ static void mce_timer_fn(struct timer_list *t)
else
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
-done:
- __this_cpu_write(mce_next_interval, iv);
- __start_timer(t, iv);
+ if (mce_get_storm_mode()) {
+ __start_timer(t, HZ);
+ } else {
+ __this_cpu_write(mce_next_interval, iv);
+ __start_timer(t, iv);
+ }
}
/*
- * Ensure that the timer is firing in @interval from now.
+ * When a storm starts on any bank on this CPU, switch to polling
+ * once per second. When the storm ends, revert to the default
+ * polling interval.
*/
-void mce_timer_kick(unsigned long interval)
+void mce_timer_kick(bool storm)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
- unsigned long iv = __this_cpu_read(mce_next_interval);
- __start_timer(t, interval);
+ mce_set_storm_mode(storm);
- if (interval < iv)
- __this_cpu_write(mce_next_interval, interval);
+ if (storm)
+ __start_timer(t, HZ);
+ else
+ __this_cpu_write(mce_next_interval, check_interval * HZ);
}
/* Must not be called in IRQ context where del_timer_sync() can deadlock */
@@ -1995,7 +2014,6 @@ static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c)
intel_init_cmci();
intel_init_lmce();
- mce_adjust_timer = cmci_intel_adjust_timer;
}
static void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c)
@@ -2008,7 +2026,6 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
mce_intel_feature_init(c);
- mce_adjust_timer = cmci_intel_adjust_timer;
break;
case X86_VENDOR_AMD: {
@@ -2568,9 +2585,6 @@ static int mce_device_create(unsigned int cpu)
int err;
int i, j;
- if (!mce_available(&boot_cpu_data))
- return -EIO;
-
dev = per_cpu(mce_device, cpu);
if (dev)
return 0;
@@ -2665,8 +2679,6 @@ static void mce_reenable_cpu(void)
static int mce_cpu_dead(unsigned int cpu)
{
- mce_intel_hcpu_update(cpu);
-
/* intentionally ignoring frozen here */
if (!cpuhp_tasks_frozen)
cmci_rediscover();
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 4d8d4bcf915d..72f0695c3dc1 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -746,6 +746,7 @@ static void check_hw_inj_possible(void)
wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), status);
rdmsrl_safe(mca_msr_reg(bank, MCA_STATUS), &status);
+ wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), 0);
if (!status) {
hw_injection_possible = false;
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 52bce533ddcc..399b62e223d2 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -42,15 +42,6 @@
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
/*
- * CMCI storm detection backoff counter
- *
- * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
- * encountered an error. If not, we decrement it by one. We signal the end of
- * the CMCI storm when it reaches 0.
- */
-static DEFINE_PER_CPU(int, cmci_backoff_cnt);
-
-/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
@@ -63,22 +54,26 @@ static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
*/
static DEFINE_SPINLOCK(cmci_poll_lock);
+/* Linux non-storm CMCI threshold (may be overridden by BIOS) */
#define CMCI_THRESHOLD 1
-#define CMCI_POLL_INTERVAL (30 * HZ)
-#define CMCI_STORM_INTERVAL (HZ)
-#define CMCI_STORM_THRESHOLD 15
-static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
-static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
-static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
-
-enum {
- CMCI_STORM_NONE,
- CMCI_STORM_ACTIVE,
- CMCI_STORM_SUBSIDED,
-};
+/*
+ * MCi_CTL2 threshold for each bank when there is no storm.
+ * Default value for each bank may have been set by BIOS.
+ */
+static u16 cmci_threshold[MAX_NR_BANKS];
-static atomic_t cmci_storm_on_cpus;
+/*
+ * High threshold to limit CMCI rate during storms. Max supported is
+ * 0x7FFF. Use this slightly smaller value so it has a distinctive
+ * signature when some asks "Why am I not seeing all corrected errors?"
+ * A high threshold is used instead of just disabling CMCI for a
+ * bank because both corrected and uncorrected errors may be logged
+ * in the same bank and signalled with CMCI. The threshold only applies
+ * to corrected errors, so keeping CMCI enabled means that uncorrected
+ * errors will still be processed in a timely fashion.
+ */
+#define CMCI_STORM_THRESHOLD 32749
static int cmci_supported(int *banks)
{
@@ -134,204 +129,166 @@ static bool lmce_supported(void)
return tmp & FEAT_CTL_LMCE_ENABLED;
}
-bool mce_intel_cmci_poll(void)
+/*
+ * Set a new CMCI threshold value. Preserve the state of the
+ * MCI_CTL2_CMCI_EN bit in case this happens during a
+ * cmci_rediscover() operation.
+ */
+static void cmci_set_threshold(int bank, int thresh)
{
- if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
- return false;
-
- /*
- * Reset the counter if we've logged an error in the last poll
- * during the storm.
- */
- if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)))
- this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
- else
- this_cpu_dec(cmci_backoff_cnt);
+ unsigned long flags;
+ u64 val;
- return true;
+ raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
+ wrmsrl(MSR_IA32_MCx_CTL2(bank), val | thresh);
+ raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
-void mce_intel_hcpu_update(unsigned long cpu)
+void mce_intel_handle_storm(int bank, bool on)
{
- if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
- atomic_dec(&cmci_storm_on_cpus);
+ if (on)
+ cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);
+ else
+ cmci_set_threshold(bank, cmci_threshold[bank]);
+}
- per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
+/*
+ * The interrupt handler. This is called on every event.
+ * Just call the poller directly to log any events.
+ * This could in theory increase the threshold under high load,
+ * but doesn't for now.
+ */
+static void intel_threshold_interrupt(void)
+{
+ machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
}
-static void cmci_toggle_interrupt_mode(bool on)
+/*
+ * Check all the reasons why current CPU cannot claim
+ * ownership of a bank.
+ * 1: CPU already owns this bank
+ * 2: BIOS owns this bank
+ * 3: Some other CPU owns this bank
+ */
+static bool cmci_skip_bank(int bank, u64 *val)
{
- unsigned long flags, *owned;
- int bank;
- u64 val;
+ unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
- raw_spin_lock_irqsave(&cmci_discover_lock, flags);
- owned = this_cpu_ptr(mce_banks_owned);
- for_each_set_bit(bank, owned, MAX_NR_BANKS) {
- rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ if (test_bit(bank, owned))
+ return true;
- if (on)
- val |= MCI_CTL2_CMCI_EN;
- else
- val &= ~MCI_CTL2_CMCI_EN;
+ /* Skip banks in firmware first mode */
+ if (test_bit(bank, mce_banks_ce_disabled))
+ return true;
- wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
- }
- raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
-}
+ rdmsrl(MSR_IA32_MCx_CTL2(bank), *val);
-unsigned long cmci_intel_adjust_timer(unsigned long interval)
-{
- if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
- (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
- mce_notify_irq();
- return CMCI_STORM_INTERVAL;
+ /* Already owned by someone else? */
+ if (*val & MCI_CTL2_CMCI_EN) {
+ clear_bit(bank, owned);
+ __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
+ return true;
}
- switch (__this_cpu_read(cmci_storm_state)) {
- case CMCI_STORM_ACTIVE:
-
- /*
- * We switch back to interrupt mode once the poll timer has
- * silenced itself. That means no events recorded and the timer
- * interval is back to our poll interval.
- */
- __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
- if (!atomic_sub_return(1, &cmci_storm_on_cpus))
- pr_notice("CMCI storm subsided: switching to interrupt mode\n");
+ return false;
+}
- fallthrough;
+/*
+ * Decide which CMCI interrupt threshold to use:
+ * 1: If this bank is in storm mode from whichever CPU was
+ * the previous owner, stay in storm mode.
+ * 2: If ignoring any threshold set by BIOS, set Linux default
+ * 3: Try to honor BIOS threshold (unless buggy BIOS set it at zero).
+ */
+static u64 cmci_pick_threshold(u64 val, int *bios_zero_thresh)
+{
+ if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
+ return val;
- case CMCI_STORM_SUBSIDED:
+ if (!mca_cfg.bios_cmci_threshold) {
+ val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
+ val |= CMCI_THRESHOLD;
+ } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
/*
- * We wait for all CPUs to go back to SUBSIDED state. When that
- * happens we switch back to interrupt mode.
+ * If bios_cmci_threshold boot option was specified
+ * but the threshold is zero, we'll try to initialize
+ * it to 1.
*/
- if (!atomic_read(&cmci_storm_on_cpus)) {
- __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
- cmci_toggle_interrupt_mode(true);
- cmci_recheck();
- }
- return CMCI_POLL_INTERVAL;
- default:
-
- /* We have shiny weather. Let the poll do whatever it thinks. */
- return interval;
+ *bios_zero_thresh = 1;
+ val |= CMCI_THRESHOLD;
}
+
+ return val;
}
-static bool cmci_storm_detect(void)
+/*
+ * Try to claim ownership of a bank.
+ */
+static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_wrong_thresh)
{
- unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
- unsigned long ts = __this_cpu_read(cmci_time_stamp);
- unsigned long now = jiffies;
- int r;
+ struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
- if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
- return true;
+ val |= MCI_CTL2_CMCI_EN;
+ wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
- if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
- cnt++;
- } else {
- cnt = 1;
- __this_cpu_write(cmci_time_stamp, now);
+ /* If the enable bit did not stick, this bank should be polled. */
+ if (!(val & MCI_CTL2_CMCI_EN)) {
+ WARN_ON(!test_bit(bank, this_cpu_ptr(mce_poll_banks)));
+ storm->banks[bank].poll_only = true;
+ return;
}
- __this_cpu_write(cmci_storm_cnt, cnt);
- if (cnt <= CMCI_STORM_THRESHOLD)
- return false;
-
- cmci_toggle_interrupt_mode(false);
- __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
- r = atomic_add_return(1, &cmci_storm_on_cpus);
- mce_timer_kick(CMCI_STORM_INTERVAL);
- this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
+ /* This CPU successfully set the enable bit. */
+ set_bit(bank, (void *)this_cpu_ptr(&mce_banks_owned));
- if (r == 1)
- pr_notice("CMCI storm detected: switching to poll mode\n");
- return true;
-}
+ if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) {
+ pr_notice("CPU%d BANK%d CMCI inherited storm\n", smp_processor_id(), bank);
+ mce_inherit_storm(bank);
+ cmci_storm_begin(bank);
+ } else {
+ __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
+ }
-/*
- * The interrupt handler. This is called on every event.
- * Just call the poller directly to log any events.
- * This could in theory increase the threshold under high load,
- * but doesn't for now.
- */
-static void intel_threshold_interrupt(void)
-{
- if (cmci_storm_detect())
- return;
+ /*
+ * We are able to set thresholds for some banks that
+ * had a threshold of 0. This means the BIOS has not
+ * set the thresholds properly or does not work with
+ * this boot option. Note down now and report later.
+ */
+ if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
+ (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
+ *bios_wrong_thresh = 1;
- machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
+ /* Save default threshold for each bank */
+ if (cmci_threshold[bank] == 0)
+ cmci_threshold[bank] = val & MCI_CTL2_CMCI_THRESHOLD_MASK;
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
- * banks.
+ * banks. Called during initial bootstrap, and also for hotplug CPU operations
+ * to rediscover/reassign machine check banks.
*/
static void cmci_discover(int banks)
{
- unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
+ int bios_wrong_thresh = 0;
unsigned long flags;
int i;
- int bios_wrong_thresh = 0;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
int bios_zero_thresh = 0;
- if (test_bit(i, owned))
+ if (cmci_skip_bank(i, &val))
continue;
- /* Skip banks in firmware first mode */
- if (test_bit(i, mce_banks_ce_disabled))
- continue;
-
- rdmsrl(MSR_IA32_MCx_CTL2(i), val);
-
- /* Already owned by someone else? */
- if (val & MCI_CTL2_CMCI_EN) {
- clear_bit(i, owned);
- __clear_bit(i, this_cpu_ptr(mce_poll_banks));
- continue;
- }
-
- if (!mca_cfg.bios_cmci_threshold) {
- val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
- val |= CMCI_THRESHOLD;
- } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
- /*
- * If bios_cmci_threshold boot option was specified
- * but the threshold is zero, we'll try to initialize
- * it to 1.
- */
- bios_zero_thresh = 1;
- val |= CMCI_THRESHOLD;
- }
-
- val |= MCI_CTL2_CMCI_EN;
- wrmsrl(MSR_IA32_MCx_CTL2(i), val);
- rdmsrl(MSR_IA32_MCx_CTL2(i), val);
-
- /* Did the enable bit stick? -- the bank supports CMCI */
- if (val & MCI_CTL2_CMCI_EN) {
- set_bit(i, owned);
- __clear_bit(i, this_cpu_ptr(mce_poll_banks));
- /*
- * We are able to set thresholds for some banks that
- * had a threshold of 0. This means the BIOS has not
- * set the thresholds properly or does not work with
- * this boot option. Note down now and report later.
- */
- if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
- (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
- bios_wrong_thresh = 1;
- } else {
- WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
- }
+ val = cmci_pick_threshold(val, &bios_zero_thresh);
+ cmci_claim_bank(i, val, bios_zero_thresh, &bios_wrong_thresh);
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
@@ -370,6 +327,9 @@ static void __cmci_disable_bank(int bank)
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
+
+ if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
+ cmci_storm_end(bank);
}
/*
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index e13a26c9c0ac..01f8f03969e6 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -41,9 +41,7 @@ struct dentry *mce_get_debugfs_dir(void);
extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL
-unsigned long cmci_intel_adjust_timer(unsigned long interval);
-bool mce_intel_cmci_poll(void);
-void mce_intel_hcpu_update(unsigned long cpu);
+void mce_intel_handle_storm(int bank, bool on);
void cmci_disable_bank(int bank);
void intel_init_cmci(void);
void intel_init_lmce(void);
@@ -51,9 +49,7 @@ void intel_clear_lmce(void);
bool intel_filter_mce(struct mce *m);
bool intel_mce_usable_address(struct mce *m);
#else
-# define cmci_intel_adjust_timer mce_adjust_timer_default
-static inline bool mce_intel_cmci_poll(void) { return false; }
-static inline void mce_intel_hcpu_update(unsigned long cpu) { }
+static inline void mce_intel_handle_storm(int bank, bool on) { }
static inline void cmci_disable_bank(int bank) { }
static inline void intel_init_cmci(void) { }
static inline void intel_init_lmce(void) { }
@@ -62,7 +58,63 @@ static inline bool intel_filter_mce(struct mce *m) { return false; }
static inline bool intel_mce_usable_address(struct mce *m) { return false; }
#endif
-void mce_timer_kick(unsigned long interval);
+void mce_timer_kick(bool storm);
+
+#ifdef CONFIG_X86_MCE_THRESHOLD
+void cmci_storm_begin(unsigned int bank);
+void cmci_storm_end(unsigned int bank);
+void mce_track_storm(struct mce *mce);
+void mce_inherit_storm(unsigned int bank);
+bool mce_get_storm_mode(void);
+void mce_set_storm_mode(bool storm);
+#else
+static inline void cmci_storm_begin(unsigned int bank) {}
+static inline void cmci_storm_end(unsigned int bank) {}
+static inline void mce_track_storm(struct mce *mce) {}
+static inline void mce_inherit_storm(unsigned int bank) {}
+static inline bool mce_get_storm_mode(void) { return false; }
+static inline void mce_set_storm_mode(bool storm) {}
+#endif
+
+/*
+ * history: Bitmask tracking errors occurrence. Each set bit
+ * represents an error seen.
+ *
+ * timestamp: Last time (in jiffies) that the bank was polled.
+ * in_storm_mode: Is this bank in storm mode?
+ * poll_only: Bank does not support CMCI, skip storm tracking.
+ */
+struct storm_bank {
+ u64 history;
+ u64 timestamp;
+ bool in_storm_mode;
+ bool poll_only;
+};
+
+#define NUM_HISTORY_BITS (sizeof(u64) * BITS_PER_BYTE)
+
+/* How many errors within the history buffer mark the start of a storm. */
+#define STORM_BEGIN_THRESHOLD 5
+
+/*
+ * How many polls of machine check bank without an error before declaring
+ * the storm is over. Since it is tracked by the bitmasks in the history
+ * field of struct storm_bank the mask is 30 bits [0 ... 29].
+ */
+#define STORM_END_POLL_THRESHOLD 29
+
+/*
+ * banks: per-cpu, per-bank details
+ * stormy_bank_count: count of MC banks in storm state
+ * poll_mode: CPU is in poll mode
+ */
+struct mca_storm_desc {
+ struct storm_bank banks[MAX_NR_BANKS];
+ u8 stormy_bank_count;
+ bool poll_mode;
+};
+
+DECLARE_PER_CPU(struct mca_storm_desc, storm_desc);
#ifdef CONFIG_ACPI_APEI
int apei_write_mce(struct mce *m);
diff --git a/arch/x86/kernel/cpu/mce/threshold.c b/arch/x86/kernel/cpu/mce/threshold.c
index ef4e7bb5fd88..89e31e1e5c9c 100644
--- a/arch/x86/kernel/cpu/mce/threshold.c
+++ b/arch/x86/kernel/cpu/mce/threshold.c
@@ -29,3 +29,118 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_threshold)
trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
apic_eoi();
}
+
+DEFINE_PER_CPU(struct mca_storm_desc, storm_desc);
+
+void mce_inherit_storm(unsigned int bank)
+{
+ struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
+
+ /*
+ * Previous CPU owning this bank had put it into storm mode,
+ * but the precise history of that storm is unknown. Assume
+ * the worst (all recent polls of the bank found a valid error
+ * logged). This will avoid the new owner prematurely declaring
+ * the storm has ended.
+ */
+ storm->banks[bank].history = ~0ull;
+ storm->banks[bank].timestamp = jiffies;
+}
+
+bool mce_get_storm_mode(void)
+{
+ return __this_cpu_read(storm_desc.poll_mode);
+}
+
+void mce_set_storm_mode(bool storm)
+{
+ __this_cpu_write(storm_desc.poll_mode, storm);
+}
+
+static void mce_handle_storm(unsigned int bank, bool on)
+{
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ mce_intel_handle_storm(bank, on);
+ break;
+ }
+}
+
+void cmci_storm_begin(unsigned int bank)
+{
+ struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
+
+ __set_bit(bank, this_cpu_ptr(mce_poll_banks));
+ storm->banks[bank].in_storm_mode = true;
+
+ /*
+ * If this is the first bank on this CPU to enter storm mode
+ * start polling.
+ */
+ if (++storm->stormy_bank_count == 1)
+ mce_timer_kick(true);
+}
+
+void cmci_storm_end(unsigned int bank)
+{
+ struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
+
+ __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
+ storm->banks[bank].history = 0;
+ storm->banks[bank].in_storm_mode = false;
+
+ /* If no banks left in storm mode, stop polling. */
+ if (!this_cpu_dec_return(storm_desc.stormy_bank_count))
+ mce_timer_kick(false);
+}
+
+void mce_track_storm(struct mce *mce)
+{
+ struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
+ unsigned long now = jiffies, delta;
+ unsigned int shift = 1;
+ u64 history = 0;
+
+ /* No tracking needed for banks that do not support CMCI */
+ if (storm->banks[mce->bank].poll_only)
+ return;
+
+ /*
+ * When a bank is in storm mode it is polled once per second and
+ * the history mask will record about the last minute of poll results.
+ * If it is not in storm mode, then the bank is only checked when
+ * there is a CMCI interrupt. Check how long it has been since
+ * this bank was last checked, and adjust the amount of "shift"
+ * to apply to history.
+ */
+ if (!storm->banks[mce->bank].in_storm_mode) {
+ delta = now - storm->banks[mce->bank].timestamp;
+ shift = (delta + HZ) / HZ;
+ }
+
+ /* If it has been a long time since the last poll, clear history. */
+ if (shift < NUM_HISTORY_BITS)
+ history = storm->banks[mce->bank].history << shift;
+
+ storm->banks[mce->bank].timestamp = now;
+
+ /* History keeps track of corrected errors. VAL=1 && UC=0 */
+ if ((mce->status & MCI_STATUS_VAL) && mce_is_correctable(mce))
+ history |= 1;
+
+ storm->banks[mce->bank].history = history;
+
+ if (storm->banks[mce->bank].in_storm_mode) {
+ if (history & GENMASK_ULL(STORM_END_POLL_THRESHOLD, 0))
+ return;
+ printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm subsided\n", smp_processor_id(), mce->bank);
+ mce_handle_storm(mce->bank, false);
+ cmci_storm_end(mce->bank);
+ } else {
+ if (hweight64(history) < STORM_BEGIN_THRESHOLD)
+ return;
+ printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm detected\n", smp_processor_id(), mce->bank);
+ mce_handle_storm(mce->bank, true);
+ cmci_storm_begin(mce->bank);
+ }
+}
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 070426b9895f..857e608af641 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -370,14 +370,14 @@ static __init struct microcode_intel *get_microcode_blob(struct ucode_cpu_info *
{
struct cpio_data cp;
+ intel_collect_cpu_info(&uci->cpu_sig);
+
if (!load_builtin_intel_microcode(&cp))
cp = find_microcode_in_initrd(ucode_path);
if (!(cp.data && cp.size))
return NULL;
- intel_collect_cpu_info(&uci->cpu_sig);
-
return scan_microcode(cp.data, cp.size, uci, save);
}
@@ -410,13 +410,13 @@ void __init load_ucode_intel_bsp(struct early_load_data *ed)
{
struct ucode_cpu_info uci;
- ed->old_rev = intel_get_microcode_revision();
-
uci.mc = get_microcode_blob(&uci, false);
- if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED)
- ucode_patch_va = UCODE_BSP_LOADED;
+ ed->old_rev = uci.cpu_sig.rev;
- ed->new_rev = uci.cpu_sig.rev;
+ if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED) {
+ ucode_patch_va = UCODE_BSP_LOADED;
+ ed->new_rev = uci.cpu_sig.rev;
+ }
}
void load_ucode_intel_ap(void)
@@ -457,12 +457,6 @@ static enum ucode_state apply_microcode_late(int cpu)
if (ret != UCODE_UPDATED && ret != UCODE_OK)
return ret;
- if (!cpu && uci->cpu_sig.rev != cur_rev) {
- pr_info("Updated to revision 0x%x, date = %04x-%02x-%02x\n",
- uci->cpu_sig.rev, mc->hdr.date & 0xffff, mc->hdr.date >> 24,
- (mc->hdr.date >> 16) & 0xff);
- }
-
cpu_data(cpu).microcode = uci->cpu_sig.rev;
if (!cpu)
boot_cpu_data.microcode = uci->cpu_sig.rev;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 2d6aa5d2e3d7..d3524778a545 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -428,6 +428,10 @@ void __init mtrr_copy_map(void)
* from the x86_init.hyper.init_platform() hook. It can be called only once.
* The MTRR state can't be changed afterwards. To ensure that, X86_FEATURE_MTRR
* is cleared.
+ *
+ * @var: MTRR variable range array to use
+ * @num_var: length of the @var array
+ * @def_type: default caching type
*/
void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
mtrr_type def_type)
@@ -492,13 +496,15 @@ static u8 type_merge(u8 type, u8 new_type, u8 *uniform)
/**
* mtrr_type_lookup - look up memory type in MTRR
*
+ * @start: Begin of the physical address range
+ * @end: End of the physical address range
+ * @uniform: output argument:
+ * - 1: the returned MTRR type is valid for the whole region
+ * - 0: otherwise
+ *
* Return Values:
* MTRR_TYPE_(type) - The effective MTRR type for the region
* MTRR_TYPE_INVALID - MTRR is disabled
- *
- * Output Argument:
- * uniform - Set to 1 when the returned MTRR type is valid for the whole
- * region, set to 0 else.
*/
u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
{
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 5d390df21440..b65ab214bdf5 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -581,7 +581,7 @@ err_out:
*
* Flush any outstanding enqueued EADD operations and perform EINIT. The
* Launch Enclave Public Key Hash MSRs are rewritten as necessary to match
- * the enclave's MRSIGNER, which is caculated from the provided sigstruct.
+ * the enclave's MRSIGNER, which is calculated from the provided sigstruct.
*
* Return:
* - 0: Success.
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index a21a4d0ecc34..520deb411a70 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -308,7 +308,7 @@ EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);
* Must be invoked from KVM after a VMEXIT before enabling interrupts when
* XFD write emulation is disabled. This is required because the guest can
* freely modify XFD and the state at VMEXIT is not guaranteed to be the
- * same as the state on VMENTER. So software state has to be udpated before
+ * same as the state on VMENTER. So software state has to be updated before
* any operation which depends on it can take place.
*
* Note: It can be invoked unconditionally even when write emulation is
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 05a110c97111..dc0956067944 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -71,9 +71,9 @@ EXPORT_SYMBOL(vmemmap_base);
* GDT used on the boot CPU before switching to virtual addresses.
*/
static struct desc_struct startup_gdt[GDT_ENTRIES] __initdata = {
- [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
- [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
- [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(DESC_CODE32, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(DESC_CODE64, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(DESC_DATA64, 0, 0xfffff),
};
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 086a2c3aaaa0..d4918d03efb4 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -114,6 +114,28 @@ SYM_CODE_START_NOALIGN(startup_64)
/* Form the CR3 value being sure to include the CR3 modifier */
addq $(early_top_pgt - __START_KERNEL_map), %rax
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ mov %rax, %rdi
+ mov %rax, %r14
+
+ addq phys_base(%rip), %rdi
+
+ /*
+ * For SEV guests: Verify that the C-bit is correct. A malicious
+ * hypervisor could lie about the C-bit position to perform a ROP
+ * attack on the guest by writing to the unencrypted stack and wait for
+ * the next RET instruction.
+ */
+ call sev_verify_cbit
+
+ /*
+ * Restore CR3 value without the phys_base which will be added
+ * below, before writing %cr3.
+ */
+ mov %r14, %rax
+#endif
+
jmp 1f
SYM_CODE_END(startup_64)
@@ -182,7 +204,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
/* Enable PAE mode, PSE, PGE and LA57 */
orl $(X86_CR4_PAE | X86_CR4_PSE | X86_CR4_PGE), %ecx
#ifdef CONFIG_X86_5LEVEL
- testl $1, __pgtable_l5_enabled(%rip)
+ testb $1, __pgtable_l5_enabled(%rip)
jz 1f
orl $X86_CR4_LA57, %ecx
1:
@@ -193,21 +215,12 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
addq phys_base(%rip), %rax
/*
- * For SEV guests: Verify that the C-bit is correct. A malicious
- * hypervisor could lie about the C-bit position to perform a ROP
- * attack on the guest by writing to the unencrypted stack and wait for
- * the next RET instruction.
- */
- movq %rax, %rdi
- call sev_verify_cbit
-
- /*
* Switch to new page-table
*
* For the boot CPU this switches to early_top_pgt which still has the
- * indentity mappings present. The secondary CPUs will switch to the
+ * identity mappings present. The secondary CPUs will switch to the
* init_top_pgt here, away from the trampoline_pgd and unmap the
- * indentity mapped ranges.
+ * identity mapped ranges.
*/
movq %rax, %cr3
@@ -255,6 +268,22 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
testl $X2APIC_ENABLE, %eax
jnz .Lread_apicid_msr
+#ifdef CONFIG_X86_X2APIC
+ /*
+ * If system is in X2APIC mode then MMIO base might not be
+ * mapped causing the MMIO read below to fault. Faults can't
+ * be handled at that point.
+ */
+ cmpl $0, x2apic_mode(%rip)
+ jz .Lread_apicid_mmio
+
+ /* Force the AP into X2APIC mode. */
+ orl $X2APIC_ENABLE, %eax
+ wrmsr
+ jmp .Lread_apicid_msr
+#endif
+
+.Lread_apicid_mmio:
/* Read the APIC ID from the fix-mapped MMIO space. */
movq apic_mmio_base(%rip), %rcx
addq $APIC_ID, %rcx
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 41eecf180b7f..8ff2bf921519 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -707,7 +707,7 @@ static void __init hpet_select_clockevents(void)
hpet_base.nr_clockevents = 0;
- /* No point if MSI is disabled or CPU has an Always Runing APIC Timer */
+ /* No point if MSI is disabled or CPU has an Always Running APIC Timer */
if (hpet_msi_disable || boot_cpu_has(X86_FEATURE_ARAT))
return;
@@ -965,7 +965,7 @@ static bool __init mwait_pc10_supported(void)
* and per CPU timer interrupts.
*
* The probability that this problem is going to be solved in the
- * forseeable future is close to zero, so the kernel has to be cluttered
+ * foreseeable future is close to zero, so the kernel has to be cluttered
* with heuristics to keep up with the ever growing amount of hardware and
* firmware trainwrecks. Hopefully some day hardware people will understand
* that the approach of "This can be fixed in software" is not sustainable.
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 8857abc706e4..660b601f1d6c 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -121,7 +121,7 @@ static const __initconst struct idt_data def_idts[] = {
static const struct idt_data ia32_idt[] __initconst = {
#if defined(CONFIG_IA32_EMULATION)
- SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat),
+ SYSG(IA32_SYSCALL_VECTOR, asm_int80_emulation),
#elif defined(CONFIG_X86_32)
SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32),
#endif
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index e8babebad7b8..a0ce46c0a2d8 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -576,7 +576,8 @@ static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs)
{
unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
- int3_emulate_call(regs, regs_get_register(regs, offs));
+ int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + p->ainsn.size);
+ int3_emulate_jmp(regs, regs_get_register(regs, offs));
}
NOKPROBE_SYMBOL(kprobe_emulate_call_indirect);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 0ddb3bd0f1aa..dfe9945b9bec 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -803,8 +803,8 @@ extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \
"setne %al\n\t"
-DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted,
- PV_VCPU_PREEMPTED_ASM, .text);
+DEFINE_ASM_FUNC(__raw_callee_save___kvm_vcpu_is_preempted,
+ PV_VCPU_PREEMPTED_ASM, .text);
#endif
static void __init kvm_guest_init(void)
@@ -942,7 +942,7 @@ static void __init kvm_init_platform(void)
* Reset the host's shared pages list related to kernel
* specific page encryption status settings before we load a
* new kernel by kexec. Reset the page encryption status
- * during early boot intead of just before kexec to avoid SMP
+ * during early boot instead of just before kexec to avoid SMP
* races during kvm_pv_guest_cpu_reboot().
* NOTE: We cannot reset the complete shared pages list
* here as we need to retain the UEFI/OVMF firmware
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index fb8f52149be9..a95d0900e8c6 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -42,7 +42,7 @@ static int __init parse_no_kvmclock_vsyscall(char *arg)
}
early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
-/* Aligned to page sizes to match whats mapped via vsyscalls to userspace */
+/* Aligned to page sizes to match what's mapped via vsyscalls to userspace */
#define HVC_BOOT_ARRAY_SIZE \
(PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index adc67f98819a..7a814b41402d 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -7,7 +7,7 @@
* This handles calls from both 32bit and 64bit mode.
*
* Lock order:
- * contex.ldt_usr_sem
+ * context.ldt_usr_sem
* mmap_lock
* context.lock
*/
@@ -49,7 +49,7 @@ void load_mm_ldt(struct mm_struct *mm)
/*
* Any change to mm->context.ldt is followed by an IPI to all
* CPUs with the mm active. The LDT will not be freed until
- * after the IPI is handled by all such CPUs. This means that,
+ * after the IPI is handled by all such CPUs. This means that
* if the ldt_struct changes before we return, the values we see
* will be safe, and the new values will be loaded before we run
* any user code.
@@ -685,7 +685,7 @@ SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
}
/*
* The SYSCALL_DEFINE() macros give us an 'unsigned long'
- * return type, but tht ABI for sys_modify_ldt() expects
+ * return type, but the ABI for sys_modify_ldt() expects
* 'int'. This cast gives us an int-sized value in %rax
* for the return code. The 'unsigned' is necessary so
* the compiler does not try to sign-extend the negative
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 5f71a0cf4399..e18914c0e38a 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -276,7 +276,7 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
const Elf_Shdr *s, *alt = NULL, *locks = NULL,
- *para = NULL, *orc = NULL, *orc_ip = NULL,
+ *orc = NULL, *orc_ip = NULL,
*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL,
*calls = NULL, *cfi = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
@@ -286,8 +286,6 @@ int module_finalize(const Elf_Ehdr *hdr,
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
locks = s;
- if (!strcmp(".parainstructions", secstrings + s->sh_name))
- para = s;
if (!strcmp(".orc_unwind", secstrings + s->sh_name))
orc = s;
if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
@@ -304,14 +302,6 @@ int module_finalize(const Elf_Ehdr *hdr,
ibt_endbr = s;
}
- /*
- * See alternative_instructions() for the ordering rules between the
- * various patching types.
- */
- if (para) {
- void *pseg = (void *)para->sh_addr;
- apply_paravirt(pseg, pseg + para->sh_size);
- }
if (retpolines || cfi) {
void *rseg = NULL, *cseg = NULL;
unsigned int rsize = 0, csize = 0;
@@ -341,7 +331,7 @@ int module_finalize(const Elf_Ehdr *hdr,
void *aseg = (void *)alt->sh_addr;
apply_alternatives(aseg, aseg + alt->sh_size);
}
- if (calls || para) {
+ if (calls || alt) {
struct callthunk_sites cs = {};
if (calls) {
@@ -349,9 +339,9 @@ int module_finalize(const Elf_Ehdr *hdr,
cs.call_end = (void *)calls->sh_addr + calls->sh_size;
}
- if (para) {
- cs.pv_start = (void *)para->sh_addr;
- cs.pv_end = (void *)para->sh_addr + para->sh_size;
+ if (alt) {
+ cs.alt_start = (void *)alt->sh_addr;
+ cs.alt_end = (void *)alt->sh_addr + alt->sh_size;
}
callthunks_patch_module_calls(&cs, me);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 97f1436c1a20..5358d43886ad 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -34,14 +34,8 @@
#include <asm/io_bitmap.h>
#include <asm/gsseg.h>
-/*
- * nop stub, which must not clobber anything *including the stack* to
- * avoid confusing the entry prologues.
- */
-DEFINE_PARAVIRT_ASM(_paravirt_nop, "", .entry.text);
-
/* stub always returning 0. */
-DEFINE_PARAVIRT_ASM(paravirt_ret0, "xor %eax,%eax", .entry.text);
+DEFINE_ASM_FUNC(paravirt_ret0, "xor %eax,%eax", .entry.text);
void __init default_banner(void)
{
@@ -49,26 +43,12 @@ void __init default_banner(void)
pv_info.name);
}
-/* Undefined instruction for dealing with missing ops pointers. */
-noinstr void paravirt_BUG(void)
-{
- BUG();
-}
-
-static unsigned paravirt_patch_call(void *insn_buff, const void *target,
- unsigned long addr, unsigned len)
-{
- __text_gen_insn(insn_buff, CALL_INSN_OPCODE,
- (void *)addr, target, CALL_INSN_SIZE);
- return CALL_INSN_SIZE;
-}
-
#ifdef CONFIG_PARAVIRT_XXL
-DEFINE_PARAVIRT_ASM(_paravirt_ident_64, "mov %rdi, %rax", .text);
-DEFINE_PARAVIRT_ASM(pv_native_save_fl, "pushf; pop %rax", .noinstr.text);
-DEFINE_PARAVIRT_ASM(pv_native_irq_disable, "cli", .noinstr.text);
-DEFINE_PARAVIRT_ASM(pv_native_irq_enable, "sti", .noinstr.text);
-DEFINE_PARAVIRT_ASM(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text);
+DEFINE_ASM_FUNC(_paravirt_ident_64, "mov %rdi, %rax", .text);
+DEFINE_ASM_FUNC(pv_native_save_fl, "pushf; pop %rax", .noinstr.text);
+DEFINE_ASM_FUNC(pv_native_irq_disable, "cli", .noinstr.text);
+DEFINE_ASM_FUNC(pv_native_irq_enable, "sti", .noinstr.text);
+DEFINE_ASM_FUNC(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text);
#endif
DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
@@ -85,28 +65,6 @@ static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
tlb_remove_page(tlb, table);
}
-unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr,
- unsigned int len)
-{
- /*
- * Neat trick to map patch type back to the call within the
- * corresponding structure.
- */
- void *opfunc = *((void **)&pv_ops + type);
- unsigned ret;
-
- if (opfunc == NULL)
- /* If there's no function, patch it with paravirt_BUG() */
- ret = paravirt_patch_call(insn_buff, paravirt_BUG, addr, len);
- else if (opfunc == _paravirt_nop)
- ret = 0;
- else
- /* Otherwise call the function. */
- ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
-
- return ret;
-}
-
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b6f4e8399fca..ab49ade31b0d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -477,7 +477,7 @@ void native_tss_update_io_bitmap(void)
/*
* Make sure that the TSS limit is covering the IO bitmap. It might have
* been cut down by a VMEXIT to 0x67 which would cause a subsequent I/O
- * access from user space to trigger a #GP because tbe bitmap is outside
+ * access from user space to trigger a #GP because the bitmap is outside
* the TSS limit.
*/
refresh_tss_limit();
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1526747bedf2..ec2c21a1844e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -226,8 +226,6 @@ static void __init reserve_brk(void)
_brk_start = 0;
}
-u64 relocated_ramdisk;
-
#ifdef CONFIG_BLK_DEV_INITRD
static u64 __init get_ramdisk_image(void)
@@ -261,7 +259,7 @@ static void __init relocate_initrd(void)
u64 area_size = PAGE_ALIGN(ramdisk_size);
/* We need to move the initrd down into directly mapped mem */
- relocated_ramdisk = memblock_phys_alloc_range(area_size, PAGE_SIZE, 0,
+ u64 relocated_ramdisk = memblock_phys_alloc_range(area_size, PAGE_SIZE, 0,
PFN_PHYS(max_pfn_mapped));
if (!relocated_ramdisk)
panic("Cannot find place for new RAMDISK of size %lld\n",
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 2c97bf7b56ae..b30d6e180df7 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -106,8 +106,8 @@ void __init pcpu_populate_pte(unsigned long addr)
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
- struct desc_struct d = GDT_ENTRY_INIT(0x8092, per_cpu_offset(cpu),
- 0xFFFFF);
+ struct desc_struct d = GDT_ENTRY_INIT(DESC_DATA32,
+ per_cpu_offset(cpu), 0xFFFFF);
write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PERCPU, &d, DESCTYPE_S);
#endif
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index ccb0915e84e1..1d24ec679915 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -96,7 +96,7 @@ static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
/* Tell the hypervisor what went wrong. */
val |= GHCB_SEV_TERM_REASON(set, reason);
- /* Request Guest Termination from Hypvervisor */
+ /* Request Guest Termination from Hypervisor */
sev_es_wr_ghcb_msr(val);
VMGEXIT();
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 70472eebe719..c67285824e82 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1234,10 +1234,6 @@ void setup_ghcb(void)
if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
return;
- /* First make sure the hypervisor talks a supported protocol. */
- if (!sev_es_negotiate_protocol())
- sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
-
/*
* Check whether the runtime #VC exception handler is active. It uses
* the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
@@ -1255,6 +1251,13 @@ void setup_ghcb(void)
}
/*
+ * Make sure the hypervisor talks a supported protocol.
+ * This gets called only in the BSP boot phase.
+ */
+ if (!sev_es_negotiate_protocol())
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+
+ /*
* Clear the boot_ghcb. The first exception comes in before the bss
* section is cleared.
*/
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2cc2aa120b4b..3f57ce68a3f1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -757,6 +757,7 @@ const struct cpumask *cpu_clustergroup_mask(int cpu)
{
return cpu_l2c_shared_mask(cpu);
}
+EXPORT_SYMBOL_GPL(cpu_clustergroup_mask);
static void impress_friends(void)
{
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 54a5596adaa6..a349dbfc6d5a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -267,19 +267,6 @@ SECTIONS
}
#endif
- /*
- * start address and size of operations which during runtime
- * can be patched with virtualization friendly instructions or
- * baremetal native ones. Think page table operations.
- * Details in paravirt_types.h
- */
- . = ALIGN(8);
- .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
- __parainstructions = .;
- *(.parainstructions)
- __parainstructions_end = .;
- }
-
#ifdef CONFIG_RETPOLINE
/*
* List of instructions that call/jmp/jcc to retpoline thunks
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index dda6fc4cfae8..42d3f47f4c07 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -105,7 +105,7 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
/*
* If the index isn't significant, use the first entry with a
- * matching function. It's userspace's responsibilty to not
+ * matching function. It's userspace's responsibility to not
* provide "duplicate" entries in all cases.
*/
if (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index)
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index ee8c4c3496ed..eea6ea7f14af 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -182,6 +182,7 @@ static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
}
static const struct file_operations mmu_rmaps_stat_fops = {
+ .owner = THIS_MODULE,
.open = kvm_mmu_rmaps_stat_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 238afd7335e4..4943f6b2bbee 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2388,7 +2388,7 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h
if (!eventfd)
return HV_STATUS_INVALID_PORT_ID;
- eventfd_signal(eventfd, 1);
+ eventfd_signal(eventfd);
return HV_STATUS_SUCCESS;
}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c57e181bba21..0b1f991b9a31 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -987,7 +987,7 @@ static void pte_list_desc_remove_entry(struct kvm *kvm,
/*
* The head descriptor is empty. If there are no tail descriptors,
- * nullify the rmap head to mark the list as emtpy, else point the rmap
+ * nullify the rmap head to mark the list as empty, else point the rmap
* head at the next descriptor, i.e. the new head.
*/
if (!head_desc->more)
@@ -6544,7 +6544,7 @@ void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level, false);
/*
- * A TLB flush is unnecessary at this point for the same resons as in
+ * A TLB flush is unnecessary at this point for the same reasons as in
* kvm_mmu_slot_try_split_huge_pages().
*/
}
diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
index bd30ebfb2f2c..04c247bfe318 100644
--- a/arch/x86/kvm/mmu/tdp_iter.c
+++ b/arch/x86/kvm/mmu/tdp_iter.c
@@ -146,7 +146,7 @@ static bool try_step_up(struct tdp_iter *iter)
* Step to the next SPTE in a pre-order traversal of the paging structure.
* To get to the next SPTE, the iterator either steps down towards the goal
* GFN, if at a present, non-last-level SPTE, or over to a SPTE mapping a
- * highter GFN.
+ * higher GFN.
*
* The basic algorithm is as follows:
* 1. If the current SPTE is a non-last-level SPTE, step down into the page
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 4900c078045a..6ee925d66648 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2972,6 +2972,25 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm)
set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux);
}
+
+ /*
+ * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if
+ * the host/guest supports its use.
+ *
+ * guest_can_use() checks a number of requirements on the host/guest to
+ * ensure that MSR_IA32_XSS is available, but it might report true even
+ * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host
+ * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better
+ * to further check that the guest CPUID actually supports
+ * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved
+ * guests will still get intercepted and caught in the normal
+ * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths.
+ */
+ if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1);
+ else
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0);
}
void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 712146312358..7fb51424fc74 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -103,6 +103,7 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
{ .index = MSR_IA32_LASTINTTOIP, .always = false },
+ { .index = MSR_IA32_XSS, .always = false },
{ .index = MSR_EFER, .always = false },
{ .index = MSR_IA32_CR_PAT, .always = false },
{ .index = MSR_AMD64_SEV_ES_GHCB, .always = true },
@@ -1855,15 +1856,17 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
bool old_paging = is_paging(vcpu);
#ifdef CONFIG_X86_64
- if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
+ if (vcpu->arch.efer & EFER_LME) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
vcpu->arch.efer |= EFER_LMA;
- svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
+ if (!vcpu->arch.guest_state_protected)
+ svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
}
if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
vcpu->arch.efer &= ~EFER_LMA;
- svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
+ if (!vcpu->arch.guest_state_protected)
+ svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
}
}
#endif
@@ -4741,7 +4744,7 @@ static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
* Emulation is possible for SEV guests if and only if a prefilled
* buffer containing the bytes of the intercepted instruction is
* available. SEV guest memory is encrypted with a guest specific key
- * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
+ * and cannot be decrypted by KVM, i.e. KVM would read ciphertext and
* decode garbage.
*
* If KVM is NOT trying to simply skip an instruction, inject #UD if
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index be67ab7fdd10..c409f934c377 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -30,7 +30,7 @@
#define IOPM_SIZE PAGE_SIZE * 3
#define MSRPM_SIZE PAGE_SIZE * 2
-#define MAX_DIRECT_ACCESS_MSRS 46
+#define MAX_DIRECT_ACCESS_MSRS 47
#define MSRPM_OFFSETS 32
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c5ec0ef51ff7..65826fe23f33 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -6561,7 +6561,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
* code was changed such that flag signals vmcs12 should
* be copied into eVMCS in guest memory.
*
- * To preserve backwards compatability, allow user
+ * To preserve backwards compatibility, allow user
* to set this flag even when there is no VMXON region.
*/
if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index be20a60047b1..e0f86f11c345 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1809,7 +1809,7 @@ static void vmx_inject_exception(struct kvm_vcpu *vcpu)
* do generate error codes with bits 31:16 set, and so KVM's
* ABI lets userspace shove in arbitrary 32-bit values. Drop
* the upper bits to avoid VM-Fail, losing information that
- * does't really exist is preferable to killing the VM.
+ * doesn't really exist is preferable to killing the VM.
*/
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)ex->error_code);
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2c924075f6f1..cec0fc2a4b1c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5518,8 +5518,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
- return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
- sizeof(guest_xsave->region));
+ kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
+ sizeof(guest_xsave->region));
}
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
@@ -10165,7 +10165,7 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
*
* But, if a VM-Exit occurs during instruction execution, and KVM does NOT skip
* the instruction or inject an exception, then KVM can incorrecty inject a new
- * asynchrounous event if the event became pending after the CPU fetched the
+ * asynchronous event if the event became pending after the CPU fetched the
* instruction (in the guest). E.g. if a page fault (#PF, #NPF, EPT violation)
* occurs and is resolved by KVM, a coincident NMI, SMI, IRQ, etc... can be
* injected on the restarted instruction instead of being deferred until the
@@ -10186,7 +10186,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
int r;
/*
- * Process nested events first, as nested VM-Exit supercedes event
+ * Process nested events first, as nested VM-Exit supersedes event
* re-injection. If there's an event queued for re-injection, it will
* be saved into the appropriate vmc{b,s}12 fields on nested VM-Exit.
*/
@@ -10884,7 +10884,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
/*
* Assert that vCPU vs. VM APICv state is consistent. An APICv
* update must kick and wait for all vCPUs before toggling the
- * per-VM state, and responsing vCPUs must wait for the update
+ * per-VM state, and responding vCPUs must wait for the update
* to complete before servicing KVM_REQ_APICV_UPDATE.
*/
WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) &&
@@ -13031,7 +13031,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
if (vcpu->arch.guest_state_protected)
return true;
- return vcpu->arch.preempted_in_kernel;
+ if (vcpu != kvm_get_running_vcpu())
+ return vcpu->arch.preempted_in_kernel;
+
+ return static_call(kvm_x86_get_cpl)(vcpu) == 0;
}
unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index e53fad915a62..523bb6df5ac9 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -2088,7 +2088,7 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
if (ret < 0 && ret != -ENOTCONN)
return false;
} else {
- eventfd_signal(evtchnfd->deliver.eventfd.ctx, 1);
+ eventfd_signal(evtchnfd->deliver.eventfd.ctx);
}
*r = 0;
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c
index cea25ca8b8cf..c9dae65ac01b 100644
--- a/arch/x86/lib/csum-partial_64.c
+++ b/arch/x86/lib/csum-partial_64.c
@@ -11,26 +11,23 @@
#include <asm/checksum.h>
#include <asm/word-at-a-time.h>
-static inline unsigned short from32to16(unsigned a)
+static inline __wsum csum_finalize_sum(u64 temp64)
{
- unsigned short b = a >> 16;
- asm("addw %w2,%w0\n\t"
- "adcw $0,%w0\n"
- : "=r" (b)
- : "0" (b), "r" (a));
- return b;
+ return (__force __wsum)((temp64 + ror64(temp64, 32)) >> 32);
}
-static inline __wsum csum_tail(u64 temp64, int odd)
+static inline unsigned long update_csum_40b(unsigned long sum, const unsigned long m[5])
{
- unsigned int result;
-
- result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff);
- if (unlikely(odd)) {
- result = from32to16(result);
- result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
- }
- return (__force __wsum)result;
+ asm("addq %1,%0\n\t"
+ "adcq %2,%0\n\t"
+ "adcq %3,%0\n\t"
+ "adcq %4,%0\n\t"
+ "adcq %5,%0\n\t"
+ "adcq $0,%0"
+ :"+r" (sum)
+ :"m" (m[0]), "m" (m[1]), "m" (m[2]),
+ "m" (m[3]), "m" (m[4]));
+ return sum;
}
/*
@@ -47,64 +44,32 @@ static inline __wsum csum_tail(u64 temp64, int odd)
__wsum csum_partial(const void *buff, int len, __wsum sum)
{
u64 temp64 = (__force u64)sum;
- unsigned odd;
- odd = 1 & (unsigned long) buff;
- if (unlikely(odd)) {
- if (unlikely(len == 0))
- return sum;
- temp64 = ror32((__force u32)sum, 8);
- temp64 += (*(unsigned char *)buff << 8);
- len--;
- buff++;
+ /* Do two 40-byte chunks in parallel to get better ILP */
+ if (likely(len >= 80)) {
+ u64 temp64_2 = 0;
+ do {
+ temp64 = update_csum_40b(temp64, buff);
+ temp64_2 = update_csum_40b(temp64_2, buff + 40);
+ buff += 80;
+ len -= 80;
+ } while (len >= 80);
+
+ asm("addq %1,%0\n\t"
+ "adcq $0,%0"
+ :"+r" (temp64): "r" (temp64_2));
}
/*
- * len == 40 is the hot case due to IPv6 headers, but annotating it likely()
- * has noticeable negative affect on codegen for all other cases with
- * minimal performance benefit here.
+ * len == 40 is the hot case due to IPv6 headers, so return
+ * early for that exact case without checking the tail bytes.
*/
- if (len == 40) {
- asm("addq 0*8(%[src]),%[res]\n\t"
- "adcq 1*8(%[src]),%[res]\n\t"
- "adcq 2*8(%[src]),%[res]\n\t"
- "adcq 3*8(%[src]),%[res]\n\t"
- "adcq 4*8(%[src]),%[res]\n\t"
- "adcq $0,%[res]"
- : [res] "+r"(temp64)
- : [src] "r"(buff), "m"(*(const char(*)[40])buff));
- return csum_tail(temp64, odd);
- }
- if (unlikely(len >= 64)) {
- /*
- * Extra accumulators for better ILP in the loop.
- */
- u64 tmp_accum, tmp_carries;
-
- asm("xorl %k[tmp_accum],%k[tmp_accum]\n\t"
- "xorl %k[tmp_carries],%k[tmp_carries]\n\t"
- "subl $64, %[len]\n\t"
- "1:\n\t"
- "addq 0*8(%[src]),%[res]\n\t"
- "adcq 1*8(%[src]),%[res]\n\t"
- "adcq 2*8(%[src]),%[res]\n\t"
- "adcq 3*8(%[src]),%[res]\n\t"
- "adcl $0,%k[tmp_carries]\n\t"
- "addq 4*8(%[src]),%[tmp_accum]\n\t"
- "adcq 5*8(%[src]),%[tmp_accum]\n\t"
- "adcq 6*8(%[src]),%[tmp_accum]\n\t"
- "adcq 7*8(%[src]),%[tmp_accum]\n\t"
- "adcl $0,%k[tmp_carries]\n\t"
- "addq $64, %[src]\n\t"
- "subl $64, %[len]\n\t"
- "jge 1b\n\t"
- "addq %[tmp_accum],%[res]\n\t"
- "adcq %[tmp_carries],%[res]\n\t"
- "adcq $0,%[res]"
- : [tmp_accum] "=&r"(tmp_accum),
- [tmp_carries] "=&r"(tmp_carries), [res] "+r"(temp64),
- [len] "+r"(len), [src] "+r"(buff)
- : "m"(*(const char *)buff));
+ if (len >= 40) {
+ temp64 = update_csum_40b(temp64, buff);
+ len -= 40;
+ if (!len)
+ return csum_finalize_sum(temp64);
+ buff += 40;
}
if (len & 32) {
@@ -143,7 +108,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum)
: [res] "+r"(temp64)
: [trail] "r"(trail));
}
- return csum_tail(temp64, odd);
+ return csum_finalize_sum(temp64);
}
EXPORT_SYMBOL(csum_partial);
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 0e65d00e2339..23f81ca3f06b 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -128,7 +128,7 @@ static void delay_halt_mwaitx(u64 unused, u64 cycles)
delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
/*
- * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu
+ * Use cpu_tss_rw as a cacheline-aligned, seldom accessed per-cpu
* variable as the monitor target.
*/
__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
diff --git a/arch/x86/lib/misc.c b/arch/x86/lib/misc.c
index 92cd8ecc3a2c..40b81c338ae5 100644
--- a/arch/x86/lib/misc.c
+++ b/arch/x86/lib/misc.c
@@ -8,7 +8,7 @@
*/
int num_digits(int val)
{
- int m = 10;
+ long long m = 10;
int d = 1;
if (val < 0) {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ab778eac1952..679b09cfe241 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1370,6 +1370,8 @@ void do_user_addr_fault(struct pt_regs *regs,
goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ flags |= FAULT_FLAG_TRIED;
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a190aae8ceaf..a0dffaca6d2b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1013,7 +1013,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
return;
}
- /* free a pte talbe */
+ /* free a pte table */
free_pagetable(pmd_page(*pmd), 0);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
@@ -1031,7 +1031,7 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
return;
}
- /* free a pmd talbe */
+ /* free a pmd table */
free_pagetable(pud_page(*pud), 0);
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
@@ -1049,7 +1049,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
return;
}
- /* free a pud talbe */
+ /* free a pud table */
free_pagetable(p4d_page(*p4d), 0);
spin_lock(&init_mm.page_table_lock);
p4d_clear(p4d);
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index a68f2dda0948..70b91de2e053 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -32,6 +32,7 @@
#include <asm/msr.h>
#include <asm/cmdline.h>
#include <asm/sev.h>
+#include <asm/ia32.h>
#include "mm_internal.h"
@@ -481,6 +482,16 @@ void __init sme_early_init(void)
*/
if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
x86_cpuinit.parallel_bringup = false;
+
+ /*
+ * The VMM is capable of injecting interrupt 0x80 and triggering the
+ * compatibility syscall path.
+ *
+ * By default, the 32-bit emulation is disabled in order to ensure
+ * the safety of the VM.
+ */
+ if (sev_status & MSR_AMD64_SEV_ENABLED)
+ ia32_disable();
}
void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index de10800cd4dd..0904d7e8e126 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -14,7 +14,7 @@
* memory ranges: uncached, write-combining, write-through, write-protected,
* and the most commonly used and default attribute: write-back caching.
*
- * PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is
+ * PAT support supersedes and augments MTRR support in a compatible fashion: MTRR is
* a hardware interface to enumerate a limited number of physical memory ranges
* and set their caching attributes explicitly, programmed into the CPU via MSRs.
* Even modern CPUs have MTRRs enabled - but these are typically not touched
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index bda9f129835e..e9b448d1b1b7 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1621,7 +1621,7 @@ repeat:
/*
* We need to keep the pfn from the existing PTE,
- * after all we're only going to change it's attributes
+ * after all we're only going to change its attributes
* not the memory it points to
*/
new_pte = pfn_pte(pfn, new_prot);
@@ -2447,7 +2447,7 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
/*
* The typical sequence for unmapping is to find a pte through
* lookup_address_in_pgd() (ideally, it should never return NULL because
- * the address is already mapped) and change it's protections. As pfn is
+ * the address is already mapped) and change its protections. As pfn is
* the *target* of a mapping, it's not useful while unmapping.
*/
struct cpa_data cpa = {
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 5dd733944629..669ba1c345b3 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -6,7 +6,7 @@
*
* https://github.com/IAIK/KAISER
*
- * The original work was written by and and signed off by for the Linux
+ * The original work was written by and signed off by for the Linux
* kernel by:
*
* Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 453ea95b667d..5768d386efab 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -355,7 +355,7 @@ static void l1d_flush_evaluate(unsigned long prev_mm, unsigned long next_mm,
/*
* Validate that it is not running on an SMT sibling as this would
- * make the excercise pointless because the siblings share L1D. If
+ * make the exercise pointless because the siblings share L1D. If
* it runs on a SMT sibling, notify it with SIGBUS on return to
* user/guest
*/
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8c10d9abc239..144a5839acfe 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2143,7 +2143,7 @@ static void save_args(const struct btf_func_model *m, u8 **prog,
} else {
/* Only copy the arguments on-stack to current
* 'stack_size' and ignore the regs, used to
- * prepare the arguments on-stack for orign call.
+ * prepare the arguments on-stack for origin call.
*/
if (for_call_origin) {
nr_regs += arg_regs;
@@ -3025,3 +3025,49 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp
#endif
WARN(1, "verification of programs using bpf_throw should have failed\n");
}
+
+void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+ struct bpf_prog *new, struct bpf_prog *old)
+{
+ u8 *old_addr, *new_addr, *old_bypass_addr;
+ int ret;
+
+ old_bypass_addr = old ? NULL : poke->bypass_addr;
+ old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
+ new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
+
+ /*
+ * On program loading or teardown, the program's kallsym entry
+ * might not be in place, so we use __bpf_arch_text_poke to skip
+ * the kallsyms check.
+ */
+ if (new) {
+ ret = __bpf_arch_text_poke(poke->tailcall_target,
+ BPF_MOD_JUMP,
+ old_addr, new_addr);
+ BUG_ON(ret < 0);
+ if (!old) {
+ ret = __bpf_arch_text_poke(poke->tailcall_bypass,
+ BPF_MOD_JUMP,
+ poke->bypass_addr,
+ NULL);
+ BUG_ON(ret < 0);
+ }
+ } else {
+ ret = __bpf_arch_text_poke(poke->tailcall_bypass,
+ BPF_MOD_JUMP,
+ old_bypass_addr,
+ poke->bypass_addr);
+ BUG_ON(ret < 0);
+ /* let other CPUs finish the execution of program
+ * so that it will not possible to expose them
+ * to invalid nop, stack unwind, nop state
+ */
+ if (!ret)
+ synchronize_rcu();
+ ret = __bpf_arch_text_poke(poke->tailcall_target,
+ BPF_MOD_JUMP,
+ old_addr, NULL);
+ BUG_ON(ret < 0);
+ }
+}
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 429a89c5468b..b18ce19981ec 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1194,7 +1194,7 @@ struct jit_context {
#define PROLOGUE_SIZE 35
/*
- * Emit prologue code for BPF program and check it's size.
+ * Emit prologue code for BPF program and check its size.
* bpf_tail_call helper will skip it while jumping into another program.
*/
static void emit_prologue(u8 **pprog, u32 stack_depth)
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index 761f3689f60a..84ba715f44d1 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -6,7 +6,7 @@
* Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
*
* IMR self test. The purpose of this module is to run a set of tests on the
- * IMR API to validate it's sanity. We check for overlapping, reserved
+ * IMR API to validate its sanity. We check for overlapping, reserved
* addresses and setup/teardown sanity.
*
*/
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index c4365a05ab83..f7235ef87bc3 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -11,6 +11,7 @@
#include <linux/elfnote.h>
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/desc_defs.h>
#include <asm/segment.h>
#include <asm/asm.h>
#include <asm/boot.h>
@@ -41,7 +42,7 @@
* Bit 8 (TF) must be cleared. Other bits are all unspecified.
*
* All other processor registers and flag bits are unspecified. The OS is in
- * charge of setting up it's own stack, GDT and IDT.
+ * charge of setting up its own stack, GDT and IDT.
*/
#define PVH_GDT_ENTRY_CS 1
@@ -148,11 +149,11 @@ SYM_DATA_END(gdt)
SYM_DATA_START_LOCAL(gdt_start)
.quad 0x0000000000000000 /* NULL descriptor */
#ifdef CONFIG_X86_64
- .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* PVH_CS_SEL */
+ .quad GDT_ENTRY(DESC_CODE64, 0, 0xfffff) /* PVH_CS_SEL */
#else
- .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* PVH_CS_SEL */
+ .quad GDT_ENTRY(DESC_CODE32, 0, 0xfffff) /* PVH_CS_SEL */
#endif
- .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* PVH_DS_SEL */
+ .quad GDT_ENTRY(DESC_DATA32, 0, 0xfffff) /* PVH_DS_SEL */
SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end)
.balign 16
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 4221259a5870..a379501b7a69 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -35,7 +35,7 @@ static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info)
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->vector = cfg->vector;
- entry->delivery_mode = apic->delivery_mode;
+ entry->delivery_mode = APIC_DELIVERY_MODE_FIXED;
entry->dest_mode = apic->dest_mode_logical;
entry->polarity = 0;
entry->trigger = 0;
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index e03207de2880..5c50e550ab63 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -741,7 +741,7 @@ static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs)
this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE);
}
-/* Trigger a slave CPU to dump it's state */
+/* Trigger a slave CPU to dump its state */
static void uv_nmi_trigger_dump(int cpu)
{
int retry = uv_nmi_trigger_delay;
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index ff5afc8a5a41..3712afc3534d 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -270,7 +270,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
* Read the RTC.
*
* Starting with HUB rev 2.0, the UV RTC register is replicated across all
- * cachelines of it's own page. This allows faster simultaneous reads
+ * cachelines of its own page. This allows faster simultaneous reads
* from a given socket.
*/
static u64 uv_read_rtc(struct clocksource *cs)
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 788e5559549f..f9bc444a3064 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -61,7 +61,7 @@ void __init reserve_real_mode(void)
set_real_mode_mem(mem);
/*
- * Unconditionally reserve the entire fisrt 1M, see comment in
+ * Unconditionally reserve the entire first 1M, see comment in
* setup_arch().
*/
memblock_reserve(0, SZ_1M);
diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S
index f10515b10e0a..e714b4624e36 100644
--- a/arch/x86/realmode/rm/reboot.S
+++ b/arch/x86/realmode/rm/reboot.S
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
+#include <asm/desc_defs.h>
#include <asm/segment.h>
#include <asm/page_types.h>
#include <asm/processor-flags.h>
@@ -153,5 +154,5 @@ SYM_DATA_START(machine_real_restart_gdt)
* base value 0x100; since this is consistent with real mode
* semantics we don't have to reload the segments once CR0.PE = 0.
*/
- .quad GDT_ENTRY(0x0093, 0x100, 0xffff)
+ .quad GDT_ENTRY(DESC_DATA16, 0x100, 0xffff)
SYM_DATA_END(machine_real_restart_gdt)
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index 90e820ac9771..7278e2545c35 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -17,7 +17,7 @@ reformatter = $(srctree)/arch/x86/tools/objdump_reformat.awk
chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk
quiet_cmd_posttest = TEST $@
- cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(reformatter) | $(obj)/insn_decoder_test $(posttest_64bit) $(posttest_verbose)
+ cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(reformatter) | $(obj)/insn_decoder_test $(posttest_64bit) $(posttest_verbose)
quiet_cmd_sanitytest = TEST $@
cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk
deleted file mode 100644
index a4cf678cf5c8..000000000000
--- a/arch/x86/tools/chkobjdump.awk
+++ /dev/null
@@ -1,34 +0,0 @@
-# GNU objdump version checker
-#
-# Usage:
-# objdump -v | awk -f chkobjdump.awk
-BEGIN {
- # objdump version 2.19 or later is OK for the test.
- od_ver = 2;
- od_sver = 19;
-}
-
-/^GNU objdump/ {
- verstr = ""
- gsub(/\(.*\)/, "");
- for (i = 3; i <= NF; i++)
- if (match($(i), "^[0-9]")) {
- verstr = $(i);
- break;
- }
- if (verstr == "") {
- printf("Warning: Failed to find objdump version number.\n");
- exit 0;
- }
- split(verstr, ver, ".");
- if (ver[1] > od_ver ||
- (ver[1] == od_ver && ver[2] >= od_sver)) {
- exit 1;
- } else {
- printf("Warning: objdump version %s is older than %d.%d\n",
- verstr, od_ver, od_sver);
- print("Warning: Skipping posttest.");
- # Logic is inverted, because we just skip test without error.
- exit 0;
- }
-}
diff --git a/arch/x86/tools/objdump_reformat.awk b/arch/x86/tools/objdump_reformat.awk
index f418c91b71f0..20b08a6c4d33 100644
--- a/arch/x86/tools/objdump_reformat.awk
+++ b/arch/x86/tools/objdump_reformat.awk
@@ -11,8 +11,8 @@ BEGIN {
prev_addr = ""
prev_hex = ""
prev_mnemonic = ""
- bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))"
- fwait_expr = "^9b "
+ bad_expr = "(\\(bad\\)|<unknown>|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))"
+ fwait_expr = "^9b[ \t]*fwait"
fwait_str="9b\tfwait"
}
@@ -22,7 +22,7 @@ BEGIN {
}
/^ *[0-9a-f]+:/ {
- if (split($0, field, "\t") < 3) {
+ if (split($0, field, /: |\t/) < 3) {
# This is a continuation of the same insn.
prev_hex = prev_hex field[2]
} else {
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index d30949e25ebd..a3bae2b24626 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -66,7 +66,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
[S_REL] =
"^(__init_(begin|end)|"
"__x86_cpu_dev_(start|end)|"
- "(__parainstructions|__alt_instructions)(_end)?|"
+ "__alt_instructions(_end)?|"
"(__iommu_table|__apicdrivers|__smp_locks)(_end)?|"
"__(start|end)_pci_.*|"
#if CONFIG_FW_LOADER
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 9b1ec5d8c99c..a65fc2ae15b4 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -9,6 +9,7 @@ config XEN
select PARAVIRT_CLOCK
select X86_HV_CALLBACK_VECTOR
depends on X86_64 || (X86_32 && X86_PAE)
+ depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MCORE2 || MATOM || MK8)
depends on X86_LOCAL_APIC && X86_TSC
help
This is the Linux Xen port. Enabling this will allow the
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index bbbfdd495ebd..aeb33e0a3f76 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -704,7 +704,7 @@ static struct trap_array_entry trap_array[] = {
TRAP_ENTRY(exc_int3, false ),
TRAP_ENTRY(exc_overflow, false ),
#ifdef CONFIG_IA32_EMULATION
- { entry_INT80_compat, xen_entry_INT80_compat, false },
+ TRAP_ENTRY(int80_emulation, false ),
#endif
TRAP_ENTRY(exc_page_fault, false ),
TRAP_ENTRY(exc_divide_error, false ),
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 6092fea7d651..39982f955cfe 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -45,7 +45,7 @@ static const typeof(pv_ops) xen_irq_ops __initconst = {
/* Initial interrupt flag handling only called while interrupts off. */
.save_fl = __PV_IS_CALLEE_SAVE(paravirt_ret0),
.irq_disable = __PV_IS_CALLEE_SAVE(paravirt_nop),
- .irq_enable = __PV_IS_CALLEE_SAVE(paravirt_BUG),
+ .irq_enable = __PV_IS_CALLEE_SAVE(BUG_func),
.safe_halt = xen_safe_halt,
.halt = xen_halt,
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index b6830554ff69..72af496a160c 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -34,7 +34,7 @@
* would need to validate the whole pagetable before going on.
* Naturally, this is quite slow. The solution is to "pin" a
* pagetable, which enforces all the constraints on the pagetable even
- * when it is not actively in use. This menas that Xen can be assured
+ * when it is not actively in use. This means that Xen can be assured
* that it is still valid when you do load it into %cr3, and doesn't
* need to revalidate it.
*
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 9e5e68008785..1a9cd18dfbd3 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -156,7 +156,7 @@ xen_pv_trap asm_xenpv_exc_machine_check
#endif /* CONFIG_X86_MCE */
xen_pv_trap asm_exc_simd_coprocessor_error
#ifdef CONFIG_IA32_EMULATION
-xen_pv_trap entry_INT80_compat
+xen_pv_trap asm_int80_emulation
#endif
xen_pv_trap asm_exc_xen_unknown_trap
xen_pv_trap asm_exc_xen_hypervisor_callback
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 06eefa9c1458..497b5d32f457 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -427,3 +427,5 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount